1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker#define PREP_BIAS 8192 33*c0909341SAndroid Build Coastguard Worker 34*c0909341SAndroid Build Coastguard Worker.macro avg d0, d1, t0, t1, t2, t3 35*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 36*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 37*c0909341SAndroid Build Coastguard Worker sqadd \t0\().8h, \t0\().8h, \t2\().8h 38*c0909341SAndroid Build Coastguard Worker sqadd \t1\().8h, \t1\().8h, \t3\().8h 39*c0909341SAndroid Build Coastguard Worker smax \t0\().8h, \t0\().8h, v28.8h // -2*PREP_BIAS - 1 << intermediate_bits 40*c0909341SAndroid Build Coastguard Worker smax \t1\().8h, \t1\().8h, v28.8h // -2*PREP_BIAS - 1 << intermediate_bits 41*c0909341SAndroid Build Coastguard Worker sqsub \t0\().8h, \t0\().8h, v28.8h // -2*PREP_BIAS - 1 << intermediate_bits 42*c0909341SAndroid Build Coastguard Worker sqsub \t1\().8h, \t1\().8h, v28.8h // -2*PREP_BIAS - 1 << intermediate_bits 43*c0909341SAndroid Build Coastguard Worker sshl \d0\().8h, \t0\().8h, v29.8h // -(intermediate_bits+1) 44*c0909341SAndroid Build Coastguard Worker sshl \d1\().8h, \t1\().8h, v29.8h // -(intermediate_bits+1) 45*c0909341SAndroid Build Coastguard Worker.endm 46*c0909341SAndroid Build Coastguard Worker 47*c0909341SAndroid Build Coastguard Worker.macro w_avg d0, d1, t0, t1, t2, t3 48*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 49*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 50*c0909341SAndroid Build Coastguard Worker // This difference requires a 17 bit range, and all bits are 51*c0909341SAndroid Build Coastguard Worker // significant for the following multiplication. 52*c0909341SAndroid Build Coastguard Worker ssubl \d0\().4s, \t2\().4h, \t0\().4h 53*c0909341SAndroid Build Coastguard Worker ssubl2 \t0\().4s, \t2\().8h, \t0\().8h 54*c0909341SAndroid Build Coastguard Worker ssubl \d1\().4s, \t3\().4h, \t1\().4h 55*c0909341SAndroid Build Coastguard Worker ssubl2 \t1\().4s, \t3\().8h, \t1\().8h 56*c0909341SAndroid Build Coastguard Worker mul \d0\().4s, \d0\().4s, v27.4s 57*c0909341SAndroid Build Coastguard Worker mul \t0\().4s, \t0\().4s, v27.4s 58*c0909341SAndroid Build Coastguard Worker mul \d1\().4s, \d1\().4s, v27.4s 59*c0909341SAndroid Build Coastguard Worker mul \t1\().4s, \t1\().4s, v27.4s 60*c0909341SAndroid Build Coastguard Worker sshr \d0\().4s, \d0\().4s, #4 61*c0909341SAndroid Build Coastguard Worker sshr \t0\().4s, \t0\().4s, #4 62*c0909341SAndroid Build Coastguard Worker sshr \d1\().4s, \d1\().4s, #4 63*c0909341SAndroid Build Coastguard Worker sshr \t1\().4s, \t1\().4s, #4 64*c0909341SAndroid Build Coastguard Worker saddw \d0\().4s, \d0\().4s, \t2\().4h 65*c0909341SAndroid Build Coastguard Worker saddw2 \t0\().4s, \t0\().4s, \t2\().8h 66*c0909341SAndroid Build Coastguard Worker saddw \d1\().4s, \d1\().4s, \t3\().4h 67*c0909341SAndroid Build Coastguard Worker saddw2 \t1\().4s, \t1\().4s, \t3\().8h 68*c0909341SAndroid Build Coastguard Worker uzp1 \d0\().8h, \d0\().8h, \t0\().8h // Same as xtn, xtn2 69*c0909341SAndroid Build Coastguard Worker uzp1 \d1\().8h, \d1\().8h, \t1\().8h // Ditto 70*c0909341SAndroid Build Coastguard Worker srshl \d0\().8h, \d0\().8h, v29.8h // -intermediate_bits 71*c0909341SAndroid Build Coastguard Worker srshl \d1\().8h, \d1\().8h, v29.8h // -intermediate_bits 72*c0909341SAndroid Build Coastguard Worker add \d0\().8h, \d0\().8h, v28.8h // PREP_BIAS >> intermediate_bits 73*c0909341SAndroid Build Coastguard Worker add \d1\().8h, \d1\().8h, v28.8h // PREP_BIAS >> intermediate_bits 74*c0909341SAndroid Build Coastguard Worker smin \d0\().8h, \d0\().8h, v31.8h // bitdepth_max 75*c0909341SAndroid Build Coastguard Worker smin \d1\().8h, \d1\().8h, v31.8h // bitdepth_max 76*c0909341SAndroid Build Coastguard Worker smax \d0\().8h, \d0\().8h, v30.8h // 0 77*c0909341SAndroid Build Coastguard Worker smax \d1\().8h, \d1\().8h, v30.8h // 0 78*c0909341SAndroid Build Coastguard Worker.endm 79*c0909341SAndroid Build Coastguard Worker 80*c0909341SAndroid Build Coastguard Worker.macro mask d0, d1, t0, t1, t2, t3 81*c0909341SAndroid Build Coastguard Worker ld1 {v27.16b}, [x6], 16 82*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 83*c0909341SAndroid Build Coastguard Worker neg v27.16b, v27.16b 84*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 85*c0909341SAndroid Build Coastguard Worker sxtl v26.8h, v27.8b 86*c0909341SAndroid Build Coastguard Worker sxtl2 v27.8h, v27.16b 87*c0909341SAndroid Build Coastguard Worker sxtl v24.4s, v26.4h 88*c0909341SAndroid Build Coastguard Worker sxtl2 v25.4s, v26.8h 89*c0909341SAndroid Build Coastguard Worker sxtl v26.4s, v27.4h 90*c0909341SAndroid Build Coastguard Worker sxtl2 v27.4s, v27.8h 91*c0909341SAndroid Build Coastguard Worker ssubl \d0\().4s, \t2\().4h, \t0\().4h 92*c0909341SAndroid Build Coastguard Worker ssubl2 \t0\().4s, \t2\().8h, \t0\().8h 93*c0909341SAndroid Build Coastguard Worker ssubl \d1\().4s, \t3\().4h, \t1\().4h 94*c0909341SAndroid Build Coastguard Worker ssubl2 \t1\().4s, \t3\().8h, \t1\().8h 95*c0909341SAndroid Build Coastguard Worker mul \d0\().4s, \d0\().4s, v24.4s 96*c0909341SAndroid Build Coastguard Worker mul \t0\().4s, \t0\().4s, v25.4s 97*c0909341SAndroid Build Coastguard Worker mul \d1\().4s, \d1\().4s, v26.4s 98*c0909341SAndroid Build Coastguard Worker mul \t1\().4s, \t1\().4s, v27.4s 99*c0909341SAndroid Build Coastguard Worker sshr \d0\().4s, \d0\().4s, #6 100*c0909341SAndroid Build Coastguard Worker sshr \t0\().4s, \t0\().4s, #6 101*c0909341SAndroid Build Coastguard Worker sshr \d1\().4s, \d1\().4s, #6 102*c0909341SAndroid Build Coastguard Worker sshr \t1\().4s, \t1\().4s, #6 103*c0909341SAndroid Build Coastguard Worker saddw \d0\().4s, \d0\().4s, \t2\().4h 104*c0909341SAndroid Build Coastguard Worker saddw2 \t0\().4s, \t0\().4s, \t2\().8h 105*c0909341SAndroid Build Coastguard Worker saddw \d1\().4s, \d1\().4s, \t3\().4h 106*c0909341SAndroid Build Coastguard Worker saddw2 \t1\().4s, \t1\().4s, \t3\().8h 107*c0909341SAndroid Build Coastguard Worker uzp1 \d0\().8h, \d0\().8h, \t0\().8h // Same as xtn, xtn2 108*c0909341SAndroid Build Coastguard Worker uzp1 \d1\().8h, \d1\().8h, \t1\().8h // Ditto 109*c0909341SAndroid Build Coastguard Worker srshl \d0\().8h, \d0\().8h, v29.8h // -intermediate_bits 110*c0909341SAndroid Build Coastguard Worker srshl \d1\().8h, \d1\().8h, v29.8h // -intermediate_bits 111*c0909341SAndroid Build Coastguard Worker add \d0\().8h, \d0\().8h, v28.8h // PREP_BIAS >> intermediate_bits 112*c0909341SAndroid Build Coastguard Worker add \d1\().8h, \d1\().8h, v28.8h // PREP_BIAS >> intermediate_bits 113*c0909341SAndroid Build Coastguard Worker smin \d0\().8h, \d0\().8h, v31.8h // bitdepth_max 114*c0909341SAndroid Build Coastguard Worker smin \d1\().8h, \d1\().8h, v31.8h // bitdepth_max 115*c0909341SAndroid Build Coastguard Worker smax \d0\().8h, \d0\().8h, v30.8h // 0 116*c0909341SAndroid Build Coastguard Worker smax \d1\().8h, \d1\().8h, v30.8h // 0 117*c0909341SAndroid Build Coastguard Worker.endm 118*c0909341SAndroid Build Coastguard Worker 119*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type, bdmax 120*c0909341SAndroid Build Coastguard Workerfunction \type\()_16bpc_neon, export=1 121*c0909341SAndroid Build Coastguard Worker clz w4, w4 122*c0909341SAndroid Build Coastguard Worker.ifnc \type, avg 123*c0909341SAndroid Build Coastguard Worker dup v31.8h, \bdmax // bitdepth_max 124*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 125*c0909341SAndroid Build Coastguard Worker.endif 126*c0909341SAndroid Build Coastguard Worker clz w7, \bdmax 127*c0909341SAndroid Build Coastguard Worker sub w7, w7, #18 // intermediate_bits = clz(bitdepth_max) - 18 128*c0909341SAndroid Build Coastguard Worker.ifc \type, avg 129*c0909341SAndroid Build Coastguard Worker mov w9, #1 130*c0909341SAndroid Build Coastguard Worker mov w8, #-2*PREP_BIAS 131*c0909341SAndroid Build Coastguard Worker lsl w9, w9, w7 // 1 << intermediate_bits 132*c0909341SAndroid Build Coastguard Worker add w7, w7, #1 133*c0909341SAndroid Build Coastguard Worker sub w8, w8, w9 // -2*PREP_BIAS - 1 << intermediate_bits 134*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -(intermediate_bits+1) 135*c0909341SAndroid Build Coastguard Worker dup v28.8h, w8 // -2*PREP_BIAS - 1 << intermediate_bits 136*c0909341SAndroid Build Coastguard Worker dup v29.8h, w7 // -(intermediate_bits+1) 137*c0909341SAndroid Build Coastguard Worker.else 138*c0909341SAndroid Build Coastguard Worker mov w8, #PREP_BIAS 139*c0909341SAndroid Build Coastguard Worker lsr w8, w8, w7 // PREP_BIAS >> intermediate_bits 140*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -intermediate_bits 141*c0909341SAndroid Build Coastguard Worker dup v28.8h, w8 // PREP_BIAS >> intermediate_bits 142*c0909341SAndroid Build Coastguard Worker dup v29.8h, w7 // -intermediate_bits 143*c0909341SAndroid Build Coastguard Worker.endif 144*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 145*c0909341SAndroid Build Coastguard Worker dup v27.4s, w6 146*c0909341SAndroid Build Coastguard Worker neg v27.4s, v27.4s 147*c0909341SAndroid Build Coastguard Worker.endif 148*c0909341SAndroid Build Coastguard Worker movrel x7, \type\()_tbl 149*c0909341SAndroid Build Coastguard Worker sub w4, w4, #24 150*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 151*c0909341SAndroid Build Coastguard Worker ldrsw x4, [x7, x4, lsl #2] 152*c0909341SAndroid Build Coastguard Worker add x7, x7, x4 153*c0909341SAndroid Build Coastguard Worker br x7 154*c0909341SAndroid Build Coastguard Worker40: 155*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 156*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 157*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 158*c0909341SAndroid Build Coastguard Worker4: 159*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 160*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [x0], x1 161*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [x7], x1 162*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [x0], x1 163*c0909341SAndroid Build Coastguard Worker st1 {v5.d}[1], [x7], x1 164*c0909341SAndroid Build Coastguard Worker b.le 0f 165*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 166*c0909341SAndroid Build Coastguard Worker b 4b 167*c0909341SAndroid Build Coastguard Worker80: 168*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 169*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 170*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 171*c0909341SAndroid Build Coastguard Worker8: 172*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x0], x1 173*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 174*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [x7], x1 175*c0909341SAndroid Build Coastguard Worker b.le 0f 176*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 177*c0909341SAndroid Build Coastguard Worker b 8b 178*c0909341SAndroid Build Coastguard Worker160: 179*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 180*c0909341SAndroid Build Coastguard Worker16: 181*c0909341SAndroid Build Coastguard Worker \type v6, v7, v0, v1, v2, v3 182*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [x0], x1 183*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 184*c0909341SAndroid Build Coastguard Worker st1 {v6.8h, v7.8h}, [x0], x1 185*c0909341SAndroid Build Coastguard Worker b.le 0f 186*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 187*c0909341SAndroid Build Coastguard Worker b 16b 188*c0909341SAndroid Build Coastguard Worker320: 189*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 190*c0909341SAndroid Build Coastguard Worker32: 191*c0909341SAndroid Build Coastguard Worker \type v6, v7, v0, v1, v2, v3 192*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 193*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 194*c0909341SAndroid Build Coastguard Worker b.le 0f 195*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 196*c0909341SAndroid Build Coastguard Worker b 32b 197*c0909341SAndroid Build Coastguard Worker640: 198*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 199*c0909341SAndroid Build Coastguard Worker add x7, x0, #64 200*c0909341SAndroid Build Coastguard Worker64: 201*c0909341SAndroid Build Coastguard Worker \type v6, v7, v0, v1, v2, v3 202*c0909341SAndroid Build Coastguard Worker \type v16, v17, v0, v1, v2, v3 203*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 204*c0909341SAndroid Build Coastguard Worker \type v18, v19, v0, v1, v2, v3 205*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 206*c0909341SAndroid Build Coastguard Worker st1 {v16.8h,v17.8h,v18.8h,v19.8h}, [x7], x1 207*c0909341SAndroid Build Coastguard Worker b.le 0f 208*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 209*c0909341SAndroid Build Coastguard Worker b 64b 210*c0909341SAndroid Build Coastguard Worker1280: 211*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 212*c0909341SAndroid Build Coastguard Worker add x7, x0, #64 213*c0909341SAndroid Build Coastguard Worker mov x8, #128 214*c0909341SAndroid Build Coastguard Worker sub x1, x1, #128 215*c0909341SAndroid Build Coastguard Worker128: 216*c0909341SAndroid Build Coastguard Worker \type v6, v7, v0, v1, v2, v3 217*c0909341SAndroid Build Coastguard Worker \type v16, v17, v0, v1, v2, v3 218*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x8 219*c0909341SAndroid Build Coastguard Worker \type v18, v19, v0, v1, v2, v3 220*c0909341SAndroid Build Coastguard Worker st1 {v16.8h,v17.8h,v18.8h,v19.8h}, [x7], x8 221*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 222*c0909341SAndroid Build Coastguard Worker \type v6, v7, v0, v1, v2, v3 223*c0909341SAndroid Build Coastguard Worker \type v16, v17, v0, v1, v2, v3 224*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 225*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 226*c0909341SAndroid Build Coastguard Worker \type v18, v19, v0, v1, v2, v3 227*c0909341SAndroid Build Coastguard Worker st1 {v16.8h,v17.8h,v18.8h,v19.8h}, [x7], x1 228*c0909341SAndroid Build Coastguard Worker b.le 0f 229*c0909341SAndroid Build Coastguard Worker \type v4, v5, v0, v1, v2, v3 230*c0909341SAndroid Build Coastguard Worker b 128b 231*c0909341SAndroid Build Coastguard Worker0: 232*c0909341SAndroid Build Coastguard Worker ret 233*c0909341SAndroid Build Coastguard Workerendfunc 234*c0909341SAndroid Build Coastguard Worker 235*c0909341SAndroid Build Coastguard Workerjumptable \type\()_tbl 236*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_tbl 237*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_tbl 238*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_tbl 239*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_tbl 240*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_tbl 241*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_tbl 242*c0909341SAndroid Build Coastguard Workerendjumptable 243*c0909341SAndroid Build Coastguard Worker.endm 244*c0909341SAndroid Build Coastguard Worker 245*c0909341SAndroid Build Coastguard Workerbidir_fn avg, w6 246*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg, w7 247*c0909341SAndroid Build Coastguard Workerbidir_fn mask, w7 248*c0909341SAndroid Build Coastguard Worker 249*c0909341SAndroid Build Coastguard Worker 250*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type 251*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_16bpc_neon, export=1 252*c0909341SAndroid Build Coastguard Worker ldr w8, [sp] 253*c0909341SAndroid Build Coastguard Worker clz w9, w4 254*c0909341SAndroid Build Coastguard Worker movrel x10, w_mask_\type\()_tbl 255*c0909341SAndroid Build Coastguard Worker dup v31.8h, w8 // bitdepth_max 256*c0909341SAndroid Build Coastguard Worker sub w9, w9, #24 257*c0909341SAndroid Build Coastguard Worker clz w8, w8 // clz(bitdepth_max) 258*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 259*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 260*c0909341SAndroid Build Coastguard Worker sub w8, w8, #12 // sh = intermediate_bits + 6 = clz(bitdepth_max) - 12 261*c0909341SAndroid Build Coastguard Worker mov w9, #PREP_BIAS*64 262*c0909341SAndroid Build Coastguard Worker neg w8, w8 // -sh 263*c0909341SAndroid Build Coastguard Worker mov w11, #27615 // (64 + 1 - 38)<<mask_sh - 1 - mask_rnd 264*c0909341SAndroid Build Coastguard Worker dup v30.4s, w9 // PREP_BIAS*64 265*c0909341SAndroid Build Coastguard Worker dup v29.4s, w8 // -sh 266*c0909341SAndroid Build Coastguard Worker dup v0.8h, w11 267*c0909341SAndroid Build Coastguard Worker.if \type == 444 268*c0909341SAndroid Build Coastguard Worker movi v1.16b, #64 269*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 270*c0909341SAndroid Build Coastguard Worker dup v2.8b, w7 271*c0909341SAndroid Build Coastguard Worker movi v3.8b, #129 272*c0909341SAndroid Build Coastguard Worker sub v3.8b, v3.8b, v2.8b 273*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 274*c0909341SAndroid Build Coastguard Worker dup v2.8h, w7 275*c0909341SAndroid Build Coastguard Worker movi v3.8h, #1, lsl #8 276*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v2.8h 277*c0909341SAndroid Build Coastguard Worker.endif 278*c0909341SAndroid Build Coastguard Worker add x12, x0, x1 279*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 280*c0909341SAndroid Build Coastguard Worker br x10 281*c0909341SAndroid Build Coastguard Worker40: 282*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 283*c0909341SAndroid Build Coastguard Worker4: 284*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 // tmp1 (four rows at once) 285*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x3], #32 // tmp2 (four rows at once) 286*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 287*c0909341SAndroid Build Coastguard Worker sabd v20.8h, v4.8h, v6.8h // abs(tmp1 - tmp2) 288*c0909341SAndroid Build Coastguard Worker sabd v21.8h, v5.8h, v7.8h 289*c0909341SAndroid Build Coastguard Worker ssubl v16.4s, v6.4h, v4.4h // tmp2 - tmp1 (requires 17 bit) 290*c0909341SAndroid Build Coastguard Worker ssubl2 v17.4s, v6.8h, v4.8h 291*c0909341SAndroid Build Coastguard Worker ssubl v18.4s, v7.4h, v5.4h 292*c0909341SAndroid Build Coastguard Worker ssubl2 v19.4s, v7.8h, v5.8h 293*c0909341SAndroid Build Coastguard Worker uqsub v20.8h, v0.8h, v20.8h // 27615 - abs() 294*c0909341SAndroid Build Coastguard Worker uqsub v21.8h, v0.8h, v21.8h 295*c0909341SAndroid Build Coastguard Worker sshll2 v7.4s, v5.8h, #6 // tmp1 << 6 296*c0909341SAndroid Build Coastguard Worker sshll v6.4s, v5.4h, #6 297*c0909341SAndroid Build Coastguard Worker sshll2 v5.4s, v4.8h, #6 298*c0909341SAndroid Build Coastguard Worker sshll v4.4s, v4.4h, #6 299*c0909341SAndroid Build Coastguard Worker ushr v20.8h, v20.8h, #10 // 64-m = (27615 - abs()) >> mask_sh 300*c0909341SAndroid Build Coastguard Worker ushr v21.8h, v21.8h, #10 301*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v30.4s // += PREP_BIAS*64 302*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v30.4s 303*c0909341SAndroid Build Coastguard Worker add v6.4s, v6.4s, v30.4s 304*c0909341SAndroid Build Coastguard Worker add v7.4s, v7.4s, v30.4s 305*c0909341SAndroid Build Coastguard Worker uxtl v22.4s, v20.4h 306*c0909341SAndroid Build Coastguard Worker uxtl2 v23.4s, v20.8h 307*c0909341SAndroid Build Coastguard Worker uxtl v24.4s, v21.4h 308*c0909341SAndroid Build Coastguard Worker uxtl2 v25.4s, v21.8h 309*c0909341SAndroid Build Coastguard Worker mla v4.4s, v16.4s, v22.4s // (tmp2-tmp1)*(64-m) 310*c0909341SAndroid Build Coastguard Worker mla v5.4s, v17.4s, v23.4s 311*c0909341SAndroid Build Coastguard Worker mla v6.4s, v18.4s, v24.4s 312*c0909341SAndroid Build Coastguard Worker mla v7.4s, v19.4s, v25.4s 313*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 314*c0909341SAndroid Build Coastguard Worker srshl v5.4s, v5.4s, v29.4s 315*c0909341SAndroid Build Coastguard Worker srshl v6.4s, v6.4s, v29.4s 316*c0909341SAndroid Build Coastguard Worker srshl v7.4s, v7.4s, v29.4s 317*c0909341SAndroid Build Coastguard Worker sqxtun v4.4h, v4.4s // iclip_pixel 318*c0909341SAndroid Build Coastguard Worker sqxtun2 v4.8h, v5.4s 319*c0909341SAndroid Build Coastguard Worker sqxtun v5.4h, v6.4s 320*c0909341SAndroid Build Coastguard Worker sqxtun2 v5.8h, v7.4s 321*c0909341SAndroid Build Coastguard Worker umin v4.8h, v4.8h, v31.8h // iclip_pixel 322*c0909341SAndroid Build Coastguard Worker umin v5.8h, v5.8h, v31.8h 323*c0909341SAndroid Build Coastguard Worker.if \type == 444 324*c0909341SAndroid Build Coastguard Worker uzp1 v20.16b, v20.16b, v21.16b // 64 - m 325*c0909341SAndroid Build Coastguard Worker sub v20.16b, v1.16b, v20.16b // m 326*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x6], #16 327*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 328*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h // (64 - m) + (64 - n) (column wise addition) 329*c0909341SAndroid Build Coastguard Worker xtn v20.8b, v20.8h 330*c0909341SAndroid Build Coastguard Worker uhsub v20.8b, v3.8b, v20.8b // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 331*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 332*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 333*c0909341SAndroid Build Coastguard Worker trn1 v24.2d, v20.2d, v21.2d 334*c0909341SAndroid Build Coastguard Worker trn2 v25.2d, v20.2d, v21.2d 335*c0909341SAndroid Build Coastguard Worker add v24.8h, v24.8h, v25.8h // (64 - my1) + (64 - my2) (row wise addition) 336*c0909341SAndroid Build Coastguard Worker addp v20.8h, v24.8h, v24.8h // (128 - m) + (128 - n) (column wise addition) 337*c0909341SAndroid Build Coastguard Worker sub v20.4h, v3.4h, v20.4h // (256 - sign) - ((128 - m) + (128 - n)) 338*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 339*c0909341SAndroid Build Coastguard Worker str s20, [x6], #4 340*c0909341SAndroid Build Coastguard Worker.endif 341*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [x0], x1 342*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [x12], x1 343*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [x0], x1 344*c0909341SAndroid Build Coastguard Worker st1 {v5.d}[1], [x12], x1 345*c0909341SAndroid Build Coastguard Worker b.gt 4b 346*c0909341SAndroid Build Coastguard Worker ret 347*c0909341SAndroid Build Coastguard Worker80: 348*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 349*c0909341SAndroid Build Coastguard Worker8: 350*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 // tmp1 351*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x3], #32 // tmp2 352*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 353*c0909341SAndroid Build Coastguard Worker sabd v20.8h, v4.8h, v6.8h // abs(tmp1 - tmp2) 354*c0909341SAndroid Build Coastguard Worker sabd v21.8h, v5.8h, v7.8h 355*c0909341SAndroid Build Coastguard Worker ssubl v16.4s, v6.4h, v4.4h // tmp2 - tmp1 (requires 17 bit) 356*c0909341SAndroid Build Coastguard Worker ssubl2 v17.4s, v6.8h, v4.8h 357*c0909341SAndroid Build Coastguard Worker ssubl v18.4s, v7.4h, v5.4h 358*c0909341SAndroid Build Coastguard Worker ssubl2 v19.4s, v7.8h, v5.8h 359*c0909341SAndroid Build Coastguard Worker uqsub v20.8h, v0.8h, v20.8h // 27615 - abs() 360*c0909341SAndroid Build Coastguard Worker uqsub v21.8h, v0.8h, v21.8h 361*c0909341SAndroid Build Coastguard Worker sshll2 v7.4s, v5.8h, #6 // tmp1 << 6 362*c0909341SAndroid Build Coastguard Worker sshll v6.4s, v5.4h, #6 363*c0909341SAndroid Build Coastguard Worker sshll2 v5.4s, v4.8h, #6 364*c0909341SAndroid Build Coastguard Worker sshll v4.4s, v4.4h, #6 365*c0909341SAndroid Build Coastguard Worker ushr v20.8h, v20.8h, #10 // 64-m = (27615 - abs()) >> mask_sh 366*c0909341SAndroid Build Coastguard Worker ushr v21.8h, v21.8h, #10 367*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v30.4s // += PREP_BIAS*64 368*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v30.4s 369*c0909341SAndroid Build Coastguard Worker add v6.4s, v6.4s, v30.4s 370*c0909341SAndroid Build Coastguard Worker add v7.4s, v7.4s, v30.4s 371*c0909341SAndroid Build Coastguard Worker uxtl v22.4s, v20.4h 372*c0909341SAndroid Build Coastguard Worker uxtl2 v23.4s, v20.8h 373*c0909341SAndroid Build Coastguard Worker uxtl v24.4s, v21.4h 374*c0909341SAndroid Build Coastguard Worker uxtl2 v25.4s, v21.8h 375*c0909341SAndroid Build Coastguard Worker mla v4.4s, v16.4s, v22.4s // (tmp2-tmp1)*(64-m) 376*c0909341SAndroid Build Coastguard Worker mla v5.4s, v17.4s, v23.4s 377*c0909341SAndroid Build Coastguard Worker mla v6.4s, v18.4s, v24.4s 378*c0909341SAndroid Build Coastguard Worker mla v7.4s, v19.4s, v25.4s 379*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 380*c0909341SAndroid Build Coastguard Worker srshl v5.4s, v5.4s, v29.4s 381*c0909341SAndroid Build Coastguard Worker srshl v6.4s, v6.4s, v29.4s 382*c0909341SAndroid Build Coastguard Worker srshl v7.4s, v7.4s, v29.4s 383*c0909341SAndroid Build Coastguard Worker sqxtun v4.4h, v4.4s // iclip_pixel 384*c0909341SAndroid Build Coastguard Worker sqxtun2 v4.8h, v5.4s 385*c0909341SAndroid Build Coastguard Worker sqxtun v5.4h, v6.4s 386*c0909341SAndroid Build Coastguard Worker sqxtun2 v5.8h, v7.4s 387*c0909341SAndroid Build Coastguard Worker umin v4.8h, v4.8h, v31.8h // iclip_pixel 388*c0909341SAndroid Build Coastguard Worker umin v5.8h, v5.8h, v31.8h 389*c0909341SAndroid Build Coastguard Worker.if \type == 444 390*c0909341SAndroid Build Coastguard Worker uzp1 v20.16b, v20.16b, v21.16b // 64 - m 391*c0909341SAndroid Build Coastguard Worker sub v20.16b, v1.16b, v20.16b // m 392*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x6], #16 393*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 394*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h // (64 - m) + (64 - n) (column wise addition) 395*c0909341SAndroid Build Coastguard Worker xtn v20.8b, v20.8h 396*c0909341SAndroid Build Coastguard Worker uhsub v20.8b, v3.8b, v20.8b // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 397*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 398*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 399*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v21.8h // (64 - my1) + (64 - my2) (row wise addition) 400*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v20.8h // (128 - m) + (128 - n) (column wise addition) 401*c0909341SAndroid Build Coastguard Worker sub v20.4h, v3.4h, v20.4h // (256 - sign) - ((128 - m) + (128 - n)) 402*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 403*c0909341SAndroid Build Coastguard Worker str s20, [x6], #4 404*c0909341SAndroid Build Coastguard Worker.endif 405*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x0], x1 406*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [x12], x1 407*c0909341SAndroid Build Coastguard Worker b.gt 8b 408*c0909341SAndroid Build Coastguard Worker ret 409*c0909341SAndroid Build Coastguard Worker1280: 410*c0909341SAndroid Build Coastguard Worker640: 411*c0909341SAndroid Build Coastguard Worker320: 412*c0909341SAndroid Build Coastguard Worker160: 413*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 414*c0909341SAndroid Build Coastguard Worker mov w11, w4 415*c0909341SAndroid Build Coastguard Worker sub x1, x1, w4, uxtw #1 416*c0909341SAndroid Build Coastguard Worker.if \type == 444 417*c0909341SAndroid Build Coastguard Worker add x10, x6, w4, uxtw 418*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 419*c0909341SAndroid Build Coastguard Worker add x10, x6, x11, lsr #1 420*c0909341SAndroid Build Coastguard Worker.endif 421*c0909341SAndroid Build Coastguard Worker add x9, x3, w4, uxtw #1 422*c0909341SAndroid Build Coastguard Worker add x7, x2, w4, uxtw #1 423*c0909341SAndroid Build Coastguard Worker161: 424*c0909341SAndroid Build Coastguard Worker mov w8, w4 425*c0909341SAndroid Build Coastguard Worker16: 426*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 // tmp1 427*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [x3], #32 // tmp2 428*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x7], #32 429*c0909341SAndroid Build Coastguard Worker ld1 {v18.8h, v19.8h}, [x9], #32 430*c0909341SAndroid Build Coastguard Worker subs w8, w8, #16 431*c0909341SAndroid Build Coastguard Worker sabd v20.8h, v4.8h, v16.8h // abs(tmp1 - tmp2) 432*c0909341SAndroid Build Coastguard Worker sabd v21.8h, v5.8h, v17.8h 433*c0909341SAndroid Build Coastguard Worker ssubl v22.4s, v16.4h, v4.4h // tmp2 - tmp1 (requires 17 bit) 434*c0909341SAndroid Build Coastguard Worker ssubl2 v23.4s, v16.8h, v4.8h 435*c0909341SAndroid Build Coastguard Worker ssubl v24.4s, v17.4h, v5.4h 436*c0909341SAndroid Build Coastguard Worker ssubl2 v25.4s, v17.8h, v5.8h 437*c0909341SAndroid Build Coastguard Worker uqsub v20.8h, v0.8h, v20.8h // 27615 - abs() 438*c0909341SAndroid Build Coastguard Worker uqsub v21.8h, v0.8h, v21.8h 439*c0909341SAndroid Build Coastguard Worker sshll2 v27.4s, v5.8h, #6 // tmp1 << 6 440*c0909341SAndroid Build Coastguard Worker sshll v26.4s, v5.4h, #6 441*c0909341SAndroid Build Coastguard Worker sshll2 v5.4s, v4.8h, #6 442*c0909341SAndroid Build Coastguard Worker sshll v4.4s, v4.4h, #6 443*c0909341SAndroid Build Coastguard Worker ushr v20.8h, v20.8h, #10 // 64-m = (27615 - abs()) >> mask_sh 444*c0909341SAndroid Build Coastguard Worker ushr v21.8h, v21.8h, #10 445*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v30.4s // += PREP_BIAS*64 446*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v30.4s 447*c0909341SAndroid Build Coastguard Worker add v26.4s, v26.4s, v30.4s 448*c0909341SAndroid Build Coastguard Worker add v27.4s, v27.4s, v30.4s 449*c0909341SAndroid Build Coastguard Worker uxtl v16.4s, v20.4h 450*c0909341SAndroid Build Coastguard Worker uxtl2 v17.4s, v20.8h 451*c0909341SAndroid Build Coastguard Worker uxtl v28.4s, v21.4h 452*c0909341SAndroid Build Coastguard Worker mla v4.4s, v22.4s, v16.4s // (tmp2-tmp1)*(64-m) 453*c0909341SAndroid Build Coastguard Worker uxtl2 v16.4s, v21.8h 454*c0909341SAndroid Build Coastguard Worker mla v5.4s, v23.4s, v17.4s 455*c0909341SAndroid Build Coastguard Worker mla v26.4s, v24.4s, v28.4s 456*c0909341SAndroid Build Coastguard Worker mla v27.4s, v25.4s, v16.4s 457*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 458*c0909341SAndroid Build Coastguard Worker srshl v5.4s, v5.4s, v29.4s 459*c0909341SAndroid Build Coastguard Worker srshl v26.4s, v26.4s, v29.4s 460*c0909341SAndroid Build Coastguard Worker srshl v27.4s, v27.4s, v29.4s 461*c0909341SAndroid Build Coastguard Worker sqxtun v4.4h, v4.4s // iclip_pixel 462*c0909341SAndroid Build Coastguard Worker sqxtun2 v4.8h, v5.4s 463*c0909341SAndroid Build Coastguard Worker sqxtun v5.4h, v26.4s 464*c0909341SAndroid Build Coastguard Worker sqxtun2 v5.8h, v27.4s 465*c0909341SAndroid Build Coastguard Worker 466*c0909341SAndroid Build Coastguard Worker // Start of other half 467*c0909341SAndroid Build Coastguard Worker sabd v22.8h, v6.8h, v18.8h // abs(tmp1 - tmp2) 468*c0909341SAndroid Build Coastguard Worker sabd v23.8h, v7.8h, v19.8h 469*c0909341SAndroid Build Coastguard Worker 470*c0909341SAndroid Build Coastguard Worker umin v4.8h, v4.8h, v31.8h // iclip_pixel 471*c0909341SAndroid Build Coastguard Worker umin v5.8h, v5.8h, v31.8h 472*c0909341SAndroid Build Coastguard Worker 473*c0909341SAndroid Build Coastguard Worker ssubl v16.4s, v18.4h, v6.4h // tmp2 - tmp1 (requires 17 bit) 474*c0909341SAndroid Build Coastguard Worker ssubl2 v17.4s, v18.8h, v6.8h 475*c0909341SAndroid Build Coastguard Worker ssubl v18.4s, v19.4h, v7.4h 476*c0909341SAndroid Build Coastguard Worker ssubl2 v19.4s, v19.8h, v7.8h 477*c0909341SAndroid Build Coastguard Worker uqsub v22.8h, v0.8h, v22.8h // 27615 - abs() 478*c0909341SAndroid Build Coastguard Worker uqsub v23.8h, v0.8h, v23.8h 479*c0909341SAndroid Build Coastguard Worker sshll v24.4s, v6.4h, #6 // tmp1 << 6 480*c0909341SAndroid Build Coastguard Worker sshll2 v25.4s, v6.8h, #6 481*c0909341SAndroid Build Coastguard Worker sshll v26.4s, v7.4h, #6 482*c0909341SAndroid Build Coastguard Worker sshll2 v27.4s, v7.8h, #6 483*c0909341SAndroid Build Coastguard Worker ushr v22.8h, v22.8h, #10 // 64-m = (27615 - abs()) >> mask_sh 484*c0909341SAndroid Build Coastguard Worker ushr v23.8h, v23.8h, #10 485*c0909341SAndroid Build Coastguard Worker add v24.4s, v24.4s, v30.4s // += PREP_BIAS*64 486*c0909341SAndroid Build Coastguard Worker add v25.4s, v25.4s, v30.4s 487*c0909341SAndroid Build Coastguard Worker add v26.4s, v26.4s, v30.4s 488*c0909341SAndroid Build Coastguard Worker add v27.4s, v27.4s, v30.4s 489*c0909341SAndroid Build Coastguard Worker uxtl v6.4s, v22.4h 490*c0909341SAndroid Build Coastguard Worker uxtl2 v7.4s, v22.8h 491*c0909341SAndroid Build Coastguard Worker uxtl v28.4s, v23.4h 492*c0909341SAndroid Build Coastguard Worker mla v24.4s, v16.4s, v6.4s // (tmp2-tmp1)*(64-m) 493*c0909341SAndroid Build Coastguard Worker uxtl2 v6.4s, v23.8h 494*c0909341SAndroid Build Coastguard Worker mla v25.4s, v17.4s, v7.4s 495*c0909341SAndroid Build Coastguard Worker mla v26.4s, v18.4s, v28.4s 496*c0909341SAndroid Build Coastguard Worker mla v27.4s, v19.4s, v6.4s 497*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v29.4s // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 498*c0909341SAndroid Build Coastguard Worker srshl v25.4s, v25.4s, v29.4s 499*c0909341SAndroid Build Coastguard Worker srshl v26.4s, v26.4s, v29.4s 500*c0909341SAndroid Build Coastguard Worker srshl v27.4s, v27.4s, v29.4s 501*c0909341SAndroid Build Coastguard Worker sqxtun v6.4h, v24.4s // iclip_pixel 502*c0909341SAndroid Build Coastguard Worker sqxtun2 v6.8h, v25.4s 503*c0909341SAndroid Build Coastguard Worker sqxtun v7.4h, v26.4s 504*c0909341SAndroid Build Coastguard Worker sqxtun2 v7.8h, v27.4s 505*c0909341SAndroid Build Coastguard Worker umin v6.8h, v6.8h, v31.8h // iclip_pixel 506*c0909341SAndroid Build Coastguard Worker umin v7.8h, v7.8h, v31.8h 507*c0909341SAndroid Build Coastguard Worker.if \type == 444 508*c0909341SAndroid Build Coastguard Worker uzp1 v20.16b, v20.16b, v21.16b // 64 - m 509*c0909341SAndroid Build Coastguard Worker uzp1 v21.16b, v22.16b, v23.16b 510*c0909341SAndroid Build Coastguard Worker sub v20.16b, v1.16b, v20.16b // m 511*c0909341SAndroid Build Coastguard Worker sub v21.16b, v1.16b, v21.16b 512*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x6], #16 513*c0909341SAndroid Build Coastguard Worker st1 {v21.16b}, [x10], #16 514*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 515*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h // (64 - m) + (64 - n) (column wise addition) 516*c0909341SAndroid Build Coastguard Worker addp v21.8h, v22.8h, v23.8h 517*c0909341SAndroid Build Coastguard Worker xtn v20.8b, v20.8h 518*c0909341SAndroid Build Coastguard Worker xtn v21.8b, v21.8h 519*c0909341SAndroid Build Coastguard Worker uhsub v20.8b, v3.8b, v20.8b // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 520*c0909341SAndroid Build Coastguard Worker uhsub v21.8b, v3.8b, v21.8b 521*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 522*c0909341SAndroid Build Coastguard Worker st1 {v21.8b}, [x10], #8 523*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 524*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v22.8h // (64 - my1) + (64 - my2) (row wise addition) 525*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v23.8h 526*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h // (128 - m) + (128 - n) (column wise addition) 527*c0909341SAndroid Build Coastguard Worker sub v20.8h, v3.8h, v20.8h // (256 - sign) - ((128 - m) + (128 - n)) 528*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 529*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 530*c0909341SAndroid Build Coastguard Worker.endif 531*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [x0], #32 532*c0909341SAndroid Build Coastguard Worker st1 {v6.8h, v7.8h}, [x12], #32 533*c0909341SAndroid Build Coastguard Worker b.gt 16b 534*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 535*c0909341SAndroid Build Coastguard Worker add x2, x2, w4, uxtw #1 536*c0909341SAndroid Build Coastguard Worker add x3, x3, w4, uxtw #1 537*c0909341SAndroid Build Coastguard Worker add x7, x7, w4, uxtw #1 538*c0909341SAndroid Build Coastguard Worker add x9, x9, w4, uxtw #1 539*c0909341SAndroid Build Coastguard Worker.if \type == 444 540*c0909341SAndroid Build Coastguard Worker add x6, x6, w4, uxtw 541*c0909341SAndroid Build Coastguard Worker add x10, x10, w4, uxtw 542*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 543*c0909341SAndroid Build Coastguard Worker add x6, x6, x11, lsr #1 544*c0909341SAndroid Build Coastguard Worker add x10, x10, x11, lsr #1 545*c0909341SAndroid Build Coastguard Worker.endif 546*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 547*c0909341SAndroid Build Coastguard Worker add x12, x12, x1 548*c0909341SAndroid Build Coastguard Worker b.gt 161b 549*c0909341SAndroid Build Coastguard Worker ret 550*c0909341SAndroid Build Coastguard Workerendfunc 551*c0909341SAndroid Build Coastguard Worker 552*c0909341SAndroid Build Coastguard Workerjumptable w_mask_\type\()_tbl 553*c0909341SAndroid Build Coastguard Worker .word 1280b - w_mask_\type\()_tbl 554*c0909341SAndroid Build Coastguard Worker .word 640b - w_mask_\type\()_tbl 555*c0909341SAndroid Build Coastguard Worker .word 320b - w_mask_\type\()_tbl 556*c0909341SAndroid Build Coastguard Worker .word 160b - w_mask_\type\()_tbl 557*c0909341SAndroid Build Coastguard Worker .word 80b - w_mask_\type\()_tbl 558*c0909341SAndroid Build Coastguard Worker .word 40b - w_mask_\type\()_tbl 559*c0909341SAndroid Build Coastguard Workerendjumptable 560*c0909341SAndroid Build Coastguard Worker.endm 561*c0909341SAndroid Build Coastguard Worker 562*c0909341SAndroid Build Coastguard Workerw_mask_fn 444 563*c0909341SAndroid Build Coastguard Workerw_mask_fn 422 564*c0909341SAndroid Build Coastguard Workerw_mask_fn 420 565*c0909341SAndroid Build Coastguard Worker 566*c0909341SAndroid Build Coastguard Worker 567*c0909341SAndroid Build Coastguard Workerfunction blend_16bpc_neon, export=1 568*c0909341SAndroid Build Coastguard Worker movrel x6, blend_tbl 569*c0909341SAndroid Build Coastguard Worker clz w3, w3 570*c0909341SAndroid Build Coastguard Worker sub w3, w3, #26 571*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x6, x3, lsl #2] 572*c0909341SAndroid Build Coastguard Worker add x6, x6, x3 573*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 574*c0909341SAndroid Build Coastguard Worker br x6 575*c0909341SAndroid Build Coastguard Worker40: 576*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 577*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 578*c0909341SAndroid Build Coastguard Worker4: 579*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x5], #8 580*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x2], #16 581*c0909341SAndroid Build Coastguard Worker ldr d0, [x0] 582*c0909341SAndroid Build Coastguard Worker neg v2.8b, v2.8b // -m 583*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 584*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x8] 585*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 586*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #9 // -m << 9 587*c0909341SAndroid Build Coastguard Worker sub v1.8h, v0.8h, v1.8h // a - b 588*c0909341SAndroid Build Coastguard Worker sqrdmulh v1.8h, v1.8h, v2.8h // ((a-b)*-m + 32) >> 6 589*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 590*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 591*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x8], x1 592*c0909341SAndroid Build Coastguard Worker b.gt 4b 593*c0909341SAndroid Build Coastguard Worker ret 594*c0909341SAndroid Build Coastguard Worker80: 595*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 596*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 597*c0909341SAndroid Build Coastguard Worker8: 598*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [x5], #16 599*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2], #32 600*c0909341SAndroid Build Coastguard Worker neg v5.16b, v4.16b // -m 601*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x0] 602*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x8] 603*c0909341SAndroid Build Coastguard Worker sxtl v4.8h, v5.8b 604*c0909341SAndroid Build Coastguard Worker sxtl2 v5.8h, v5.16b 605*c0909341SAndroid Build Coastguard Worker shl v4.8h, v4.8h, #9 // -m << 9 606*c0909341SAndroid Build Coastguard Worker shl v5.8h, v5.8h, #9 607*c0909341SAndroid Build Coastguard Worker sub v2.8h, v0.8h, v2.8h // a - b 608*c0909341SAndroid Build Coastguard Worker sub v3.8h, v1.8h, v3.8h 609*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 610*c0909341SAndroid Build Coastguard Worker sqrdmulh v2.8h, v2.8h, v4.8h // ((a-b)*-m + 32) >> 6 611*c0909341SAndroid Build Coastguard Worker sqrdmulh v3.8h, v3.8h, v5.8h 612*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 613*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v3.8h 614*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 615*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x8], x1 616*c0909341SAndroid Build Coastguard Worker b.gt 8b 617*c0909341SAndroid Build Coastguard Worker ret 618*c0909341SAndroid Build Coastguard Worker160: 619*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 620*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 621*c0909341SAndroid Build Coastguard Worker16: 622*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b}, [x5], #32 623*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], #64 624*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 625*c0909341SAndroid Build Coastguard Worker neg v18.16b, v16.16b // -m 626*c0909341SAndroid Build Coastguard Worker neg v19.16b, v17.16b 627*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0] 628*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v18.8b 629*c0909341SAndroid Build Coastguard Worker sxtl2 v17.8h, v18.16b 630*c0909341SAndroid Build Coastguard Worker sxtl v18.8h, v19.8b 631*c0909341SAndroid Build Coastguard Worker sxtl2 v19.8h, v19.16b 632*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x8] 633*c0909341SAndroid Build Coastguard Worker shl v16.8h, v16.8h, #9 // -m << 9 634*c0909341SAndroid Build Coastguard Worker shl v17.8h, v17.8h, #9 635*c0909341SAndroid Build Coastguard Worker shl v18.8h, v18.8h, #9 636*c0909341SAndroid Build Coastguard Worker shl v19.8h, v19.8h, #9 637*c0909341SAndroid Build Coastguard Worker sub v4.8h, v0.8h, v4.8h // a - b 638*c0909341SAndroid Build Coastguard Worker sub v5.8h, v1.8h, v5.8h 639*c0909341SAndroid Build Coastguard Worker sub v6.8h, v2.8h, v6.8h 640*c0909341SAndroid Build Coastguard Worker sub v7.8h, v3.8h, v7.8h 641*c0909341SAndroid Build Coastguard Worker sqrdmulh v4.8h, v4.8h, v16.8h // ((a-b)*-m + 32) >> 6 642*c0909341SAndroid Build Coastguard Worker sqrdmulh v5.8h, v5.8h, v17.8h 643*c0909341SAndroid Build Coastguard Worker sqrdmulh v6.8h, v6.8h, v18.8h 644*c0909341SAndroid Build Coastguard Worker sqrdmulh v7.8h, v7.8h, v19.8h 645*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v4.8h 646*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v5.8h 647*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v6.8h 648*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v7.8h 649*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 650*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x8], x1 651*c0909341SAndroid Build Coastguard Worker b.gt 16b 652*c0909341SAndroid Build Coastguard Worker ret 653*c0909341SAndroid Build Coastguard Worker320: 654*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 655*c0909341SAndroid Build Coastguard Worker32: 656*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b}, [x5], #32 657*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], #64 658*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 659*c0909341SAndroid Build Coastguard Worker neg v18.16b, v16.16b // -m 660*c0909341SAndroid Build Coastguard Worker neg v19.16b, v17.16b 661*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v18.8b 662*c0909341SAndroid Build Coastguard Worker sxtl2 v17.8h, v18.16b 663*c0909341SAndroid Build Coastguard Worker sxtl v18.8h, v19.8b 664*c0909341SAndroid Build Coastguard Worker sxtl2 v19.8h, v19.16b 665*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0] 666*c0909341SAndroid Build Coastguard Worker shl v16.8h, v16.8h, #9 // -m << 9 667*c0909341SAndroid Build Coastguard Worker shl v17.8h, v17.8h, #9 668*c0909341SAndroid Build Coastguard Worker shl v18.8h, v18.8h, #9 669*c0909341SAndroid Build Coastguard Worker shl v19.8h, v19.8h, #9 670*c0909341SAndroid Build Coastguard Worker sub v4.8h, v0.8h, v4.8h // a - b 671*c0909341SAndroid Build Coastguard Worker sub v5.8h, v1.8h, v5.8h 672*c0909341SAndroid Build Coastguard Worker sub v6.8h, v2.8h, v6.8h 673*c0909341SAndroid Build Coastguard Worker sub v7.8h, v3.8h, v7.8h 674*c0909341SAndroid Build Coastguard Worker sqrdmulh v4.8h, v4.8h, v16.8h // ((a-b)*-m + 32) >> 6 675*c0909341SAndroid Build Coastguard Worker sqrdmulh v5.8h, v5.8h, v17.8h 676*c0909341SAndroid Build Coastguard Worker sqrdmulh v6.8h, v6.8h, v18.8h 677*c0909341SAndroid Build Coastguard Worker sqrdmulh v7.8h, v7.8h, v19.8h 678*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v4.8h 679*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v5.8h 680*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v6.8h 681*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v7.8h 682*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 683*c0909341SAndroid Build Coastguard Worker b.gt 32b 684*c0909341SAndroid Build Coastguard Worker ret 685*c0909341SAndroid Build Coastguard Workerendfunc 686*c0909341SAndroid Build Coastguard Worker 687*c0909341SAndroid Build Coastguard Workerjumptable blend_tbl 688*c0909341SAndroid Build Coastguard Worker .word 320b - blend_tbl 689*c0909341SAndroid Build Coastguard Worker .word 160b - blend_tbl 690*c0909341SAndroid Build Coastguard Worker .word 80b - blend_tbl 691*c0909341SAndroid Build Coastguard Worker .word 40b - blend_tbl 692*c0909341SAndroid Build Coastguard Workerendjumptable 693*c0909341SAndroid Build Coastguard Worker 694*c0909341SAndroid Build Coastguard Workerfunction blend_h_16bpc_neon, export=1 695*c0909341SAndroid Build Coastguard Worker movrel x6, blend_h_tbl 696*c0909341SAndroid Build Coastguard Worker movrel x5, X(obmc_masks) 697*c0909341SAndroid Build Coastguard Worker add x5, x5, w4, uxtw 698*c0909341SAndroid Build Coastguard Worker sub w4, w4, w4, lsr #2 699*c0909341SAndroid Build Coastguard Worker clz w7, w3 700*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 701*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 702*c0909341SAndroid Build Coastguard Worker sub w7, w7, #24 703*c0909341SAndroid Build Coastguard Worker ldrsw x7, [x6, x7, lsl #2] 704*c0909341SAndroid Build Coastguard Worker add x6, x6, x7 705*c0909341SAndroid Build Coastguard Worker br x6 706*c0909341SAndroid Build Coastguard Worker20: 707*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 708*c0909341SAndroid Build Coastguard Worker2: 709*c0909341SAndroid Build Coastguard Worker ld2r {v2.8b, v3.8b}, [x5], #2 710*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x2], #8 711*c0909341SAndroid Build Coastguard Worker ext v2.8b, v2.8b, v3.8b, #6 712*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 713*c0909341SAndroid Build Coastguard Worker neg v2.8b, v2.8b // -m 714*c0909341SAndroid Build Coastguard Worker ldr s0, [x0] 715*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[1], [x8] 716*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 717*c0909341SAndroid Build Coastguard Worker shl v2.4h, v2.4h, #9 // -m << 9 718*c0909341SAndroid Build Coastguard Worker sub v1.4h, v0.4h, v1.4h // a - b 719*c0909341SAndroid Build Coastguard Worker sqrdmulh v1.4h, v1.4h, v2.4h // ((a-b)*-m + 32) >> 6 720*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 721*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 722*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[1], [x8], x1 723*c0909341SAndroid Build Coastguard Worker b.gt 2b 724*c0909341SAndroid Build Coastguard Worker ret 725*c0909341SAndroid Build Coastguard Worker40: 726*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 727*c0909341SAndroid Build Coastguard Worker4: 728*c0909341SAndroid Build Coastguard Worker ld2r {v2.8b, v3.8b}, [x5], #2 729*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x2], #16 730*c0909341SAndroid Build Coastguard Worker ext v2.8b, v2.8b, v3.8b, #4 731*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 732*c0909341SAndroid Build Coastguard Worker neg v2.8b, v2.8b // -m 733*c0909341SAndroid Build Coastguard Worker ldr d0, [x0] 734*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x8] 735*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 736*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #9 // -m << 9 737*c0909341SAndroid Build Coastguard Worker sub v1.8h, v0.8h, v1.8h // a - b 738*c0909341SAndroid Build Coastguard Worker sqrdmulh v1.8h, v1.8h, v2.8h // ((a-b)*-m + 32) >> 6 739*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 740*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 741*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x8], x1 742*c0909341SAndroid Build Coastguard Worker b.gt 4b 743*c0909341SAndroid Build Coastguard Worker ret 744*c0909341SAndroid Build Coastguard Worker80: 745*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 746*c0909341SAndroid Build Coastguard Worker8: 747*c0909341SAndroid Build Coastguard Worker ld2r {v4.8b, v5.8b}, [x5], #2 748*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2], #32 749*c0909341SAndroid Build Coastguard Worker neg v4.8b, v4.8b // -m 750*c0909341SAndroid Build Coastguard Worker neg v5.8b, v5.8b 751*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x0] 752*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 753*c0909341SAndroid Build Coastguard Worker sxtl v4.8h, v4.8b 754*c0909341SAndroid Build Coastguard Worker sxtl v5.8h, v5.8b 755*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x8] 756*c0909341SAndroid Build Coastguard Worker shl v4.8h, v4.8h, #9 // -m << 9 757*c0909341SAndroid Build Coastguard Worker shl v5.8h, v5.8h, #9 758*c0909341SAndroid Build Coastguard Worker sub v2.8h, v0.8h, v2.8h // a - b 759*c0909341SAndroid Build Coastguard Worker sub v3.8h, v1.8h, v3.8h 760*c0909341SAndroid Build Coastguard Worker sqrdmulh v2.8h, v2.8h, v4.8h // ((a-b)*-m + 32) >> 6 761*c0909341SAndroid Build Coastguard Worker sqrdmulh v3.8h, v3.8h, v5.8h 762*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 763*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v3.8h 764*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 765*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x8], x1 766*c0909341SAndroid Build Coastguard Worker b.gt 8b 767*c0909341SAndroid Build Coastguard Worker ret 768*c0909341SAndroid Build Coastguard Worker160: 769*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 770*c0909341SAndroid Build Coastguard Worker16: 771*c0909341SAndroid Build Coastguard Worker ld2r {v16.8b, v17.8b}, [x5], #2 772*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], #64 773*c0909341SAndroid Build Coastguard Worker neg v16.8b, v16.8b // -m 774*c0909341SAndroid Build Coastguard Worker neg v17.8b, v17.8b 775*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0] 776*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x8] 777*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 778*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v16.8b 779*c0909341SAndroid Build Coastguard Worker sxtl v17.8h, v17.8b 780*c0909341SAndroid Build Coastguard Worker shl v16.8h, v16.8h, #9 // -m << 9 781*c0909341SAndroid Build Coastguard Worker shl v17.8h, v17.8h, #9 782*c0909341SAndroid Build Coastguard Worker sub v4.8h, v0.8h, v4.8h // a - b 783*c0909341SAndroid Build Coastguard Worker sub v5.8h, v1.8h, v5.8h 784*c0909341SAndroid Build Coastguard Worker sub v6.8h, v2.8h, v6.8h 785*c0909341SAndroid Build Coastguard Worker sub v7.8h, v3.8h, v7.8h 786*c0909341SAndroid Build Coastguard Worker sqrdmulh v4.8h, v4.8h, v16.8h // ((a-b)*-m + 32) >> 6 787*c0909341SAndroid Build Coastguard Worker sqrdmulh v5.8h, v5.8h, v16.8h 788*c0909341SAndroid Build Coastguard Worker sqrdmulh v6.8h, v6.8h, v17.8h 789*c0909341SAndroid Build Coastguard Worker sqrdmulh v7.8h, v7.8h, v17.8h 790*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v4.8h 791*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v5.8h 792*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v6.8h 793*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v7.8h 794*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 795*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x8], x1 796*c0909341SAndroid Build Coastguard Worker b.gt 16b 797*c0909341SAndroid Build Coastguard Worker ret 798*c0909341SAndroid Build Coastguard Worker1280: 799*c0909341SAndroid Build Coastguard Worker640: 800*c0909341SAndroid Build Coastguard Worker320: 801*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 802*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 803*c0909341SAndroid Build Coastguard Worker add x7, x2, w3, uxtw #1 804*c0909341SAndroid Build Coastguard Worker321: 805*c0909341SAndroid Build Coastguard Worker ld2r {v24.8b, v25.8b}, [x5], #2 806*c0909341SAndroid Build Coastguard Worker mov w6, w3 807*c0909341SAndroid Build Coastguard Worker neg v24.8b, v24.8b // -m 808*c0909341SAndroid Build Coastguard Worker neg v25.8b, v25.8b 809*c0909341SAndroid Build Coastguard Worker sxtl v24.8h, v24.8b 810*c0909341SAndroid Build Coastguard Worker sxtl v25.8h, v25.8b 811*c0909341SAndroid Build Coastguard Worker shl v24.8h, v24.8h, #9 // -m << 9 812*c0909341SAndroid Build Coastguard Worker shl v25.8h, v25.8h, #9 813*c0909341SAndroid Build Coastguard Worker32: 814*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x2], #64 815*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0] 816*c0909341SAndroid Build Coastguard Worker subs w6, w6, #32 817*c0909341SAndroid Build Coastguard Worker sub v16.8h, v0.8h, v16.8h // a - b 818*c0909341SAndroid Build Coastguard Worker sub v17.8h, v1.8h, v17.8h 819*c0909341SAndroid Build Coastguard Worker sub v18.8h, v2.8h, v18.8h 820*c0909341SAndroid Build Coastguard Worker sub v19.8h, v3.8h, v19.8h 821*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x7], #64 822*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x8] 823*c0909341SAndroid Build Coastguard Worker sqrdmulh v16.8h, v16.8h, v24.8h // ((a-b)*-m + 32) >> 6 824*c0909341SAndroid Build Coastguard Worker sqrdmulh v17.8h, v17.8h, v24.8h 825*c0909341SAndroid Build Coastguard Worker sqrdmulh v18.8h, v18.8h, v24.8h 826*c0909341SAndroid Build Coastguard Worker sqrdmulh v19.8h, v19.8h, v24.8h 827*c0909341SAndroid Build Coastguard Worker sub v20.8h, v4.8h, v20.8h // a - b 828*c0909341SAndroid Build Coastguard Worker sub v21.8h, v5.8h, v21.8h 829*c0909341SAndroid Build Coastguard Worker sub v22.8h, v6.8h, v22.8h 830*c0909341SAndroid Build Coastguard Worker sub v23.8h, v7.8h, v23.8h 831*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v16.8h 832*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v17.8h 833*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v18.8h 834*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v19.8h 835*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v20.8h, v25.8h // ((a-b)*-m + 32) >> 6 836*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v21.8h, v25.8h 837*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v22.8h, v25.8h 838*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.8h, v23.8h, v25.8h 839*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 840*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v20.8h 841*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v21.8h 842*c0909341SAndroid Build Coastguard Worker add v6.8h, v6.8h, v22.8h 843*c0909341SAndroid Build Coastguard Worker add v7.8h, v7.8h, v23.8h 844*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x8], #64 845*c0909341SAndroid Build Coastguard Worker b.gt 32b 846*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 847*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 848*c0909341SAndroid Build Coastguard Worker add x8, x8, x1 849*c0909341SAndroid Build Coastguard Worker add x2, x2, w3, uxtw #1 850*c0909341SAndroid Build Coastguard Worker add x7, x7, w3, uxtw #1 851*c0909341SAndroid Build Coastguard Worker b.gt 321b 852*c0909341SAndroid Build Coastguard Worker ret 853*c0909341SAndroid Build Coastguard Workerendfunc 854*c0909341SAndroid Build Coastguard Worker 855*c0909341SAndroid Build Coastguard Workerjumptable blend_h_tbl 856*c0909341SAndroid Build Coastguard Worker .word 1280b - blend_h_tbl 857*c0909341SAndroid Build Coastguard Worker .word 640b - blend_h_tbl 858*c0909341SAndroid Build Coastguard Worker .word 320b - blend_h_tbl 859*c0909341SAndroid Build Coastguard Worker .word 160b - blend_h_tbl 860*c0909341SAndroid Build Coastguard Worker .word 80b - blend_h_tbl 861*c0909341SAndroid Build Coastguard Worker .word 40b - blend_h_tbl 862*c0909341SAndroid Build Coastguard Worker .word 20b - blend_h_tbl 863*c0909341SAndroid Build Coastguard Workerendjumptable 864*c0909341SAndroid Build Coastguard Worker 865*c0909341SAndroid Build Coastguard Workerfunction blend_v_16bpc_neon, export=1 866*c0909341SAndroid Build Coastguard Worker movrel x6, blend_v_tbl 867*c0909341SAndroid Build Coastguard Worker movrel x5, X(obmc_masks) 868*c0909341SAndroid Build Coastguard Worker add x5, x5, w3, uxtw 869*c0909341SAndroid Build Coastguard Worker clz w3, w3 870*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 871*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 872*c0909341SAndroid Build Coastguard Worker sub w3, w3, #26 873*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x6, x3, lsl #2] 874*c0909341SAndroid Build Coastguard Worker add x6, x6, x3 875*c0909341SAndroid Build Coastguard Worker br x6 876*c0909341SAndroid Build Coastguard Worker20: 877*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 878*c0909341SAndroid Build Coastguard Worker ld1r {v2.8b}, [x5] 879*c0909341SAndroid Build Coastguard Worker neg v2.8b, v2.8b // -m 880*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 881*c0909341SAndroid Build Coastguard Worker shl v2.4h, v2.4h, #9 // -m << 9 882*c0909341SAndroid Build Coastguard Worker2: 883*c0909341SAndroid Build Coastguard Worker ldr s1, [x2], #4 884*c0909341SAndroid Build Coastguard Worker ldr h0, [x0] 885*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 886*c0909341SAndroid Build Coastguard Worker ld1 {v1.h}[1], [x2] 887*c0909341SAndroid Build Coastguard Worker ld1 {v0.h}[1], [x8] 888*c0909341SAndroid Build Coastguard Worker add x2, x2, #4 889*c0909341SAndroid Build Coastguard Worker sub v1.4h, v0.4h, v1.4h // a - b 890*c0909341SAndroid Build Coastguard Worker sqrdmulh v1.4h, v1.4h, v2.4h // ((a-b)*-m + 32) >> 6 891*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 892*c0909341SAndroid Build Coastguard Worker st1 {v0.h}[0], [x0], x1 893*c0909341SAndroid Build Coastguard Worker st1 {v0.h}[1], [x8], x1 894*c0909341SAndroid Build Coastguard Worker b.gt 2b 895*c0909341SAndroid Build Coastguard Worker ret 896*c0909341SAndroid Build Coastguard Worker40: 897*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 898*c0909341SAndroid Build Coastguard Worker ld1r {v2.2s}, [x5] 899*c0909341SAndroid Build Coastguard Worker sub x1, x1, #4 900*c0909341SAndroid Build Coastguard Worker neg v2.8b, v2.8b // -m 901*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 902*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #9 // -m << 9 903*c0909341SAndroid Build Coastguard Worker4: 904*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x2], #16 905*c0909341SAndroid Build Coastguard Worker ldr d0, [x0] 906*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x8] 907*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 908*c0909341SAndroid Build Coastguard Worker sub v1.8h, v0.8h, v1.8h // a - b 909*c0909341SAndroid Build Coastguard Worker sqrdmulh v1.8h, v1.8h, v2.8h // ((a-b)*-m + 32) >> 6 910*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 911*c0909341SAndroid Build Coastguard Worker str s0, [x0], #4 912*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[2], [x8], #4 913*c0909341SAndroid Build Coastguard Worker st1 {v0.h}[2], [x0], x1 914*c0909341SAndroid Build Coastguard Worker st1 {v0.h}[6], [x8], x1 915*c0909341SAndroid Build Coastguard Worker b.gt 4b 916*c0909341SAndroid Build Coastguard Worker ret 917*c0909341SAndroid Build Coastguard Worker80: 918*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 919*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [x5] 920*c0909341SAndroid Build Coastguard Worker sub x1, x1, #8 921*c0909341SAndroid Build Coastguard Worker neg v4.8b, v4.8b // -m 922*c0909341SAndroid Build Coastguard Worker sxtl v4.8h, v4.8b 923*c0909341SAndroid Build Coastguard Worker shl v4.8h, v4.8h, #9 // -m << 9 924*c0909341SAndroid Build Coastguard Worker8: 925*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2], #32 926*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x0] 927*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x8] 928*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 929*c0909341SAndroid Build Coastguard Worker sub v2.8h, v0.8h, v2.8h // a - b 930*c0909341SAndroid Build Coastguard Worker sub v3.8h, v1.8h, v3.8h 931*c0909341SAndroid Build Coastguard Worker sqrdmulh v2.8h, v2.8h, v4.8h // ((a-b)*-m + 32) >> 6 932*c0909341SAndroid Build Coastguard Worker sqrdmulh v3.8h, v3.8h, v4.8h 933*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 934*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v3.8h 935*c0909341SAndroid Build Coastguard Worker str d0, [x0], #8 936*c0909341SAndroid Build Coastguard Worker str d1, [x8], #8 937*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[2], [x0], x1 938*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[2], [x8], x1 939*c0909341SAndroid Build Coastguard Worker b.gt 8b 940*c0909341SAndroid Build Coastguard Worker ret 941*c0909341SAndroid Build Coastguard Worker160: 942*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 943*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b}, [x5] 944*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 945*c0909341SAndroid Build Coastguard Worker neg v17.16b, v16.16b // -m 946*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v17.8b 947*c0909341SAndroid Build Coastguard Worker sxtl2 v17.8h, v17.16b 948*c0909341SAndroid Build Coastguard Worker shl v16.8h, v16.8h, #9 // -m << 9 949*c0909341SAndroid Build Coastguard Worker shl v17.4h, v17.4h, #9 950*c0909341SAndroid Build Coastguard Worker16: 951*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], #64 952*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0] 953*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 954*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x8] 955*c0909341SAndroid Build Coastguard Worker sub v4.8h, v0.8h, v4.8h // a - b 956*c0909341SAndroid Build Coastguard Worker sub v5.4h, v1.4h, v5.4h 957*c0909341SAndroid Build Coastguard Worker sub v6.8h, v2.8h, v6.8h 958*c0909341SAndroid Build Coastguard Worker sub v7.4h, v3.4h, v7.4h 959*c0909341SAndroid Build Coastguard Worker sqrdmulh v4.8h, v4.8h, v16.8h // ((a-b)*-m + 32) >> 6 960*c0909341SAndroid Build Coastguard Worker sqrdmulh v5.4h, v5.4h, v17.4h 961*c0909341SAndroid Build Coastguard Worker sqrdmulh v6.8h, v6.8h, v16.8h 962*c0909341SAndroid Build Coastguard Worker sqrdmulh v7.4h, v7.4h, v17.4h 963*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v4.8h 964*c0909341SAndroid Build Coastguard Worker add v1.4h, v1.4h, v5.4h 965*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v6.8h 966*c0909341SAndroid Build Coastguard Worker add v3.4h, v3.4h, v7.4h 967*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], #16 968*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x8], #16 969*c0909341SAndroid Build Coastguard Worker st1 {v1.4h}, [x0], x1 970*c0909341SAndroid Build Coastguard Worker st1 {v3.4h}, [x8], x1 971*c0909341SAndroid Build Coastguard Worker b.gt 16b 972*c0909341SAndroid Build Coastguard Worker ret 973*c0909341SAndroid Build Coastguard Worker320: 974*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 975*c0909341SAndroid Build Coastguard Worker ld1 {v24.16b, v25.16b}, [x5] 976*c0909341SAndroid Build Coastguard Worker neg v26.16b, v24.16b // -m 977*c0909341SAndroid Build Coastguard Worker neg v27.8b, v25.8b 978*c0909341SAndroid Build Coastguard Worker sxtl v24.8h, v26.8b 979*c0909341SAndroid Build Coastguard Worker sxtl2 v25.8h, v26.16b 980*c0909341SAndroid Build Coastguard Worker sxtl v26.8h, v27.8b 981*c0909341SAndroid Build Coastguard Worker shl v24.8h, v24.8h, #9 // -m << 9 982*c0909341SAndroid Build Coastguard Worker shl v25.8h, v25.8h, #9 983*c0909341SAndroid Build Coastguard Worker shl v26.8h, v26.8h, #9 984*c0909341SAndroid Build Coastguard Worker32: 985*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x2], #64 986*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x0] 987*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x2], #64 988*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h}, [x8] 989*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 990*c0909341SAndroid Build Coastguard Worker sub v16.8h, v0.8h, v16.8h // a - b 991*c0909341SAndroid Build Coastguard Worker sub v17.8h, v1.8h, v17.8h 992*c0909341SAndroid Build Coastguard Worker sub v18.8h, v2.8h, v18.8h 993*c0909341SAndroid Build Coastguard Worker sub v20.8h, v4.8h, v20.8h 994*c0909341SAndroid Build Coastguard Worker sub v21.8h, v5.8h, v21.8h 995*c0909341SAndroid Build Coastguard Worker sub v22.8h, v6.8h, v22.8h 996*c0909341SAndroid Build Coastguard Worker sqrdmulh v16.8h, v16.8h, v24.8h // ((a-b)*-m + 32) >> 6 997*c0909341SAndroid Build Coastguard Worker sqrdmulh v17.8h, v17.8h, v25.8h 998*c0909341SAndroid Build Coastguard Worker sqrdmulh v18.8h, v18.8h, v26.8h 999*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v20.8h, v24.8h 1000*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v21.8h, v25.8h 1001*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v22.8h, v26.8h 1002*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v16.8h 1003*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v17.8h 1004*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v18.8h 1005*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v20.8h 1006*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v21.8h 1007*c0909341SAndroid Build Coastguard Worker add v6.8h, v6.8h, v22.8h 1008*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h}, [x0], x1 1009*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h}, [x8], x1 1010*c0909341SAndroid Build Coastguard Worker b.gt 32b 1011*c0909341SAndroid Build Coastguard Worker ret 1012*c0909341SAndroid Build Coastguard Workerendfunc 1013*c0909341SAndroid Build Coastguard Worker 1014*c0909341SAndroid Build Coastguard Workerjumptable blend_v_tbl 1015*c0909341SAndroid Build Coastguard Worker .word 320b - blend_v_tbl 1016*c0909341SAndroid Build Coastguard Worker .word 160b - blend_v_tbl 1017*c0909341SAndroid Build Coastguard Worker .word 80b - blend_v_tbl 1018*c0909341SAndroid Build Coastguard Worker .word 40b - blend_v_tbl 1019*c0909341SAndroid Build Coastguard Worker .word 20b - blend_v_tbl 1020*c0909341SAndroid Build Coastguard Workerendjumptable 1021*c0909341SAndroid Build Coastguard Worker 1022*c0909341SAndroid Build Coastguard Worker 1023*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions, 1024*c0909341SAndroid Build Coastguard Worker// and assumes that x9 is set to (clz(w)-24). 1025*c0909341SAndroid Build Coastguard Workerfunction put_16bpc_neon, export=1 1026*c0909341SAndroid Build Coastguard Worker movrel x10, put_16bpc_tbl 1027*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 1028*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 1029*c0909341SAndroid Build Coastguard Worker br x10 1030*c0909341SAndroid Build Coastguard Worker 1031*c0909341SAndroid Build Coastguard Worker20: 1032*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1033*c0909341SAndroid Build Coastguard Worker2: 1034*c0909341SAndroid Build Coastguard Worker ld1r {v0.4s}, [x2], x3 1035*c0909341SAndroid Build Coastguard Worker ld1r {v1.4s}, [x2], x3 1036*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 1037*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 1038*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[0], [x0], x1 1039*c0909341SAndroid Build Coastguard Worker b.gt 2b 1040*c0909341SAndroid Build Coastguard Worker ret 1041*c0909341SAndroid Build Coastguard Worker40: 1042*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1043*c0909341SAndroid Build Coastguard Worker4: 1044*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2], x3 1045*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x2], x3 1046*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 1047*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 1048*c0909341SAndroid Build Coastguard Worker st1 {v1.4h}, [x0], x1 1049*c0909341SAndroid Build Coastguard Worker b.gt 4b 1050*c0909341SAndroid Build Coastguard Worker ret 1051*c0909341SAndroid Build Coastguard Worker80: 1052*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1053*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 1054*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1055*c0909341SAndroid Build Coastguard Worker add x9, x2, x3 1056*c0909341SAndroid Build Coastguard Worker lsl x3, x3, #1 1057*c0909341SAndroid Build Coastguard Worker8: 1058*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2], x3 1059*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x9], x3 1060*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 1061*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 1062*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x8], x1 1063*c0909341SAndroid Build Coastguard Worker b.gt 8b 1064*c0909341SAndroid Build Coastguard Worker ret 1065*c0909341SAndroid Build Coastguard Worker160: 1066*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1067*c0909341SAndroid Build Coastguard Worker16: 1068*c0909341SAndroid Build Coastguard Worker ldp x6, x7, [x2] 1069*c0909341SAndroid Build Coastguard Worker ldp x8, x9, [x2, #16] 1070*c0909341SAndroid Build Coastguard Worker stp x6, x7, [x0] 1071*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 1072*c0909341SAndroid Build Coastguard Worker stp x8, x9, [x0, #16] 1073*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 1074*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1075*c0909341SAndroid Build Coastguard Worker b.gt 16b 1076*c0909341SAndroid Build Coastguard Worker ret 1077*c0909341SAndroid Build Coastguard Worker320: 1078*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1079*c0909341SAndroid Build Coastguard Worker32: 1080*c0909341SAndroid Build Coastguard Worker ldp x6, x7, [x2] 1081*c0909341SAndroid Build Coastguard Worker ldp x8, x9, [x2, #16] 1082*c0909341SAndroid Build Coastguard Worker stp x6, x7, [x0] 1083*c0909341SAndroid Build Coastguard Worker ldp x10, x11, [x2, #32] 1084*c0909341SAndroid Build Coastguard Worker stp x8, x9, [x0, #16] 1085*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 1086*c0909341SAndroid Build Coastguard Worker ldp x12, x13, [x2, #48] 1087*c0909341SAndroid Build Coastguard Worker stp x10, x11, [x0, #32] 1088*c0909341SAndroid Build Coastguard Worker stp x12, x13, [x0, #48] 1089*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 1090*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1091*c0909341SAndroid Build Coastguard Worker b.gt 32b 1092*c0909341SAndroid Build Coastguard Worker ret 1093*c0909341SAndroid Build Coastguard Worker640: 1094*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1095*c0909341SAndroid Build Coastguard Worker64: 1096*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x2] 1097*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x2, #32] 1098*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1099*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x2, #64] 1100*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1101*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x2, #96] 1102*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 1103*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1104*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1105*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 1106*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1107*c0909341SAndroid Build Coastguard Worker b.gt 64b 1108*c0909341SAndroid Build Coastguard Worker ret 1109*c0909341SAndroid Build Coastguard Worker1280: 1110*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1111*c0909341SAndroid Build Coastguard Worker128: 1112*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x2] 1113*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x2, #32] 1114*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1115*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x2, #64] 1116*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1117*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x2, #96] 1118*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 1119*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1120*c0909341SAndroid Build Coastguard Worker ldp q16, q17, [x2, #128] 1121*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1122*c0909341SAndroid Build Coastguard Worker ldp q18, q19, [x2, #160] 1123*c0909341SAndroid Build Coastguard Worker stp q16, q17, [x0, #128] 1124*c0909341SAndroid Build Coastguard Worker ldp q20, q21, [x2, #192] 1125*c0909341SAndroid Build Coastguard Worker stp q18, q19, [x0, #160] 1126*c0909341SAndroid Build Coastguard Worker ldp q22, q23, [x2, #224] 1127*c0909341SAndroid Build Coastguard Worker stp q20, q21, [x0, #192] 1128*c0909341SAndroid Build Coastguard Worker stp q22, q23, [x0, #224] 1129*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 1130*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1131*c0909341SAndroid Build Coastguard Worker b.gt 128b 1132*c0909341SAndroid Build Coastguard Worker ret 1133*c0909341SAndroid Build Coastguard Workerendfunc 1134*c0909341SAndroid Build Coastguard Worker 1135*c0909341SAndroid Build Coastguard Workerjumptable put_16bpc_tbl 1136*c0909341SAndroid Build Coastguard Worker .word 1280b - put_16bpc_tbl 1137*c0909341SAndroid Build Coastguard Worker .word 640b - put_16bpc_tbl 1138*c0909341SAndroid Build Coastguard Worker .word 320b - put_16bpc_tbl 1139*c0909341SAndroid Build Coastguard Worker .word 160b - put_16bpc_tbl 1140*c0909341SAndroid Build Coastguard Worker .word 80b - put_16bpc_tbl 1141*c0909341SAndroid Build Coastguard Worker .word 40b - put_16bpc_tbl 1142*c0909341SAndroid Build Coastguard Worker .word 20b - put_16bpc_tbl 1143*c0909341SAndroid Build Coastguard Workerendjumptable 1144*c0909341SAndroid Build Coastguard Worker 1145*c0909341SAndroid Build Coastguard Worker 1146*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the prep_8tap functions, 1147*c0909341SAndroid Build Coastguard Worker// and assumes that x9 is set to (clz(w)-24), w7 to intermediate_bits and 1148*c0909341SAndroid Build Coastguard Worker// x8 to w*2. 1149*c0909341SAndroid Build Coastguard Workerfunction prep_16bpc_neon 1150*c0909341SAndroid Build Coastguard Worker movrel x10, prep_16bpc_tbl 1151*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 1152*c0909341SAndroid Build Coastguard Worker dup v31.8h, w7 // intermediate_bits 1153*c0909341SAndroid Build Coastguard Worker movi v30.8h, #(PREP_BIAS >> 8), lsl #8 1154*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 1155*c0909341SAndroid Build Coastguard Worker br x10 1156*c0909341SAndroid Build Coastguard Worker 1157*c0909341SAndroid Build Coastguard Worker40: 1158*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1159*c0909341SAndroid Build Coastguard Worker add x9, x1, x2 1160*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 1161*c0909341SAndroid Build Coastguard Worker4: 1162*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 1163*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x9], x2 1164*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1165*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1166*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1167*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], #16 1168*c0909341SAndroid Build Coastguard Worker b.gt 4b 1169*c0909341SAndroid Build Coastguard Worker ret 1170*c0909341SAndroid Build Coastguard Worker80: 1171*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1172*c0909341SAndroid Build Coastguard Worker add x9, x1, x2 1173*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 1174*c0909341SAndroid Build Coastguard Worker8: 1175*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 1176*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x9], x2 1177*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1178*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1179*c0909341SAndroid Build Coastguard Worker sshl v1.8h, v1.8h, v31.8h 1180*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1181*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v30.8h 1182*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 1183*c0909341SAndroid Build Coastguard Worker b.gt 8b 1184*c0909341SAndroid Build Coastguard Worker ret 1185*c0909341SAndroid Build Coastguard Worker160: 1186*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1187*c0909341SAndroid Build Coastguard Worker16: 1188*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1189*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1190*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1191*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1] 1192*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1193*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1194*c0909341SAndroid Build Coastguard Worker sshl v1.8h, v1.8h, v31.8h 1195*c0909341SAndroid Build Coastguard Worker sshl v2.8h, v2.8h, v31.8h 1196*c0909341SAndroid Build Coastguard Worker sshl v3.8h, v3.8h, v31.8h 1197*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1198*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v30.8h 1199*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v30.8h 1200*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v30.8h 1201*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 1202*c0909341SAndroid Build Coastguard Worker b.gt 16b 1203*c0909341SAndroid Build Coastguard Worker ret 1204*c0909341SAndroid Build Coastguard Worker320: 1205*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1206*c0909341SAndroid Build Coastguard Worker32: 1207*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1208*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1209*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1210*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1211*c0909341SAndroid Build Coastguard Worker sshl v1.8h, v1.8h, v31.8h 1212*c0909341SAndroid Build Coastguard Worker sshl v2.8h, v2.8h, v31.8h 1213*c0909341SAndroid Build Coastguard Worker sshl v3.8h, v3.8h, v31.8h 1214*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1215*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1216*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v30.8h 1217*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v30.8h 1218*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v30.8h 1219*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 1220*c0909341SAndroid Build Coastguard Worker b.gt 32b 1221*c0909341SAndroid Build Coastguard Worker ret 1222*c0909341SAndroid Build Coastguard Worker640: 1223*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1224*c0909341SAndroid Build Coastguard Worker64: 1225*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1226*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1227*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1228*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1229*c0909341SAndroid Build Coastguard Worker sshl v1.8h, v1.8h, v31.8h 1230*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1, #64] 1231*c0909341SAndroid Build Coastguard Worker sshl v2.8h, v2.8h, v31.8h 1232*c0909341SAndroid Build Coastguard Worker sshl v3.8h, v3.8h, v31.8h 1233*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1, #96] 1234*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1235*c0909341SAndroid Build Coastguard Worker sshl v4.8h, v4.8h, v31.8h 1236*c0909341SAndroid Build Coastguard Worker sshl v5.8h, v5.8h, v31.8h 1237*c0909341SAndroid Build Coastguard Worker sshl v6.8h, v6.8h, v31.8h 1238*c0909341SAndroid Build Coastguard Worker sshl v7.8h, v7.8h, v31.8h 1239*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1240*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v30.8h 1241*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v30.8h 1242*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v30.8h 1243*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1244*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v30.8h 1245*c0909341SAndroid Build Coastguard Worker sub v5.8h, v5.8h, v30.8h 1246*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1247*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v30.8h 1248*c0909341SAndroid Build Coastguard Worker sub v7.8h, v7.8h, v30.8h 1249*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1250*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1251*c0909341SAndroid Build Coastguard Worker add x0, x0, x8 1252*c0909341SAndroid Build Coastguard Worker b.gt 64b 1253*c0909341SAndroid Build Coastguard Worker ret 1254*c0909341SAndroid Build Coastguard Worker1280: 1255*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1256*c0909341SAndroid Build Coastguard Worker128: 1257*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1258*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1259*c0909341SAndroid Build Coastguard Worker sshl v0.8h, v0.8h, v31.8h 1260*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1261*c0909341SAndroid Build Coastguard Worker sshl v1.8h, v1.8h, v31.8h 1262*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1, #64] 1263*c0909341SAndroid Build Coastguard Worker sshl v2.8h, v2.8h, v31.8h 1264*c0909341SAndroid Build Coastguard Worker sshl v3.8h, v3.8h, v31.8h 1265*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1, #96] 1266*c0909341SAndroid Build Coastguard Worker sshl v4.8h, v4.8h, v31.8h 1267*c0909341SAndroid Build Coastguard Worker sshl v5.8h, v5.8h, v31.8h 1268*c0909341SAndroid Build Coastguard Worker ldp q16, q17, [x1, #128] 1269*c0909341SAndroid Build Coastguard Worker sshl v6.8h, v6.8h, v31.8h 1270*c0909341SAndroid Build Coastguard Worker sshl v7.8h, v7.8h, v31.8h 1271*c0909341SAndroid Build Coastguard Worker ldp q18, q19, [x1, #160] 1272*c0909341SAndroid Build Coastguard Worker sshl v16.8h, v16.8h, v31.8h 1273*c0909341SAndroid Build Coastguard Worker sshl v17.8h, v17.8h, v31.8h 1274*c0909341SAndroid Build Coastguard Worker ldp q20, q21, [x1, #192] 1275*c0909341SAndroid Build Coastguard Worker sshl v18.8h, v18.8h, v31.8h 1276*c0909341SAndroid Build Coastguard Worker sshl v19.8h, v19.8h, v31.8h 1277*c0909341SAndroid Build Coastguard Worker ldp q22, q23, [x1, #224] 1278*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1279*c0909341SAndroid Build Coastguard Worker sshl v20.8h, v20.8h, v31.8h 1280*c0909341SAndroid Build Coastguard Worker sshl v21.8h, v21.8h, v31.8h 1281*c0909341SAndroid Build Coastguard Worker sshl v22.8h, v22.8h, v31.8h 1282*c0909341SAndroid Build Coastguard Worker sshl v23.8h, v23.8h, v31.8h 1283*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v30.8h 1284*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v30.8h 1285*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v30.8h 1286*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v30.8h 1287*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1288*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v30.8h 1289*c0909341SAndroid Build Coastguard Worker sub v5.8h, v5.8h, v30.8h 1290*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1291*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v30.8h 1292*c0909341SAndroid Build Coastguard Worker sub v7.8h, v7.8h, v30.8h 1293*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1294*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v30.8h 1295*c0909341SAndroid Build Coastguard Worker sub v17.8h, v17.8h, v30.8h 1296*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1297*c0909341SAndroid Build Coastguard Worker sub v18.8h, v18.8h, v30.8h 1298*c0909341SAndroid Build Coastguard Worker sub v19.8h, v19.8h, v30.8h 1299*c0909341SAndroid Build Coastguard Worker stp q16, q17, [x0, #128] 1300*c0909341SAndroid Build Coastguard Worker sub v20.8h, v20.8h, v30.8h 1301*c0909341SAndroid Build Coastguard Worker sub v21.8h, v21.8h, v30.8h 1302*c0909341SAndroid Build Coastguard Worker stp q18, q19, [x0, #160] 1303*c0909341SAndroid Build Coastguard Worker sub v22.8h, v22.8h, v30.8h 1304*c0909341SAndroid Build Coastguard Worker sub v23.8h, v23.8h, v30.8h 1305*c0909341SAndroid Build Coastguard Worker stp q20, q21, [x0, #192] 1306*c0909341SAndroid Build Coastguard Worker stp q22, q23, [x0, #224] 1307*c0909341SAndroid Build Coastguard Worker add x0, x0, x8 1308*c0909341SAndroid Build Coastguard Worker b.gt 128b 1309*c0909341SAndroid Build Coastguard Worker ret 1310*c0909341SAndroid Build Coastguard Workerendfunc 1311*c0909341SAndroid Build Coastguard Worker 1312*c0909341SAndroid Build Coastguard Workerjumptable prep_16bpc_tbl 1313*c0909341SAndroid Build Coastguard Worker .word 1280b - prep_16bpc_tbl 1314*c0909341SAndroid Build Coastguard Worker .word 640b - prep_16bpc_tbl 1315*c0909341SAndroid Build Coastguard Worker .word 320b - prep_16bpc_tbl 1316*c0909341SAndroid Build Coastguard Worker .word 160b - prep_16bpc_tbl 1317*c0909341SAndroid Build Coastguard Worker .word 80b - prep_16bpc_tbl 1318*c0909341SAndroid Build Coastguard Worker .word 40b - prep_16bpc_tbl 1319*c0909341SAndroid Build Coastguard Workerendjumptable 1320*c0909341SAndroid Build Coastguard Worker 1321*c0909341SAndroid Build Coastguard Worker 1322*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1323*c0909341SAndroid Build Coastguard Worker ld1 {\d0\wd}[0], [\s0], \strd 1324*c0909341SAndroid Build Coastguard Worker ld1 {\d1\wd}[0], [\s1], \strd 1325*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1326*c0909341SAndroid Build Coastguard Worker ld1 {\d2\wd}[0], [\s0], \strd 1327*c0909341SAndroid Build Coastguard Worker ld1 {\d3\wd}[0], [\s1], \strd 1328*c0909341SAndroid Build Coastguard Worker.endif 1329*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1330*c0909341SAndroid Build Coastguard Worker ld1 {\d4\wd}[0], [\s0], \strd 1331*c0909341SAndroid Build Coastguard Worker.endif 1332*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1333*c0909341SAndroid Build Coastguard Worker ld1 {\d5\wd}[0], [\s1], \strd 1334*c0909341SAndroid Build Coastguard Worker.endif 1335*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1336*c0909341SAndroid Build Coastguard Worker ld1 {\d6\wd}[0], [\s0], \strd 1337*c0909341SAndroid Build Coastguard Worker.endif 1338*c0909341SAndroid Build Coastguard Worker.endm 1339*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1340*c0909341SAndroid Build Coastguard Worker ld1 {\d0\wd}, [\s0], \strd 1341*c0909341SAndroid Build Coastguard Worker ld1 {\d1\wd}, [\s1], \strd 1342*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1343*c0909341SAndroid Build Coastguard Worker ld1 {\d2\wd}, [\s0], \strd 1344*c0909341SAndroid Build Coastguard Worker ld1 {\d3\wd}, [\s1], \strd 1345*c0909341SAndroid Build Coastguard Worker.endif 1346*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1347*c0909341SAndroid Build Coastguard Worker ld1 {\d4\wd}, [\s0], \strd 1348*c0909341SAndroid Build Coastguard Worker.endif 1349*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1350*c0909341SAndroid Build Coastguard Worker ld1 {\d5\wd}, [\s1], \strd 1351*c0909341SAndroid Build Coastguard Worker.endif 1352*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1353*c0909341SAndroid Build Coastguard Worker ld1 {\d6\wd}, [\s0], \strd 1354*c0909341SAndroid Build Coastguard Worker.endif 1355*c0909341SAndroid Build Coastguard Worker.endm 1356*c0909341SAndroid Build Coastguard Worker.macro load_regpair s0, s1, strd, wd, d0, d1, d2, d3, d4, d5 1357*c0909341SAndroid Build Coastguard Worker ld1 {\d0\wd, \d1\wd}, [\s0], \strd 1358*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1359*c0909341SAndroid Build Coastguard Worker ld1 {\d2\wd, \d3\wd}, [\s1], \strd 1360*c0909341SAndroid Build Coastguard Worker.endif 1361*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1362*c0909341SAndroid Build Coastguard Worker ld1 {\d4\wd, \d5\wd}, [\s0], \strd 1363*c0909341SAndroid Build Coastguard Worker.endif 1364*c0909341SAndroid Build Coastguard Worker.endm 1365*c0909341SAndroid Build Coastguard Worker.macro load_s s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1366*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, .s, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1367*c0909341SAndroid Build Coastguard Worker.endm 1368*c0909341SAndroid Build Coastguard Worker.macro load_4h s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1369*c0909341SAndroid Build Coastguard Worker load_reg \s0, \s1, \strd, .4h, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1370*c0909341SAndroid Build Coastguard Worker.endm 1371*c0909341SAndroid Build Coastguard Worker.macro load_8h s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1372*c0909341SAndroid Build Coastguard Worker load_reg \s0, \s1, \strd, .8h, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1373*c0909341SAndroid Build Coastguard Worker.endm 1374*c0909341SAndroid Build Coastguard Worker.macro load_16h s0, s1, strd, d0, d1, d2, d3, d4, d5 1375*c0909341SAndroid Build Coastguard Worker load_regpair \s0, \s1, \strd, .8h, \d0, \d1, \d2, \d3, \d4, \d5 1376*c0909341SAndroid Build Coastguard Worker.endm 1377*c0909341SAndroid Build Coastguard Worker.macro interleave_1 wd, r0, r1, r2, r3, r4 1378*c0909341SAndroid Build Coastguard Worker trn1 \r0\wd, \r0\wd, \r1\wd 1379*c0909341SAndroid Build Coastguard Worker trn1 \r1\wd, \r1\wd, \r2\wd 1380*c0909341SAndroid Build Coastguard Worker.ifnb \r3 1381*c0909341SAndroid Build Coastguard Worker trn1 \r2\wd, \r2\wd, \r3\wd 1382*c0909341SAndroid Build Coastguard Worker trn1 \r3\wd, \r3\wd, \r4\wd 1383*c0909341SAndroid Build Coastguard Worker.endif 1384*c0909341SAndroid Build Coastguard Worker.endm 1385*c0909341SAndroid Build Coastguard Worker.macro interleave_1_s r0, r1, r2, r3, r4 1386*c0909341SAndroid Build Coastguard Worker interleave_1 .2s, \r0, \r1, \r2, \r3, \r4 1387*c0909341SAndroid Build Coastguard Worker.endm 1388*c0909341SAndroid Build Coastguard Worker.macro umin_h c, wd, r0, r1, r2, r3 1389*c0909341SAndroid Build Coastguard Worker umin \r0\wd, \r0\wd, \c\wd 1390*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1391*c0909341SAndroid Build Coastguard Worker umin \r1\wd, \r1\wd, \c\wd 1392*c0909341SAndroid Build Coastguard Worker.endif 1393*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1394*c0909341SAndroid Build Coastguard Worker umin \r2\wd, \r2\wd, \c\wd 1395*c0909341SAndroid Build Coastguard Worker umin \r3\wd, \r3\wd, \c\wd 1396*c0909341SAndroid Build Coastguard Worker.endif 1397*c0909341SAndroid Build Coastguard Worker.endm 1398*c0909341SAndroid Build Coastguard Worker.macro sub_h c, wd, r0, r1, r2, r3 1399*c0909341SAndroid Build Coastguard Worker sub \r0\wd, \r0\wd, \c\wd 1400*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1401*c0909341SAndroid Build Coastguard Worker sub \r1\wd, \r1\wd, \c\wd 1402*c0909341SAndroid Build Coastguard Worker.endif 1403*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1404*c0909341SAndroid Build Coastguard Worker sub \r2\wd, \r2\wd, \c\wd 1405*c0909341SAndroid Build Coastguard Worker sub \r3\wd, \r3\wd, \c\wd 1406*c0909341SAndroid Build Coastguard Worker.endif 1407*c0909341SAndroid Build Coastguard Worker.endm 1408*c0909341SAndroid Build Coastguard Worker.macro smull_smlal_4tap d, s0, s1, s2, s3 1409*c0909341SAndroid Build Coastguard Worker smull \d\().4s, \s0\().4h, v0.h[0] 1410*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s1\().4h, v0.h[1] 1411*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s2\().4h, v0.h[2] 1412*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s3\().4h, v0.h[3] 1413*c0909341SAndroid Build Coastguard Worker.endm 1414*c0909341SAndroid Build Coastguard Worker.macro smull2_smlal2_4tap d, s0, s1, s2, s3 1415*c0909341SAndroid Build Coastguard Worker smull2 \d\().4s, \s0\().8h, v0.h[0] 1416*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s1\().8h, v0.h[1] 1417*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s2\().8h, v0.h[2] 1418*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s3\().8h, v0.h[3] 1419*c0909341SAndroid Build Coastguard Worker.endm 1420*c0909341SAndroid Build Coastguard Worker.macro smull_smlal_6tap d, s0, s1, s2, s3, s4, s5, s6, s7 1421*c0909341SAndroid Build Coastguard Worker smull \d\().4s, \s1\().4h, v0.h[1] 1422*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s2\().4h, v0.h[2] 1423*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s3\().4h, v0.h[3] 1424*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s4\().4h, v0.h[4] 1425*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s5\().4h, v0.h[5] 1426*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s6\().4h, v0.h[6] 1427*c0909341SAndroid Build Coastguard Worker.endm 1428*c0909341SAndroid Build Coastguard Worker.macro smull2_smlal2_6tap d, s0, s1, s2, s3, s4, s5, s6, s7 1429*c0909341SAndroid Build Coastguard Worker smull2 \d\().4s, \s1\().8h, v0.h[1] 1430*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s2\().8h, v0.h[2] 1431*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s3\().8h, v0.h[3] 1432*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s4\().8h, v0.h[4] 1433*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s5\().8h, v0.h[5] 1434*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s6\().8h, v0.h[6] 1435*c0909341SAndroid Build Coastguard Worker.endm 1436*c0909341SAndroid Build Coastguard Worker.macro smull_smlal_8tap d, s0, s1, s2, s3, s4, s5, s6, s7 1437*c0909341SAndroid Build Coastguard Worker smull \d\().4s, \s0\().4h, v0.h[0] 1438*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s1\().4h, v0.h[1] 1439*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s2\().4h, v0.h[2] 1440*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s3\().4h, v0.h[3] 1441*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s4\().4h, v0.h[4] 1442*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s5\().4h, v0.h[5] 1443*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s6\().4h, v0.h[6] 1444*c0909341SAndroid Build Coastguard Worker smlal \d\().4s, \s7\().4h, v0.h[7] 1445*c0909341SAndroid Build Coastguard Worker.endm 1446*c0909341SAndroid Build Coastguard Worker.macro smull2_smlal2_8tap d, s0, s1, s2, s3, s4, s5, s6, s7 1447*c0909341SAndroid Build Coastguard Worker smull2 \d\().4s, \s0\().8h, v0.h[0] 1448*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s1\().8h, v0.h[1] 1449*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s2\().8h, v0.h[2] 1450*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s3\().8h, v0.h[3] 1451*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s4\().8h, v0.h[4] 1452*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s5\().8h, v0.h[5] 1453*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s6\().8h, v0.h[6] 1454*c0909341SAndroid Build Coastguard Worker smlal2 \d\().4s, \s7\().8h, v0.h[7] 1455*c0909341SAndroid Build Coastguard Worker.endm 1456*c0909341SAndroid Build Coastguard Worker.macro sqrshrun_h shift, r0, r1, r2, r3 1457*c0909341SAndroid Build Coastguard Worker sqrshrun \r0\().4h, \r0\().4s, #\shift 1458*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1459*c0909341SAndroid Build Coastguard Worker sqrshrun2 \r0\().8h, \r1\().4s, #\shift 1460*c0909341SAndroid Build Coastguard Worker.endif 1461*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1462*c0909341SAndroid Build Coastguard Worker sqrshrun \r2\().4h, \r2\().4s, #\shift 1463*c0909341SAndroid Build Coastguard Worker sqrshrun2 \r2\().8h, \r3\().4s, #\shift 1464*c0909341SAndroid Build Coastguard Worker.endif 1465*c0909341SAndroid Build Coastguard Worker.endm 1466*c0909341SAndroid Build Coastguard Worker.macro xtn_h r0, r1, r2, r3 1467*c0909341SAndroid Build Coastguard Worker uzp1 \r0\().8h, \r0\().8h, \r1\().8h // Same as xtn, xtn2 1468*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1469*c0909341SAndroid Build Coastguard Worker uzp1 \r2\().8h, \r2\().8h, \r3\().8h // Ditto 1470*c0909341SAndroid Build Coastguard Worker.endif 1471*c0909341SAndroid Build Coastguard Worker.endm 1472*c0909341SAndroid Build Coastguard Worker.macro srshl_s shift, r0, r1, r2, r3 1473*c0909341SAndroid Build Coastguard Worker srshl \r0\().4s, \r0\().4s, \shift\().4s 1474*c0909341SAndroid Build Coastguard Worker srshl \r1\().4s, \r1\().4s, \shift\().4s 1475*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1476*c0909341SAndroid Build Coastguard Worker srshl \r2\().4s, \r2\().4s, \shift\().4s 1477*c0909341SAndroid Build Coastguard Worker srshl \r3\().4s, \r3\().4s, \shift\().4s 1478*c0909341SAndroid Build Coastguard Worker.endif 1479*c0909341SAndroid Build Coastguard Worker.endm 1480*c0909341SAndroid Build Coastguard Worker.macro st_s strd, reg, lanes 1481*c0909341SAndroid Build Coastguard Worker st1 {\reg\().s}[0], [x0], \strd 1482*c0909341SAndroid Build Coastguard Worker st1 {\reg\().s}[1], [x9], \strd 1483*c0909341SAndroid Build Coastguard Worker.if \lanes > 2 1484*c0909341SAndroid Build Coastguard Worker st1 {\reg\().s}[2], [x0], \strd 1485*c0909341SAndroid Build Coastguard Worker st1 {\reg\().s}[3], [x9], \strd 1486*c0909341SAndroid Build Coastguard Worker.endif 1487*c0909341SAndroid Build Coastguard Worker.endm 1488*c0909341SAndroid Build Coastguard Worker.macro st_d strd, r0, r1 1489*c0909341SAndroid Build Coastguard Worker st1 {\r0\().8b}, [x0], \strd 1490*c0909341SAndroid Build Coastguard Worker st1 {\r0\().d}[1], [x9], \strd 1491*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1492*c0909341SAndroid Build Coastguard Worker st1 {\r1\().8b}, [x0], \strd 1493*c0909341SAndroid Build Coastguard Worker st1 {\r1\().d}[1], [x9], \strd 1494*c0909341SAndroid Build Coastguard Worker.endif 1495*c0909341SAndroid Build Coastguard Worker.endm 1496*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, r0, r1, r2, r3 1497*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1498*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, \r0, \r1, \r2, \r3 1499*c0909341SAndroid Build Coastguard Worker umin_h v31, .8h, \r0, \r2 1500*c0909341SAndroid Build Coastguard Worker.else 1501*c0909341SAndroid Build Coastguard Worker srshl_s v30, \r0, \r1, \r2, \r3 // -(6-intermediate_bits) 1502*c0909341SAndroid Build Coastguard Worker xtn_h \r0, \r1, \r2, \r3 1503*c0909341SAndroid Build Coastguard Worker sub_h v29, .8h, \r0, \r2 // PREP_BIAS 1504*c0909341SAndroid Build Coastguard Worker.endif 1505*c0909341SAndroid Build Coastguard Worker st_d \strd, \r0, \r2 1506*c0909341SAndroid Build Coastguard Worker.endm 1507*c0909341SAndroid Build Coastguard Worker.macro st_reg strd, wd, r0, r1, r2, r3, r4, r5, r6, r7 1508*c0909341SAndroid Build Coastguard Worker st1 {\r0\wd}, [x0], \strd 1509*c0909341SAndroid Build Coastguard Worker st1 {\r1\wd}, [x9], \strd 1510*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1511*c0909341SAndroid Build Coastguard Worker st1 {\r2\wd}, [x0], \strd 1512*c0909341SAndroid Build Coastguard Worker st1 {\r3\wd}, [x9], \strd 1513*c0909341SAndroid Build Coastguard Worker.endif 1514*c0909341SAndroid Build Coastguard Worker.ifnb \r4 1515*c0909341SAndroid Build Coastguard Worker st1 {\r4\wd}, [x0], \strd 1516*c0909341SAndroid Build Coastguard Worker st1 {\r5\wd}, [x9], \strd 1517*c0909341SAndroid Build Coastguard Worker st1 {\r6\wd}, [x0], \strd 1518*c0909341SAndroid Build Coastguard Worker st1 {\r7\wd}, [x9], \strd 1519*c0909341SAndroid Build Coastguard Worker.endif 1520*c0909341SAndroid Build Coastguard Worker.endm 1521*c0909341SAndroid Build Coastguard Worker.macro st_8h strd, r0, r1, r2, r3, r4, r5, r6, r7 1522*c0909341SAndroid Build Coastguard Worker st_reg \strd, .8h, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7 1523*c0909341SAndroid Build Coastguard Worker.endm 1524*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, r0, r1, r2, r3 1525*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1526*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, \r0, \r1, \r2, \r3 1527*c0909341SAndroid Build Coastguard Worker umin_h v31, .8h, \r0, \r2 1528*c0909341SAndroid Build Coastguard Worker.else 1529*c0909341SAndroid Build Coastguard Worker srshl_s v30, \r0, \r1, \r2, \r3 // -(6-intermediate_bits) 1530*c0909341SAndroid Build Coastguard Worker xtn_h \r0, \r1, \r2, \r3 1531*c0909341SAndroid Build Coastguard Worker sub_h v29, .8h, \r0, \r2 // PREP_BIAS 1532*c0909341SAndroid Build Coastguard Worker.endif 1533*c0909341SAndroid Build Coastguard Worker st_8h \strd, \r0, \r2 1534*c0909341SAndroid Build Coastguard Worker.endm 1535*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, dst, r0, r1, r2, r3 1536*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1537*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, \r0, \r1, \r2, \r3 1538*c0909341SAndroid Build Coastguard Worker umin \r0\().8h, \r0\().8h, v31.8h 1539*c0909341SAndroid Build Coastguard Worker umin \r1\().8h, \r2\().8h, v31.8h 1540*c0909341SAndroid Build Coastguard Worker.else 1541*c0909341SAndroid Build Coastguard Worker srshl_s v30, \r0, \r1, \r2, \r3 // -(6-intermediate_bits) 1542*c0909341SAndroid Build Coastguard Worker xtn_h \r0, \r1, \r2, \r3 1543*c0909341SAndroid Build Coastguard Worker sub \r0\().8h, \r0\().8h, v29.8h 1544*c0909341SAndroid Build Coastguard Worker sub \r1\().8h, \r2\().8h, v29.8h 1545*c0909341SAndroid Build Coastguard Worker.endif 1546*c0909341SAndroid Build Coastguard Worker st1 {\r0\().8h, \r1\().8h}, [\dst], \strd 1547*c0909341SAndroid Build Coastguard Worker.endm 1548*c0909341SAndroid Build Coastguard Worker 1549*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v, taps 1550*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_16bpc_neon, export=1 1551*c0909341SAndroid Build Coastguard Worker mov w9, \type_h 1552*c0909341SAndroid Build Coastguard Worker mov w10, \type_v 1553*c0909341SAndroid Build Coastguard Worker b \op\()_\taps\()_neon 1554*c0909341SAndroid Build Coastguard Workerendfunc 1555*c0909341SAndroid Build Coastguard Worker.endm 1556*c0909341SAndroid Build Coastguard Worker 1557*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor. 1558*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15) 1559*c0909341SAndroid Build Coastguard Worker#define SMOOTH ((1*15<<7)|4*15) 1560*c0909341SAndroid Build Coastguard Worker#define SHARP ((2*15<<7)|3*15) 1561*c0909341SAndroid Build Coastguard Worker 1562*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, bdmax, ds2, sr2, taps 1563*c0909341SAndroid Build Coastguard Workerfunction \type\()_\taps\()_neon 1564*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, w8 1565*c0909341SAndroid Build Coastguard Worker ldr w8, [sp] 1566*c0909341SAndroid Build Coastguard Worker.endif 1567*c0909341SAndroid Build Coastguard Worker mov w11, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) 1568*c0909341SAndroid Build Coastguard Worker mul \mx, \mx, w11 1569*c0909341SAndroid Build Coastguard Worker mul \my, \my, w11 1570*c0909341SAndroid Build Coastguard Worker add \mx, \mx, w9 // mx, 8tap_h, 4tap_h 1571*c0909341SAndroid Build Coastguard Worker add \my, \my, w10 // my, 8tap_v, 4tap_v 1572*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1573*c0909341SAndroid Build Coastguard Worker uxtw \d_strd, \w 1574*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1575*c0909341SAndroid Build Coastguard Worker.endif 1576*c0909341SAndroid Build Coastguard Worker 1577*c0909341SAndroid Build Coastguard Worker dup v31.8h, \bdmax // bitdepth_max 1578*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax 1579*c0909341SAndroid Build Coastguard Worker clz w9, \w 1580*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #18 // intermediate_bits = clz(bitdepth_max) - 18 1581*c0909341SAndroid Build Coastguard Worker mov w12, #6 1582*c0909341SAndroid Build Coastguard Worker tst \mx, #(0x7f << 14) 1583*c0909341SAndroid Build Coastguard Worker sub w9, w9, #24 1584*c0909341SAndroid Build Coastguard Worker add w13, w12, \bdmax // 6 + intermediate_bits 1585*c0909341SAndroid Build Coastguard Worker sub w12, w12, \bdmax // 6 - intermediate_bits 1586*c0909341SAndroid Build Coastguard Worker movrel x11, X(mc_subpel_filters), -8 1587*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_h) 1588*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1589*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_v) 1590*c0909341SAndroid Build Coastguard Worker b \type\()_16bpc_neon 1591*c0909341SAndroid Build Coastguard Worker 1592*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_h): 1593*c0909341SAndroid Build Coastguard Worker cmp \w, #4 1594*c0909341SAndroid Build Coastguard Worker ubfx w10, \mx, #7, #7 1595*c0909341SAndroid Build Coastguard Worker and \mx, \mx, #0x7f 1596*c0909341SAndroid Build Coastguard Worker b.le 4f 1597*c0909341SAndroid Build Coastguard Worker mov \mx, w10 1598*c0909341SAndroid Build Coastguard Worker4: 1599*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1600*c0909341SAndroid Build Coastguard Worker add \xmx, x11, \mx, uxtw #3 1601*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_hv) 1602*c0909341SAndroid Build Coastguard Worker 1603*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_\taps\()_h_tbl 1604*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 1605*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1606*c0909341SAndroid Build Coastguard Worker mov w12, #34 // rounding for 10-bit 1607*c0909341SAndroid Build Coastguard Worker mov w13, #40 // rounding for 12-bit 1608*c0909341SAndroid Build Coastguard Worker cmp \bdmax, #2 // 10-bit: 4, 12-bit: 2 1609*c0909341SAndroid Build Coastguard Worker csel w12, w12, w13, ne // select rounding based on \bdmax 1610*c0909341SAndroid Build Coastguard Worker.else 1611*c0909341SAndroid Build Coastguard Worker neg w12, w12 // -(6 - intermediate_bits) 1612*c0909341SAndroid Build Coastguard Worker movi v28.8h, #(PREP_BIAS >> 8), lsl #8 1613*c0909341SAndroid Build Coastguard Worker.endif 1614*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 1615*c0909341SAndroid Build Coastguard Worker dup v30.4s, w12 // rounding or shift amount 1616*c0909341SAndroid Build Coastguard Worker br x10 1617*c0909341SAndroid Build Coastguard Worker 1618*c0909341SAndroid Build Coastguard Worker20: // 2xN h 1619*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1620*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1621*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 1622*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1623*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1624*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1625*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1626*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1627*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1628*c0909341SAndroid Build Coastguard Worker2: 1629*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [\src], \s_strd 1630*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [\sr2], \s_strd 1631*c0909341SAndroid Build Coastguard Worker mov v2.16b, v30.16b 1632*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #2 1633*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #2 1634*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1635*c0909341SAndroid Build Coastguard Worker trn1 v3.2s, v4.2s, v6.2s 1636*c0909341SAndroid Build Coastguard Worker trn2 v6.2s, v4.2s, v6.2s 1637*c0909341SAndroid Build Coastguard Worker trn1 v4.2s, v5.2s, v7.2s 1638*c0909341SAndroid Build Coastguard Worker trn2 v7.2s, v5.2s, v7.2s 1639*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v3.4h, v0.h[0] 1640*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v4.4h, v0.h[1] 1641*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v6.4h, v0.h[2] 1642*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v7.4h, v0.h[3] 1643*c0909341SAndroid Build Coastguard Worker sqshrun v2.4h, v2.4s, #6 1644*c0909341SAndroid Build Coastguard Worker umin v2.4h, v2.4h, v31.4h 1645*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[0], [\dst], \d_strd 1646*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[1], [\ds2], \d_strd 1647*c0909341SAndroid Build Coastguard Worker b.gt 2b 1648*c0909341SAndroid Build Coastguard Worker ret 1649*c0909341SAndroid Build Coastguard Worker.endif 1650*c0909341SAndroid Build Coastguard Worker 1651*c0909341SAndroid Build Coastguard Worker40: // 4xN h 1652*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1653*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 1654*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1655*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1656*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1657*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1658*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1659*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1660*c0909341SAndroid Build Coastguard Worker4: 1661*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h}, [\src], \s_strd 1662*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h}, [\sr2], \s_strd 1663*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1664*c0909341SAndroid Build Coastguard Worker mov v2.16b, v30.16b 1665*c0909341SAndroid Build Coastguard Worker mov v3.16b, v30.16b 1666*c0909341SAndroid Build Coastguard Worker.endif 1667*c0909341SAndroid Build Coastguard Worker ext v17.16b, v16.16b, v16.16b, #2 1668*c0909341SAndroid Build Coastguard Worker ext v18.16b, v16.16b, v16.16b, #4 1669*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v16.16b, #6 1670*c0909341SAndroid Build Coastguard Worker ext v21.16b, v20.16b, v20.16b, #2 1671*c0909341SAndroid Build Coastguard Worker ext v22.16b, v20.16b, v20.16b, #4 1672*c0909341SAndroid Build Coastguard Worker ext v23.16b, v20.16b, v20.16b, #6 1673*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1674*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1675*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v16.4h, v0.h[0] 1676*c0909341SAndroid Build Coastguard Worker.else 1677*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v0.h[0] 1678*c0909341SAndroid Build Coastguard Worker.endif 1679*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v0.h[1] 1680*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v0.h[2] 1681*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v0.h[3] 1682*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1683*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v0.h[0] 1684*c0909341SAndroid Build Coastguard Worker.else 1685*c0909341SAndroid Build Coastguard Worker smull v3.4s, v20.4h, v0.h[0] 1686*c0909341SAndroid Build Coastguard Worker.endif 1687*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v0.h[1] 1688*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v0.h[2] 1689*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v23.4h, v0.h[3] 1690*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1691*c0909341SAndroid Build Coastguard Worker sqshrun v16.4h, v2.4s, #6 1692*c0909341SAndroid Build Coastguard Worker sqshrun2 v16.8h, v3.4s, #6 1693*c0909341SAndroid Build Coastguard Worker umin v16.8h, v16.8h, v31.8h 1694*c0909341SAndroid Build Coastguard Worker.else 1695*c0909341SAndroid Build Coastguard Worker srshl v16.4s, v2.4s, v30.4s // -(6-intermediate_bits) 1696*c0909341SAndroid Build Coastguard Worker srshl v20.4s, v3.4s, v30.4s // -(6-intermediate_bits) 1697*c0909341SAndroid Build Coastguard Worker uzp1 v16.8h, v16.8h, v20.8h // Same as xtn, xtn2 1698*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v28.8h // PREP_BIAS 1699*c0909341SAndroid Build Coastguard Worker.endif 1700*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [\dst], \d_strd 1701*c0909341SAndroid Build Coastguard Worker st1 {v16.d}[1], [\ds2], \d_strd 1702*c0909341SAndroid Build Coastguard Worker b.gt 4b 1703*c0909341SAndroid Build Coastguard Worker ret 1704*c0909341SAndroid Build Coastguard Worker 1705*c0909341SAndroid Build Coastguard Worker80: 1706*c0909341SAndroid Build Coastguard Worker160: 1707*c0909341SAndroid Build Coastguard Worker320: 1708*c0909341SAndroid Build Coastguard Worker640: 1709*c0909341SAndroid Build Coastguard Worker1280: // 8xN, 16xN, 32xN, ... h 1710*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1711*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 1712*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1713*c0909341SAndroid Build Coastguard Worker sub \src, \src, #4 1714*c0909341SAndroid Build Coastguard Worker.else 1715*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 1716*c0909341SAndroid Build Coastguard Worker.endif 1717*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1718*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1719*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1720*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1721*c0909341SAndroid Build Coastguard Worker 1722*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, uxtw #1 1723*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #16 1724*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1725*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1726*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, uxtw #1 1727*c0909341SAndroid Build Coastguard Worker.endif 1728*c0909341SAndroid Build Coastguard Worker81: 1729*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [\src], #32 1730*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h}, [\sr2], #32 1731*c0909341SAndroid Build Coastguard Worker mov \mx, \w 1732*c0909341SAndroid Build Coastguard Worker 1733*c0909341SAndroid Build Coastguard Worker8: 1734*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1735*c0909341SAndroid Build Coastguard Worker .ifc \type, put 1736*c0909341SAndroid Build Coastguard Worker mov v18.16b, v30.16b 1737*c0909341SAndroid Build Coastguard Worker mov v19.16b, v30.16b 1738*c0909341SAndroid Build Coastguard Worker smlal v18.4s, v16.4h, v0.h[1] 1739*c0909341SAndroid Build Coastguard Worker smlal2 v19.4s, v16.8h, v0.h[1] 1740*c0909341SAndroid Build Coastguard Worker mov v22.16b, v30.16b 1741*c0909341SAndroid Build Coastguard Worker mov v23.16b, v30.16b 1742*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v20.4h, v0.h[1] 1743*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v20.8h, v0.h[1] 1744*c0909341SAndroid Build Coastguard Worker .else 1745*c0909341SAndroid Build Coastguard Worker smull v18.4s, v16.4h, v0.h[1] 1746*c0909341SAndroid Build Coastguard Worker smull2 v19.4s, v16.8h, v0.h[1] 1747*c0909341SAndroid Build Coastguard Worker smull v22.4s, v20.4h, v0.h[1] 1748*c0909341SAndroid Build Coastguard Worker smull2 v23.4s, v20.8h, v0.h[1] 1749*c0909341SAndroid Build Coastguard Worker .endif 1750*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 1751*c0909341SAndroid Build Coastguard Worker ext v24.16b, v16.16b, v17.16b, #(2*\i-2) 1752*c0909341SAndroid Build Coastguard Worker ext v25.16b, v20.16b, v21.16b, #(2*\i-2) 1753*c0909341SAndroid Build Coastguard Worker smlal v18.4s, v24.4h, v0.h[\i] 1754*c0909341SAndroid Build Coastguard Worker smlal2 v19.4s, v24.8h, v0.h[\i] 1755*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v25.4h, v0.h[\i] 1756*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v25.8h, v0.h[\i] 1757*c0909341SAndroid Build Coastguard Worker .endr 1758*c0909341SAndroid Build Coastguard Worker.else // 8tap 1759*c0909341SAndroid Build Coastguard Worker .ifc \type, put 1760*c0909341SAndroid Build Coastguard Worker mov v18.16b, v30.16b 1761*c0909341SAndroid Build Coastguard Worker mov v19.16b, v30.16b 1762*c0909341SAndroid Build Coastguard Worker smlal v18.4s, v16.4h, v0.h[0] 1763*c0909341SAndroid Build Coastguard Worker smlal2 v19.4s, v16.8h, v0.h[0] 1764*c0909341SAndroid Build Coastguard Worker mov v22.16b, v30.16b 1765*c0909341SAndroid Build Coastguard Worker mov v23.16b, v30.16b 1766*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v20.4h, v0.h[0] 1767*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v20.8h, v0.h[0] 1768*c0909341SAndroid Build Coastguard Worker .else 1769*c0909341SAndroid Build Coastguard Worker smull v18.4s, v16.4h, v0.h[0] 1770*c0909341SAndroid Build Coastguard Worker smull2 v19.4s, v16.8h, v0.h[0] 1771*c0909341SAndroid Build Coastguard Worker smull v22.4s, v20.4h, v0.h[0] 1772*c0909341SAndroid Build Coastguard Worker smull2 v23.4s, v20.8h, v0.h[0] 1773*c0909341SAndroid Build Coastguard Worker .endif 1774*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 1775*c0909341SAndroid Build Coastguard Worker ext v24.16b, v16.16b, v17.16b, #(2*\i) 1776*c0909341SAndroid Build Coastguard Worker ext v25.16b, v20.16b, v21.16b, #(2*\i) 1777*c0909341SAndroid Build Coastguard Worker smlal v18.4s, v24.4h, v0.h[\i] 1778*c0909341SAndroid Build Coastguard Worker smlal2 v19.4s, v24.8h, v0.h[\i] 1779*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v25.4h, v0.h[\i] 1780*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v25.8h, v0.h[\i] 1781*c0909341SAndroid Build Coastguard Worker .endr 1782*c0909341SAndroid Build Coastguard Worker.endif 1783*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #8 1784*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1785*c0909341SAndroid Build Coastguard Worker sqshrun v18.4h, v18.4s, #6 1786*c0909341SAndroid Build Coastguard Worker sqshrun2 v18.8h, v19.4s, #6 1787*c0909341SAndroid Build Coastguard Worker sqshrun v22.4h, v22.4s, #6 1788*c0909341SAndroid Build Coastguard Worker sqshrun2 v22.8h, v23.4s, #6 1789*c0909341SAndroid Build Coastguard Worker umin v18.8h, v18.8h, v31.8h 1790*c0909341SAndroid Build Coastguard Worker umin v22.8h, v22.8h, v31.8h 1791*c0909341SAndroid Build Coastguard Worker.else 1792*c0909341SAndroid Build Coastguard Worker srshl v18.4s, v18.4s, v30.4s // -(6-intermediate_bits) 1793*c0909341SAndroid Build Coastguard Worker srshl v19.4s, v19.4s, v30.4s // -(6-intermediate_bits) 1794*c0909341SAndroid Build Coastguard Worker srshl v22.4s, v22.4s, v30.4s // -(6-intermediate_bits) 1795*c0909341SAndroid Build Coastguard Worker srshl v23.4s, v23.4s, v30.4s // -(6-intermediate_bits) 1796*c0909341SAndroid Build Coastguard Worker uzp1 v18.8h, v18.8h, v19.8h // Same as xtn, xtn2 1797*c0909341SAndroid Build Coastguard Worker uzp1 v22.8h, v22.8h, v23.8h // Ditto 1798*c0909341SAndroid Build Coastguard Worker sub v18.8h, v18.8h, v28.8h // PREP_BIAS 1799*c0909341SAndroid Build Coastguard Worker sub v22.8h, v22.8h, v28.8h // PREP_BIAS 1800*c0909341SAndroid Build Coastguard Worker.endif 1801*c0909341SAndroid Build Coastguard Worker st1 {v18.8h}, [\dst], #16 1802*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [\ds2], #16 1803*c0909341SAndroid Build Coastguard Worker b.le 9f 1804*c0909341SAndroid Build Coastguard Worker 1805*c0909341SAndroid Build Coastguard Worker mov v16.16b, v17.16b 1806*c0909341SAndroid Build Coastguard Worker mov v20.16b, v21.16b 1807*c0909341SAndroid Build Coastguard Worker ld1 {v17.8h}, [\src], #16 1808*c0909341SAndroid Build Coastguard Worker ld1 {v21.8h}, [\sr2], #16 1809*c0909341SAndroid Build Coastguard Worker b 8b 1810*c0909341SAndroid Build Coastguard Worker 1811*c0909341SAndroid Build Coastguard Worker9: 1812*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1813*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 1814*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1815*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 1816*c0909341SAndroid Build Coastguard Worker 1817*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1818*c0909341SAndroid Build Coastguard Worker b.gt 81b 1819*c0909341SAndroid Build Coastguard Worker ret 1820*c0909341SAndroid Build Coastguard Workerendfunc 1821*c0909341SAndroid Build Coastguard Worker 1822*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_h_tbl 1823*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_h_tbl 1824*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_h_tbl 1825*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_h_tbl 1826*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_h_tbl 1827*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_h_tbl 1828*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_h_tbl 1829*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_h_tbl 1830*c0909341SAndroid Build Coastguard Workerendjumptable 1831*c0909341SAndroid Build Coastguard Worker 1832*c0909341SAndroid Build Coastguard Worker 1833*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_v) 1834*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1835*c0909341SAndroid Build Coastguard Worker ubfx w10, \my, #7, #7 1836*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1837*c0909341SAndroid Build Coastguard Worker b.le 4f 1838*c0909341SAndroid Build Coastguard Worker mov \my, w10 1839*c0909341SAndroid Build Coastguard Worker4: 1840*c0909341SAndroid Build Coastguard Worker add \xmy, x11, \my, uxtw #3 1841*c0909341SAndroid Build Coastguard Worker 1842*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1843*c0909341SAndroid Build Coastguard Worker dup v30.4s, w12 // 6 - intermediate_bits 1844*c0909341SAndroid Build Coastguard Worker movi v29.8h, #(PREP_BIAS >> 8), lsl #8 1845*c0909341SAndroid Build Coastguard Worker.endif 1846*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_\taps\()_v_tbl 1847*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 1848*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1849*c0909341SAndroid Build Coastguard Worker neg v30.4s, v30.4s // -(6-intermediate_bits) 1850*c0909341SAndroid Build Coastguard Worker.endif 1851*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 1852*c0909341SAndroid Build Coastguard Worker br x10 1853*c0909341SAndroid Build Coastguard Worker 1854*c0909341SAndroid Build Coastguard Worker20: // 2xN v 1855*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1856*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1857*c0909341SAndroid Build Coastguard Worker b.gt 28f 1858*c0909341SAndroid Build Coastguard Worker 1859*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1860*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1861*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1862*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1863*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1864*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1865*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1866*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1867*c0909341SAndroid Build Coastguard Worker 1868*c0909341SAndroid Build Coastguard Worker // 2x2 v 1869*c0909341SAndroid Build Coastguard Worker load_s \src, \sr2, \s_strd, v1, v2, v3, v4, v5 1870*c0909341SAndroid Build Coastguard Worker interleave_1_s v1, v2, v3, v4, v5 1871*c0909341SAndroid Build Coastguard Worker b.gt 24f 1872*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v6, v1, v2, v3, v4 1873*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, v6 1874*c0909341SAndroid Build Coastguard Worker umin_h v31, .8h, v6 1875*c0909341SAndroid Build Coastguard Worker st_s \d_strd, v6, 2 1876*c0909341SAndroid Build Coastguard Worker ret 1877*c0909341SAndroid Build Coastguard Worker 1878*c0909341SAndroid Build Coastguard Worker24: // 2x4 v 1879*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v6, v7 1880*c0909341SAndroid Build Coastguard Worker interleave_1_s v5, v6, v7 1881*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v16, v1, v2, v3, v4 1882*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v17, v3, v4, v5, v6 1883*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, v16, v17 1884*c0909341SAndroid Build Coastguard Worker umin_h v31, .8h, v16 1885*c0909341SAndroid Build Coastguard Worker st_s \d_strd, v16, 4 1886*c0909341SAndroid Build Coastguard Worker ret 1887*c0909341SAndroid Build Coastguard Worker 1888*c0909341SAndroid Build Coastguard Worker28: // 2x6, 2x8, 2x12, 2x16 v 1889*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 1890*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1891*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1892*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1893*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1894*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1895*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1896*c0909341SAndroid Build Coastguard Worker 1897*c0909341SAndroid Build Coastguard Worker load_s \src, \sr2, \s_strd, v1, v2, v3, v4, v5, v6, v7 1898*c0909341SAndroid Build Coastguard Worker interleave_1_s v1, v2, v3, v4, v5 1899*c0909341SAndroid Build Coastguard Worker interleave_1_s v5, v6, v7 1900*c0909341SAndroid Build Coastguard Worker216: 1901*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1902*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v16, v17, v18, v19 1903*c0909341SAndroid Build Coastguard Worker interleave_1_s v7, v16, v17, v18, v19 1904*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v24, v1, v2, v3, v4, v5, v6, v7, v16 1905*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v25, v3, v4, v5, v6, v7, v16, v17, v18 1906*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, v24, v25 1907*c0909341SAndroid Build Coastguard Worker umin_h v31, .8h, v24 1908*c0909341SAndroid Build Coastguard Worker st_s \d_strd, v24, 4 1909*c0909341SAndroid Build Coastguard Worker b.le 0f 1910*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1911*c0909341SAndroid Build Coastguard Worker mov v1.16b, v5.16b 1912*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1913*c0909341SAndroid Build Coastguard Worker mov v3.16b, v7.16b 1914*c0909341SAndroid Build Coastguard Worker mov v4.16b, v16.16b 1915*c0909341SAndroid Build Coastguard Worker mov v5.16b, v17.16b 1916*c0909341SAndroid Build Coastguard Worker mov v6.16b, v18.16b 1917*c0909341SAndroid Build Coastguard Worker mov v7.16b, v19.16b 1918*c0909341SAndroid Build Coastguard Worker b.eq 26f 1919*c0909341SAndroid Build Coastguard Worker b 216b 1920*c0909341SAndroid Build Coastguard Worker26: 1921*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v16, v17 1922*c0909341SAndroid Build Coastguard Worker interleave_1_s v7, v16, v17 1923*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v24, v1, v2, v3, v4, v5, v6, v7, v16 1924*c0909341SAndroid Build Coastguard Worker sqrshrun_h 6, v24 1925*c0909341SAndroid Build Coastguard Worker umin_h v31, .4h, v24 1926*c0909341SAndroid Build Coastguard Worker st_s \d_strd, v24, 2 1927*c0909341SAndroid Build Coastguard Worker0: 1928*c0909341SAndroid Build Coastguard Worker ret 1929*c0909341SAndroid Build Coastguard Worker.endif 1930*c0909341SAndroid Build Coastguard Worker 1931*c0909341SAndroid Build Coastguard Worker40: 1932*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1933*c0909341SAndroid Build Coastguard Worker b.gt 480f 1934*c0909341SAndroid Build Coastguard Worker 1935*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 v 1936*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1937*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1938*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1939*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1940*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1941*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1942*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1943*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1944*c0909341SAndroid Build Coastguard Worker 1945*c0909341SAndroid Build Coastguard Worker load_4h \src, \sr2, \s_strd, v1, v2, v3, v4, v5 1946*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v6, v1, v2, v3, v4 1947*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v7, v2, v3, v4, v5 1948*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v6, v7 1949*c0909341SAndroid Build Coastguard Worker b.le 0f 1950*c0909341SAndroid Build Coastguard Worker load_4h \sr2, \src, \s_strd, v6, v7 1951*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v1, v3, v4, v5, v6 1952*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v2, v4, v5, v6, v7 1953*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1, v2 1954*c0909341SAndroid Build Coastguard Worker0: 1955*c0909341SAndroid Build Coastguard Worker ret 1956*c0909341SAndroid Build Coastguard Worker 1957*c0909341SAndroid Build Coastguard Worker480: // 4x6, 4x8, 4x12, 4x16 v 1958*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 1959*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1960*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1961*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1962*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1963*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1964*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1965*c0909341SAndroid Build Coastguard Worker 1966*c0909341SAndroid Build Coastguard Worker load_4h \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22 1967*c0909341SAndroid Build Coastguard Worker 1968*c0909341SAndroid Build Coastguard Worker48: 1969*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1970*c0909341SAndroid Build Coastguard Worker load_4h \sr2, \src, \s_strd, v23, v24, v25, v26 1971*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v1, v16, v17, v18, v19, v20, v21, v22, v23 1972*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v2, v17, v18, v19, v20, v21, v22, v23, v24 1973*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v3, v18, v19, v20, v21, v22, v23, v24, v25 1974*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v4, v19, v20, v21, v22, v23, v24, v25, v26 1975*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1, v2, v3, v4 1976*c0909341SAndroid Build Coastguard Worker b.le 0f 1977*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1978*c0909341SAndroid Build Coastguard Worker mov v16.8b, v20.8b 1979*c0909341SAndroid Build Coastguard Worker mov v17.8b, v21.8b 1980*c0909341SAndroid Build Coastguard Worker mov v18.8b, v22.8b 1981*c0909341SAndroid Build Coastguard Worker mov v19.8b, v23.8b 1982*c0909341SAndroid Build Coastguard Worker mov v20.8b, v24.8b 1983*c0909341SAndroid Build Coastguard Worker mov v21.8b, v25.8b 1984*c0909341SAndroid Build Coastguard Worker mov v22.8b, v26.8b 1985*c0909341SAndroid Build Coastguard Worker b.eq 46f 1986*c0909341SAndroid Build Coastguard Worker b 48b 1987*c0909341SAndroid Build Coastguard Worker46: 1988*c0909341SAndroid Build Coastguard Worker load_4h \sr2, \src, \s_strd, v23, v24 1989*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v1, v16, v17, v18, v19, v20, v21, v22, v23 1990*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v2, v17, v18, v19, v20, v21, v22, v23, v24 1991*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1, v2 1992*c0909341SAndroid Build Coastguard Worker0: 1993*c0909341SAndroid Build Coastguard Worker ret 1994*c0909341SAndroid Build Coastguard Worker 1995*c0909341SAndroid Build Coastguard Worker80: 1996*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1997*c0909341SAndroid Build Coastguard Worker b.gt 880f 1998*c0909341SAndroid Build Coastguard Worker 1999*c0909341SAndroid Build Coastguard Worker // 8x2, 8x4 v 2000*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2001*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 2002*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2003*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2004*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2005*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2006*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2007*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2008*c0909341SAndroid Build Coastguard Worker 2009*c0909341SAndroid Build Coastguard Worker load_8h \src, \sr2, \s_strd, v1, v2, v3, v4, v5 2010*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v16, v1, v2, v3, v4 2011*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v17, v1, v2, v3, v4 2012*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v18, v2, v3, v4, v5 2013*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v19, v2, v3, v4, v5 2014*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v16, v17, v18, v19 2015*c0909341SAndroid Build Coastguard Worker b.le 0f 2016*c0909341SAndroid Build Coastguard Worker load_8h \sr2, \src, \s_strd, v6, v7 2017*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v16, v3, v4, v5, v6 2018*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v17, v3, v4, v5, v6 2019*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v18, v4, v5, v6, v7 2020*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v19, v4, v5, v6, v7 2021*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v16, v17, v18, v19 2022*c0909341SAndroid Build Coastguard Worker0: 2023*c0909341SAndroid Build Coastguard Worker ret 2024*c0909341SAndroid Build Coastguard Worker 2025*c0909341SAndroid Build Coastguard Worker880: // 8x6, 8x8, 8x16, 8x32 v 2026*c0909341SAndroid Build Coastguard Worker1680: // 16x8, 16x16, ... 2027*c0909341SAndroid Build Coastguard Worker320: // 32x8, 32x16, ... 2028*c0909341SAndroid Build Coastguard Worker640: 2029*c0909341SAndroid Build Coastguard Worker1280: 2030*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2031*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 2032*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2033*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2034*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2035*c0909341SAndroid Build Coastguard Worker mov \my, \h 2036*c0909341SAndroid Build Coastguard Worker168: 2037*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2038*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2039*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2040*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2041*c0909341SAndroid Build Coastguard Worker 2042*c0909341SAndroid Build Coastguard Worker load_8h \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22 2043*c0909341SAndroid Build Coastguard Worker 2044*c0909341SAndroid Build Coastguard Worker88: 2045*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2046*c0909341SAndroid Build Coastguard Worker load_8h \sr2, \src, \s_strd, v23, v24 2047*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v1, v16, v17, v18, v19, v20, v21, v22, v23 2048*c0909341SAndroid Build Coastguard Worker smull2_smlal2_\taps v2, v16, v17, v18, v19, v20, v21, v22, v23 2049*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v3, v17, v18, v19, v20, v21, v22, v23, v24 2050*c0909341SAndroid Build Coastguard Worker smull2_smlal2_\taps v4, v17, v18, v19, v20, v21, v22, v23, v24 2051*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2, v3, v4 2052*c0909341SAndroid Build Coastguard Worker b.le 9f 2053*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2054*c0909341SAndroid Build Coastguard Worker load_8h \sr2, \src, \s_strd, v25, v26 2055*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v1, v18, v19, v20, v21, v22, v23, v24, v25 2056*c0909341SAndroid Build Coastguard Worker smull2_smlal2_\taps v2, v18, v19, v20, v21, v22, v23, v24, v25 2057*c0909341SAndroid Build Coastguard Worker smull_smlal_\taps v3, v19, v20, v21, v22, v23, v24, v25, v26 2058*c0909341SAndroid Build Coastguard Worker smull2_smlal2_\taps v4, v19, v20, v21, v22, v23, v24, v25, v26 2059*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2, v3, v4 2060*c0909341SAndroid Build Coastguard Worker b.le 9f 2061*c0909341SAndroid Build Coastguard Worker mov v16.16b, v20.16b 2062*c0909341SAndroid Build Coastguard Worker mov v17.16b, v21.16b 2063*c0909341SAndroid Build Coastguard Worker mov v18.16b, v22.16b 2064*c0909341SAndroid Build Coastguard Worker mov v19.16b, v23.16b 2065*c0909341SAndroid Build Coastguard Worker mov v20.16b, v24.16b 2066*c0909341SAndroid Build Coastguard Worker mov v21.16b, v25.16b 2067*c0909341SAndroid Build Coastguard Worker mov v22.16b, v26.16b 2068*c0909341SAndroid Build Coastguard Worker b 88b 2069*c0909341SAndroid Build Coastguard Worker9: 2070*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2071*c0909341SAndroid Build Coastguard Worker b.le 0f 2072*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2073*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2074*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 2075*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 2076*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 2077*c0909341SAndroid Build Coastguard Worker mov \h, \my 2078*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2079*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2080*c0909341SAndroid Build Coastguard Worker b 168b 2081*c0909341SAndroid Build Coastguard Worker0: 2082*c0909341SAndroid Build Coastguard Worker ret 2083*c0909341SAndroid Build Coastguard Worker 2084*c0909341SAndroid Build Coastguard Worker160: 2085*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2086*c0909341SAndroid Build Coastguard Worker b.gt 1680b 2087*c0909341SAndroid Build Coastguard Worker 2088*c0909341SAndroid Build Coastguard Worker // 16x2, 16x4 v 2089*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 2090*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2091*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2092*c0909341SAndroid Build Coastguard Worker 2093*c0909341SAndroid Build Coastguard Worker load_16h \src, \src, \s_strd, v16, v17, v18, v19, v20, v21 2094*c0909341SAndroid Build Coastguard Worker16: 2095*c0909341SAndroid Build Coastguard Worker load_16h \src, \src, \s_strd, v22, v23 2096*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 2097*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v1, v16, v18, v20, v22 2098*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v2, v16, v18, v20, v22 2099*c0909341SAndroid Build Coastguard Worker smull_smlal_4tap v3, v17, v19, v21, v23 2100*c0909341SAndroid Build Coastguard Worker smull2_smlal2_4tap v4, v17, v19, v21, v23 2101*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, x0, v1, v2, v3, v4 2102*c0909341SAndroid Build Coastguard Worker b.le 0f 2103*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2104*c0909341SAndroid Build Coastguard Worker mov v17.16b, v19.16b 2105*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2106*c0909341SAndroid Build Coastguard Worker mov v19.16b, v21.16b 2107*c0909341SAndroid Build Coastguard Worker mov v20.16b, v22.16b 2108*c0909341SAndroid Build Coastguard Worker mov v21.16b, v23.16b 2109*c0909341SAndroid Build Coastguard Worker b 16b 2110*c0909341SAndroid Build Coastguard Worker0: 2111*c0909341SAndroid Build Coastguard Worker ret 2112*c0909341SAndroid Build Coastguard Workerendfunc 2113*c0909341SAndroid Build Coastguard Worker 2114*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_v_tbl 2115*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_v_tbl 2116*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_v_tbl 2117*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_v_tbl 2118*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_v_tbl 2119*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_v_tbl 2120*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_v_tbl 2121*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_v_tbl 2122*c0909341SAndroid Build Coastguard Workerendjumptable 2123*c0909341SAndroid Build Coastguard Worker 2124*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_hv) 2125*c0909341SAndroid Build Coastguard Worker cmp \h, #4 2126*c0909341SAndroid Build Coastguard Worker ubfx w10, \my, #7, #7 2127*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 2128*c0909341SAndroid Build Coastguard Worker b.le 4f 2129*c0909341SAndroid Build Coastguard Worker mov \my, w10 2130*c0909341SAndroid Build Coastguard Worker4: 2131*c0909341SAndroid Build Coastguard Worker add \xmy, x11, \my, uxtw #3 2132*c0909341SAndroid Build Coastguard Worker 2133*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_\taps\()_hv_tbl 2134*c0909341SAndroid Build Coastguard Worker dup v30.4s, w12 // 6 - intermediate_bits 2135*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 2136*c0909341SAndroid Build Coastguard Worker neg v30.4s, v30.4s // -(6-intermediate_bits) 2137*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2138*c0909341SAndroid Build Coastguard Worker dup v29.4s, w13 // 6 + intermediate_bits 2139*c0909341SAndroid Build Coastguard Worker.else 2140*c0909341SAndroid Build Coastguard Worker movi v29.8h, #(PREP_BIAS >> 8), lsl #8 2141*c0909341SAndroid Build Coastguard Worker.endif 2142*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 2143*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2144*c0909341SAndroid Build Coastguard Worker neg v29.4s, v29.4s // -(6+intermediate_bits) 2145*c0909341SAndroid Build Coastguard Worker.endif 2146*c0909341SAndroid Build Coastguard Worker br x10 2147*c0909341SAndroid Build Coastguard Worker 2148*c0909341SAndroid Build Coastguard Worker20: 2149*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2150*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2151*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 2152*c0909341SAndroid Build Coastguard Worker b.gt 280f 2153*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2154*c0909341SAndroid Build Coastguard Worker 2155*c0909341SAndroid Build Coastguard Worker // 2x2, 2x4 hv 2156*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #2 2157*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2158*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2159*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2160*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2161*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2162*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2163*c0909341SAndroid Build Coastguard Worker mov x15, x30 2164*c0909341SAndroid Build Coastguard Worker 2165*c0909341SAndroid Build Coastguard Worker ld1 {v27.8h}, [\src], \s_strd 2166*c0909341SAndroid Build Coastguard Worker ext v28.16b, v27.16b, v27.16b, #2 2167*c0909341SAndroid Build Coastguard Worker smull v27.4s, v27.4h, v0.4h 2168*c0909341SAndroid Build Coastguard Worker smull v28.4s, v28.4h, v0.4h 2169*c0909341SAndroid Build Coastguard Worker addp v27.4s, v27.4s, v28.4s 2170*c0909341SAndroid Build Coastguard Worker addp v16.4s, v27.4s, v27.4s 2171*c0909341SAndroid Build Coastguard Worker srshl v16.2s, v16.2s, v30.2s // -(6-intermediate_bits) 2172*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2173*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2174*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2175*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2176*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53). 2177*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v16.4s 2178*c0909341SAndroid Build Coastguard Worker 2179*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v24.2s 2180*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2181*c0909341SAndroid Build Coastguard Worker 2182*c0909341SAndroid Build Coastguard Worker2: 2183*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2184*c0909341SAndroid Build Coastguard Worker 2185*c0909341SAndroid Build Coastguard Worker ext v18.8b, v17.8b, v24.8b, #4 2186*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2187*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2188*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2189*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v24.4h, v1.h[3] 2190*c0909341SAndroid Build Coastguard Worker 2191*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v29.4s // -(6+intermediate_bits) 2192*c0909341SAndroid Build Coastguard Worker sqxtun v2.4h, v2.4s 2193*c0909341SAndroid Build Coastguard Worker umin v2.4h, v2.4h, v31.4h 2194*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2195*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[0], [\dst], \d_strd 2196*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[1], [\ds2], \d_strd 2197*c0909341SAndroid Build Coastguard Worker b.le 0f 2198*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2199*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2200*c0909341SAndroid Build Coastguard Worker b 2b 2201*c0909341SAndroid Build Coastguard Worker 2202*c0909341SAndroid Build Coastguard Worker280: // 2x8, 2x16, 2x32 hv 2203*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2204*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2205*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2206*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2207*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2208*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2209*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2210*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2211*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2212*c0909341SAndroid Build Coastguard Worker mov x15, x30 2213*c0909341SAndroid Build Coastguard Worker 2214*c0909341SAndroid Build Coastguard Worker ld1 {v27.8h}, [\src], \s_strd 2215*c0909341SAndroid Build Coastguard Worker ext v28.16b, v27.16b, v27.16b, #2 2216*c0909341SAndroid Build Coastguard Worker smull v27.4s, v27.4h, v0.4h 2217*c0909341SAndroid Build Coastguard Worker smull v28.4s, v28.4h, v0.4h 2218*c0909341SAndroid Build Coastguard Worker addp v27.4s, v27.4s, v28.4s 2219*c0909341SAndroid Build Coastguard Worker addp v16.4s, v27.4s, v27.4s 2220*c0909341SAndroid Build Coastguard Worker srshl v16.2s, v16.2s, v30.2s // -(6-intermediate_bits) 2221*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2222*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2223*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2224*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53). 2225*c0909341SAndroid Build Coastguard Worker 2226*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2227*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v16.4s 2228*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v24.2s 2229*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2230*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2231*c0909341SAndroid Build Coastguard Worker ext v18.8b, v17.8b, v24.8b, #4 2232*c0909341SAndroid Build Coastguard Worker mov v19.8b, v24.8b 2233*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2234*c0909341SAndroid Build Coastguard Worker ext v20.8b, v19.8b, v24.8b, #4 2235*c0909341SAndroid Build Coastguard Worker mov v21.8b, v24.8b 2236*c0909341SAndroid Build Coastguard Worker 2237*c0909341SAndroid Build Coastguard Worker28: 2238*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2239*c0909341SAndroid Build Coastguard Worker ext v22.8b, v21.8b, v24.8b, #4 2240*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2241*c0909341SAndroid Build Coastguard Worker smull v3.4s, v17.4h, v1.h[1] 2242*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[2] 2243*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[3] 2244*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[4] 2245*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[5] 2246*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[6] 2247*c0909341SAndroid Build Coastguard Worker.else // 8tap 2248*c0909341SAndroid Build Coastguard Worker smull v3.4s, v16.4h, v1.h[0] 2249*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v17.4h, v1.h[1] 2250*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[2] 2251*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[3] 2252*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[4] 2253*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[5] 2254*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[6] 2255*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v24.4h, v1.h[7] 2256*c0909341SAndroid Build Coastguard Worker.endif 2257*c0909341SAndroid Build Coastguard Worker 2258*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits) 2259*c0909341SAndroid Build Coastguard Worker sqxtun v3.4h, v3.4s 2260*c0909341SAndroid Build Coastguard Worker umin v3.4h, v3.4h, v31.4h 2261*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2262*c0909341SAndroid Build Coastguard Worker st1 {v3.s}[0], [\dst], \d_strd 2263*c0909341SAndroid Build Coastguard Worker st1 {v3.s}[1], [\ds2], \d_strd 2264*c0909341SAndroid Build Coastguard Worker b.le 0f 2265*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2266*c0909341SAndroid Build Coastguard Worker mov v17.8b, v19.8b 2267*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2268*c0909341SAndroid Build Coastguard Worker mov v19.8b, v21.8b 2269*c0909341SAndroid Build Coastguard Worker mov v20.8b, v22.8b 2270*c0909341SAndroid Build Coastguard Worker mov v21.8b, v24.8b 2271*c0909341SAndroid Build Coastguard Worker b 28b 2272*c0909341SAndroid Build Coastguard Worker 2273*c0909341SAndroid Build Coastguard Worker0: 2274*c0909341SAndroid Build Coastguard Worker ret x15 2275*c0909341SAndroid Build Coastguard Worker 2276*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_2): 2277*c0909341SAndroid Build Coastguard Worker ld1 {v25.8h}, [\sr2], \s_strd 2278*c0909341SAndroid Build Coastguard Worker ld1 {v27.8h}, [\src], \s_strd 2279*c0909341SAndroid Build Coastguard Worker ext v26.16b, v25.16b, v25.16b, #2 2280*c0909341SAndroid Build Coastguard Worker ext v28.16b, v27.16b, v27.16b, #2 2281*c0909341SAndroid Build Coastguard Worker trn1 v24.2s, v25.2s, v27.2s 2282*c0909341SAndroid Build Coastguard Worker trn2 v27.2s, v25.2s, v27.2s 2283*c0909341SAndroid Build Coastguard Worker trn1 v25.2s, v26.2s, v28.2s 2284*c0909341SAndroid Build Coastguard Worker trn2 v28.2s, v26.2s, v28.2s 2285*c0909341SAndroid Build Coastguard Worker smull v24.4s, v24.4h, v0.h[0] 2286*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v25.4h, v0.h[1] 2287*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v27.4h, v0.h[2] 2288*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v28.4h, v0.h[3] 2289*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v30.4s // -(6-intermediate_bits) 2290*c0909341SAndroid Build Coastguard Worker xtn v24.4h, v24.4s 2291*c0909341SAndroid Build Coastguard Worker ret 2292*c0909341SAndroid Build Coastguard Worker.endif 2293*c0909341SAndroid Build Coastguard Worker 2294*c0909341SAndroid Build Coastguard Worker40: 2295*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2296*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 2297*c0909341SAndroid Build Coastguard Worker b.gt 480f 2298*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2299*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #2 2300*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2301*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2302*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2303*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2304*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2305*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2306*c0909341SAndroid Build Coastguard Worker mov x15, x30 2307*c0909341SAndroid Build Coastguard Worker 2308*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 hv 2309*c0909341SAndroid Build Coastguard Worker ld1 {v25.8h}, [\src], \s_strd 2310*c0909341SAndroid Build Coastguard Worker ext v26.16b, v25.16b, v25.16b, #2 2311*c0909341SAndroid Build Coastguard Worker ext v27.16b, v25.16b, v25.16b, #4 2312*c0909341SAndroid Build Coastguard Worker ext v28.16b, v25.16b, v25.16b, #6 2313*c0909341SAndroid Build Coastguard Worker smull v25.4s, v25.4h, v0.h[0] 2314*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v26.4h, v0.h[1] 2315*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v27.4h, v0.h[2] 2316*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v28.4h, v0.h[3] 2317*c0909341SAndroid Build Coastguard Worker srshl v16.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2318*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2319*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2320*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2321*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53). 2322*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v16.4s 2323*c0909341SAndroid Build Coastguard Worker 2324*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2325*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2326*c0909341SAndroid Build Coastguard Worker mov v18.8b, v25.8b 2327*c0909341SAndroid Build Coastguard Worker 2328*c0909341SAndroid Build Coastguard Worker4: 2329*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2330*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2331*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2332*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2333*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v24.4h, v1.h[3] 2334*c0909341SAndroid Build Coastguard Worker smull v3.4s, v17.4h, v1.h[0] 2335*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[1] 2336*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v24.4h, v1.h[2] 2337*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v25.4h, v1.h[3] 2338*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2339*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v29.4s // -(6+intermediate_bits) 2340*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits) 2341*c0909341SAndroid Build Coastguard Worker sqxtun v2.4h, v2.4s 2342*c0909341SAndroid Build Coastguard Worker sqxtun2 v2.8h, v3.4s 2343*c0909341SAndroid Build Coastguard Worker umin v2.8h, v2.8h, v31.8h 2344*c0909341SAndroid Build Coastguard Worker.else 2345*c0909341SAndroid Build Coastguard Worker rshrn v2.4h, v2.4s, #6 2346*c0909341SAndroid Build Coastguard Worker rshrn2 v2.8h, v3.4s, #6 2347*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v29.8h // PREP_BIAS 2348*c0909341SAndroid Build Coastguard Worker.endif 2349*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2350*c0909341SAndroid Build Coastguard Worker 2351*c0909341SAndroid Build Coastguard Worker st1 {v2.8b}, [\dst], \d_strd 2352*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[1], [\ds2], \d_strd 2353*c0909341SAndroid Build Coastguard Worker b.le 0f 2354*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2355*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2356*c0909341SAndroid Build Coastguard Worker mov v18.8b, v25.8b 2357*c0909341SAndroid Build Coastguard Worker b 4b 2358*c0909341SAndroid Build Coastguard Worker 2359*c0909341SAndroid Build Coastguard Worker480: // 4x8, 4x16, 4x32 hv 2360*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2361*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2362*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2363*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd 2364*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2365*c0909341SAndroid Build Coastguard Worker.else 2366*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2367*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2368*c0909341SAndroid Build Coastguard Worker.endif 2369*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2370*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2371*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2372*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2373*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2374*c0909341SAndroid Build Coastguard Worker mov x15, x30 2375*c0909341SAndroid Build Coastguard Worker 2376*c0909341SAndroid Build Coastguard Worker ld1 {v25.8h}, [\src], \s_strd 2377*c0909341SAndroid Build Coastguard Worker ext v26.16b, v25.16b, v25.16b, #2 2378*c0909341SAndroid Build Coastguard Worker ext v27.16b, v25.16b, v25.16b, #4 2379*c0909341SAndroid Build Coastguard Worker ext v28.16b, v25.16b, v25.16b, #6 2380*c0909341SAndroid Build Coastguard Worker smull v25.4s, v25.4h, v0.h[0] 2381*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v26.4h, v0.h[1] 2382*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v27.4h, v0.h[2] 2383*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v28.4h, v0.h[3] 2384*c0909341SAndroid Build Coastguard Worker srshl v16.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2385*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2386*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2387*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2388*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53). 2389*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2390*c0909341SAndroid Build Coastguard Worker xtn v18.4h, v16.4s 2391*c0909341SAndroid Build Coastguard Worker.else 2392*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v16.4s 2393*c0909341SAndroid Build Coastguard Worker 2394*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2395*c0909341SAndroid Build Coastguard Worker mov v17.8b, v24.8b 2396*c0909341SAndroid Build Coastguard Worker mov v18.8b, v25.8b 2397*c0909341SAndroid Build Coastguard Worker.endif 2398*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2399*c0909341SAndroid Build Coastguard Worker mov v19.8b, v24.8b 2400*c0909341SAndroid Build Coastguard Worker mov v20.8b, v25.8b 2401*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2402*c0909341SAndroid Build Coastguard Worker mov v21.8b, v24.8b 2403*c0909341SAndroid Build Coastguard Worker mov v22.8b, v25.8b 2404*c0909341SAndroid Build Coastguard Worker 2405*c0909341SAndroid Build Coastguard Worker48: 2406*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2407*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2408*c0909341SAndroid Build Coastguard Worker smull v3.4s, v18.4h, v1.h[1] 2409*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[2] 2410*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[3] 2411*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[4] 2412*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[5] 2413*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v24.4h, v1.h[6] 2414*c0909341SAndroid Build Coastguard Worker smull v4.4s, v19.4h, v1.h[1] 2415*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[2] 2416*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[3] 2417*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[4] 2418*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[5] 2419*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v25.4h, v1.h[6] 2420*c0909341SAndroid Build Coastguard Worker.else // 8tap 2421*c0909341SAndroid Build Coastguard Worker smull v3.4s, v16.4h, v1.h[0] 2422*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v17.4h, v1.h[1] 2423*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[2] 2424*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[3] 2425*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[4] 2426*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[5] 2427*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[6] 2428*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v24.4h, v1.h[7] 2429*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v1.h[0] 2430*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v1.h[1] 2431*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v19.4h, v1.h[2] 2432*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[3] 2433*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[4] 2434*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[5] 2435*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[6] 2436*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v25.4h, v1.h[7] 2437*c0909341SAndroid Build Coastguard Worker.endif 2438*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2439*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits) 2440*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // -(6+intermediate_bits) 2441*c0909341SAndroid Build Coastguard Worker sqxtun v3.4h, v3.4s 2442*c0909341SAndroid Build Coastguard Worker sqxtun2 v3.8h, v4.4s 2443*c0909341SAndroid Build Coastguard Worker umin v3.8h, v3.8h, v31.8h 2444*c0909341SAndroid Build Coastguard Worker.else 2445*c0909341SAndroid Build Coastguard Worker rshrn v3.4h, v3.4s, #6 2446*c0909341SAndroid Build Coastguard Worker rshrn2 v3.8h, v4.4s, #6 2447*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v29.8h // PREP_BIAS 2448*c0909341SAndroid Build Coastguard Worker.endif 2449*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2450*c0909341SAndroid Build Coastguard Worker st1 {v3.8b}, [\dst], \d_strd 2451*c0909341SAndroid Build Coastguard Worker st1 {v3.d}[1], [\ds2], \d_strd 2452*c0909341SAndroid Build Coastguard Worker b.le 0f 2453*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap 2454*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2455*c0909341SAndroid Build Coastguard Worker mov v17.8b, v19.8b 2456*c0909341SAndroid Build Coastguard Worker.endif 2457*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2458*c0909341SAndroid Build Coastguard Worker mov v19.8b, v21.8b 2459*c0909341SAndroid Build Coastguard Worker mov v20.8b, v22.8b 2460*c0909341SAndroid Build Coastguard Worker mov v21.8b, v24.8b 2461*c0909341SAndroid Build Coastguard Worker mov v22.8b, v25.8b 2462*c0909341SAndroid Build Coastguard Worker b 48b 2463*c0909341SAndroid Build Coastguard Worker0: 2464*c0909341SAndroid Build Coastguard Worker ret x15 2465*c0909341SAndroid Build Coastguard Worker 2466*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_4): 2467*c0909341SAndroid Build Coastguard Worker ld1 {v24.8h}, [\sr2], \s_strd 2468*c0909341SAndroid Build Coastguard Worker ld1 {v25.8h}, [\src], \s_strd 2469*c0909341SAndroid Build Coastguard Worker ext v26.16b, v24.16b, v24.16b, #2 2470*c0909341SAndroid Build Coastguard Worker ext v27.16b, v24.16b, v24.16b, #4 2471*c0909341SAndroid Build Coastguard Worker ext v28.16b, v24.16b, v24.16b, #6 2472*c0909341SAndroid Build Coastguard Worker smull v24.4s, v24.4h, v0.h[0] 2473*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v26.4h, v0.h[1] 2474*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v27.4h, v0.h[2] 2475*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v28.4h, v0.h[3] 2476*c0909341SAndroid Build Coastguard Worker ext v26.16b, v25.16b, v25.16b, #2 2477*c0909341SAndroid Build Coastguard Worker ext v27.16b, v25.16b, v25.16b, #4 2478*c0909341SAndroid Build Coastguard Worker ext v28.16b, v25.16b, v25.16b, #6 2479*c0909341SAndroid Build Coastguard Worker smull v25.4s, v25.4h, v0.h[0] 2480*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v26.4h, v0.h[1] 2481*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v27.4h, v0.h[2] 2482*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v28.4h, v0.h[3] 2483*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v30.4s // -(6-intermediate_bits) 2484*c0909341SAndroid Build Coastguard Worker srshl v25.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2485*c0909341SAndroid Build Coastguard Worker xtn v24.4h, v24.4s 2486*c0909341SAndroid Build Coastguard Worker xtn v25.4h, v25.4s 2487*c0909341SAndroid Build Coastguard Worker ret 2488*c0909341SAndroid Build Coastguard Worker 2489*c0909341SAndroid Build Coastguard Worker80: 2490*c0909341SAndroid Build Coastguard Worker160: 2491*c0909341SAndroid Build Coastguard Worker320: 2492*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2493*c0909341SAndroid Build Coastguard Worker b.gt 880f 2494*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 2495*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2496*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2497*c0909341SAndroid Build Coastguard Worker sub \src, \src, #4 2498*c0909341SAndroid Build Coastguard Worker.else 2499*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 2500*c0909341SAndroid Build Coastguard Worker.endif 2501*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2502*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2503*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2504*c0909341SAndroid Build Coastguard Worker mov x15, x30 2505*c0909341SAndroid Build Coastguard Worker mov \my, \h 2506*c0909341SAndroid Build Coastguard Worker 2507*c0909341SAndroid Build Coastguard Worker164: // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv 2508*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2509*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2510*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2511*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2512*c0909341SAndroid Build Coastguard Worker 2513*c0909341SAndroid Build Coastguard Worker ld1 {v27.8h, v28.8h}, [\src], \s_strd 2514*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2515*c0909341SAndroid Build Coastguard Worker smull v24.4s, v27.4h, v0.h[1] 2516*c0909341SAndroid Build Coastguard Worker smull2 v25.4s, v27.8h, v0.h[1] 2517*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 2518*c0909341SAndroid Build Coastguard Worker ext v26.16b, v27.16b, v28.16b, #(2*\i-2) 2519*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v26.4h, v0.h[\i] 2520*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v26.8h, v0.h[\i] 2521*c0909341SAndroid Build Coastguard Worker .endr 2522*c0909341SAndroid Build Coastguard Worker.else 2523*c0909341SAndroid Build Coastguard Worker smull v24.4s, v27.4h, v0.h[0] 2524*c0909341SAndroid Build Coastguard Worker smull2 v25.4s, v27.8h, v0.h[0] 2525*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 2526*c0909341SAndroid Build Coastguard Worker ext v26.16b, v27.16b, v28.16b, #(2*\i) 2527*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v26.4h, v0.h[\i] 2528*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v26.8h, v0.h[\i] 2529*c0909341SAndroid Build Coastguard Worker .endr 2530*c0909341SAndroid Build Coastguard Worker.endif 2531*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v30.4s // -(6-intermediate_bits) 2532*c0909341SAndroid Build Coastguard Worker srshl v25.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2533*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2534*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2535*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2536*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53), 2537*c0909341SAndroid Build Coastguard Worker // and conserves register space (no need to clobber v8-v15). 2538*c0909341SAndroid Build Coastguard Worker uzp1 v16.8h, v24.8h, v25.8h // Same as xtn, xtn2 2539*c0909341SAndroid Build Coastguard Worker 2540*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2541*c0909341SAndroid Build Coastguard Worker mov v17.16b, v23.16b 2542*c0909341SAndroid Build Coastguard Worker mov v18.16b, v24.16b 2543*c0909341SAndroid Build Coastguard Worker 2544*c0909341SAndroid Build Coastguard Worker8: 2545*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2546*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v16.8h, v1.h[0] 2547*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2548*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v1.h[0] 2549*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v17.8h, v1.h[0] 2550*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2551*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v17.8h, v1.h[1] 2552*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v1.h[1] 2553*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v18.8h, v1.h[1] 2554*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2555*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v1.h[2] 2556*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v23.4h, v1.h[2] 2557*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v23.8h, v1.h[2] 2558*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v23.4h, v1.h[3] 2559*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v23.8h, v1.h[3] 2560*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[3] 2561*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[3] 2562*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2563*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v29.4s // -(6+intermediate_bits) 2564*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits) 2565*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // -(6+intermediate_bits) 2566*c0909341SAndroid Build Coastguard Worker srshl v5.4s, v5.4s, v29.4s // -(6+intermediate_bits) 2567*c0909341SAndroid Build Coastguard Worker sqxtun v2.4h, v2.4s 2568*c0909341SAndroid Build Coastguard Worker sqxtun2 v2.8h, v3.4s 2569*c0909341SAndroid Build Coastguard Worker sqxtun v3.4h, v4.4s 2570*c0909341SAndroid Build Coastguard Worker sqxtun2 v3.8h, v5.4s 2571*c0909341SAndroid Build Coastguard Worker umin v2.8h, v2.8h, v31.8h 2572*c0909341SAndroid Build Coastguard Worker umin v3.8h, v3.8h, v31.8h 2573*c0909341SAndroid Build Coastguard Worker.else 2574*c0909341SAndroid Build Coastguard Worker rshrn v2.4h, v2.4s, #6 2575*c0909341SAndroid Build Coastguard Worker rshrn2 v2.8h, v3.4s, #6 2576*c0909341SAndroid Build Coastguard Worker rshrn v3.4h, v4.4s, #6 2577*c0909341SAndroid Build Coastguard Worker rshrn2 v3.8h, v5.4s, #6 2578*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v29.8h // PREP_BIAS 2579*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v29.8h // PREP_BIAS 2580*c0909341SAndroid Build Coastguard Worker.endif 2581*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2582*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [\dst], \d_strd 2583*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [\ds2], \d_strd 2584*c0909341SAndroid Build Coastguard Worker b.le 9f 2585*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2586*c0909341SAndroid Build Coastguard Worker mov v17.16b, v23.16b 2587*c0909341SAndroid Build Coastguard Worker mov v18.16b, v24.16b 2588*c0909341SAndroid Build Coastguard Worker b 8b 2589*c0909341SAndroid Build Coastguard Worker9: 2590*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2591*c0909341SAndroid Build Coastguard Worker b.le 0f 2592*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2593*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2594*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 2595*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 2596*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #2 2597*c0909341SAndroid Build Coastguard Worker mov \h, \my 2598*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2599*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2600*c0909341SAndroid Build Coastguard Worker b 164b 2601*c0909341SAndroid Build Coastguard Worker 2602*c0909341SAndroid Build Coastguard Worker880: // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv 2603*c0909341SAndroid Build Coastguard Worker640: 2604*c0909341SAndroid Build Coastguard Worker1280: 2605*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2606*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 2607*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2608*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2609*c0909341SAndroid Build Coastguard Worker sub \src, \src, #4 2610*c0909341SAndroid Build Coastguard Worker.else 2611*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 2612*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2613*c0909341SAndroid Build Coastguard Worker.endif 2614*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2615*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2616*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2617*c0909341SAndroid Build Coastguard Worker mov x15, x30 2618*c0909341SAndroid Build Coastguard Worker mov \my, \h 2619*c0909341SAndroid Build Coastguard Worker 2620*c0909341SAndroid Build Coastguard Worker168: 2621*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2622*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2623*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2624*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2625*c0909341SAndroid Build Coastguard Worker 2626*c0909341SAndroid Build Coastguard Worker ld1 {v27.8h, v28.8h}, [\src], \s_strd 2627*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2628*c0909341SAndroid Build Coastguard Worker smull v24.4s, v27.4h, v0.h[1] 2629*c0909341SAndroid Build Coastguard Worker smull2 v25.4s, v27.8h, v0.h[1] 2630*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 2631*c0909341SAndroid Build Coastguard Worker ext v26.16b, v27.16b, v28.16b, #(2*\i-2) 2632*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v26.4h, v0.h[\i] 2633*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v26.8h, v0.h[\i] 2634*c0909341SAndroid Build Coastguard Worker .endr 2635*c0909341SAndroid Build Coastguard Worker.else // 8tap 2636*c0909341SAndroid Build Coastguard Worker smull v24.4s, v27.4h, v0.h[0] 2637*c0909341SAndroid Build Coastguard Worker smull2 v25.4s, v27.8h, v0.h[0] 2638*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 2639*c0909341SAndroid Build Coastguard Worker ext v26.16b, v27.16b, v28.16b, #(2*\i) 2640*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v26.4h, v0.h[\i] 2641*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v26.8h, v0.h[\i] 2642*c0909341SAndroid Build Coastguard Worker .endr 2643*c0909341SAndroid Build Coastguard Worker.endif 2644*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v30.4s // -(6-intermediate_bits) 2645*c0909341SAndroid Build Coastguard Worker srshl v25.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2646*c0909341SAndroid Build Coastguard Worker // The intermediates from the horizontal pass fit in 16 bit without 2647*c0909341SAndroid Build Coastguard Worker // any bias; we could just as well keep them as .4s, but narrowing 2648*c0909341SAndroid Build Coastguard Worker // them to .4h gives a significant speedup on out of order cores 2649*c0909341SAndroid Build Coastguard Worker // (at the cost of a smaller slowdown on in-order cores such as A53), 2650*c0909341SAndroid Build Coastguard Worker // and conserves register space (no need to clobber v8-v15). 2651*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2652*c0909341SAndroid Build Coastguard Worker uzp1 v18.8h, v24.8h, v25.8h // Same as xtn, xtn2 2653*c0909341SAndroid Build Coastguard Worker.else 2654*c0909341SAndroid Build Coastguard Worker uzp1 v16.8h, v24.8h, v25.8h // Same as xtn, xtn2 2655*c0909341SAndroid Build Coastguard Worker 2656*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2657*c0909341SAndroid Build Coastguard Worker mov v17.16b, v23.16b 2658*c0909341SAndroid Build Coastguard Worker mov v18.16b, v24.16b 2659*c0909341SAndroid Build Coastguard Worker.endif 2660*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2661*c0909341SAndroid Build Coastguard Worker mov v19.16b, v23.16b 2662*c0909341SAndroid Build Coastguard Worker mov v20.16b, v24.16b 2663*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2664*c0909341SAndroid Build Coastguard Worker mov v21.16b, v23.16b 2665*c0909341SAndroid Build Coastguard Worker mov v22.16b, v24.16b 2666*c0909341SAndroid Build Coastguard Worker 2667*c0909341SAndroid Build Coastguard Worker88: 2668*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2669*c0909341SAndroid Build Coastguard Worker smull v2.4s, v18.4h, v1.h[1] 2670*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v18.8h, v1.h[1] 2671*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2672*c0909341SAndroid Build Coastguard Worker smull v4.4s, v19.4h, v1.h[1] 2673*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v19.8h, v1.h[1] 2674*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[2] 2675*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v1.h[2] 2676*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[2] 2677*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v20.8h, v1.h[2] 2678*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[3] 2679*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v1.h[3] 2680*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[3] 2681*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v21.8h, v1.h[3] 2682*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[4] 2683*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[4] 2684*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[4] 2685*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v22.8h, v1.h[4] 2686*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[5] 2687*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[5] 2688*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v23.4h, v1.h[5] 2689*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v23.8h, v1.h[5] 2690*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v23.4h, v1.h[6] 2691*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v23.8h, v1.h[6] 2692*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[6] 2693*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[6] 2694*c0909341SAndroid Build Coastguard Worker.else // 8tap 2695*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2696*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v16.8h, v1.h[0] 2697*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2698*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v1.h[0] 2699*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v17.8h, v1.h[0] 2700*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2701*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v17.8h, v1.h[1] 2702*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v1.h[1] 2703*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v18.8h, v1.h[1] 2704*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2705*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v1.h[2] 2706*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v19.4h, v1.h[2] 2707*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v19.8h, v1.h[2] 2708*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[3] 2709*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v1.h[3] 2710*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[3] 2711*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v20.8h, v1.h[3] 2712*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[4] 2713*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v1.h[4] 2714*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[4] 2715*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v21.8h, v1.h[4] 2716*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[5] 2717*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[5] 2718*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[5] 2719*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v22.8h, v1.h[5] 2720*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[6] 2721*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[6] 2722*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v23.4h, v1.h[6] 2723*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v23.8h, v1.h[6] 2724*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v23.4h, v1.h[7] 2725*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v23.8h, v1.h[7] 2726*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[7] 2727*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[7] 2728*c0909341SAndroid Build Coastguard Worker.endif 2729*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2730*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v29.4s // -(6+intermediate_bits) 2731*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits) 2732*c0909341SAndroid Build Coastguard Worker srshl v4.4s, v4.4s, v29.4s // -(6+intermediate_bits) 2733*c0909341SAndroid Build Coastguard Worker srshl v5.4s, v5.4s, v29.4s // -(6+intermediate_bits) 2734*c0909341SAndroid Build Coastguard Worker sqxtun v2.4h, v2.4s 2735*c0909341SAndroid Build Coastguard Worker sqxtun2 v2.8h, v3.4s 2736*c0909341SAndroid Build Coastguard Worker sqxtun v3.4h, v4.4s 2737*c0909341SAndroid Build Coastguard Worker sqxtun2 v3.8h, v5.4s 2738*c0909341SAndroid Build Coastguard Worker umin v2.8h, v2.8h, v31.8h 2739*c0909341SAndroid Build Coastguard Worker umin v3.8h, v3.8h, v31.8h 2740*c0909341SAndroid Build Coastguard Worker.else 2741*c0909341SAndroid Build Coastguard Worker rshrn v2.4h, v2.4s, #6 2742*c0909341SAndroid Build Coastguard Worker rshrn2 v2.8h, v3.4s, #6 2743*c0909341SAndroid Build Coastguard Worker rshrn v3.4h, v4.4s, #6 2744*c0909341SAndroid Build Coastguard Worker rshrn2 v3.8h, v5.4s, #6 2745*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v29.8h // PREP_BIAS 2746*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v29.8h // PREP_BIAS 2747*c0909341SAndroid Build Coastguard Worker.endif 2748*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2749*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [\dst], \d_strd 2750*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [\ds2], \d_strd 2751*c0909341SAndroid Build Coastguard Worker b.le 9f 2752*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap 2753*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2754*c0909341SAndroid Build Coastguard Worker mov v17.16b, v19.16b 2755*c0909341SAndroid Build Coastguard Worker.endif 2756*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2757*c0909341SAndroid Build Coastguard Worker mov v19.16b, v21.16b 2758*c0909341SAndroid Build Coastguard Worker mov v20.16b, v22.16b 2759*c0909341SAndroid Build Coastguard Worker mov v21.16b, v23.16b 2760*c0909341SAndroid Build Coastguard Worker mov v22.16b, v24.16b 2761*c0909341SAndroid Build Coastguard Worker b 88b 2762*c0909341SAndroid Build Coastguard Worker9: 2763*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2764*c0909341SAndroid Build Coastguard Worker b.le 0f 2765*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2766*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2767*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 2768*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 2769*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 2770*c0909341SAndroid Build Coastguard Worker mov \h, \my 2771*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2772*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2773*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2774*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 2775*c0909341SAndroid Build Coastguard Worker.endif 2776*c0909341SAndroid Build Coastguard Worker b 168b 2777*c0909341SAndroid Build Coastguard Worker0: 2778*c0909341SAndroid Build Coastguard Worker ret x15 2779*c0909341SAndroid Build Coastguard Worker 2780*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_8): 2781*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [\sr2], \s_strd 2782*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [\src], \s_strd 2783*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2784*c0909341SAndroid Build Coastguard Worker smull v25.4s, v4.4h, v0.h[1] 2785*c0909341SAndroid Build Coastguard Worker smull2 v26.4s, v4.8h, v0.h[1] 2786*c0909341SAndroid Build Coastguard Worker smull v27.4s, v6.4h, v0.h[1] 2787*c0909341SAndroid Build Coastguard Worker smull2 v28.4s, v6.8h, v0.h[1] 2788*c0909341SAndroid Build Coastguard Worker.irpc i, 23456 2789*c0909341SAndroid Build Coastguard Worker ext v23.16b, v4.16b, v5.16b, #(2*\i-2) 2790*c0909341SAndroid Build Coastguard Worker ext v24.16b, v6.16b, v7.16b, #(2*\i-2) 2791*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v23.4h, v0.h[\i] 2792*c0909341SAndroid Build Coastguard Worker smlal2 v26.4s, v23.8h, v0.h[\i] 2793*c0909341SAndroid Build Coastguard Worker smlal v27.4s, v24.4h, v0.h[\i] 2794*c0909341SAndroid Build Coastguard Worker smlal2 v28.4s, v24.8h, v0.h[\i] 2795*c0909341SAndroid Build Coastguard Worker.endr 2796*c0909341SAndroid Build Coastguard Worker.else // 8tap 2797*c0909341SAndroid Build Coastguard Worker smull v25.4s, v4.4h, v0.h[0] 2798*c0909341SAndroid Build Coastguard Worker smull2 v26.4s, v4.8h, v0.h[0] 2799*c0909341SAndroid Build Coastguard Worker smull v27.4s, v6.4h, v0.h[0] 2800*c0909341SAndroid Build Coastguard Worker smull2 v28.4s, v6.8h, v0.h[0] 2801*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 2802*c0909341SAndroid Build Coastguard Worker ext v23.16b, v4.16b, v5.16b, #(2*\i) 2803*c0909341SAndroid Build Coastguard Worker ext v24.16b, v6.16b, v7.16b, #(2*\i) 2804*c0909341SAndroid Build Coastguard Worker smlal v25.4s, v23.4h, v0.h[\i] 2805*c0909341SAndroid Build Coastguard Worker smlal2 v26.4s, v23.8h, v0.h[\i] 2806*c0909341SAndroid Build Coastguard Worker smlal v27.4s, v24.4h, v0.h[\i] 2807*c0909341SAndroid Build Coastguard Worker smlal2 v28.4s, v24.8h, v0.h[\i] 2808*c0909341SAndroid Build Coastguard Worker.endr 2809*c0909341SAndroid Build Coastguard Worker.endif 2810*c0909341SAndroid Build Coastguard Worker srshl v25.4s, v25.4s, v30.4s // -(6-intermediate_bits) 2811*c0909341SAndroid Build Coastguard Worker srshl v26.4s, v26.4s, v30.4s // -(6-intermediate_bits) 2812*c0909341SAndroid Build Coastguard Worker srshl v27.4s, v27.4s, v30.4s // -(6-intermediate_bits) 2813*c0909341SAndroid Build Coastguard Worker srshl v28.4s, v28.4s, v30.4s // -(6-intermediate_bits) 2814*c0909341SAndroid Build Coastguard Worker uzp1 v23.8h, v25.8h, v26.8h // Same as xtn, xtn2 2815*c0909341SAndroid Build Coastguard Worker uzp1 v24.8h, v27.8h, v28.8h // Ditto 2816*c0909341SAndroid Build Coastguard Worker ret 2817*c0909341SAndroid Build Coastguard Workerendfunc 2818*c0909341SAndroid Build Coastguard Worker 2819*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_hv_tbl 2820*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_hv_tbl 2821*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_hv_tbl 2822*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_hv_tbl 2823*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_hv_tbl 2824*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_hv_tbl 2825*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_hv_tbl 2826*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_hv_tbl 2827*c0909341SAndroid Build Coastguard Workerendjumptable 2828*c0909341SAndroid Build Coastguard Worker.endm 2829*c0909341SAndroid Build Coastguard Worker 2830*c0909341SAndroid Build Coastguard Worker 2831*c0909341SAndroid Build Coastguard Worker.macro filter_bilin_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, bdmax, ds2, sr2 2832*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_16bpc_neon, export=1 2833*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, w8 2834*c0909341SAndroid Build Coastguard Worker ldr w8, [sp] 2835*c0909341SAndroid Build Coastguard Worker.endif 2836*c0909341SAndroid Build Coastguard Worker dup v1.8h, \mx 2837*c0909341SAndroid Build Coastguard Worker dup v3.8h, \my 2838*c0909341SAndroid Build Coastguard Worker mov w10, #16 2839*c0909341SAndroid Build Coastguard Worker sub w9, w10, \mx 2840*c0909341SAndroid Build Coastguard Worker sub w10, w10, \my 2841*c0909341SAndroid Build Coastguard Worker dup v0.8h, w9 2842*c0909341SAndroid Build Coastguard Worker dup v2.8h, w10 2843*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2844*c0909341SAndroid Build Coastguard Worker uxtw \d_strd, \w 2845*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2846*c0909341SAndroid Build Coastguard Worker.endif 2847*c0909341SAndroid Build Coastguard Worker 2848*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax // bitdepth_max 2849*c0909341SAndroid Build Coastguard Worker clz w9, \w 2850*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #18 // intermediate_bits = clz(bitdepth_max) - 18 2851*c0909341SAndroid Build Coastguard Worker mov w11, #4 2852*c0909341SAndroid Build Coastguard Worker sub w9, w9, #24 2853*c0909341SAndroid Build Coastguard Worker sub w11, w11, \bdmax // 4 - intermediate_bits 2854*c0909341SAndroid Build Coastguard Worker add w12, \bdmax, #4 // 4 + intermediate_bits 2855*c0909341SAndroid Build Coastguard Worker cbnz \mx, L(\type\()_bilin_h) 2856*c0909341SAndroid Build Coastguard Worker cbnz \my, L(\type\()_bilin_v) 2857*c0909341SAndroid Build Coastguard Worker b \type\()_16bpc_neon 2858*c0909341SAndroid Build Coastguard Worker 2859*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h): 2860*c0909341SAndroid Build Coastguard Worker cbnz \my, L(\type\()_bilin_hv) 2861*c0909341SAndroid Build Coastguard Worker 2862*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_bilin_h_tbl 2863*c0909341SAndroid Build Coastguard Worker dup v31.8h, w11 // 4 - intermediate_bits 2864*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 2865*c0909341SAndroid Build Coastguard Worker neg v31.8h, v31.8h // -(4-intermediate_bits) 2866*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2867*c0909341SAndroid Build Coastguard Worker dup v30.8h, \bdmax // intermediate_bits 2868*c0909341SAndroid Build Coastguard Worker.else 2869*c0909341SAndroid Build Coastguard Worker movi v29.8h, #(PREP_BIAS >> 8), lsl #8 2870*c0909341SAndroid Build Coastguard Worker.endif 2871*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 2872*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2873*c0909341SAndroid Build Coastguard Worker neg v30.8h, v30.8h // -intermediate_bits 2874*c0909341SAndroid Build Coastguard Worker.endif 2875*c0909341SAndroid Build Coastguard Worker br x10 2876*c0909341SAndroid Build Coastguard Worker 2877*c0909341SAndroid Build Coastguard Worker20: // 2xN h 2878*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2879*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2880*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2881*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2882*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2883*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2884*c0909341SAndroid Build Coastguard Worker2: 2885*c0909341SAndroid Build Coastguard Worker ld1 {v4.4h}, [\src], \s_strd 2886*c0909341SAndroid Build Coastguard Worker ld1 {v6.4h}, [\sr2], \s_strd 2887*c0909341SAndroid Build Coastguard Worker ext v5.8b, v4.8b, v4.8b, #2 2888*c0909341SAndroid Build Coastguard Worker ext v7.8b, v6.8b, v6.8b, #2 2889*c0909341SAndroid Build Coastguard Worker trn1 v4.2s, v4.2s, v6.2s 2890*c0909341SAndroid Build Coastguard Worker trn1 v5.2s, v5.2s, v7.2s 2891*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2892*c0909341SAndroid Build Coastguard Worker mul v4.4h, v4.4h, v0.4h 2893*c0909341SAndroid Build Coastguard Worker mla v4.4h, v5.4h, v1.4h 2894*c0909341SAndroid Build Coastguard Worker urshl v4.4h, v4.4h, v31.4h 2895*c0909341SAndroid Build Coastguard Worker urshl v4.4h, v4.4h, v30.4h 2896*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 2897*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 2898*c0909341SAndroid Build Coastguard Worker b.gt 2b 2899*c0909341SAndroid Build Coastguard Worker ret 2900*c0909341SAndroid Build Coastguard Worker.endif 2901*c0909341SAndroid Build Coastguard Worker 2902*c0909341SAndroid Build Coastguard Worker40: // 4xN h 2903*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2904*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2905*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2906*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2907*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2908*c0909341SAndroid Build Coastguard Worker4: 2909*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [\src], \s_strd 2910*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [\sr2], \s_strd 2911*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #2 2912*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #2 2913*c0909341SAndroid Build Coastguard Worker trn1 v4.2d, v4.2d, v6.2d 2914*c0909341SAndroid Build Coastguard Worker trn1 v5.2d, v5.2d, v7.2d 2915*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2916*c0909341SAndroid Build Coastguard Worker mul v4.8h, v4.8h, v0.8h 2917*c0909341SAndroid Build Coastguard Worker mla v4.8h, v5.8h, v1.8h 2918*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v31.8h 2919*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2920*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v30.8h 2921*c0909341SAndroid Build Coastguard Worker.else 2922*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 2923*c0909341SAndroid Build Coastguard Worker.endif 2924*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 2925*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 2926*c0909341SAndroid Build Coastguard Worker b.gt 4b 2927*c0909341SAndroid Build Coastguard Worker ret 2928*c0909341SAndroid Build Coastguard Worker 2929*c0909341SAndroid Build Coastguard Worker80: // 8xN h 2930*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2931*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2932*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2933*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2934*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2935*c0909341SAndroid Build Coastguard Worker8: 2936*c0909341SAndroid Build Coastguard Worker ldr h5, [\src, #16] 2937*c0909341SAndroid Build Coastguard Worker ldr h7, [\sr2, #16] 2938*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [\src], \s_strd 2939*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [\sr2], \s_strd 2940*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v5.16b, #2 2941*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v7.16b, #2 2942*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2943*c0909341SAndroid Build Coastguard Worker mul v4.8h, v4.8h, v0.8h 2944*c0909341SAndroid Build Coastguard Worker mla v4.8h, v5.8h, v1.8h 2945*c0909341SAndroid Build Coastguard Worker mul v6.8h, v6.8h, v0.8h 2946*c0909341SAndroid Build Coastguard Worker mla v6.8h, v7.8h, v1.8h 2947*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v31.8h 2948*c0909341SAndroid Build Coastguard Worker urshl v6.8h, v6.8h, v31.8h 2949*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2950*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v30.8h 2951*c0909341SAndroid Build Coastguard Worker urshl v6.8h, v6.8h, v30.8h 2952*c0909341SAndroid Build Coastguard Worker.else 2953*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 2954*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v29.8h 2955*c0909341SAndroid Build Coastguard Worker.endif 2956*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 2957*c0909341SAndroid Build Coastguard Worker st1 {v6.8h}, [\ds2], \d_strd 2958*c0909341SAndroid Build Coastguard Worker b.gt 8b 2959*c0909341SAndroid Build Coastguard Worker ret 2960*c0909341SAndroid Build Coastguard Worker160: 2961*c0909341SAndroid Build Coastguard Worker320: 2962*c0909341SAndroid Build Coastguard Worker640: 2963*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 2964*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2965*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2966*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2967*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2968*c0909341SAndroid Build Coastguard Worker 2969*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, uxtw #1 2970*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #16 2971*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2972*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2973*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, uxtw #1 2974*c0909341SAndroid Build Coastguard Worker.endif 2975*c0909341SAndroid Build Coastguard Worker161: 2976*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h}, [\src], #16 2977*c0909341SAndroid Build Coastguard Worker ld1 {v21.8h}, [\sr2], #16 2978*c0909341SAndroid Build Coastguard Worker mov \mx, \w 2979*c0909341SAndroid Build Coastguard Worker 2980*c0909341SAndroid Build Coastguard Worker16: 2981*c0909341SAndroid Build Coastguard Worker ld1 {v17.8h, v18.8h}, [\src], #32 2982*c0909341SAndroid Build Coastguard Worker ld1 {v22.8h, v23.8h}, [\sr2], #32 2983*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v17.16b, #2 2984*c0909341SAndroid Build Coastguard Worker ext v20.16b, v17.16b, v18.16b, #2 2985*c0909341SAndroid Build Coastguard Worker ext v24.16b, v21.16b, v22.16b, #2 2986*c0909341SAndroid Build Coastguard Worker ext v25.16b, v22.16b, v23.16b, #2 2987*c0909341SAndroid Build Coastguard Worker mul v16.8h, v16.8h, v0.8h 2988*c0909341SAndroid Build Coastguard Worker mla v16.8h, v19.8h, v1.8h 2989*c0909341SAndroid Build Coastguard Worker mul v17.8h, v17.8h, v0.8h 2990*c0909341SAndroid Build Coastguard Worker mla v17.8h, v20.8h, v1.8h 2991*c0909341SAndroid Build Coastguard Worker mul v21.8h, v21.8h, v0.8h 2992*c0909341SAndroid Build Coastguard Worker mla v21.8h, v24.8h, v1.8h 2993*c0909341SAndroid Build Coastguard Worker mul v22.8h, v22.8h, v0.8h 2994*c0909341SAndroid Build Coastguard Worker mla v22.8h, v25.8h, v1.8h 2995*c0909341SAndroid Build Coastguard Worker urshl v16.8h, v16.8h, v31.8h 2996*c0909341SAndroid Build Coastguard Worker urshl v17.8h, v17.8h, v31.8h 2997*c0909341SAndroid Build Coastguard Worker urshl v21.8h, v21.8h, v31.8h 2998*c0909341SAndroid Build Coastguard Worker urshl v22.8h, v22.8h, v31.8h 2999*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 3000*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3001*c0909341SAndroid Build Coastguard Worker urshl v16.8h, v16.8h, v30.8h 3002*c0909341SAndroid Build Coastguard Worker urshl v17.8h, v17.8h, v30.8h 3003*c0909341SAndroid Build Coastguard Worker urshl v21.8h, v21.8h, v30.8h 3004*c0909341SAndroid Build Coastguard Worker urshl v22.8h, v22.8h, v30.8h 3005*c0909341SAndroid Build Coastguard Worker.else 3006*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v29.8h 3007*c0909341SAndroid Build Coastguard Worker sub v17.8h, v17.8h, v29.8h 3008*c0909341SAndroid Build Coastguard Worker sub v21.8h, v21.8h, v29.8h 3009*c0909341SAndroid Build Coastguard Worker sub v22.8h, v22.8h, v29.8h 3010*c0909341SAndroid Build Coastguard Worker.endif 3011*c0909341SAndroid Build Coastguard Worker st1 {v16.8h, v17.8h}, [\dst], #32 3012*c0909341SAndroid Build Coastguard Worker st1 {v21.8h, v22.8h}, [\ds2], #32 3013*c0909341SAndroid Build Coastguard Worker b.le 9f 3014*c0909341SAndroid Build Coastguard Worker 3015*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 3016*c0909341SAndroid Build Coastguard Worker mov v21.16b, v23.16b 3017*c0909341SAndroid Build Coastguard Worker b 16b 3018*c0909341SAndroid Build Coastguard Worker 3019*c0909341SAndroid Build Coastguard Worker9: 3020*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 3021*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 3022*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 3023*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 3024*c0909341SAndroid Build Coastguard Worker 3025*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3026*c0909341SAndroid Build Coastguard Worker b.gt 161b 3027*c0909341SAndroid Build Coastguard Worker ret 3028*c0909341SAndroid Build Coastguard Workerendfunc 3029*c0909341SAndroid Build Coastguard Worker 3030*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_h_tbl 3031*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_h_tbl 3032*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_h_tbl 3033*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_h_tbl 3034*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_h_tbl 3035*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_h_tbl 3036*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_h_tbl 3037*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_h_tbl 3038*c0909341SAndroid Build Coastguard Workerendjumptable 3039*c0909341SAndroid Build Coastguard Worker 3040*c0909341SAndroid Build Coastguard Worker 3041*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_v) 3042*c0909341SAndroid Build Coastguard Worker cmp \h, #4 3043*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_bilin_v_tbl 3044*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 3045*c0909341SAndroid Build Coastguard Worker dup v31.8h, w11 // 4 - intermediate_bits 3046*c0909341SAndroid Build Coastguard Worker.endif 3047*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 3048*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 3049*c0909341SAndroid Build Coastguard Worker movi v29.8h, #(PREP_BIAS >> 8), lsl #8 3050*c0909341SAndroid Build Coastguard Worker neg v31.8h, v31.8h // -(4-intermediate_bits) 3051*c0909341SAndroid Build Coastguard Worker.endif 3052*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 3053*c0909341SAndroid Build Coastguard Worker br x10 3054*c0909341SAndroid Build Coastguard Worker 3055*c0909341SAndroid Build Coastguard Worker20: // 2xN v 3056*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3057*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3058*c0909341SAndroid Build Coastguard Worker cmp \h, #2 3059*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3060*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3061*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3062*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3063*c0909341SAndroid Build Coastguard Worker 3064*c0909341SAndroid Build Coastguard Worker // 2x2 v 3065*c0909341SAndroid Build Coastguard Worker ld1r {v16.4s}, [\src], \s_strd 3066*c0909341SAndroid Build Coastguard Worker b.gt 24f 3067*c0909341SAndroid Build Coastguard Worker22: 3068*c0909341SAndroid Build Coastguard Worker ld1r {v17.4s}, [\sr2], \s_strd 3069*c0909341SAndroid Build Coastguard Worker ld1r {v18.4s}, [\src], \s_strd 3070*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s 3071*c0909341SAndroid Build Coastguard Worker trn1 v17.2s, v17.2s, v18.2s 3072*c0909341SAndroid Build Coastguard Worker mul v4.4h, v16.4h, v2.4h 3073*c0909341SAndroid Build Coastguard Worker mla v4.4h, v17.4h, v3.4h 3074*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3075*c0909341SAndroid Build Coastguard Worker str s4, [\dst] 3076*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2] 3077*c0909341SAndroid Build Coastguard Worker ret 3078*c0909341SAndroid Build Coastguard Worker24: // 2x4, 2x6, 2x8, ... v 3079*c0909341SAndroid Build Coastguard Worker ld1r {v17.4s}, [\sr2], \s_strd 3080*c0909341SAndroid Build Coastguard Worker ld1r {v18.4s}, [\src], \s_strd 3081*c0909341SAndroid Build Coastguard Worker ld1r {v19.4s}, [\sr2], \s_strd 3082*c0909341SAndroid Build Coastguard Worker ld1r {v20.4s}, [\src], \s_strd 3083*c0909341SAndroid Build Coastguard Worker sub \h, \h, #4 3084*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s 3085*c0909341SAndroid Build Coastguard Worker trn1 v17.2s, v17.2s, v18.2s 3086*c0909341SAndroid Build Coastguard Worker trn1 v18.2s, v18.2s, v19.2s 3087*c0909341SAndroid Build Coastguard Worker trn1 v19.2s, v19.2s, v20.2s 3088*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v18.2d 3089*c0909341SAndroid Build Coastguard Worker trn1 v17.2d, v17.2d, v19.2d 3090*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3091*c0909341SAndroid Build Coastguard Worker mla v4.8h, v17.8h, v3.8h 3092*c0909341SAndroid Build Coastguard Worker cmp \h, #2 3093*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3094*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 3095*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 3096*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[2], [\dst], \d_strd 3097*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[3], [\ds2], \d_strd 3098*c0909341SAndroid Build Coastguard Worker b.lt 0f 3099*c0909341SAndroid Build Coastguard Worker mov v16.8b, v20.8b 3100*c0909341SAndroid Build Coastguard Worker b.eq 22b 3101*c0909341SAndroid Build Coastguard Worker b 24b 3102*c0909341SAndroid Build Coastguard Worker0: 3103*c0909341SAndroid Build Coastguard Worker ret 3104*c0909341SAndroid Build Coastguard Worker.endif 3105*c0909341SAndroid Build Coastguard Worker 3106*c0909341SAndroid Build Coastguard Worker40: // 4xN v 3107*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3108*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3109*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3110*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3111*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3112*c0909341SAndroid Build Coastguard Worker ld1 {v16.4h}, [\src], \s_strd 3113*c0909341SAndroid Build Coastguard Worker4: 3114*c0909341SAndroid Build Coastguard Worker ld1 {v17.4h}, [\sr2], \s_strd 3115*c0909341SAndroid Build Coastguard Worker ld1 {v18.4h}, [\src], \s_strd 3116*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d 3117*c0909341SAndroid Build Coastguard Worker trn1 v17.2d, v17.2d, v18.2d 3118*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3119*c0909341SAndroid Build Coastguard Worker mla v4.8h, v17.8h, v3.8h 3120*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3121*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3122*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3123*c0909341SAndroid Build Coastguard Worker.else 3124*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v31.8h 3125*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 3126*c0909341SAndroid Build Coastguard Worker.endif 3127*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 3128*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 3129*c0909341SAndroid Build Coastguard Worker b.le 0f 3130*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 3131*c0909341SAndroid Build Coastguard Worker b 4b 3132*c0909341SAndroid Build Coastguard Worker0: 3133*c0909341SAndroid Build Coastguard Worker ret 3134*c0909341SAndroid Build Coastguard Worker 3135*c0909341SAndroid Build Coastguard Worker80: // 8xN v 3136*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3137*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3138*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3139*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3140*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3141*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h}, [\src], \s_strd 3142*c0909341SAndroid Build Coastguard Worker8: 3143*c0909341SAndroid Build Coastguard Worker ld1 {v17.8h}, [\sr2], \s_strd 3144*c0909341SAndroid Build Coastguard Worker ld1 {v18.8h}, [\src], \s_strd 3145*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3146*c0909341SAndroid Build Coastguard Worker mla v4.8h, v17.8h, v3.8h 3147*c0909341SAndroid Build Coastguard Worker mul v5.8h, v17.8h, v2.8h 3148*c0909341SAndroid Build Coastguard Worker mla v5.8h, v18.8h, v3.8h 3149*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3150*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3151*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3152*c0909341SAndroid Build Coastguard Worker urshr v5.8h, v5.8h, #4 3153*c0909341SAndroid Build Coastguard Worker.else 3154*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v31.8h 3155*c0909341SAndroid Build Coastguard Worker urshl v5.8h, v5.8h, v31.8h 3156*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 3157*c0909341SAndroid Build Coastguard Worker sub v5.8h, v5.8h, v29.8h 3158*c0909341SAndroid Build Coastguard Worker.endif 3159*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 3160*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [\ds2], \d_strd 3161*c0909341SAndroid Build Coastguard Worker b.le 0f 3162*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 3163*c0909341SAndroid Build Coastguard Worker b 8b 3164*c0909341SAndroid Build Coastguard Worker0: 3165*c0909341SAndroid Build Coastguard Worker ret 3166*c0909341SAndroid Build Coastguard Worker 3167*c0909341SAndroid Build Coastguard Worker160: // 16xN, 32xN, ... 3168*c0909341SAndroid Build Coastguard Worker320: 3169*c0909341SAndroid Build Coastguard Worker640: 3170*c0909341SAndroid Build Coastguard Worker1280: 3171*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3172*c0909341SAndroid Build Coastguard Worker mov \my, \h 3173*c0909341SAndroid Build Coastguard Worker1: 3174*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3175*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3176*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3177*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3178*c0909341SAndroid Build Coastguard Worker 3179*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [\src], \s_strd 3180*c0909341SAndroid Build Coastguard Worker2: 3181*c0909341SAndroid Build Coastguard Worker ld1 {v18.8h, v19.8h}, [\sr2], \s_strd 3182*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h}, [\src], \s_strd 3183*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3184*c0909341SAndroid Build Coastguard Worker mla v4.8h, v18.8h, v3.8h 3185*c0909341SAndroid Build Coastguard Worker mul v5.8h, v17.8h, v2.8h 3186*c0909341SAndroid Build Coastguard Worker mla v5.8h, v19.8h, v3.8h 3187*c0909341SAndroid Build Coastguard Worker mul v6.8h, v18.8h, v2.8h 3188*c0909341SAndroid Build Coastguard Worker mla v6.8h, v20.8h, v3.8h 3189*c0909341SAndroid Build Coastguard Worker mul v7.8h, v19.8h, v2.8h 3190*c0909341SAndroid Build Coastguard Worker mla v7.8h, v21.8h, v3.8h 3191*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3192*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3193*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3194*c0909341SAndroid Build Coastguard Worker urshr v5.8h, v5.8h, #4 3195*c0909341SAndroid Build Coastguard Worker urshr v6.8h, v6.8h, #4 3196*c0909341SAndroid Build Coastguard Worker urshr v7.8h, v7.8h, #4 3197*c0909341SAndroid Build Coastguard Worker.else 3198*c0909341SAndroid Build Coastguard Worker urshl v4.8h, v4.8h, v31.8h 3199*c0909341SAndroid Build Coastguard Worker urshl v5.8h, v5.8h, v31.8h 3200*c0909341SAndroid Build Coastguard Worker urshl v6.8h, v6.8h, v31.8h 3201*c0909341SAndroid Build Coastguard Worker urshl v7.8h, v7.8h, v31.8h 3202*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 3203*c0909341SAndroid Build Coastguard Worker sub v5.8h, v5.8h, v29.8h 3204*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v29.8h 3205*c0909341SAndroid Build Coastguard Worker sub v7.8h, v7.8h, v29.8h 3206*c0909341SAndroid Build Coastguard Worker.endif 3207*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [\dst], \d_strd 3208*c0909341SAndroid Build Coastguard Worker st1 {v6.8h, v7.8h}, [\ds2], \d_strd 3209*c0909341SAndroid Build Coastguard Worker b.le 9f 3210*c0909341SAndroid Build Coastguard Worker mov v16.16b, v20.16b 3211*c0909341SAndroid Build Coastguard Worker mov v17.16b, v21.16b 3212*c0909341SAndroid Build Coastguard Worker b 2b 3213*c0909341SAndroid Build Coastguard Worker9: 3214*c0909341SAndroid Build Coastguard Worker subs \w, \w, #16 3215*c0909341SAndroid Build Coastguard Worker b.le 0f 3216*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 3217*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 3218*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 3219*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 3220*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 3221*c0909341SAndroid Build Coastguard Worker mov \h, \my 3222*c0909341SAndroid Build Coastguard Worker add \src, \src, #32 3223*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #32 3224*c0909341SAndroid Build Coastguard Worker b 1b 3225*c0909341SAndroid Build Coastguard Worker0: 3226*c0909341SAndroid Build Coastguard Worker ret 3227*c0909341SAndroid Build Coastguard Workerendfunc 3228*c0909341SAndroid Build Coastguard Worker 3229*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_v_tbl 3230*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_v_tbl 3231*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_v_tbl 3232*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_v_tbl 3233*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_v_tbl 3234*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_v_tbl 3235*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_v_tbl 3236*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_v_tbl 3237*c0909341SAndroid Build Coastguard Workerendjumptable 3238*c0909341SAndroid Build Coastguard Worker 3239*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_hv) 3240*c0909341SAndroid Build Coastguard Worker movrel x10, \type\()_bilin_hv_tbl 3241*c0909341SAndroid Build Coastguard Worker dup v31.8h, w11 // 4 - intermediate_bits 3242*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, x9, lsl #2] 3243*c0909341SAndroid Build Coastguard Worker neg v31.8h, v31.8h // -(4-intermediate_bits) 3244*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3245*c0909341SAndroid Build Coastguard Worker dup v30.4s, w12 // 4 + intermediate_bits 3246*c0909341SAndroid Build Coastguard Worker.else 3247*c0909341SAndroid Build Coastguard Worker movi v29.8h, #(PREP_BIAS >> 8), lsl #8 3248*c0909341SAndroid Build Coastguard Worker.endif 3249*c0909341SAndroid Build Coastguard Worker add x10, x10, x9 3250*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3251*c0909341SAndroid Build Coastguard Worker neg v30.4s, v30.4s // -(4+intermediate_bits) 3252*c0909341SAndroid Build Coastguard Worker.endif 3253*c0909341SAndroid Build Coastguard Worker br x10 3254*c0909341SAndroid Build Coastguard Worker 3255*c0909341SAndroid Build Coastguard Worker20: // 2xN hv 3256*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3257*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3258*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3259*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3260*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3261*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3262*c0909341SAndroid Build Coastguard Worker 3263*c0909341SAndroid Build Coastguard Worker ld1 {v20.4h}, [\src], \s_strd 3264*c0909341SAndroid Build Coastguard Worker ext v21.8b, v20.8b, v20.8b, #2 3265*c0909341SAndroid Build Coastguard Worker mul v16.4h, v20.4h, v0.4h 3266*c0909341SAndroid Build Coastguard Worker mla v16.4h, v21.4h, v1.4h 3267*c0909341SAndroid Build Coastguard Worker urshl v16.4h, v16.4h, v31.4h 3268*c0909341SAndroid Build Coastguard Worker 3269*c0909341SAndroid Build Coastguard Worker2: 3270*c0909341SAndroid Build Coastguard Worker ld1 {v22.4h}, [\sr2], \s_strd 3271*c0909341SAndroid Build Coastguard Worker ld1 {v24.4h}, [\src], \s_strd 3272*c0909341SAndroid Build Coastguard Worker ext v23.8b, v22.8b, v22.8b, #2 3273*c0909341SAndroid Build Coastguard Worker ext v25.8b, v24.8b, v24.8b, #2 3274*c0909341SAndroid Build Coastguard Worker trn1 v22.2s, v22.2s, v24.2s 3275*c0909341SAndroid Build Coastguard Worker trn1 v23.2s, v23.2s, v25.2s 3276*c0909341SAndroid Build Coastguard Worker mul v17.4h, v22.4h, v0.4h 3277*c0909341SAndroid Build Coastguard Worker mla v17.4h, v23.4h, v1.4h 3278*c0909341SAndroid Build Coastguard Worker urshl v17.4h, v17.4h, v31.4h 3279*c0909341SAndroid Build Coastguard Worker 3280*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s 3281*c0909341SAndroid Build Coastguard Worker 3282*c0909341SAndroid Build Coastguard Worker umull v4.4s, v16.4h, v2.4h 3283*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v17.4h, v3.4h 3284*c0909341SAndroid Build Coastguard Worker urshl v4.4s, v4.4s, v30.4s 3285*c0909341SAndroid Build Coastguard Worker xtn v4.4h, v4.4s 3286*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3287*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 3288*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 3289*c0909341SAndroid Build Coastguard Worker b.le 0f 3290*c0909341SAndroid Build Coastguard Worker trn2 v16.2s, v17.2s, v17.2s 3291*c0909341SAndroid Build Coastguard Worker b 2b 3292*c0909341SAndroid Build Coastguard Worker0: 3293*c0909341SAndroid Build Coastguard Worker ret 3294*c0909341SAndroid Build Coastguard Worker.endif 3295*c0909341SAndroid Build Coastguard Worker 3296*c0909341SAndroid Build Coastguard Worker40: // 4xN hv 3297*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3298*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3299*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3300*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3301*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3302*c0909341SAndroid Build Coastguard Worker 3303*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h}, [\src], \s_strd 3304*c0909341SAndroid Build Coastguard Worker ext v21.16b, v20.16b, v20.16b, #2 3305*c0909341SAndroid Build Coastguard Worker mul v16.4h, v20.4h, v0.4h 3306*c0909341SAndroid Build Coastguard Worker mla v16.4h, v21.4h, v1.4h 3307*c0909341SAndroid Build Coastguard Worker urshl v16.4h, v16.4h, v31.4h 3308*c0909341SAndroid Build Coastguard Worker 3309*c0909341SAndroid Build Coastguard Worker4: 3310*c0909341SAndroid Build Coastguard Worker ld1 {v22.8h}, [\sr2], \s_strd 3311*c0909341SAndroid Build Coastguard Worker ld1 {v24.8h}, [\src], \s_strd 3312*c0909341SAndroid Build Coastguard Worker ext v23.16b, v22.16b, v22.16b, #2 3313*c0909341SAndroid Build Coastguard Worker ext v25.16b, v24.16b, v24.16b, #2 3314*c0909341SAndroid Build Coastguard Worker trn1 v22.2d, v22.2d, v24.2d 3315*c0909341SAndroid Build Coastguard Worker trn1 v23.2d, v23.2d, v25.2d 3316*c0909341SAndroid Build Coastguard Worker mul v17.8h, v22.8h, v0.8h 3317*c0909341SAndroid Build Coastguard Worker mla v17.8h, v23.8h, v1.8h 3318*c0909341SAndroid Build Coastguard Worker urshl v17.8h, v17.8h, v31.8h 3319*c0909341SAndroid Build Coastguard Worker 3320*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d 3321*c0909341SAndroid Build Coastguard Worker 3322*c0909341SAndroid Build Coastguard Worker umull v4.4s, v16.4h, v2.4h 3323*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v17.4h, v3.4h 3324*c0909341SAndroid Build Coastguard Worker umull2 v5.4s, v16.8h, v2.8h 3325*c0909341SAndroid Build Coastguard Worker umlal2 v5.4s, v17.8h, v3.8h 3326*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3327*c0909341SAndroid Build Coastguard Worker urshl v4.4s, v4.4s, v30.4s 3328*c0909341SAndroid Build Coastguard Worker urshl v5.4s, v5.4s, v30.4s 3329*c0909341SAndroid Build Coastguard Worker uzp1 v4.8h, v4.8h, v5.8h // Same as xtn, xtn2 3330*c0909341SAndroid Build Coastguard Worker.else 3331*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v4.4s, #4 3332*c0909341SAndroid Build Coastguard Worker rshrn2 v4.8h, v5.4s, #4 3333*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 3334*c0909341SAndroid Build Coastguard Worker.endif 3335*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3336*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 3337*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 3338*c0909341SAndroid Build Coastguard Worker b.le 0f 3339*c0909341SAndroid Build Coastguard Worker trn2 v16.2d, v17.2d, v17.2d 3340*c0909341SAndroid Build Coastguard Worker b 4b 3341*c0909341SAndroid Build Coastguard Worker0: 3342*c0909341SAndroid Build Coastguard Worker ret 3343*c0909341SAndroid Build Coastguard Worker 3344*c0909341SAndroid Build Coastguard Worker80: // 8xN, 16xN, ... hv 3345*c0909341SAndroid Build Coastguard Worker160: 3346*c0909341SAndroid Build Coastguard Worker320: 3347*c0909341SAndroid Build Coastguard Worker640: 3348*c0909341SAndroid Build Coastguard Worker1280: 3349*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3350*c0909341SAndroid Build Coastguard Worker mov \my, \h 3351*c0909341SAndroid Build Coastguard Worker 3352*c0909341SAndroid Build Coastguard Worker1: 3353*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3354*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3355*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3356*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3357*c0909341SAndroid Build Coastguard Worker 3358*c0909341SAndroid Build Coastguard Worker ldr h21, [\src, #16] 3359*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h}, [\src], \s_strd 3360*c0909341SAndroid Build Coastguard Worker ext v21.16b, v20.16b, v21.16b, #2 3361*c0909341SAndroid Build Coastguard Worker mul v16.8h, v20.8h, v0.8h 3362*c0909341SAndroid Build Coastguard Worker mla v16.8h, v21.8h, v1.8h 3363*c0909341SAndroid Build Coastguard Worker urshl v16.8h, v16.8h, v31.8h 3364*c0909341SAndroid Build Coastguard Worker 3365*c0909341SAndroid Build Coastguard Worker2: 3366*c0909341SAndroid Build Coastguard Worker ldr h23, [\sr2, #16] 3367*c0909341SAndroid Build Coastguard Worker ld1 {v22.8h}, [\sr2], \s_strd 3368*c0909341SAndroid Build Coastguard Worker ldr h25, [\src, #16] 3369*c0909341SAndroid Build Coastguard Worker ld1 {v24.8h}, [\src], \s_strd 3370*c0909341SAndroid Build Coastguard Worker ext v23.16b, v22.16b, v23.16b, #2 3371*c0909341SAndroid Build Coastguard Worker ext v25.16b, v24.16b, v25.16b, #2 3372*c0909341SAndroid Build Coastguard Worker mul v17.8h, v22.8h, v0.8h 3373*c0909341SAndroid Build Coastguard Worker mla v17.8h, v23.8h, v1.8h 3374*c0909341SAndroid Build Coastguard Worker mul v18.8h, v24.8h, v0.8h 3375*c0909341SAndroid Build Coastguard Worker mla v18.8h, v25.8h, v1.8h 3376*c0909341SAndroid Build Coastguard Worker urshl v17.8h, v17.8h, v31.8h 3377*c0909341SAndroid Build Coastguard Worker urshl v18.8h, v18.8h, v31.8h 3378*c0909341SAndroid Build Coastguard Worker 3379*c0909341SAndroid Build Coastguard Worker umull v4.4s, v16.4h, v2.4h 3380*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v17.4h, v3.4h 3381*c0909341SAndroid Build Coastguard Worker umull2 v5.4s, v16.8h, v2.8h 3382*c0909341SAndroid Build Coastguard Worker umlal2 v5.4s, v17.8h, v3.8h 3383*c0909341SAndroid Build Coastguard Worker umull v6.4s, v17.4h, v2.4h 3384*c0909341SAndroid Build Coastguard Worker umlal v6.4s, v18.4h, v3.4h 3385*c0909341SAndroid Build Coastguard Worker umull2 v7.4s, v17.8h, v2.8h 3386*c0909341SAndroid Build Coastguard Worker umlal2 v7.4s, v18.8h, v3.8h 3387*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3388*c0909341SAndroid Build Coastguard Worker urshl v4.4s, v4.4s, v30.4s 3389*c0909341SAndroid Build Coastguard Worker urshl v5.4s, v5.4s, v30.4s 3390*c0909341SAndroid Build Coastguard Worker urshl v6.4s, v6.4s, v30.4s 3391*c0909341SAndroid Build Coastguard Worker urshl v7.4s, v7.4s, v30.4s 3392*c0909341SAndroid Build Coastguard Worker uzp1 v4.8h, v4.8h, v5.8h // Same as xtn, xtn2 3393*c0909341SAndroid Build Coastguard Worker uzp1 v5.8h, v6.8h, v7.8h // Ditto 3394*c0909341SAndroid Build Coastguard Worker.else 3395*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v4.4s, #4 3396*c0909341SAndroid Build Coastguard Worker rshrn2 v4.8h, v5.4s, #4 3397*c0909341SAndroid Build Coastguard Worker rshrn v5.4h, v6.4s, #4 3398*c0909341SAndroid Build Coastguard Worker rshrn2 v5.8h, v7.4s, #4 3399*c0909341SAndroid Build Coastguard Worker sub v4.8h, v4.8h, v29.8h 3400*c0909341SAndroid Build Coastguard Worker sub v5.8h, v5.8h, v29.8h 3401*c0909341SAndroid Build Coastguard Worker.endif 3402*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3403*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 3404*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [\ds2], \d_strd 3405*c0909341SAndroid Build Coastguard Worker b.le 9f 3406*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 3407*c0909341SAndroid Build Coastguard Worker b 2b 3408*c0909341SAndroid Build Coastguard Worker9: 3409*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 3410*c0909341SAndroid Build Coastguard Worker b.le 0f 3411*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 3412*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 3413*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 3414*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 3415*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 3416*c0909341SAndroid Build Coastguard Worker mov \h, \my 3417*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 3418*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 3419*c0909341SAndroid Build Coastguard Worker b 1b 3420*c0909341SAndroid Build Coastguard Worker0: 3421*c0909341SAndroid Build Coastguard Worker ret 3422*c0909341SAndroid Build Coastguard Workerendfunc 3423*c0909341SAndroid Build Coastguard Worker 3424*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_hv_tbl 3425*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_hv_tbl 3426*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_hv_tbl 3427*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_hv_tbl 3428*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_hv_tbl 3429*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_hv_tbl 3430*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_hv_tbl 3431*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_hv_tbl 3432*c0909341SAndroid Build Coastguard Workerendjumptable 3433*c0909341SAndroid Build Coastguard Worker.endm 3434*c0909341SAndroid Build Coastguard Worker 3435*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular_sharp, REGULAR, SHARP, 8tap 3436*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth_sharp, SMOOTH, SHARP, 8tap 3437*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp, SHARP, SHARP, 8tap 3438*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp_regular, SHARP, REGULAR, 8tap 3439*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp_smooth, SHARP, SMOOTH, 8tap 3440*c0909341SAndroid Build Coastguard Workerfilter_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, w8, x9, x10, 8tap 3441*c0909341SAndroid Build Coastguard Worker 3442*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular, REGULAR, REGULAR, 6tap 3443*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular_smooth, REGULAR, SMOOTH, 6tap 3444*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth, SMOOTH, SMOOTH, 6tap 3445*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth_regular, SMOOTH, REGULAR, 6tap 3446*c0909341SAndroid Build Coastguard Workerfilter_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, w8, x9, x10, 6tap 3447*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, w8, x9, x10 3448*c0909341SAndroid Build Coastguard Worker 3449*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular_sharp, REGULAR, SHARP, 8tap 3450*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth_sharp, SMOOTH, SHARP, 8tap 3451*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp, SHARP, SHARP, 8tap 3452*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp_regular, SHARP, REGULAR, 8tap 3453*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp_smooth, SHARP, SMOOTH, 8tap 3454*c0909341SAndroid Build Coastguard Workerfilter_fn prep, x0, x8, x1, x2, w3, w4, w5, x5, w6, x6, w7, x9, x10, 8tap 3455*c0909341SAndroid Build Coastguard Worker 3456*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular, REGULAR, REGULAR, 6tap 3457*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular_smooth, REGULAR, SMOOTH, 6tap 3458*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth, SMOOTH, SMOOTH, 6tap 3459*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth_regular, SMOOTH, REGULAR, 6tap 3460*c0909341SAndroid Build Coastguard Workerfilter_fn prep, x0, x8, x1, x2, w3, w4, w5, x5, w6, x6, w7, x9, x10, 6tap 3461*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn prep, x0, x8, x1, x2, w3, w4, w5, x5, w6, x6, w7, x9, x10 3462*c0909341SAndroid Build Coastguard Worker 3463*c0909341SAndroid Build Coastguard Worker 3464*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc 3465*c0909341SAndroid Build Coastguard Worker asr w13, \src, #10 3466*c0909341SAndroid Build Coastguard Worker add \src, \src, \inc 3467*c0909341SAndroid Build Coastguard Worker ldr \dst, [x11, w13, sxtw #3] 3468*c0909341SAndroid Build Coastguard Worker.endm 3469*c0909341SAndroid Build Coastguard Worker 3470*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon 3471*c0909341SAndroid Build Coastguard Worker add w12, w5, #512 3472*c0909341SAndroid Build Coastguard Worker 3473*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [x2], x3 3474*c0909341SAndroid Build Coastguard Worker 3475*c0909341SAndroid Build Coastguard Worker load_filter_row d0, w12, w7 3476*c0909341SAndroid Build Coastguard Worker load_filter_row d1, w12, w7 3477*c0909341SAndroid Build Coastguard Worker load_filter_row d2, w12, w7 3478*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 3479*c0909341SAndroid Build Coastguard Worker load_filter_row d3, w12, w7 3480*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 3481*c0909341SAndroid Build Coastguard Worker load_filter_row d4, w12, w7 3482*c0909341SAndroid Build Coastguard Worker sxtl v2.8h, v2.8b 3483*c0909341SAndroid Build Coastguard Worker load_filter_row d5, w12, w7 3484*c0909341SAndroid Build Coastguard Worker sxtl v3.8h, v3.8b 3485*c0909341SAndroid Build Coastguard Worker load_filter_row d6, w12, w7 3486*c0909341SAndroid Build Coastguard Worker sxtl v4.8h, v4.8b 3487*c0909341SAndroid Build Coastguard Worker load_filter_row d7, w12, w7 3488*c0909341SAndroid Build Coastguard Worker sxtl v5.8h, v5.8b 3489*c0909341SAndroid Build Coastguard Worker ext v18.16b, v16.16b, v17.16b, #2*1 3490*c0909341SAndroid Build Coastguard Worker smull v8.4s, v16.4h, v0.4h 3491*c0909341SAndroid Build Coastguard Worker smull2 v9.4s, v16.8h, v0.8h 3492*c0909341SAndroid Build Coastguard Worker sxtl v6.8h, v6.8b 3493*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v17.16b, #2*2 3494*c0909341SAndroid Build Coastguard Worker smull v10.4s, v18.4h, v1.4h 3495*c0909341SAndroid Build Coastguard Worker smull2 v11.4s, v18.8h, v1.8h 3496*c0909341SAndroid Build Coastguard Worker sxtl v7.8h, v7.8b 3497*c0909341SAndroid Build Coastguard Worker ext v20.16b, v16.16b, v17.16b, #2*3 3498*c0909341SAndroid Build Coastguard Worker smull v0.4s, v19.4h, v2.4h 3499*c0909341SAndroid Build Coastguard Worker smull2 v1.4s, v19.8h, v2.8h 3500*c0909341SAndroid Build Coastguard Worker ext v21.16b, v16.16b, v17.16b, #2*4 3501*c0909341SAndroid Build Coastguard Worker addp v8.4s, v8.4s, v9.4s 3502*c0909341SAndroid Build Coastguard Worker smull v2.4s, v20.4h, v3.4h 3503*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v20.8h, v3.8h 3504*c0909341SAndroid Build Coastguard Worker ext v22.16b, v16.16b, v17.16b, #2*5 3505*c0909341SAndroid Build Coastguard Worker addp v9.4s, v10.4s, v11.4s 3506*c0909341SAndroid Build Coastguard Worker smull v10.4s, v21.4h, v4.4h 3507*c0909341SAndroid Build Coastguard Worker smull2 v11.4s, v21.8h, v4.8h 3508*c0909341SAndroid Build Coastguard Worker ext v23.16b, v16.16b, v17.16b, #2*6 3509*c0909341SAndroid Build Coastguard Worker addp v0.4s, v0.4s, v1.4s 3510*c0909341SAndroid Build Coastguard Worker smull v18.4s, v22.4h, v5.4h 3511*c0909341SAndroid Build Coastguard Worker smull2 v19.4s, v22.8h, v5.8h 3512*c0909341SAndroid Build Coastguard Worker ext v16.16b, v16.16b, v17.16b, #2*7 3513*c0909341SAndroid Build Coastguard Worker addp v1.4s, v2.4s, v3.4s 3514*c0909341SAndroid Build Coastguard Worker addp v2.4s, v10.4s, v11.4s 3515*c0909341SAndroid Build Coastguard Worker smull v20.4s, v23.4h, v6.4h 3516*c0909341SAndroid Build Coastguard Worker smull2 v21.4s, v23.8h, v6.8h 3517*c0909341SAndroid Build Coastguard Worker addp v3.4s, v18.4s, v19.4s 3518*c0909341SAndroid Build Coastguard Worker smull v22.4s, v16.4h, v7.4h 3519*c0909341SAndroid Build Coastguard Worker smull2 v23.4s, v16.8h, v7.8h 3520*c0909341SAndroid Build Coastguard Worker addp v4.4s, v20.4s, v21.4s 3521*c0909341SAndroid Build Coastguard Worker addp v5.4s, v22.4s, v23.4s 3522*c0909341SAndroid Build Coastguard Worker 3523*c0909341SAndroid Build Coastguard Worker addp v8.4s, v8.4s, v9.4s 3524*c0909341SAndroid Build Coastguard Worker addp v0.4s, v0.4s, v1.4s 3525*c0909341SAndroid Build Coastguard Worker addp v2.4s, v2.4s, v3.4s 3526*c0909341SAndroid Build Coastguard Worker addp v4.4s, v4.4s, v5.4s 3527*c0909341SAndroid Build Coastguard Worker 3528*c0909341SAndroid Build Coastguard Worker addp v16.4s, v8.4s, v0.4s 3529*c0909341SAndroid Build Coastguard Worker addp v17.4s, v2.4s, v4.4s 3530*c0909341SAndroid Build Coastguard Worker 3531*c0909341SAndroid Build Coastguard Worker add w5, w5, w8 3532*c0909341SAndroid Build Coastguard Worker 3533*c0909341SAndroid Build Coastguard Worker srshl v16.4s, v16.4s, v14.4s // -(7 - intermediate_bits) 3534*c0909341SAndroid Build Coastguard Worker srshl v17.4s, v17.4s, v14.4s // -(7 - intermediate_bits) 3535*c0909341SAndroid Build Coastguard Worker 3536*c0909341SAndroid Build Coastguard Worker ret 3537*c0909341SAndroid Build Coastguard Workerendfunc 3538*c0909341SAndroid Build Coastguard Worker 3539*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_16bpc_neon( 3540*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3541*c0909341SAndroid Build Coastguard Worker// const pixel *src, const ptrdiff_t src_stride, 3542*c0909341SAndroid Build Coastguard Worker// const int16_t *const abcd, int mx, int my, 3543*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max) 3544*c0909341SAndroid Build Coastguard Worker.macro warp t 3545*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_16bpc_neon, export=1 3546*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 3547*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 3548*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 3549*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 3550*c0909341SAndroid Build Coastguard Worker 3551*c0909341SAndroid Build Coastguard Worker.ifb \t 3552*c0909341SAndroid Build Coastguard Worker dup v15.8h, w7 // bitdepth_max 3553*c0909341SAndroid Build Coastguard Worker.else 3554*c0909341SAndroid Build Coastguard Worker movi v15.8h, #(PREP_BIAS >> 8), lsl #8 3555*c0909341SAndroid Build Coastguard Worker.endif 3556*c0909341SAndroid Build Coastguard Worker clz w7, w7 3557*c0909341SAndroid Build Coastguard Worker // intermediate_bits = clz(bitdepth_max) - 18 3558*c0909341SAndroid Build Coastguard Worker.ifb \t 3559*c0909341SAndroid Build Coastguard Worker sub w8, w7, #11 // 7 + intermediate_bits = clz(bitdepth_max) - 18 + 7 3560*c0909341SAndroid Build Coastguard Worker.endif 3561*c0909341SAndroid Build Coastguard Worker sub w7, w7, #25 // -(7 - intermediate_bits) 3562*c0909341SAndroid Build Coastguard Worker.ifb \t 3563*c0909341SAndroid Build Coastguard Worker neg w8, w8 // -(7 + intermediate_bits) 3564*c0909341SAndroid Build Coastguard Worker.endif 3565*c0909341SAndroid Build Coastguard Worker dup v14.4s, w7 // -(7 - intermediate_bits) 3566*c0909341SAndroid Build Coastguard Worker.ifb \t 3567*c0909341SAndroid Build Coastguard Worker dup v13.4s, w8 // -(7 + intermediate_bits) 3568*c0909341SAndroid Build Coastguard Worker.endif 3569*c0909341SAndroid Build Coastguard Worker 3570*c0909341SAndroid Build Coastguard Worker ldr x4, [x4] 3571*c0909341SAndroid Build Coastguard Worker sbfx x7, x4, #0, #16 3572*c0909341SAndroid Build Coastguard Worker sbfx x8, x4, #16, #16 3573*c0909341SAndroid Build Coastguard Worker sbfx x9, x4, #32, #16 3574*c0909341SAndroid Build Coastguard Worker sbfx x4, x4, #48, #16 3575*c0909341SAndroid Build Coastguard Worker mov w10, #8 3576*c0909341SAndroid Build Coastguard Worker sub x2, x2, x3, lsl #1 3577*c0909341SAndroid Build Coastguard Worker sub x2, x2, x3 3578*c0909341SAndroid Build Coastguard Worker sub x2, x2, #6 3579*c0909341SAndroid Build Coastguard Worker movrel x11, X(mc_warp_filter), 64*8 3580*c0909341SAndroid Build Coastguard Worker mov x15, x30 3581*c0909341SAndroid Build Coastguard Worker.ifnb \t 3582*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3583*c0909341SAndroid Build Coastguard Worker.endif 3584*c0909341SAndroid Build Coastguard Worker 3585*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3586*c0909341SAndroid Build Coastguard Worker uzp1 v24.8h, v16.8h, v17.8h // Same as xtn, xtn2 3587*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3588*c0909341SAndroid Build Coastguard Worker uzp1 v25.8h, v16.8h, v17.8h // Ditto 3589*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3590*c0909341SAndroid Build Coastguard Worker uzp1 v26.8h, v16.8h, v17.8h // Ditto 3591*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3592*c0909341SAndroid Build Coastguard Worker uzp1 v27.8h, v16.8h, v17.8h // Ditto 3593*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3594*c0909341SAndroid Build Coastguard Worker uzp1 v28.8h, v16.8h, v17.8h // Ditto 3595*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3596*c0909341SAndroid Build Coastguard Worker uzp1 v29.8h, v16.8h, v17.8h // Ditto 3597*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3598*c0909341SAndroid Build Coastguard Worker uzp1 v30.8h, v16.8h, v17.8h // Ditto 3599*c0909341SAndroid Build Coastguard Worker 3600*c0909341SAndroid Build Coastguard Worker1: 3601*c0909341SAndroid Build Coastguard Worker add w14, w6, #512 3602*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3603*c0909341SAndroid Build Coastguard Worker uzp1 v31.8h, v16.8h, v17.8h // Same as xtn, xtn2 3604*c0909341SAndroid Build Coastguard Worker 3605*c0909341SAndroid Build Coastguard Worker load_filter_row d0, w14, w9 3606*c0909341SAndroid Build Coastguard Worker load_filter_row d1, w14, w9 3607*c0909341SAndroid Build Coastguard Worker load_filter_row d2, w14, w9 3608*c0909341SAndroid Build Coastguard Worker load_filter_row d3, w14, w9 3609*c0909341SAndroid Build Coastguard Worker load_filter_row d4, w14, w9 3610*c0909341SAndroid Build Coastguard Worker load_filter_row d5, w14, w9 3611*c0909341SAndroid Build Coastguard Worker load_filter_row d6, w14, w9 3612*c0909341SAndroid Build Coastguard Worker load_filter_row d7, w14, w9 3613*c0909341SAndroid Build Coastguard Worker transpose_8x8b_xtl v0, v1, v2, v3, v4, v5, v6, v7, sxtl 3614*c0909341SAndroid Build Coastguard Worker 3615*c0909341SAndroid Build Coastguard Worker // This ordering of smull/smlal/smull2/smlal2 is highly 3616*c0909341SAndroid Build Coastguard Worker // beneficial for Cortex A53 here. 3617*c0909341SAndroid Build Coastguard Worker smull v16.4s, v24.4h, v0.4h 3618*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v25.4h, v1.4h 3619*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v26.4h, v2.4h 3620*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v27.4h, v3.4h 3621*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v28.4h, v4.4h 3622*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v29.4h, v5.4h 3623*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v30.4h, v6.4h 3624*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v31.4h, v7.4h 3625*c0909341SAndroid Build Coastguard Worker smull2 v17.4s, v24.8h, v0.8h 3626*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v25.8h, v1.8h 3627*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v26.8h, v2.8h 3628*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v27.8h, v3.8h 3629*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v28.8h, v4.8h 3630*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v29.8h, v5.8h 3631*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v30.8h, v6.8h 3632*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v31.8h, v7.8h 3633*c0909341SAndroid Build Coastguard Worker 3634*c0909341SAndroid Build Coastguard Worker mov v24.16b, v25.16b 3635*c0909341SAndroid Build Coastguard Worker mov v25.16b, v26.16b 3636*c0909341SAndroid Build Coastguard Worker.ifb \t 3637*c0909341SAndroid Build Coastguard Worker srshl v16.4s, v16.4s, v13.4s // -(7 + intermediate_bits) 3638*c0909341SAndroid Build Coastguard Worker srshl v17.4s, v17.4s, v13.4s // -(7 + intermediate_bits) 3639*c0909341SAndroid Build Coastguard Worker.else 3640*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #7 3641*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #7 3642*c0909341SAndroid Build Coastguard Worker.endif 3643*c0909341SAndroid Build Coastguard Worker mov v26.16b, v27.16b 3644*c0909341SAndroid Build Coastguard Worker.ifb \t 3645*c0909341SAndroid Build Coastguard Worker sqxtun v16.4h, v16.4s 3646*c0909341SAndroid Build Coastguard Worker sqxtun2 v16.8h, v17.4s 3647*c0909341SAndroid Build Coastguard Worker.else 3648*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v15.8h // PREP_BIAS 3649*c0909341SAndroid Build Coastguard Worker.endif 3650*c0909341SAndroid Build Coastguard Worker mov v27.16b, v28.16b 3651*c0909341SAndroid Build Coastguard Worker mov v28.16b, v29.16b 3652*c0909341SAndroid Build Coastguard Worker.ifb \t 3653*c0909341SAndroid Build Coastguard Worker umin v16.8h, v16.8h, v15.8h // bitdepth_max 3654*c0909341SAndroid Build Coastguard Worker.endif 3655*c0909341SAndroid Build Coastguard Worker mov v29.16b, v30.16b 3656*c0909341SAndroid Build Coastguard Worker mov v30.16b, v31.16b 3657*c0909341SAndroid Build Coastguard Worker subs w10, w10, #1 3658*c0909341SAndroid Build Coastguard Worker st1 {v16.8h}, [x0], x1 3659*c0909341SAndroid Build Coastguard Worker 3660*c0909341SAndroid Build Coastguard Worker add w6, w6, w4 3661*c0909341SAndroid Build Coastguard Worker b.gt 1b 3662*c0909341SAndroid Build Coastguard Worker 3663*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 3664*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 3665*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 3666*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 3667*c0909341SAndroid Build Coastguard Worker 3668*c0909341SAndroid Build Coastguard Worker ret x15 3669*c0909341SAndroid Build Coastguard Workerendfunc 3670*c0909341SAndroid Build Coastguard Worker.endm 3671*c0909341SAndroid Build Coastguard Worker 3672*c0909341SAndroid Build Coastguard Workerwarp 3673*c0909341SAndroid Build Coastguard Workerwarp t 3674*c0909341SAndroid Build Coastguard Worker 3675*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_16bpc_neon( 3676*c0909341SAndroid Build Coastguard Worker// const intptr_t bw, const intptr_t bh, 3677*c0909341SAndroid Build Coastguard Worker// const intptr_t iw, const intptr_t ih, 3678*c0909341SAndroid Build Coastguard Worker// const intptr_t x, const intptr_t y, 3679*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3680*c0909341SAndroid Build Coastguard Worker// const pixel *ref, const ptrdiff_t ref_stride) 3681*c0909341SAndroid Build Coastguard Workerfunction emu_edge_16bpc_neon, export=1 3682*c0909341SAndroid Build Coastguard Worker ldp x8, x9, [sp] 3683*c0909341SAndroid Build Coastguard Worker 3684*c0909341SAndroid Build Coastguard Worker // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) 3685*c0909341SAndroid Build Coastguard Worker // ref += iclip(x, 0, iw - 1) 3686*c0909341SAndroid Build Coastguard Worker sub x12, x3, #1 // ih - 1 3687*c0909341SAndroid Build Coastguard Worker cmp x5, x3 3688*c0909341SAndroid Build Coastguard Worker sub x13, x2, #1 // iw - 1 3689*c0909341SAndroid Build Coastguard Worker csel x12, x12, x5, ge // min(y, ih - 1) 3690*c0909341SAndroid Build Coastguard Worker cmp x4, x2 3691*c0909341SAndroid Build Coastguard Worker bic x12, x12, x12, asr #63 // max(min(y, ih - 1), 0) 3692*c0909341SAndroid Build Coastguard Worker csel x13, x13, x4, ge // min(x, iw - 1) 3693*c0909341SAndroid Build Coastguard Worker bic x13, x13, x13, asr #63 // max(min(x, iw - 1), 0) 3694*c0909341SAndroid Build Coastguard Worker madd x8, x12, x9, x8 // ref += iclip() * stride 3695*c0909341SAndroid Build Coastguard Worker add x8, x8, x13, lsl #1 // ref += iclip() 3696*c0909341SAndroid Build Coastguard Worker 3697*c0909341SAndroid Build Coastguard Worker // bottom_ext = iclip(y + bh - ih, 0, bh - 1) 3698*c0909341SAndroid Build Coastguard Worker // top_ext = iclip(-y, 0, bh - 1) 3699*c0909341SAndroid Build Coastguard Worker add x10, x5, x1 // y + bh 3700*c0909341SAndroid Build Coastguard Worker neg x5, x5 // -y 3701*c0909341SAndroid Build Coastguard Worker sub x10, x10, x3 // y + bh - ih 3702*c0909341SAndroid Build Coastguard Worker sub x12, x1, #1 // bh - 1 3703*c0909341SAndroid Build Coastguard Worker cmp x10, x1 3704*c0909341SAndroid Build Coastguard Worker bic x5, x5, x5, asr #63 // max(-y, 0) 3705*c0909341SAndroid Build Coastguard Worker csel x10, x10, x12, lt // min(y + bh - ih, bh-1) 3706*c0909341SAndroid Build Coastguard Worker cmp x5, x1 3707*c0909341SAndroid Build Coastguard Worker bic x10, x10, x10, asr #63 // max(min(y + bh - ih, bh-1), 0) 3708*c0909341SAndroid Build Coastguard Worker csel x5, x5, x12, lt // min(max(-y, 0), bh-1) 3709*c0909341SAndroid Build Coastguard Worker 3710*c0909341SAndroid Build Coastguard Worker // right_ext = iclip(x + bw - iw, 0, bw - 1) 3711*c0909341SAndroid Build Coastguard Worker // left_ext = iclip(-x, 0, bw - 1) 3712*c0909341SAndroid Build Coastguard Worker add x11, x4, x0 // x + bw 3713*c0909341SAndroid Build Coastguard Worker neg x4, x4 // -x 3714*c0909341SAndroid Build Coastguard Worker sub x11, x11, x2 // x + bw - iw 3715*c0909341SAndroid Build Coastguard Worker sub x13, x0, #1 // bw - 1 3716*c0909341SAndroid Build Coastguard Worker cmp x11, x0 3717*c0909341SAndroid Build Coastguard Worker bic x4, x4, x4, asr #63 // max(-x, 0) 3718*c0909341SAndroid Build Coastguard Worker csel x11, x11, x13, lt // min(x + bw - iw, bw-1) 3719*c0909341SAndroid Build Coastguard Worker cmp x4, x0 3720*c0909341SAndroid Build Coastguard Worker bic x11, x11, x11, asr #63 // max(min(x + bw - iw, bw-1), 0) 3721*c0909341SAndroid Build Coastguard Worker csel x4, x4, x13, lt // min(max(-x, 0), bw - 1) 3722*c0909341SAndroid Build Coastguard Worker 3723*c0909341SAndroid Build Coastguard Worker // center_h = bh - top_ext - bottom_ext 3724*c0909341SAndroid Build Coastguard Worker // dst += top_ext * PXSTRIDE(dst_stride) 3725*c0909341SAndroid Build Coastguard Worker // center_w = bw - left_ext - right_ext 3726*c0909341SAndroid Build Coastguard Worker sub x1, x1, x5 // bh - top_ext 3727*c0909341SAndroid Build Coastguard Worker madd x6, x5, x7, x6 3728*c0909341SAndroid Build Coastguard Worker sub x2, x0, x4 // bw - left_ext 3729*c0909341SAndroid Build Coastguard Worker sub x1, x1, x10 // center_h = bh - top_ext - bottom_ext 3730*c0909341SAndroid Build Coastguard Worker sub x2, x2, x11 // center_w = bw - left_ext - right_ext 3731*c0909341SAndroid Build Coastguard Worker 3732*c0909341SAndroid Build Coastguard Worker mov x14, x6 // backup of dst 3733*c0909341SAndroid Build Coastguard Worker 3734*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right 3735*c0909341SAndroid Build Coastguard Worker0: 3736*c0909341SAndroid Build Coastguard Worker.if \need_left 3737*c0909341SAndroid Build Coastguard Worker ld1r {v0.8h}, [x8] 3738*c0909341SAndroid Build Coastguard Worker mov x12, x6 // out = dst 3739*c0909341SAndroid Build Coastguard Worker mov x3, x4 3740*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 3741*c0909341SAndroid Build Coastguard Worker1: 3742*c0909341SAndroid Build Coastguard Worker subs x3, x3, #16 3743*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x12], #32 3744*c0909341SAndroid Build Coastguard Worker b.gt 1b 3745*c0909341SAndroid Build Coastguard Worker.endif 3746*c0909341SAndroid Build Coastguard Worker mov x13, x8 3747*c0909341SAndroid Build Coastguard Worker add x12, x6, x4, lsl #1 // out = dst + left_ext 3748*c0909341SAndroid Build Coastguard Worker mov x3, x2 3749*c0909341SAndroid Build Coastguard Worker1: 3750*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x13], #64 3751*c0909341SAndroid Build Coastguard Worker subs x3, x3, #32 3752*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x12], #64 3753*c0909341SAndroid Build Coastguard Worker b.gt 1b 3754*c0909341SAndroid Build Coastguard Worker.if \need_right 3755*c0909341SAndroid Build Coastguard Worker add x3, x8, x2, lsl #1 // in + center_w 3756*c0909341SAndroid Build Coastguard Worker sub x3, x3, #2 // in + center_w - 1 3757*c0909341SAndroid Build Coastguard Worker add x12, x6, x4, lsl #1 // dst + left_ext 3758*c0909341SAndroid Build Coastguard Worker ld1r {v0.8h}, [x3] 3759*c0909341SAndroid Build Coastguard Worker add x12, x12, x2, lsl #1 // out = dst + left_ext + center_w 3760*c0909341SAndroid Build Coastguard Worker mov x3, x11 3761*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 3762*c0909341SAndroid Build Coastguard Worker1: 3763*c0909341SAndroid Build Coastguard Worker subs x3, x3, #16 3764*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x12], #32 3765*c0909341SAndroid Build Coastguard Worker b.gt 1b 3766*c0909341SAndroid Build Coastguard Worker.endif 3767*c0909341SAndroid Build Coastguard Worker 3768*c0909341SAndroid Build Coastguard Worker subs x1, x1, #1 // center_h-- 3769*c0909341SAndroid Build Coastguard Worker add x6, x6, x7 3770*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 3771*c0909341SAndroid Build Coastguard Worker b.gt 0b 3772*c0909341SAndroid Build Coastguard Worker.endm 3773*c0909341SAndroid Build Coastguard Worker 3774*c0909341SAndroid Build Coastguard Worker cbz x4, 2f 3775*c0909341SAndroid Build Coastguard Worker // need_left 3776*c0909341SAndroid Build Coastguard Worker cbz x11, 3f 3777*c0909341SAndroid Build Coastguard Worker // need_left + need_right 3778*c0909341SAndroid Build Coastguard Worker v_loop 1, 1 3779*c0909341SAndroid Build Coastguard Worker b 5f 3780*c0909341SAndroid Build Coastguard Worker 3781*c0909341SAndroid Build Coastguard Worker2: 3782*c0909341SAndroid Build Coastguard Worker // !need_left 3783*c0909341SAndroid Build Coastguard Worker cbz x11, 4f 3784*c0909341SAndroid Build Coastguard Worker // !need_left + need_right 3785*c0909341SAndroid Build Coastguard Worker v_loop 0, 1 3786*c0909341SAndroid Build Coastguard Worker b 5f 3787*c0909341SAndroid Build Coastguard Worker 3788*c0909341SAndroid Build Coastguard Worker3: 3789*c0909341SAndroid Build Coastguard Worker // need_left + !need_right 3790*c0909341SAndroid Build Coastguard Worker v_loop 1, 0 3791*c0909341SAndroid Build Coastguard Worker b 5f 3792*c0909341SAndroid Build Coastguard Worker 3793*c0909341SAndroid Build Coastguard Worker4: 3794*c0909341SAndroid Build Coastguard Worker // !need_left + !need_right 3795*c0909341SAndroid Build Coastguard Worker v_loop 0, 0 3796*c0909341SAndroid Build Coastguard Worker 3797*c0909341SAndroid Build Coastguard Worker5: 3798*c0909341SAndroid Build Coastguard Worker 3799*c0909341SAndroid Build Coastguard Worker cbz x10, 3f 3800*c0909341SAndroid Build Coastguard Worker // need_bottom 3801*c0909341SAndroid Build Coastguard Worker sub x8, x6, x7 // ref = dst - stride 3802*c0909341SAndroid Build Coastguard Worker mov x4, x0 3803*c0909341SAndroid Build Coastguard Worker1: 3804*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x8], #64 3805*c0909341SAndroid Build Coastguard Worker mov x3, x10 3806*c0909341SAndroid Build Coastguard Worker2: 3807*c0909341SAndroid Build Coastguard Worker subs x3, x3, #1 3808*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x7 3809*c0909341SAndroid Build Coastguard Worker b.gt 2b 3810*c0909341SAndroid Build Coastguard Worker msub x6, x7, x10, x6 // dst -= bottom_ext * stride 3811*c0909341SAndroid Build Coastguard Worker subs x4, x4, #32 // bw -= 32 3812*c0909341SAndroid Build Coastguard Worker add x6, x6, #64 // dst += 32 3813*c0909341SAndroid Build Coastguard Worker b.gt 1b 3814*c0909341SAndroid Build Coastguard Worker 3815*c0909341SAndroid Build Coastguard Worker3: 3816*c0909341SAndroid Build Coastguard Worker cbz x5, 3f 3817*c0909341SAndroid Build Coastguard Worker // need_top 3818*c0909341SAndroid Build Coastguard Worker msub x6, x7, x5, x14 // dst = stored_dst - top_ext * stride 3819*c0909341SAndroid Build Coastguard Worker1: 3820*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x14], #64 3821*c0909341SAndroid Build Coastguard Worker mov x3, x5 3822*c0909341SAndroid Build Coastguard Worker2: 3823*c0909341SAndroid Build Coastguard Worker subs x3, x3, #1 3824*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x7 3825*c0909341SAndroid Build Coastguard Worker b.gt 2b 3826*c0909341SAndroid Build Coastguard Worker msub x6, x7, x5, x6 // dst -= top_ext * stride 3827*c0909341SAndroid Build Coastguard Worker subs x0, x0, #32 // bw -= 32 3828*c0909341SAndroid Build Coastguard Worker add x6, x6, #64 // dst += 32 3829*c0909341SAndroid Build Coastguard Worker b.gt 1b 3830*c0909341SAndroid Build Coastguard Worker 3831*c0909341SAndroid Build Coastguard Worker3: 3832*c0909341SAndroid Build Coastguard Worker ret 3833*c0909341SAndroid Build Coastguard Workerendfunc 3834