1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker#define PREP_BIAS 8192 33*c0909341SAndroid Build Coastguard Worker 34*c0909341SAndroid Build Coastguard Worker.macro avg d0, d00, d01, d1, d10, d11 35*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2, :128]! 36*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r3, :128]! 37*c0909341SAndroid Build Coastguard Worker vqadd.s16 q0, q0, q2 38*c0909341SAndroid Build Coastguard Worker vqadd.s16 q1, q1, q3 39*c0909341SAndroid Build Coastguard Worker vmax.s16 q0, q0, q12 // -2*PREP_BIAS - 1 << intermediate_bits 40*c0909341SAndroid Build Coastguard Worker vmax.s16 q1, q1, q12 // -2*PREP_BIAS - 1 << intermediate_bits 41*c0909341SAndroid Build Coastguard Worker vqsub.s16 q0, q0, q12 // -2*PREP_BIAS - 1 << intermediate_bits 42*c0909341SAndroid Build Coastguard Worker vqsub.s16 q1, q1, q12 // -2*PREP_BIAS - 1 << intermediate_bits 43*c0909341SAndroid Build Coastguard Worker vshl.s16 \d0, q0, q13 // -(intermediate_bits+1) 44*c0909341SAndroid Build Coastguard Worker vshl.s16 \d1, q1, q13 // -(intermediate_bits+1) 45*c0909341SAndroid Build Coastguard Worker.endm 46*c0909341SAndroid Build Coastguard Worker 47*c0909341SAndroid Build Coastguard Worker.macro w_avg d0, d00, d01, d1, d10, d11 48*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2, :128]! 49*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r3, :128]! 50*c0909341SAndroid Build Coastguard Worker // This difference requires a 17 bit range, and all bits are 51*c0909341SAndroid Build Coastguard Worker // significant for the following multiplication. 52*c0909341SAndroid Build Coastguard Worker vsubl.s16 \d0, d4, d0 53*c0909341SAndroid Build Coastguard Worker vsubl.s16 q0, d5, d1 54*c0909341SAndroid Build Coastguard Worker vsubl.s16 \d1, d6, d2 55*c0909341SAndroid Build Coastguard Worker vsubl.s16 q1, d7, d3 56*c0909341SAndroid Build Coastguard Worker vmul.s32 \d0, \d0, q4 57*c0909341SAndroid Build Coastguard Worker vmul.s32 q0, q0, q4 58*c0909341SAndroid Build Coastguard Worker vmul.s32 \d1, \d1, q4 59*c0909341SAndroid Build Coastguard Worker vmul.s32 q1, q1, q4 60*c0909341SAndroid Build Coastguard Worker vshr.s32 \d0, \d0, #4 61*c0909341SAndroid Build Coastguard Worker vshr.s32 q0, q0, #4 62*c0909341SAndroid Build Coastguard Worker vshr.s32 \d1, \d1, #4 63*c0909341SAndroid Build Coastguard Worker vshr.s32 q1, q1, #4 64*c0909341SAndroid Build Coastguard Worker vaddw.s16 \d0, \d0, d4 65*c0909341SAndroid Build Coastguard Worker vaddw.s16 q0, q0, d5 66*c0909341SAndroid Build Coastguard Worker vaddw.s16 \d1, \d1, d6 67*c0909341SAndroid Build Coastguard Worker vaddw.s16 q1, q1, d7 68*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d00, \d0 69*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d01, q0 70*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d10, \d1 71*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d11, q1 72*c0909341SAndroid Build Coastguard Worker vrshl.s16 \d0, \d0, q13 // -intermediate_bits 73*c0909341SAndroid Build Coastguard Worker vrshl.s16 \d1, \d1, q13 // -intermediate_bits 74*c0909341SAndroid Build Coastguard Worker vadd.s16 \d0, \d0, q12 // PREP_BIAS >> intermediate_bits 75*c0909341SAndroid Build Coastguard Worker vadd.s16 \d1, \d1, q12 // PREP_BIAS >> intermediate_bits 76*c0909341SAndroid Build Coastguard Worker vmin.s16 \d0, \d0, q15 // bitdepth_max 77*c0909341SAndroid Build Coastguard Worker vmin.s16 \d1, \d1, q15 // bitdepth_max 78*c0909341SAndroid Build Coastguard Worker vmax.s16 \d0, \d0, q14 // 0 79*c0909341SAndroid Build Coastguard Worker vmax.s16 \d1, \d1, q14 // 0 80*c0909341SAndroid Build Coastguard Worker.endm 81*c0909341SAndroid Build Coastguard Worker 82*c0909341SAndroid Build Coastguard Worker.macro mask d0, d00, d01, d1, d10, d11 83*c0909341SAndroid Build Coastguard Worker vld1.8 {q7}, [r6, :128]! 84*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2, :128]! 85*c0909341SAndroid Build Coastguard Worker vneg.s8 q7, q7 86*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r3, :128]! 87*c0909341SAndroid Build Coastguard Worker vmovl.s8 q6, d14 88*c0909341SAndroid Build Coastguard Worker vmovl.s8 q7, d15 89*c0909341SAndroid Build Coastguard Worker vmovl.s16 q4, d12 90*c0909341SAndroid Build Coastguard Worker vmovl.s16 q5, d13 91*c0909341SAndroid Build Coastguard Worker vmovl.s16 q6, d14 92*c0909341SAndroid Build Coastguard Worker vmovl.s16 q7, d15 93*c0909341SAndroid Build Coastguard Worker vsubl.s16 \d0, d4, d0 94*c0909341SAndroid Build Coastguard Worker vsubl.s16 q0, d5, d1 95*c0909341SAndroid Build Coastguard Worker vsubl.s16 \d1, d6, d2 96*c0909341SAndroid Build Coastguard Worker vsubl.s16 q1, d7, d3 97*c0909341SAndroid Build Coastguard Worker vmul.s32 \d0, \d0, q4 98*c0909341SAndroid Build Coastguard Worker vmul.s32 q0, q0, q5 99*c0909341SAndroid Build Coastguard Worker vmul.s32 \d1, \d1, q6 100*c0909341SAndroid Build Coastguard Worker vmul.s32 q1, q1, q7 101*c0909341SAndroid Build Coastguard Worker vshr.s32 \d0, \d0, #6 102*c0909341SAndroid Build Coastguard Worker vshr.s32 q0, q0, #6 103*c0909341SAndroid Build Coastguard Worker vshr.s32 \d1, \d1, #6 104*c0909341SAndroid Build Coastguard Worker vshr.s32 q1, q1, #6 105*c0909341SAndroid Build Coastguard Worker vaddw.s16 \d0, \d0, d4 106*c0909341SAndroid Build Coastguard Worker vaddw.s16 q0, q0, d5 107*c0909341SAndroid Build Coastguard Worker vaddw.s16 \d1, \d1, d6 108*c0909341SAndroid Build Coastguard Worker vaddw.s16 q1, q1, d7 109*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d00, \d0 110*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d01, q0 111*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d10, \d1 112*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d11, q1 113*c0909341SAndroid Build Coastguard Worker vrshl.s16 \d0, \d0, q13 // -intermediate_bits 114*c0909341SAndroid Build Coastguard Worker vrshl.s16 \d1, \d1, q13 // -intermediate_bits 115*c0909341SAndroid Build Coastguard Worker vadd.s16 \d0, \d0, q12 // PREP_BIAS >> intermediate_bits 116*c0909341SAndroid Build Coastguard Worker vadd.s16 \d1, \d1, q12 // PREP_BIAS >> intermediate_bits 117*c0909341SAndroid Build Coastguard Worker vmin.s16 \d0, \d0, q15 // bitdepth_max 118*c0909341SAndroid Build Coastguard Worker vmin.s16 \d1, \d1, q15 // bitdepth_max 119*c0909341SAndroid Build Coastguard Worker vmax.s16 \d0, \d0, q14 // 0 120*c0909341SAndroid Build Coastguard Worker vmax.s16 \d1, \d1, q14 // 0 121*c0909341SAndroid Build Coastguard Worker.endm 122*c0909341SAndroid Build Coastguard Worker 123*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type, bdmax 124*c0909341SAndroid Build Coastguard Workerfunction \type\()_16bpc_neon, export=1 125*c0909341SAndroid Build Coastguard Worker push {r4-r7,lr} 126*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #20] 127*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #28] 128*c0909341SAndroid Build Coastguard Worker clz r4, r4 129*c0909341SAndroid Build Coastguard Worker.ifnc \type, avg 130*c0909341SAndroid Build Coastguard Worker ldr r7, [sp, #32] 131*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #0 132*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // bitdepth_max 133*c0909341SAndroid Build Coastguard Worker.endif 134*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 135*c0909341SAndroid Build Coastguard Worker vpush {q4} 136*c0909341SAndroid Build Coastguard Worker.endif 137*c0909341SAndroid Build Coastguard Worker.ifc \type, mask 138*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 139*c0909341SAndroid Build Coastguard Worker.endif 140*c0909341SAndroid Build Coastguard Worker clz r7, \bdmax 141*c0909341SAndroid Build Coastguard Worker sub r7, r7, #18 // intermediate_bits = clz(bitdepth_max) - 18 142*c0909341SAndroid Build Coastguard Worker.ifc \type, avg 143*c0909341SAndroid Build Coastguard Worker mov lr, #1 144*c0909341SAndroid Build Coastguard Worker movw r12, #2*PREP_BIAS 145*c0909341SAndroid Build Coastguard Worker lsl lr, lr, r7 // 1 << intermediate_bits 146*c0909341SAndroid Build Coastguard Worker neg r12, r12 // -2*PREP_BIAS 147*c0909341SAndroid Build Coastguard Worker add r7, r7, #1 148*c0909341SAndroid Build Coastguard Worker sub r12, r12, lr // -2*PREP_BIAS - 1 << intermediate_bits 149*c0909341SAndroid Build Coastguard Worker neg r7, r7 // -(intermediate_bits+1) 150*c0909341SAndroid Build Coastguard Worker vdup.16 q12, r12 // -2*PREP_BIAS - 1 << intermediate_bits 151*c0909341SAndroid Build Coastguard Worker vdup.16 q13, r7 // -(intermediate_bits+1) 152*c0909341SAndroid Build Coastguard Worker.else 153*c0909341SAndroid Build Coastguard Worker mov r12, #PREP_BIAS 154*c0909341SAndroid Build Coastguard Worker lsr r12, r12, r7 // PREP_BIAS >> intermediate_bits 155*c0909341SAndroid Build Coastguard Worker neg r7, r7 // -intermediate_bits 156*c0909341SAndroid Build Coastguard Worker vdup.16 q12, r12 // PREP_BIAS >> intermediate_bits 157*c0909341SAndroid Build Coastguard Worker vdup.16 q13, r7 // -intermediate_bits 158*c0909341SAndroid Build Coastguard Worker.endif 159*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 160*c0909341SAndroid Build Coastguard Worker vdup.32 q4, r6 161*c0909341SAndroid Build Coastguard Worker vneg.s32 q4, q4 162*c0909341SAndroid Build Coastguard Worker.endif 163*c0909341SAndroid Build Coastguard Worker adr r7, L(\type\()_tbl) 164*c0909341SAndroid Build Coastguard Worker sub r4, r4, #24 165*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 166*c0909341SAndroid Build Coastguard Worker ldr r4, [r7, r4, lsl #2] 167*c0909341SAndroid Build Coastguard Worker add r7, r7, r4 168*c0909341SAndroid Build Coastguard Worker bx r7 169*c0909341SAndroid Build Coastguard Worker 170*c0909341SAndroid Build Coastguard Worker .align 2 171*c0909341SAndroid Build Coastguard WorkerL(\type\()_tbl): 172*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_tbl) + CONFIG_THUMB 173*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_tbl) + CONFIG_THUMB 174*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_tbl) + CONFIG_THUMB 175*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_tbl) + CONFIG_THUMB 176*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_tbl) + CONFIG_THUMB 177*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_tbl) + CONFIG_THUMB 178*c0909341SAndroid Build Coastguard Worker 179*c0909341SAndroid Build Coastguard Worker40: 180*c0909341SAndroid Build Coastguard Worker add r7, r0, r1 181*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 182*c0909341SAndroid Build Coastguard Worker4: 183*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 184*c0909341SAndroid Build Coastguard Worker vst1.16 {d16}, [r0, :64], r1 185*c0909341SAndroid Build Coastguard Worker vst1.16 {d17}, [r7, :64], r1 186*c0909341SAndroid Build Coastguard Worker vst1.16 {d18}, [r0, :64], r1 187*c0909341SAndroid Build Coastguard Worker vst1.16 {d19}, [r7, :64], r1 188*c0909341SAndroid Build Coastguard Worker ble 0f 189*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 190*c0909341SAndroid Build Coastguard Worker b 4b 191*c0909341SAndroid Build Coastguard Worker80: 192*c0909341SAndroid Build Coastguard Worker add r7, r0, r1 193*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 194*c0909341SAndroid Build Coastguard Worker8: 195*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [r0, :128], r1 196*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 197*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [r7, :128], r1 198*c0909341SAndroid Build Coastguard Worker ble 0f 199*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 200*c0909341SAndroid Build Coastguard Worker b 8b 201*c0909341SAndroid Build Coastguard Worker160: 202*c0909341SAndroid Build Coastguard Worker16: 203*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 204*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r1 205*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 206*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128], r1 207*c0909341SAndroid Build Coastguard Worker ble 0f 208*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 209*c0909341SAndroid Build Coastguard Worker b 16b 210*c0909341SAndroid Build Coastguard Worker320: 211*c0909341SAndroid Build Coastguard Worker add r7, r0, #32 212*c0909341SAndroid Build Coastguard Worker32: 213*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 214*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r1 215*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 216*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r1 217*c0909341SAndroid Build Coastguard Worker ble 0f 218*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 219*c0909341SAndroid Build Coastguard Worker b 32b 220*c0909341SAndroid Build Coastguard Worker640: 221*c0909341SAndroid Build Coastguard Worker add r7, r0, #32 222*c0909341SAndroid Build Coastguard Worker mov r12, #64 223*c0909341SAndroid Build Coastguard Worker sub r1, r1, #64 224*c0909341SAndroid Build Coastguard Worker64: 225*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 226*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r12 227*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 228*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r12 229*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 230*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r1 231*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 232*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r1 233*c0909341SAndroid Build Coastguard Worker ble 0f 234*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 235*c0909341SAndroid Build Coastguard Worker b 64b 236*c0909341SAndroid Build Coastguard Worker1280: 237*c0909341SAndroid Build Coastguard Worker add r7, r0, #32 238*c0909341SAndroid Build Coastguard Worker mov r12, #64 239*c0909341SAndroid Build Coastguard Worker sub r1, r1, #192 240*c0909341SAndroid Build Coastguard Worker128: 241*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 242*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r12 243*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 244*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r12 245*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 246*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r12 247*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 248*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r12 249*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 250*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r12 251*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 252*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r12 253*c0909341SAndroid Build Coastguard Worker \type q10, d20, d21, q11, d22, d23 254*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r1 255*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 256*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r7, :128], r1 257*c0909341SAndroid Build Coastguard Worker ble 0f 258*c0909341SAndroid Build Coastguard Worker \type q8, d16, d17, q9, d18, d19 259*c0909341SAndroid Build Coastguard Worker b 128b 260*c0909341SAndroid Build Coastguard Worker0: 261*c0909341SAndroid Build Coastguard Worker.ifc \type, mask 262*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 263*c0909341SAndroid Build Coastguard Worker.endif 264*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 265*c0909341SAndroid Build Coastguard Worker vpop {q4} 266*c0909341SAndroid Build Coastguard Worker.endif 267*c0909341SAndroid Build Coastguard Worker pop {r4-r7,pc} 268*c0909341SAndroid Build Coastguard Workerendfunc 269*c0909341SAndroid Build Coastguard Worker.endm 270*c0909341SAndroid Build Coastguard Worker 271*c0909341SAndroid Build Coastguard Workerbidir_fn avg, r6 272*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg, r7 273*c0909341SAndroid Build Coastguard Workerbidir_fn mask, r7 274*c0909341SAndroid Build Coastguard Worker 275*c0909341SAndroid Build Coastguard Worker 276*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type 277*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_16bpc_neon, export=1 278*c0909341SAndroid Build Coastguard Worker push {r4-r10,lr} 279*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 280*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #96] 281*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #104] 282*c0909341SAndroid Build Coastguard Worker ldr r8, [sp, #112] 283*c0909341SAndroid Build Coastguard Worker clz r9, r4 284*c0909341SAndroid Build Coastguard Worker adr lr, L(w_mask_\type\()_tbl) 285*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r8 // bitdepth_max 286*c0909341SAndroid Build Coastguard Worker sub r9, r9, #24 287*c0909341SAndroid Build Coastguard Worker clz r8, r8 // clz(bitdepth_max) 288*c0909341SAndroid Build Coastguard Worker ldr r9, [lr, r9, lsl #2] 289*c0909341SAndroid Build Coastguard Worker add r9, lr, r9 290*c0909341SAndroid Build Coastguard Worker sub r8, r8, #12 // sh = intermediate_bits + 6 = clz(bitdepth_max) - 12 291*c0909341SAndroid Build Coastguard Worker mov r10, #PREP_BIAS*64 292*c0909341SAndroid Build Coastguard Worker neg r8, r8 // -sh 293*c0909341SAndroid Build Coastguard Worker movw r12, #27615 // (64 + 1 - 38)<<mask_sh - 1 - mask_rnd 294*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r8 // -sh 295*c0909341SAndroid Build Coastguard Worker vdup.16 q0, r12 296*c0909341SAndroid Build Coastguard Worker.if \type == 444 297*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #64 298*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 299*c0909341SAndroid Build Coastguard Worker vdup.8 d4, r7 300*c0909341SAndroid Build Coastguard Worker vmov.i8 d2, #129 301*c0909341SAndroid Build Coastguard Worker vsub.i16 d2, d2, d4 302*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 303*c0909341SAndroid Build Coastguard Worker vdup.16 q2, r7 304*c0909341SAndroid Build Coastguard Worker vmov.i16 q1, #0x100 305*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q2 306*c0909341SAndroid Build Coastguard Worker.endif 307*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 308*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 309*c0909341SAndroid Build Coastguard Worker bx r9 310*c0909341SAndroid Build Coastguard Worker 311*c0909341SAndroid Build Coastguard Worker .align 2 312*c0909341SAndroid Build Coastguard WorkerL(w_mask_\type\()_tbl): 313*c0909341SAndroid Build Coastguard Worker .word 1280f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 314*c0909341SAndroid Build Coastguard Worker .word 640f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 315*c0909341SAndroid Build Coastguard Worker .word 320f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 316*c0909341SAndroid Build Coastguard Worker .word 160f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 317*c0909341SAndroid Build Coastguard Worker .word 8f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 318*c0909341SAndroid Build Coastguard Worker .word 4f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 319*c0909341SAndroid Build Coastguard Worker 320*c0909341SAndroid Build Coastguard Worker4: 321*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! // tmp1 (four rows at once) 322*c0909341SAndroid Build Coastguard Worker vld1.16 {q4, q5}, [r3, :128]! // tmp2 (four rows at once) 323*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 324*c0909341SAndroid Build Coastguard Worker vdup.32 q13, r10 // PREP_BIAS*64 325*c0909341SAndroid Build Coastguard Worker vabd.s16 q6, q2, q4 // abs(tmp1 - tmp2) 326*c0909341SAndroid Build Coastguard Worker vabd.s16 q7, q3, q5 327*c0909341SAndroid Build Coastguard Worker vsubl.s16 q8, d8, d4 // tmp2 - tmp1 (requires 17 bit) 328*c0909341SAndroid Build Coastguard Worker vsubl.s16 q9, d9, d5 329*c0909341SAndroid Build Coastguard Worker vsubl.s16 q10, d10, d6 330*c0909341SAndroid Build Coastguard Worker vsubl.s16 q11, d11, d7 331*c0909341SAndroid Build Coastguard Worker vqsub.u16 q6, q0, q6 // 27615 - abs() 332*c0909341SAndroid Build Coastguard Worker vqsub.u16 q7, q0, q7 333*c0909341SAndroid Build Coastguard Worker vshll.s16 q5, d7, #6 // tmp1 << 6 334*c0909341SAndroid Build Coastguard Worker vshll.s16 q4, d6, #6 335*c0909341SAndroid Build Coastguard Worker vshll.s16 q3, d5, #6 336*c0909341SAndroid Build Coastguard Worker vshll.s16 q2, d4, #6 337*c0909341SAndroid Build Coastguard Worker vshr.u16 q6, q6, #10 // 64-m = (27615 - abs()) >> mask_sh 338*c0909341SAndroid Build Coastguard Worker vshr.u16 q7, q7, #10 339*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q13 // += PREP_BIAS*64 340*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q13 341*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q13 342*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q13 343*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d12 344*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d13 345*c0909341SAndroid Build Coastguard Worker vmla.i32 q2, q8, q12 // (tmp2-tmp1)*(64-m) 346*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d14 347*c0909341SAndroid Build Coastguard Worker vmla.i32 q3, q9, q13 348*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d15 349*c0909341SAndroid Build Coastguard Worker vmla.i32 q4, q10, q12 350*c0909341SAndroid Build Coastguard Worker vmla.i32 q5, q11, q13 351*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 352*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 353*c0909341SAndroid Build Coastguard Worker vrshl.s32 q4, q4, q14 354*c0909341SAndroid Build Coastguard Worker vrshl.s32 q5, q5, q14 355*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 // iclip_pixel 356*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 357*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q4 358*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d7, q5 359*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 // iclip_pixel 360*c0909341SAndroid Build Coastguard Worker vmin.u16 q3, q3, q15 // iclip_pixel 361*c0909341SAndroid Build Coastguard Worker.if \type == 444 362*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 // 64 - m 363*c0909341SAndroid Build Coastguard Worker vmovn.i16 d13, q7 364*c0909341SAndroid Build Coastguard Worker vsub.i16 q6, q1, q6 // m 365*c0909341SAndroid Build Coastguard Worker vst1.8 {q6}, [r6, :128]! 366*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 367*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (64 - m) + (64 - n) (column wise addition) 368*c0909341SAndroid Build Coastguard Worker vpadd.i16 d13, d14, d15 369*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 370*c0909341SAndroid Build Coastguard Worker vhsub.u8 d12, d2, d12 // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 371*c0909341SAndroid Build Coastguard Worker vst1.8 {d12}, [r6, :64]! 372*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 373*c0909341SAndroid Build Coastguard Worker vadd.i16 d12, d12, d13 // (64 - my1) + (64 - my2) (row wise addition) 374*c0909341SAndroid Build Coastguard Worker vadd.i16 d13, d14, d15 375*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (128 - m) + (128 - n) (column wise addition) 376*c0909341SAndroid Build Coastguard Worker vsub.i16 d12, d2, d12 // (256 - sign) - ((128 - m) + (128 - n)) 377*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d12, q6, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 378*c0909341SAndroid Build Coastguard Worker vst1.32 {d12[0]}, [r6, :32]! 379*c0909341SAndroid Build Coastguard Worker.endif 380*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [r0, :64], r1 381*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [r12, :64], r1 382*c0909341SAndroid Build Coastguard Worker vst1.16 {d6}, [r0, :64], r1 383*c0909341SAndroid Build Coastguard Worker vst1.16 {d7}, [r12, :64], r1 384*c0909341SAndroid Build Coastguard Worker bgt 4b 385*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 386*c0909341SAndroid Build Coastguard Worker pop {r4-r10,pc} 387*c0909341SAndroid Build Coastguard Worker8: 388*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! // tmp1 389*c0909341SAndroid Build Coastguard Worker vld1.16 {q4, q5}, [r3, :128]! // tmp2 390*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 391*c0909341SAndroid Build Coastguard Worker vdup.32 q13, r10 // PREP_BIAS*64 392*c0909341SAndroid Build Coastguard Worker vabd.s16 q6, q2, q4 // abs(tmp1 - tmp2) 393*c0909341SAndroid Build Coastguard Worker vabd.s16 q7, q3, q5 394*c0909341SAndroid Build Coastguard Worker vsubl.s16 q8, d8, d4 // tmp2 - tmp1 (requires 17 bit) 395*c0909341SAndroid Build Coastguard Worker vsubl.s16 q9, d9, d5 396*c0909341SAndroid Build Coastguard Worker vsubl.s16 q10, d10, d6 397*c0909341SAndroid Build Coastguard Worker vsubl.s16 q11, d11, d7 398*c0909341SAndroid Build Coastguard Worker vqsub.u16 q6, q0, q6 // 27615 - abs() 399*c0909341SAndroid Build Coastguard Worker vqsub.u16 q7, q0, q7 400*c0909341SAndroid Build Coastguard Worker vshll.s16 q5, d7, #6 // tmp1 << 6 401*c0909341SAndroid Build Coastguard Worker vshll.s16 q4, d6, #6 402*c0909341SAndroid Build Coastguard Worker vshll.s16 q3, d5, #6 403*c0909341SAndroid Build Coastguard Worker vshll.s16 q2, d4, #6 404*c0909341SAndroid Build Coastguard Worker vshr.u16 q6, q6, #10 // 64-m = (27615 - abs()) >> mask_sh 405*c0909341SAndroid Build Coastguard Worker vshr.u16 q7, q7, #10 406*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q13 // += PREP_BIAS*64 407*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q13 408*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q13 409*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q13 410*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d12 411*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d13 412*c0909341SAndroid Build Coastguard Worker vmla.i32 q2, q8, q12 // (tmp2-tmp1)*(64-m) 413*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d14 414*c0909341SAndroid Build Coastguard Worker vmla.i32 q3, q9, q13 415*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d15 416*c0909341SAndroid Build Coastguard Worker vmla.i32 q4, q10, q12 417*c0909341SAndroid Build Coastguard Worker vmla.i32 q5, q11, q13 418*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 419*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 420*c0909341SAndroid Build Coastguard Worker vrshl.s32 q4, q4, q14 421*c0909341SAndroid Build Coastguard Worker vrshl.s32 q5, q5, q14 422*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 // iclip_pixel 423*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 424*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q4 425*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d7, q5 426*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 // iclip_pixel 427*c0909341SAndroid Build Coastguard Worker vmin.u16 q3, q3, q15 // iclip_pixel 428*c0909341SAndroid Build Coastguard Worker.if \type == 444 429*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 // 64 - m 430*c0909341SAndroid Build Coastguard Worker vmovn.i16 d13, q7 431*c0909341SAndroid Build Coastguard Worker vsub.i16 q6, q1, q6 // m 432*c0909341SAndroid Build Coastguard Worker vst1.8 {q6}, [r6, :128]! 433*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 434*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (64 - m) + (64 - n) (column wise addition) 435*c0909341SAndroid Build Coastguard Worker vpadd.i16 d13, d14, d15 436*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 437*c0909341SAndroid Build Coastguard Worker vhsub.u8 d12, d2, d12 // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 438*c0909341SAndroid Build Coastguard Worker vst1.8 {d12}, [r6, :64]! 439*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 440*c0909341SAndroid Build Coastguard Worker vadd.i16 q6, q6, q7 // (64 - my1) + (64 - my2) (row wise addition) 441*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (128 - m) + (128 - n) (column wise addition) 442*c0909341SAndroid Build Coastguard Worker vsub.i16 d12, d2, d12 // (256 - sign) - ((128 - m) + (128 - n)) 443*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d12, q6, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 444*c0909341SAndroid Build Coastguard Worker vst1.32 {d12[0]}, [r6, :32]! 445*c0909341SAndroid Build Coastguard Worker.endif 446*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 447*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128], r1 448*c0909341SAndroid Build Coastguard Worker bgt 8b 449*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 450*c0909341SAndroid Build Coastguard Worker pop {r4-r10,pc} 451*c0909341SAndroid Build Coastguard Worker1280: 452*c0909341SAndroid Build Coastguard Worker640: 453*c0909341SAndroid Build Coastguard Worker320: 454*c0909341SAndroid Build Coastguard Worker160: 455*c0909341SAndroid Build Coastguard Worker sub r1, r1, r4, lsl #1 456*c0909341SAndroid Build Coastguard Worker.if \type == 444 457*c0909341SAndroid Build Coastguard Worker add lr, r6, r4 458*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 459*c0909341SAndroid Build Coastguard Worker add lr, r6, r4, lsr #1 460*c0909341SAndroid Build Coastguard Worker.endif 461*c0909341SAndroid Build Coastguard Worker add r7, r2, r4, lsl #1 462*c0909341SAndroid Build Coastguard Worker add r9, r3, r4, lsl #1 463*c0909341SAndroid Build Coastguard Worker161: 464*c0909341SAndroid Build Coastguard Worker mov r8, r4 465*c0909341SAndroid Build Coastguard Worker16: 466*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r2, :128]! // tmp1 467*c0909341SAndroid Build Coastguard Worker vld1.16 {q4}, [r3, :128]! // tmp2 468*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r7, :128]! 469*c0909341SAndroid Build Coastguard Worker vld1.16 {q5}, [r9, :128]! 470*c0909341SAndroid Build Coastguard Worker subs r8, r8, #8 471*c0909341SAndroid Build Coastguard Worker vdup.32 q13, r10 // PREP_BIAS*64 472*c0909341SAndroid Build Coastguard Worker vabd.s16 q6, q2, q4 // abs(tmp1 - tmp2) 473*c0909341SAndroid Build Coastguard Worker vabd.s16 q7, q3, q5 474*c0909341SAndroid Build Coastguard Worker vsubl.s16 q8, d8, d4 // tmp2 - tmp1 (requires 17 bit) 475*c0909341SAndroid Build Coastguard Worker vsubl.s16 q9, d9, d5 476*c0909341SAndroid Build Coastguard Worker vsubl.s16 q10, d10, d6 477*c0909341SAndroid Build Coastguard Worker vsubl.s16 q11, d11, d7 478*c0909341SAndroid Build Coastguard Worker vqsub.u16 q6, q0, q6 // 27615 - abs() 479*c0909341SAndroid Build Coastguard Worker vqsub.u16 q7, q0, q7 480*c0909341SAndroid Build Coastguard Worker vshll.s16 q5, d7, #6 // tmp1 << 6 481*c0909341SAndroid Build Coastguard Worker vshll.s16 q4, d6, #6 482*c0909341SAndroid Build Coastguard Worker vshll.s16 q3, d5, #6 483*c0909341SAndroid Build Coastguard Worker vshll.s16 q2, d4, #6 484*c0909341SAndroid Build Coastguard Worker vshr.u16 q6, q6, #10 // 64-m = (27615 - abs()) >> mask_sh 485*c0909341SAndroid Build Coastguard Worker vshr.u16 q7, q7, #10 486*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q13 // += PREP_BIAS*64 487*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q13 488*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q13 489*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q13 490*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d12 491*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d13 492*c0909341SAndroid Build Coastguard Worker vmla.i32 q2, q8, q12 // (tmp2-tmp1)*(64-m) 493*c0909341SAndroid Build Coastguard Worker vmovl.u16 q12, d14 494*c0909341SAndroid Build Coastguard Worker vmla.i32 q3, q9, q13 495*c0909341SAndroid Build Coastguard Worker vmovl.u16 q13, d15 496*c0909341SAndroid Build Coastguard Worker vmla.i32 q4, q10, q12 497*c0909341SAndroid Build Coastguard Worker vmla.i32 q5, q11, q13 498*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // (tmp1<<6 + (tmp2-tmp1)*(64-m) + (1 << (sh-1)) + PREP_BIAS*64) >> sh 499*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 500*c0909341SAndroid Build Coastguard Worker vrshl.s32 q4, q4, q14 501*c0909341SAndroid Build Coastguard Worker vrshl.s32 q5, q5, q14 502*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 // iclip_pixel 503*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 504*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q4 505*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d7, q5 506*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 // iclip_pixel 507*c0909341SAndroid Build Coastguard Worker vmin.u16 q3, q3, q15 // iclip_pixel 508*c0909341SAndroid Build Coastguard Worker.if \type == 444 509*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 // 64 - m 510*c0909341SAndroid Build Coastguard Worker vmovn.i16 d13, q7 511*c0909341SAndroid Build Coastguard Worker vsub.i16 q6, q1, q6 // m 512*c0909341SAndroid Build Coastguard Worker vst1.8 {d12}, [r6, :64]! 513*c0909341SAndroid Build Coastguard Worker vst1.8 {d13}, [lr, :64]! 514*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 515*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (64 - m) + (64 - n) (column wise addition) 516*c0909341SAndroid Build Coastguard Worker vpadd.i16 d13, d14, d15 517*c0909341SAndroid Build Coastguard Worker vmovn.i16 d12, q6 518*c0909341SAndroid Build Coastguard Worker vhsub.u8 d12, d2, d12 // ((129 - sign) - ((64 - m) + (64 - n)) >> 1 519*c0909341SAndroid Build Coastguard Worker vst1.32 {d12[0]}, [r6, :32]! 520*c0909341SAndroid Build Coastguard Worker vst1.32 {d12[1]}, [lr, :32]! 521*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 522*c0909341SAndroid Build Coastguard Worker vadd.i16 q6, q6, q7 // (64 - my1) + (64 - my2) (row wise addition) 523*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // (128 - m) + (128 - n) (column wise addition) 524*c0909341SAndroid Build Coastguard Worker vsub.i16 d12, d2, d12 // (256 - sign) - ((128 - m) + (128 - n)) 525*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d12, q6, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 526*c0909341SAndroid Build Coastguard Worker vst1.32 {d12[0]}, [r6, :32]! 527*c0909341SAndroid Build Coastguard Worker.endif 528*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128]! 529*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128]! 530*c0909341SAndroid Build Coastguard Worker bgt 16b 531*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 532*c0909341SAndroid Build Coastguard Worker add r2, r2, r4, lsl #1 533*c0909341SAndroid Build Coastguard Worker add r3, r3, r4, lsl #1 534*c0909341SAndroid Build Coastguard Worker add r7, r7, r4, lsl #1 535*c0909341SAndroid Build Coastguard Worker add r9, r9, r4, lsl #1 536*c0909341SAndroid Build Coastguard Worker.if \type == 444 537*c0909341SAndroid Build Coastguard Worker add r6, r6, r4 538*c0909341SAndroid Build Coastguard Worker add lr, lr, r4 539*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 540*c0909341SAndroid Build Coastguard Worker add r6, r6, r4, lsr #1 541*c0909341SAndroid Build Coastguard Worker add lr, lr, r4, lsr #1 542*c0909341SAndroid Build Coastguard Worker.endif 543*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 544*c0909341SAndroid Build Coastguard Worker add r12, r12, r1 545*c0909341SAndroid Build Coastguard Worker bgt 161b 546*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 547*c0909341SAndroid Build Coastguard Worker pop {r4-r10,pc} 548*c0909341SAndroid Build Coastguard Workerendfunc 549*c0909341SAndroid Build Coastguard Worker.endm 550*c0909341SAndroid Build Coastguard Worker 551*c0909341SAndroid Build Coastguard Workerw_mask_fn 444 552*c0909341SAndroid Build Coastguard Workerw_mask_fn 422 553*c0909341SAndroid Build Coastguard Workerw_mask_fn 420 554*c0909341SAndroid Build Coastguard Worker 555*c0909341SAndroid Build Coastguard Workerfunction blend_16bpc_neon, export=1 556*c0909341SAndroid Build Coastguard Worker push {r4-r5,lr} 557*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #12] 558*c0909341SAndroid Build Coastguard Worker clz lr, r3 559*c0909341SAndroid Build Coastguard Worker adr r3, L(blend_tbl) 560*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 561*c0909341SAndroid Build Coastguard Worker ldr lr, [r3, lr, lsl #2] 562*c0909341SAndroid Build Coastguard Worker add r3, r3, lr 563*c0909341SAndroid Build Coastguard Worker bx r3 564*c0909341SAndroid Build Coastguard Worker 565*c0909341SAndroid Build Coastguard Worker .align 2 566*c0909341SAndroid Build Coastguard WorkerL(blend_tbl): 567*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_tbl) + CONFIG_THUMB 568*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_tbl) + CONFIG_THUMB 569*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_tbl) + CONFIG_THUMB 570*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_tbl) + CONFIG_THUMB 571*c0909341SAndroid Build Coastguard Worker 572*c0909341SAndroid Build Coastguard Worker40: 573*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 574*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 575*c0909341SAndroid Build Coastguard Worker4: 576*c0909341SAndroid Build Coastguard Worker vld1.8 {d4}, [r5, :64]! 577*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r2, :128]! 578*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r0, :64] 579*c0909341SAndroid Build Coastguard Worker vneg.s8 d4, d4 // -m 580*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 581*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r12, :64] 582*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d4 583*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #9 // -m << 9 584*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q0, q1 // a - b 585*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q1, q1, q2 // ((a-b)*-m + 32) >> 6 586*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 587*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 588*c0909341SAndroid Build Coastguard Worker vst1.16 {d1}, [r12, :64], r1 589*c0909341SAndroid Build Coastguard Worker bgt 4b 590*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 591*c0909341SAndroid Build Coastguard Worker80: 592*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 593*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 594*c0909341SAndroid Build Coastguard Worker8: 595*c0909341SAndroid Build Coastguard Worker vld1.8 {q8}, [r5, :128]! 596*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! 597*c0909341SAndroid Build Coastguard Worker vneg.s8 q9, q8 // -m 598*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r0, :128] 599*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128] 600*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d18 601*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d19 602*c0909341SAndroid Build Coastguard Worker vshl.i16 q8, q8, #9 // -m << 9 603*c0909341SAndroid Build Coastguard Worker vshl.i16 q9, q9, #9 604*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q0, q2 // a - b 605*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q1, q3 606*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 607*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q2, q2, q8 // ((a-b)*-m + 32) >> 6 608*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q3, q3, q9 609*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 610*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q3 611*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 612*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128], r1 613*c0909341SAndroid Build Coastguard Worker bgt 8b 614*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 615*c0909341SAndroid Build Coastguard Worker160: 616*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 617*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 618*c0909341SAndroid Build Coastguard Worker16: 619*c0909341SAndroid Build Coastguard Worker vld1.8 {q12, q13}, [r5, :128]! 620*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 621*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 622*c0909341SAndroid Build Coastguard Worker vneg.s8 q14, q12 // -m 623*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128] 624*c0909341SAndroid Build Coastguard Worker vneg.s8 q15, q13 625*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 626*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d28 627*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d29 628*c0909341SAndroid Build Coastguard Worker vmovl.s8 q14, d30 629*c0909341SAndroid Build Coastguard Worker vmovl.s8 q15, d31 630*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128] 631*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 632*c0909341SAndroid Build Coastguard Worker vshl.i16 q13, q13, #9 633*c0909341SAndroid Build Coastguard Worker vshl.i16 q14, q14, #9 634*c0909341SAndroid Build Coastguard Worker vshl.i16 q15, q15, #9 635*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 636*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q1, q9 637*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 638*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q3, q11 639*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 640*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q13 641*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q14 642*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q11, q15 643*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 644*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q9 645*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 646*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 647*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q11 648*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128], r1 649*c0909341SAndroid Build Coastguard Worker bgt 16b 650*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 651*c0909341SAndroid Build Coastguard Worker320: 652*c0909341SAndroid Build Coastguard Worker add r12, r0, #32 653*c0909341SAndroid Build Coastguard Worker32: 654*c0909341SAndroid Build Coastguard Worker vld1.8 {q12, q13}, [r5, :128]! 655*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 656*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 657*c0909341SAndroid Build Coastguard Worker vneg.s8 q14, q12 // -m 658*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128] 659*c0909341SAndroid Build Coastguard Worker vneg.s8 q15, q13 660*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 661*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d28 662*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d29 663*c0909341SAndroid Build Coastguard Worker vmovl.s8 q14, d30 664*c0909341SAndroid Build Coastguard Worker vmovl.s8 q15, d31 665*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128] 666*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 667*c0909341SAndroid Build Coastguard Worker vshl.i16 q13, q13, #9 668*c0909341SAndroid Build Coastguard Worker vshl.i16 q14, q14, #9 669*c0909341SAndroid Build Coastguard Worker vshl.i16 q15, q15, #9 670*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 671*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q1, q9 672*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 673*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q3, q11 674*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 675*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q13 676*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q14 677*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q11, q15 678*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 679*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q9 680*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 681*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 682*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q11 683*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128], r1 684*c0909341SAndroid Build Coastguard Worker bgt 32b 685*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 686*c0909341SAndroid Build Coastguard Workerendfunc 687*c0909341SAndroid Build Coastguard Worker 688*c0909341SAndroid Build Coastguard Workerfunction blend_h_16bpc_neon, export=1 689*c0909341SAndroid Build Coastguard Worker push {r4-r5,lr} 690*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 691*c0909341SAndroid Build Coastguard Worker movrel r5, X(obmc_masks) 692*c0909341SAndroid Build Coastguard Worker add r5, r5, r4 693*c0909341SAndroid Build Coastguard Worker sub r4, r4, r4, lsr #2 694*c0909341SAndroid Build Coastguard Worker clz lr, r3 695*c0909341SAndroid Build Coastguard Worker adr r12, L(blend_h_tbl) 696*c0909341SAndroid Build Coastguard Worker sub lr, lr, #24 697*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 698*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 699*c0909341SAndroid Build Coastguard Worker bx r12 700*c0909341SAndroid Build Coastguard Worker 701*c0909341SAndroid Build Coastguard Worker .align 2 702*c0909341SAndroid Build Coastguard WorkerL(blend_h_tbl): 703*c0909341SAndroid Build Coastguard Worker .word 1280f - L(blend_h_tbl) + CONFIG_THUMB 704*c0909341SAndroid Build Coastguard Worker .word 640f - L(blend_h_tbl) + CONFIG_THUMB 705*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_h_tbl) + CONFIG_THUMB 706*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_h_tbl) + CONFIG_THUMB 707*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_h_tbl) + CONFIG_THUMB 708*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_h_tbl) + CONFIG_THUMB 709*c0909341SAndroid Build Coastguard Worker .word 20f - L(blend_h_tbl) + CONFIG_THUMB 710*c0909341SAndroid Build Coastguard Worker 711*c0909341SAndroid Build Coastguard Worker20: 712*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 713*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 714*c0909341SAndroid Build Coastguard Worker2: 715*c0909341SAndroid Build Coastguard Worker vld2.8 {d4[], d5[]}, [r5, :16]! 716*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r2, :64]! 717*c0909341SAndroid Build Coastguard Worker vext.8 d4, d4, d5, #6 718*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 719*c0909341SAndroid Build Coastguard Worker vneg.s8 d4, d4 // -m 720*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r0, :32] 721*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32] 722*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d4 723*c0909341SAndroid Build Coastguard Worker vshl.i16 d4, d4, #9 // -m << 9 724*c0909341SAndroid Build Coastguard Worker vsub.i16 d2, d0, d2 // a - b 725*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 d2, d2, d4 // ((a-b)*-m + 32) >> 6 726*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d2 727*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 728*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[1]}, [r12, :32], r1 729*c0909341SAndroid Build Coastguard Worker bgt 2b 730*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 731*c0909341SAndroid Build Coastguard Worker40: 732*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 733*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 734*c0909341SAndroid Build Coastguard Worker4: 735*c0909341SAndroid Build Coastguard Worker vld2.8 {d4[], d5[]}, [r5, :16]! 736*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r2, :128]! 737*c0909341SAndroid Build Coastguard Worker vext.8 d4, d4, d5, #4 738*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 739*c0909341SAndroid Build Coastguard Worker vneg.s8 d4, d4 // -m 740*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r0, :64] 741*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r12, :64] 742*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d4 743*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #9 // -m << 9 744*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q0, q1 // a - b 745*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q1, q1, q2 // ((a-b)*-m + 32) >> 6 746*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 747*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 748*c0909341SAndroid Build Coastguard Worker vst1.16 {d1}, [r12, :64], r1 749*c0909341SAndroid Build Coastguard Worker bgt 4b 750*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 751*c0909341SAndroid Build Coastguard Worker80: 752*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 753*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 754*c0909341SAndroid Build Coastguard Worker8: 755*c0909341SAndroid Build Coastguard Worker vld2.8 {d16[], d17[]}, [r5, :16]! 756*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! 757*c0909341SAndroid Build Coastguard Worker vneg.s8 q9, q8 // -m 758*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r0, :128] 759*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 760*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d18 761*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d19 762*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128] 763*c0909341SAndroid Build Coastguard Worker vshl.i16 q8, q8, #9 // -m << 9 764*c0909341SAndroid Build Coastguard Worker vshl.i16 q9, q9, #9 765*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q0, q2 // a - b 766*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q1, q3 767*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q2, q2, q8 // ((a-b)*-m + 32) >> 6 768*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q3, q3, q9 769*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 770*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q3 771*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 772*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128], r1 773*c0909341SAndroid Build Coastguard Worker bgt 8b 774*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 775*c0909341SAndroid Build Coastguard Worker160: 776*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 777*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 778*c0909341SAndroid Build Coastguard Worker16: 779*c0909341SAndroid Build Coastguard Worker vld2.8 {d24[], d25[]}, [r5, :16]! 780*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 781*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 782*c0909341SAndroid Build Coastguard Worker vneg.s8 q13, q12 // -m 783*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128] 784*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d26 785*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 786*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d27 787*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128] 788*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 789*c0909341SAndroid Build Coastguard Worker vshl.i16 q13, q13, #9 790*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 791*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q1, q9 792*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 793*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q3, q11 794*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 795*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q12 796*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q13 797*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q11, q13 798*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 799*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q9 800*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 801*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q11 802*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 803*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128], r1 804*c0909341SAndroid Build Coastguard Worker bgt 16b 805*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 806*c0909341SAndroid Build Coastguard Worker1280: 807*c0909341SAndroid Build Coastguard Worker640: 808*c0909341SAndroid Build Coastguard Worker320: 809*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 810*c0909341SAndroid Build Coastguard Worker321: 811*c0909341SAndroid Build Coastguard Worker vld1.8 {d24[]}, [r5]! 812*c0909341SAndroid Build Coastguard Worker mov r12, r3 813*c0909341SAndroid Build Coastguard Worker vneg.s8 d24, d24 // -m 814*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d24 815*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 816*c0909341SAndroid Build Coastguard Worker32: 817*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 818*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128]! 819*c0909341SAndroid Build Coastguard Worker subs r12, r12, #32 820*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 821*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r0, :128] 822*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 823*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q1, q9 824*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 825*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q3, q11 826*c0909341SAndroid Build Coastguard Worker sub r0, r0, #32 827*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 828*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q12 829*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q12 830*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q11, q12 831*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 832*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q9 833*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 834*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 835*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q11 836*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 837*c0909341SAndroid Build Coastguard Worker bgt 32b 838*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 839*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 840*c0909341SAndroid Build Coastguard Worker bgt 321b 841*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 842*c0909341SAndroid Build Coastguard Workerendfunc 843*c0909341SAndroid Build Coastguard Worker 844*c0909341SAndroid Build Coastguard Workerfunction blend_v_16bpc_neon, export=1 845*c0909341SAndroid Build Coastguard Worker push {r4,lr} 846*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #8] 847*c0909341SAndroid Build Coastguard Worker movrel lr, X(obmc_masks) 848*c0909341SAndroid Build Coastguard Worker add lr, lr, r3 849*c0909341SAndroid Build Coastguard Worker clz r12, r3 850*c0909341SAndroid Build Coastguard Worker adr r3, L(blend_v_tbl) 851*c0909341SAndroid Build Coastguard Worker sub r12, r12, #26 852*c0909341SAndroid Build Coastguard Worker ldr r12, [r3, r12, lsl #2] 853*c0909341SAndroid Build Coastguard Worker add r3, r3, r12 854*c0909341SAndroid Build Coastguard Worker bx r3 855*c0909341SAndroid Build Coastguard Worker 856*c0909341SAndroid Build Coastguard Worker .align 2 857*c0909341SAndroid Build Coastguard WorkerL(blend_v_tbl): 858*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_v_tbl) + CONFIG_THUMB 859*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_v_tbl) + CONFIG_THUMB 860*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_v_tbl) + CONFIG_THUMB 861*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_v_tbl) + CONFIG_THUMB 862*c0909341SAndroid Build Coastguard Worker .word 20f - L(blend_v_tbl) + CONFIG_THUMB 863*c0909341SAndroid Build Coastguard Worker 864*c0909341SAndroid Build Coastguard Worker20: 865*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 866*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 867*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[]}, [lr] 868*c0909341SAndroid Build Coastguard Worker vneg.s8 d4, d4 // -m 869*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d4 870*c0909341SAndroid Build Coastguard Worker vshl.i16 d4, d4, #9 // -m << 9 871*c0909341SAndroid Build Coastguard Worker2: 872*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [r2, :32]! 873*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[]}, [r0, :16] 874*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 875*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[1]}, [r2, :16] 876*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r12, :16] 877*c0909341SAndroid Build Coastguard Worker add r2, r2, #4 878*c0909341SAndroid Build Coastguard Worker vsub.i16 d2, d0, d2 // a - b 879*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 d2, d2, d4 // ((a-b)*-m + 32) >> 6 880*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d2 881*c0909341SAndroid Build Coastguard Worker vst1.16 {d0[0]}, [r0, :16], r1 882*c0909341SAndroid Build Coastguard Worker vst1.16 {d0[1]}, [r12, :16], r1 883*c0909341SAndroid Build Coastguard Worker bgt 2b 884*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 885*c0909341SAndroid Build Coastguard Worker40: 886*c0909341SAndroid Build Coastguard Worker vld1.32 {d4[]}, [lr, :32] 887*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 888*c0909341SAndroid Build Coastguard Worker vneg.s8 d4, d4 // -m 889*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 890*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d4 891*c0909341SAndroid Build Coastguard Worker sub r1, r1, #4 892*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #9 // -m << 9 893*c0909341SAndroid Build Coastguard Worker4: 894*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r2, :128]! 895*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r0, :64] 896*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r12, :64] 897*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 898*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q0, q1 // a - b 899*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q1, q1, q2 // ((a-b)*-m + 32) >> 6 900*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 901*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32]! 902*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[0]}, [r12, :32]! 903*c0909341SAndroid Build Coastguard Worker vst1.16 {d0[2]}, [r0, :16], r1 904*c0909341SAndroid Build Coastguard Worker vst1.16 {d1[2]}, [r12, :16], r1 905*c0909341SAndroid Build Coastguard Worker bgt 4b 906*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 907*c0909341SAndroid Build Coastguard Worker80: 908*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [lr, :64] 909*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 910*c0909341SAndroid Build Coastguard Worker vneg.s8 d16, d16 // -m 911*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 912*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d16 913*c0909341SAndroid Build Coastguard Worker sub r1, r1, #8 914*c0909341SAndroid Build Coastguard Worker vshl.i16 q8, q8, #9 // -m << 9 915*c0909341SAndroid Build Coastguard Worker8: 916*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! 917*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r0, :128] 918*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128] 919*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 920*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q0, q2 // a - b 921*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q1, q3 922*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q2, q2, q8 // ((a-b)*-m + 32) >> 6 923*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q3, q3, q8 924*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 925*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q3 926*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64]! 927*c0909341SAndroid Build Coastguard Worker vst1.16 {d2}, [r12, :64]! 928*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[0]}, [r0, :32], r1 929*c0909341SAndroid Build Coastguard Worker vst1.32 {d3[0]}, [r12, :32], r1 930*c0909341SAndroid Build Coastguard Worker bgt 8b 931*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 932*c0909341SAndroid Build Coastguard Worker160: 933*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [lr, :128] 934*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 935*c0909341SAndroid Build Coastguard Worker vneg.s8 q13, q12 // -m 936*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 937*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d26 938*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d27 939*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 940*c0909341SAndroid Build Coastguard Worker vshl.i16 d26, d26, #9 941*c0909341SAndroid Build Coastguard Worker16: 942*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 943*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2}, [r0, :64] 944*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 945*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 946*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 947*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6}, [r12, :64] 948*c0909341SAndroid Build Coastguard Worker vsub.i16 d18, d2, d18 949*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 950*c0909341SAndroid Build Coastguard Worker vsub.i16 d22, d6, d22 951*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 952*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 d18, d18, d26 953*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q12 954*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 d22, d22, d26 955*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 956*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d18 957*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 958*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2}, [r0, :64], r1 959*c0909341SAndroid Build Coastguard Worker vadd.i16 d6, d6, d22 960*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6}, [r12, :64], r1 961*c0909341SAndroid Build Coastguard Worker bgt 16b 962*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 963*c0909341SAndroid Build Coastguard Worker320: 964*c0909341SAndroid Build Coastguard Worker vld1.8 {d24, d25, d26}, [lr, :64] 965*c0909341SAndroid Build Coastguard Worker vneg.s8 q14, q12 // -m 966*c0909341SAndroid Build Coastguard Worker vneg.s8 d30, d26 967*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d28 968*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d29 969*c0909341SAndroid Build Coastguard Worker vmovl.s8 q14, d30 970*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 971*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #9 // -m << 9 972*c0909341SAndroid Build Coastguard Worker vshl.i16 q13, q13, #9 973*c0909341SAndroid Build Coastguard Worker vshl.i16 q14, q14, #9 974*c0909341SAndroid Build Coastguard Worker32: 975*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 976*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128]! 977*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 978*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [r2, :128] 979*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q0, q8 // a - b 980*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r0, :128] 981*c0909341SAndroid Build Coastguard Worker sub r0, r0, #32 982*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q1, q9 983*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q2, q10 984*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q12 // ((a-b)*-m + 32) >> 6 985*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q13 986*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q14 987*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 988*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q9 989*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 990*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 991*c0909341SAndroid Build Coastguard Worker add r2, r2, #32 992*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 993*c0909341SAndroid Build Coastguard Worker bgt 32b 994*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 995*c0909341SAndroid Build Coastguard Workerendfunc 996*c0909341SAndroid Build Coastguard Worker 997*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions, 998*c0909341SAndroid Build Coastguard Worker// and assumes that r9 is set to (clz(w)-24). 999*c0909341SAndroid Build Coastguard Workerfunction put_neon 1000*c0909341SAndroid Build Coastguard Worker adr r10, L(put_tbl) 1001*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 1002*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 1003*c0909341SAndroid Build Coastguard Worker bx r10 1004*c0909341SAndroid Build Coastguard Worker 1005*c0909341SAndroid Build Coastguard Worker .align 2 1006*c0909341SAndroid Build Coastguard WorkerL(put_tbl): 1007*c0909341SAndroid Build Coastguard Worker .word 1280f - L(put_tbl) + CONFIG_THUMB 1008*c0909341SAndroid Build Coastguard Worker .word 640f - L(put_tbl) + CONFIG_THUMB 1009*c0909341SAndroid Build Coastguard Worker .word 320f - L(put_tbl) + CONFIG_THUMB 1010*c0909341SAndroid Build Coastguard Worker .word 16f - L(put_tbl) + CONFIG_THUMB 1011*c0909341SAndroid Build Coastguard Worker .word 80f - L(put_tbl) + CONFIG_THUMB 1012*c0909341SAndroid Build Coastguard Worker .word 4f - L(put_tbl) + CONFIG_THUMB 1013*c0909341SAndroid Build Coastguard Worker .word 2f - L(put_tbl) + CONFIG_THUMB 1014*c0909341SAndroid Build Coastguard Worker 1015*c0909341SAndroid Build Coastguard Worker2: 1016*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2], r3 1017*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[]}, [r2], r3 1018*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 1019*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 1020*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[1]}, [r0, :32], r1 1021*c0909341SAndroid Build Coastguard Worker bgt 2b 1022*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1023*c0909341SAndroid Build Coastguard Worker4: 1024*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2], r3 1025*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r2], r3 1026*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 1027*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 1028*c0909341SAndroid Build Coastguard Worker vst1.16 {d1}, [r0, :64], r1 1029*c0909341SAndroid Build Coastguard Worker bgt 4b 1030*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1031*c0909341SAndroid Build Coastguard Worker80: 1032*c0909341SAndroid Build Coastguard Worker add r8, r0, r1 1033*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1034*c0909341SAndroid Build Coastguard Worker add r9, r2, r3 1035*c0909341SAndroid Build Coastguard Worker lsl r3, r3, #1 1036*c0909341SAndroid Build Coastguard Worker8: 1037*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r2], r3 1038*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r9], r3 1039*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 1040*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 1041*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r8, :128], r1 1042*c0909341SAndroid Build Coastguard Worker bgt 8b 1043*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1044*c0909341SAndroid Build Coastguard Worker16: 1045*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2], r3 1046*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1047*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 1048*c0909341SAndroid Build Coastguard Worker bgt 16b 1049*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1050*c0909341SAndroid Build Coastguard Worker320: 1051*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 1052*c0909341SAndroid Build Coastguard Worker sub r3, r3, #32 1053*c0909341SAndroid Build Coastguard Worker32: 1054*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2]! 1055*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1056*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2], r3 1057*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1058*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128], r1 1059*c0909341SAndroid Build Coastguard Worker bgt 32b 1060*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1061*c0909341SAndroid Build Coastguard Worker640: 1062*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 1063*c0909341SAndroid Build Coastguard Worker sub r3, r3, #96 1064*c0909341SAndroid Build Coastguard Worker64: 1065*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2]! 1066*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1067*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2]! 1068*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1069*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r2]! 1070*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 1071*c0909341SAndroid Build Coastguard Worker vld1.16 {q14, q15}, [r2], r3 1072*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1073*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r0, :128], r1 1074*c0909341SAndroid Build Coastguard Worker bgt 64b 1075*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1076*c0909341SAndroid Build Coastguard Worker1280: 1077*c0909341SAndroid Build Coastguard Worker sub r1, r1, #224 1078*c0909341SAndroid Build Coastguard Worker sub r3, r3, #224 1079*c0909341SAndroid Build Coastguard Worker128: 1080*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2]! 1081*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1082*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2]! 1083*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1084*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r2]! 1085*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 1086*c0909341SAndroid Build Coastguard Worker vld1.16 {q14, q15}, [r2]! 1087*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r0, :128]! 1088*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2]! 1089*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1090*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2]! 1091*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1092*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r2]! 1093*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 1094*c0909341SAndroid Build Coastguard Worker vld1.16 {q14, q15}, [r2], r3 1095*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1096*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r0, :128], r1 1097*c0909341SAndroid Build Coastguard Worker bgt 128b 1098*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1099*c0909341SAndroid Build Coastguard Workerendfunc 1100*c0909341SAndroid Build Coastguard Worker 1101*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the prep_8tap functions, 1102*c0909341SAndroid Build Coastguard Worker// and assumes that r9 is set to (clz(w)-24), r7 to intermediate_bits and 1103*c0909341SAndroid Build Coastguard Worker// r8 to w*2. 1104*c0909341SAndroid Build Coastguard Workerfunction prep_neon 1105*c0909341SAndroid Build Coastguard Worker adr r10, L(prep_tbl) 1106*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 1107*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // intermediate_bits 1108*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #PREP_BIAS 1109*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 1110*c0909341SAndroid Build Coastguard Worker bx r10 1111*c0909341SAndroid Build Coastguard Worker 1112*c0909341SAndroid Build Coastguard Worker .align 2 1113*c0909341SAndroid Build Coastguard WorkerL(prep_tbl): 1114*c0909341SAndroid Build Coastguard Worker .word 1280f - L(prep_tbl) + CONFIG_THUMB 1115*c0909341SAndroid Build Coastguard Worker .word 640f - L(prep_tbl) + CONFIG_THUMB 1116*c0909341SAndroid Build Coastguard Worker .word 320f - L(prep_tbl) + CONFIG_THUMB 1117*c0909341SAndroid Build Coastguard Worker .word 16f - L(prep_tbl) + CONFIG_THUMB 1118*c0909341SAndroid Build Coastguard Worker .word 80f - L(prep_tbl) + CONFIG_THUMB 1119*c0909341SAndroid Build Coastguard Worker .word 40f - L(prep_tbl) + CONFIG_THUMB 1120*c0909341SAndroid Build Coastguard Worker 1121*c0909341SAndroid Build Coastguard Worker40: 1122*c0909341SAndroid Build Coastguard Worker add r9, r1, r2 1123*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 1124*c0909341SAndroid Build Coastguard Worker4: 1125*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r1], r2 1126*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r9], r2 1127*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1128*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1129*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1130*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 1131*c0909341SAndroid Build Coastguard Worker bgt 4b 1132*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1133*c0909341SAndroid Build Coastguard Worker80: 1134*c0909341SAndroid Build Coastguard Worker add r9, r1, r2 1135*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 1136*c0909341SAndroid Build Coastguard Worker8: 1137*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1], r2 1138*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r9], r2 1139*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1140*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1141*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1142*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1143*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1144*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1145*c0909341SAndroid Build Coastguard Worker bgt 8b 1146*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1147*c0909341SAndroid Build Coastguard Worker16: 1148*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1], r2 1149*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1150*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1], r2 1151*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1152*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1153*c0909341SAndroid Build Coastguard Worker vshl.s16 q2, q2, q15 1154*c0909341SAndroid Build Coastguard Worker vshl.s16 q3, q3, q15 1155*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1156*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1157*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q14 1158*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1159*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q14 1160*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1161*c0909341SAndroid Build Coastguard Worker bgt 16b 1162*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1163*c0909341SAndroid Build Coastguard Worker320: 1164*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 1165*c0909341SAndroid Build Coastguard Worker32: 1166*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1]! 1167*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 1168*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1169*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1], r2 1170*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1171*c0909341SAndroid Build Coastguard Worker vshl.s16 q2, q2, q15 1172*c0909341SAndroid Build Coastguard Worker vshl.s16 q3, q3, q15 1173*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1174*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1175*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q14 1176*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1177*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q14 1178*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1179*c0909341SAndroid Build Coastguard Worker bgt 32b 1180*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1181*c0909341SAndroid Build Coastguard Worker640: 1182*c0909341SAndroid Build Coastguard Worker sub r2, r2, #96 1183*c0909341SAndroid Build Coastguard Worker64: 1184*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1]! 1185*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 1186*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1187*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1]! 1188*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1189*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r1]! 1190*c0909341SAndroid Build Coastguard Worker vshl.s16 q2, q2, q15 1191*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r1], r2 1192*c0909341SAndroid Build Coastguard Worker vshl.s16 q3, q3, q15 1193*c0909341SAndroid Build Coastguard Worker vshl.s16 q8, q8, q15 1194*c0909341SAndroid Build Coastguard Worker vshl.s16 q9, q9, q15 1195*c0909341SAndroid Build Coastguard Worker vshl.s16 q10, q10, q15 1196*c0909341SAndroid Build Coastguard Worker vshl.s16 q11, q11, q15 1197*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1198*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1199*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q14 1200*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q14 1201*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 1202*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1203*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 1204*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1205*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 1206*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1207*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q11, q14 1208*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1209*c0909341SAndroid Build Coastguard Worker bgt 64b 1210*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1211*c0909341SAndroid Build Coastguard Worker1280: 1212*c0909341SAndroid Build Coastguard Worker sub r2, r2, #224 1213*c0909341SAndroid Build Coastguard Worker128: 1214*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1]! 1215*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 1216*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1217*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1]! 1218*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1219*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r1]! 1220*c0909341SAndroid Build Coastguard Worker vshl.s16 q2, q2, q15 1221*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r1]! 1222*c0909341SAndroid Build Coastguard Worker vshl.s16 q3, q3, q15 1223*c0909341SAndroid Build Coastguard Worker vshl.s16 q8, q8, q15 1224*c0909341SAndroid Build Coastguard Worker vshl.s16 q9, q9, q15 1225*c0909341SAndroid Build Coastguard Worker vshl.s16 q10, q10, q15 1226*c0909341SAndroid Build Coastguard Worker vshl.s16 q11, q11, q15 1227*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1228*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1229*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q14 1230*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q14 1231*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 1232*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1233*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1]! 1234*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 1235*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 1236*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1237*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1]! 1238*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q11, q14 1239*c0909341SAndroid Build Coastguard Worker vshl.s16 q0, q0, q15 1240*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1241*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r1]! 1242*c0909341SAndroid Build Coastguard Worker vshl.s16 q1, q1, q15 1243*c0909341SAndroid Build Coastguard Worker vshl.s16 q2, q2, q15 1244*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1245*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r1], r2 1246*c0909341SAndroid Build Coastguard Worker vshl.s16 q3, q3, q15 1247*c0909341SAndroid Build Coastguard Worker vshl.s16 q8, q8, q15 1248*c0909341SAndroid Build Coastguard Worker vshl.s16 q9, q9, q15 1249*c0909341SAndroid Build Coastguard Worker vshl.s16 q10, q10, q15 1250*c0909341SAndroid Build Coastguard Worker vshl.s16 q11, q11, q15 1251*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q14 1252*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q14 1253*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q14 1254*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q14 1255*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 1256*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1257*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 1258*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1259*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 1260*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1261*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q11, q14 1262*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128]! 1263*c0909341SAndroid Build Coastguard Worker bgt 128b 1264*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1265*c0909341SAndroid Build Coastguard Workerendfunc 1266*c0909341SAndroid Build Coastguard Worker 1267*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1268*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d0[]}, [\s0], \strd 1269*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d1[]}, [\s1], \strd 1270*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1271*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d2[]}, [\s0], \strd 1272*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d3[]}, [\s1], \strd 1273*c0909341SAndroid Build Coastguard Worker.endif 1274*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1275*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d4[]}, [\s0], \strd 1276*c0909341SAndroid Build Coastguard Worker.endif 1277*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1278*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d5[]}, [\s1], \strd 1279*c0909341SAndroid Build Coastguard Worker.endif 1280*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1281*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d6[]}, [\s0], \strd 1282*c0909341SAndroid Build Coastguard Worker.endif 1283*c0909341SAndroid Build Coastguard Worker.endm 1284*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1285*c0909341SAndroid Build Coastguard Worker vld1.16 {\d0}, [\s0], \strd 1286*c0909341SAndroid Build Coastguard Worker vld1.16 {\d1}, [\s1], \strd 1287*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1288*c0909341SAndroid Build Coastguard Worker vld1.16 {\d2}, [\s0], \strd 1289*c0909341SAndroid Build Coastguard Worker vld1.16 {\d3}, [\s1], \strd 1290*c0909341SAndroid Build Coastguard Worker.endif 1291*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1292*c0909341SAndroid Build Coastguard Worker vld1.16 {\d4}, [\s0], \strd 1293*c0909341SAndroid Build Coastguard Worker.endif 1294*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1295*c0909341SAndroid Build Coastguard Worker vld1.16 {\d5}, [\s1], \strd 1296*c0909341SAndroid Build Coastguard Worker.endif 1297*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1298*c0909341SAndroid Build Coastguard Worker vld1.16 {\d6}, [\s0], \strd 1299*c0909341SAndroid Build Coastguard Worker.endif 1300*c0909341SAndroid Build Coastguard Worker.endm 1301*c0909341SAndroid Build Coastguard Worker.macro load_regpair s0, s1, strd, d0, d1, d2, d3, d4, d5 1302*c0909341SAndroid Build Coastguard Worker vld1.16 {\d0, \d1}, [\s0], \strd 1303*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1304*c0909341SAndroid Build Coastguard Worker vld1.16 {\d2, \d3}, [\s1], \strd 1305*c0909341SAndroid Build Coastguard Worker.endif 1306*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1307*c0909341SAndroid Build Coastguard Worker vld1.16 {\d4, \d5}, [\s0], \strd 1308*c0909341SAndroid Build Coastguard Worker.endif 1309*c0909341SAndroid Build Coastguard Worker.endm 1310*c0909341SAndroid Build Coastguard Worker.macro load_32 s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1311*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, 32, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1312*c0909341SAndroid Build Coastguard Worker.endm 1313*c0909341SAndroid Build Coastguard Worker.macro load_16s16 s0, s1, strd, d0, d1, d2, d3, d4, d5 1314*c0909341SAndroid Build Coastguard Worker load_regpair \s0, \s1, \strd, \d0, \d1, \d2, \d3, \d4, \d5 1315*c0909341SAndroid Build Coastguard Worker.endm 1316*c0909341SAndroid Build Coastguard Worker.macro interleave_1_32 r0, r1, r2, r3, r4 1317*c0909341SAndroid Build Coastguard Worker vext.8 \r0, \r0, \r1, #4 1318*c0909341SAndroid Build Coastguard Worker vext.8 \r1, \r1, \r2, #4 1319*c0909341SAndroid Build Coastguard Worker.ifnb \r3 1320*c0909341SAndroid Build Coastguard Worker vext.8 \r2, \r2, \r3, #4 1321*c0909341SAndroid Build Coastguard Worker vext.8 \r3, \r3, \r4, #4 1322*c0909341SAndroid Build Coastguard Worker.endif 1323*c0909341SAndroid Build Coastguard Worker.endm 1324*c0909341SAndroid Build Coastguard Worker.macro vmin_u16 c, r0, r1, r2, r3 1325*c0909341SAndroid Build Coastguard Worker vmin.u16 \r0, \r0, \c 1326*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1327*c0909341SAndroid Build Coastguard Worker vmin.u16 \r1, \r1, \c 1328*c0909341SAndroid Build Coastguard Worker.endif 1329*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1330*c0909341SAndroid Build Coastguard Worker vmin.u16 \r2, \r2, \c 1331*c0909341SAndroid Build Coastguard Worker vmin.u16 \r3, \r3, \c 1332*c0909341SAndroid Build Coastguard Worker.endif 1333*c0909341SAndroid Build Coastguard Worker.endm 1334*c0909341SAndroid Build Coastguard Worker.macro vsub_i16 c, r0, r1, r2, r3 1335*c0909341SAndroid Build Coastguard Worker vsub.i16 \r0, \r0, \c 1336*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1337*c0909341SAndroid Build Coastguard Worker vsub.i16 \r1, \r1, \c 1338*c0909341SAndroid Build Coastguard Worker.endif 1339*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1340*c0909341SAndroid Build Coastguard Worker vsub.i16 \r2, \r2, \c 1341*c0909341SAndroid Build Coastguard Worker vsub.i16 \r3, \r3, \c 1342*c0909341SAndroid Build Coastguard Worker.endif 1343*c0909341SAndroid Build Coastguard Worker.endm 1344*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlal_4 d, s0, s1, s2, s3 1345*c0909341SAndroid Build Coastguard Worker vmull.s16 \d, \s0, d0[0] 1346*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s1, d0[1] 1347*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s2, d0[2] 1348*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s3, d0[3] 1349*c0909341SAndroid Build Coastguard Worker.endm 1350*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlal_8 d, s0, s1, s2, s3, s4, s5, s6, s7 1351*c0909341SAndroid Build Coastguard Worker vmull.s16 \d, \s0, d0[0] 1352*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s1, d0[1] 1353*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s2, d0[2] 1354*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s3, d0[3] 1355*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s4, d1[0] 1356*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s5, d1[1] 1357*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s6, d1[2] 1358*c0909341SAndroid Build Coastguard Worker vmlal.s16 \d, \s7, d1[3] 1359*c0909341SAndroid Build Coastguard Worker.endm 1360*c0909341SAndroid Build Coastguard Worker.macro vqrshrun_s32 shift, q0, d0, q1, d1, q2, d2, q3, d3 1361*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 \d0, \q0, #\shift 1362*c0909341SAndroid Build Coastguard Worker.ifnb \q1 1363*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 \d1, \q1, #\shift 1364*c0909341SAndroid Build Coastguard Worker.endif 1365*c0909341SAndroid Build Coastguard Worker.ifnb \q2 1366*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 \d2, \q2, #\shift 1367*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 \d3, \q3, #\shift 1368*c0909341SAndroid Build Coastguard Worker.endif 1369*c0909341SAndroid Build Coastguard Worker.endm 1370*c0909341SAndroid Build Coastguard Worker.macro vmovn_i32 q0, d0, q1, d1, q2, d2, q3, d3 1371*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d0, \q0 1372*c0909341SAndroid Build Coastguard Worker.ifnb \q1 1373*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d1, \q1 1374*c0909341SAndroid Build Coastguard Worker.endif 1375*c0909341SAndroid Build Coastguard Worker.ifnb \q2 1376*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d2, \q2 1377*c0909341SAndroid Build Coastguard Worker vmovn.i32 \d3, \q3 1378*c0909341SAndroid Build Coastguard Worker.endif 1379*c0909341SAndroid Build Coastguard Worker.endm 1380*c0909341SAndroid Build Coastguard Worker.macro vrshl_s32 shift, r0, r1, r2, r3 1381*c0909341SAndroid Build Coastguard Worker vrshl.s32 \r0, \r0, \shift 1382*c0909341SAndroid Build Coastguard Worker vrshl.s32 \r1, \r1, \shift 1383*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1384*c0909341SAndroid Build Coastguard Worker vrshl.s32 \r2, \r2, \shift 1385*c0909341SAndroid Build Coastguard Worker vrshl.s32 \r3, \r3, \shift 1386*c0909341SAndroid Build Coastguard Worker.endif 1387*c0909341SAndroid Build Coastguard Worker.endm 1388*c0909341SAndroid Build Coastguard Worker.macro vst1_32 strd, r0, r1 1389*c0909341SAndroid Build Coastguard Worker vst1.32 {\r0[0]}, [r0, :32], \strd 1390*c0909341SAndroid Build Coastguard Worker vst1.32 {\r0[1]}, [r9, :32], \strd 1391*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1392*c0909341SAndroid Build Coastguard Worker vst1.32 {\r1[0]}, [r0, :32], \strd 1393*c0909341SAndroid Build Coastguard Worker vst1.32 {\r1[1]}, [r9, :32], \strd 1394*c0909341SAndroid Build Coastguard Worker.endif 1395*c0909341SAndroid Build Coastguard Worker.endm 1396*c0909341SAndroid Build Coastguard Worker.macro vst1_reg strd, align, r0, r1, r2, r3, r4, r5, r6, r7 1397*c0909341SAndroid Build Coastguard Worker vst1.16 {\r0}, [r0, \align], \strd 1398*c0909341SAndroid Build Coastguard Worker vst1.16 {\r1}, [r9, \align], \strd 1399*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1400*c0909341SAndroid Build Coastguard Worker vst1.16 {\r2}, [r0, \align], \strd 1401*c0909341SAndroid Build Coastguard Worker vst1.16 {\r3}, [r9, \align], \strd 1402*c0909341SAndroid Build Coastguard Worker.endif 1403*c0909341SAndroid Build Coastguard Worker.ifnb \r4 1404*c0909341SAndroid Build Coastguard Worker vst1.16 {\r4}, [r0, \align], \strd 1405*c0909341SAndroid Build Coastguard Worker vst1.16 {\r5}, [r9, \align], \strd 1406*c0909341SAndroid Build Coastguard Worker vst1.16 {\r6}, [r0, \align], \strd 1407*c0909341SAndroid Build Coastguard Worker vst1.16 {\r7}, [r9, \align], \strd 1408*c0909341SAndroid Build Coastguard Worker.endif 1409*c0909341SAndroid Build Coastguard Worker.endm 1410*c0909341SAndroid Build Coastguard Worker.macro finalize type, q0, q1, d0, d1, q2, q3, d2, d3 1411*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1412*c0909341SAndroid Build Coastguard Worker vqrshrun_s32 6, \q0, \d0, \q1, \d1, \q2, \d2, \q3, \d3 1413*c0909341SAndroid Build Coastguard Worker vmin_u16 q15, \q0, \q1 1414*c0909341SAndroid Build Coastguard Worker.else 1415*c0909341SAndroid Build Coastguard Worker vrshl_s32 q14, \q0, \q1, \q2, \q3 // -(6-intermediate_bits) 1416*c0909341SAndroid Build Coastguard Worker vmovn_i32 \q0, \d0, \q1, \d1, \q2, \d2, \q3, \d3 1417*c0909341SAndroid Build Coastguard Worker vsub_i16 q15, \q0, \q1 // PREP_BIAS 1418*c0909341SAndroid Build Coastguard Worker.endif 1419*c0909341SAndroid Build Coastguard Worker.endm 1420*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, q0, q1, d0, d1, q2, q3, d2, d3 1421*c0909341SAndroid Build Coastguard Worker finalize \type, \q0, \q1, \d0, \d1, \q2, \q3, \d2, \d3 1422*c0909341SAndroid Build Coastguard Worker vst1_reg \strd, :64, \d0, \d1, \d2, \d3 1423*c0909341SAndroid Build Coastguard Worker.endm 1424*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, q0, q1, d0, d1, q2, q3, d2, d3 1425*c0909341SAndroid Build Coastguard Worker finalize \type, \q0, \q1, \d0, \d1, \q2, \q3, \d2, \d3 1426*c0909341SAndroid Build Coastguard Worker vst1_reg \strd, :128, \q0, \q1 1427*c0909341SAndroid Build Coastguard Worker.endm 1428*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, q0, q1, d0, d1, q2, q3, d2, d3 1429*c0909341SAndroid Build Coastguard Worker finalize \type, \q0, \q1, \d0, \d1, \q2, \q3, \d2, \d3 1430*c0909341SAndroid Build Coastguard Worker vst1.16 {\q0, \q1}, [r0, :128], \strd 1431*c0909341SAndroid Build Coastguard Worker.endm 1432*c0909341SAndroid Build Coastguard Worker 1433*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v 1434*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_16bpc_neon, export=1 1435*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 1436*c0909341SAndroid Build Coastguard Worker movw r9, \type_h 1437*c0909341SAndroid Build Coastguard Worker movw r10, \type_v 1438*c0909341SAndroid Build Coastguard Worker b \op\()_8tap_neon 1439*c0909341SAndroid Build Coastguard Workerendfunc 1440*c0909341SAndroid Build Coastguard Worker.endm 1441*c0909341SAndroid Build Coastguard Worker 1442*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor. 1443*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15) 1444*c0909341SAndroid Build Coastguard Worker#define SMOOTH ((1*15<<7)|4*15) 1445*c0909341SAndroid Build Coastguard Worker#define SHARP ((2*15<<7)|3*15) 1446*c0909341SAndroid Build Coastguard Worker 1447*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, my, bdmax, ds2, sr2 1448*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular, REGULAR, REGULAR 1449*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_smooth, REGULAR, SMOOTH 1450*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_sharp, REGULAR, SHARP 1451*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth, SMOOTH, SMOOTH 1452*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_regular, SMOOTH, REGULAR 1453*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_sharp, SMOOTH, SHARP 1454*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp, SHARP, SHARP 1455*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_regular, SHARP, REGULAR 1456*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_smooth, SHARP, SMOOTH 1457*c0909341SAndroid Build Coastguard Worker 1458*c0909341SAndroid Build Coastguard Workerfunction \type\()_8tap_neon 1459*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 1460*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 1461*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, r8 1462*c0909341SAndroid Build Coastguard Worker ldr r8, [sp, #52] 1463*c0909341SAndroid Build Coastguard Worker.endif 1464*c0909341SAndroid Build Coastguard Worker movw r11, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) 1465*c0909341SAndroid Build Coastguard Worker mul \mx, \mx, r11 1466*c0909341SAndroid Build Coastguard Worker mul \my, \my, r11 1467*c0909341SAndroid Build Coastguard Worker add \mx, \mx, r9 // mx, 8tap_h, 4tap_h 1468*c0909341SAndroid Build Coastguard Worker add \my, \my, r10 // my, 8tap_v, 4tap_v 1469*c0909341SAndroid Build Coastguard Worker 1470*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1471*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \w, #1 1472*c0909341SAndroid Build Coastguard Worker.endif 1473*c0909341SAndroid Build Coastguard Worker 1474*c0909341SAndroid Build Coastguard Worker vdup.16 q15, \bdmax // bitdepth_max 1475*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax 1476*c0909341SAndroid Build Coastguard Worker clz r9, \w 1477*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #18 // intermediate_bits = clz(bitdepth_max) - 18 1478*c0909341SAndroid Build Coastguard Worker tst \mx, #(0x7f << 14) 1479*c0909341SAndroid Build Coastguard Worker sub r9, r9, #24 1480*c0909341SAndroid Build Coastguard Worker add lr, \bdmax, #6 // 6 + intermediate_bits 1481*c0909341SAndroid Build Coastguard Worker rsb r12, \bdmax, #6 // 6 - intermediate_bits 1482*c0909341SAndroid Build Coastguard Worker movrel r11, X(mc_subpel_filters), -8 1483*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_h) 1484*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1485*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_v) 1486*c0909341SAndroid Build Coastguard Worker b \type\()_neon 1487*c0909341SAndroid Build Coastguard Worker 1488*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h): 1489*c0909341SAndroid Build Coastguard Worker cmp \w, #4 1490*c0909341SAndroid Build Coastguard Worker ubfx r10, \mx, #7, #7 1491*c0909341SAndroid Build Coastguard Worker and \mx, \mx, #0x7f 1492*c0909341SAndroid Build Coastguard Worker it gt 1493*c0909341SAndroid Build Coastguard Worker movgt \mx, r10 1494*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1495*c0909341SAndroid Build Coastguard Worker add \mx, r11, \mx, lsl #3 1496*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_hv) 1497*c0909341SAndroid Build Coastguard Worker 1498*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_8tap_h_tbl) 1499*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // 6 - intermediate_bits 1500*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 1501*c0909341SAndroid Build Coastguard Worker vneg.s32 q14, q14 // -(6-intermediate_bits) 1502*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1503*c0909341SAndroid Build Coastguard Worker vdup.16 q13, \bdmax // intermediate_bits 1504*c0909341SAndroid Build Coastguard Worker.else 1505*c0909341SAndroid Build Coastguard Worker vmov.i16 q13, #PREP_BIAS 1506*c0909341SAndroid Build Coastguard Worker.endif 1507*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 1508*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1509*c0909341SAndroid Build Coastguard Worker vneg.s16 q13, q13 // -intermediate_bits 1510*c0909341SAndroid Build Coastguard Worker.endif 1511*c0909341SAndroid Build Coastguard Worker bx r10 1512*c0909341SAndroid Build Coastguard Worker 1513*c0909341SAndroid Build Coastguard Worker .align 2 1514*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h_tbl): 1515*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1516*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1517*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1518*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1519*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1520*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1521*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1522*c0909341SAndroid Build Coastguard Worker 1523*c0909341SAndroid Build Coastguard Worker20: // 2xN h 1524*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1525*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 1526*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 1527*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1528*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1529*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1530*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1531*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1532*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1533*c0909341SAndroid Build Coastguard Worker2: 1534*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [\src], \s_strd 1535*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [\sr2], \s_strd 1536*c0909341SAndroid Build Coastguard Worker vext.8 d5, d4, d5, #2 1537*c0909341SAndroid Build Coastguard Worker vext.8 d7, d6, d7, #2 1538*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1539*c0909341SAndroid Build Coastguard Worker vtrn.32 d4, d6 1540*c0909341SAndroid Build Coastguard Worker vtrn.32 d5, d7 1541*c0909341SAndroid Build Coastguard Worker vmull.s16 q1, d4, d0[0] 1542*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d5, d0[1] 1543*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d6, d0[2] 1544*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d7, d0[3] 1545*c0909341SAndroid Build Coastguard Worker vrshl.s32 q1, q1, q14 // -(6-intermediate_bits) 1546*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d2, q1 1547*c0909341SAndroid Build Coastguard Worker vrshl.s16 d2, d2, d26 // -intermediate_bits 1548*c0909341SAndroid Build Coastguard Worker vmin.u16 d2, d2, d30 1549*c0909341SAndroid Build Coastguard Worker vst1.32 {d2[0]}, [\dst, :32], \d_strd 1550*c0909341SAndroid Build Coastguard Worker vst1.32 {d2[1]}, [\ds2, :32], \d_strd 1551*c0909341SAndroid Build Coastguard Worker bgt 2b 1552*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1553*c0909341SAndroid Build Coastguard Worker.endif 1554*c0909341SAndroid Build Coastguard Worker 1555*c0909341SAndroid Build Coastguard Worker40: // 4xN h 1556*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 1557*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 1558*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1559*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1560*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1561*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1562*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1563*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1564*c0909341SAndroid Build Coastguard Worker4: 1565*c0909341SAndroid Build Coastguard Worker vld1.16 {q8}, [\src], \s_strd 1566*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\sr2], \s_strd 1567*c0909341SAndroid Build Coastguard Worker vext.8 d18, d16, d17, #2 1568*c0909341SAndroid Build Coastguard Worker vext.8 d19, d16, d17, #4 1569*c0909341SAndroid Build Coastguard Worker vext.8 d20, d16, d17, #6 1570*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 1571*c0909341SAndroid Build Coastguard Worker vext.8 d25, d22, d23, #4 1572*c0909341SAndroid Build Coastguard Worker vext.8 d21, d22, d23, #6 1573*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1574*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d0[0] 1575*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d0[1] 1576*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d19, d0[2] 1577*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d0[3] 1578*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d22, d0[0] 1579*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d0[1] 1580*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[2] 1581*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[3] 1582*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // -(6-intermediate_bits) 1583*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 1584*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1585*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 1586*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 1587*c0909341SAndroid Build Coastguard Worker vrshl.s16 q2, q2, q13 // -intermediate_bits 1588*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 1589*c0909341SAndroid Build Coastguard Worker.else 1590*c0909341SAndroid Build Coastguard Worker vmovn.s32 d4, q2 1591*c0909341SAndroid Build Coastguard Worker vmovn.s32 d5, q3 1592*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q13 // PREP_BIAS 1593*c0909341SAndroid Build Coastguard Worker.endif 1594*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 1595*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [\ds2, :64], \d_strd 1596*c0909341SAndroid Build Coastguard Worker bgt 4b 1597*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1598*c0909341SAndroid Build Coastguard Worker 1599*c0909341SAndroid Build Coastguard Worker80: 1600*c0909341SAndroid Build Coastguard Worker160: 1601*c0909341SAndroid Build Coastguard Worker320: 1602*c0909341SAndroid Build Coastguard Worker640: 1603*c0909341SAndroid Build Coastguard Worker1280: // 8xN, 16xN, 32xN, ... h 1604*c0909341SAndroid Build Coastguard Worker vpush {q4-q5} 1605*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 1606*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 1607*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1608*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1609*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1610*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1611*c0909341SAndroid Build Coastguard Worker 1612*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, lsl #1 1613*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #16 1614*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1615*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1616*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, lsl #1 1617*c0909341SAndroid Build Coastguard Worker.endif 1618*c0909341SAndroid Build Coastguard Worker81: 1619*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [\src]! 1620*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [\sr2]! 1621*c0909341SAndroid Build Coastguard Worker mov \mx, \w 1622*c0909341SAndroid Build Coastguard Worker 1623*c0909341SAndroid Build Coastguard Worker8: 1624*c0909341SAndroid Build Coastguard Worker vmull.s16 q1, d16, d0[0] 1625*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d17, d0[0] 1626*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d20, d0[0] 1627*c0909341SAndroid Build Coastguard Worker vmull.s16 q4, d21, d0[0] 1628*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 1629*c0909341SAndroid Build Coastguard Worker vext.8 q12, q8, q9, #(2*\i) 1630*c0909341SAndroid Build Coastguard Worker vext.8 q5, q10, q11, #(2*\i) 1631*c0909341SAndroid Build Coastguard Worker.if \i < 4 1632*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d24, d0[\i] 1633*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d25, d0[\i] 1634*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d10, d0[\i] 1635*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d11, d0[\i] 1636*c0909341SAndroid Build Coastguard Worker.else 1637*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d24, d1[\i-4] 1638*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d25, d1[\i-4] 1639*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d10, d1[\i-4] 1640*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d11, d1[\i-4] 1641*c0909341SAndroid Build Coastguard Worker.endif 1642*c0909341SAndroid Build Coastguard Worker.endr 1643*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #8 1644*c0909341SAndroid Build Coastguard Worker vrshl.s32 q1, q1, q14 // -(6-intermediate_bits) 1645*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // -(6-intermediate_bits) 1646*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 1647*c0909341SAndroid Build Coastguard Worker vrshl.s32 q4, q4, q14 // -(6-intermediate_bits) 1648*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1649*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d2, q1 1650*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d3, q2 1651*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q3 1652*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q4 1653*c0909341SAndroid Build Coastguard Worker vrshl.s16 q1, q1, q13 // -intermediate_bits 1654*c0909341SAndroid Build Coastguard Worker vrshl.s16 q2, q2, q13 // -intermediate_bits 1655*c0909341SAndroid Build Coastguard Worker vmin.u16 q1, q1, q15 1656*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 1657*c0909341SAndroid Build Coastguard Worker.else 1658*c0909341SAndroid Build Coastguard Worker vmovn.s32 d2, q1 1659*c0909341SAndroid Build Coastguard Worker vmovn.s32 d3, q2 1660*c0909341SAndroid Build Coastguard Worker vmovn.s32 d4, q3 1661*c0909341SAndroid Build Coastguard Worker vmovn.s32 d5, q4 1662*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q13 // PREP_BIAS 1663*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q13 // PREP_BIAS 1664*c0909341SAndroid Build Coastguard Worker.endif 1665*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [\dst, :128]! 1666*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [\ds2, :128]! 1667*c0909341SAndroid Build Coastguard Worker ble 9f 1668*c0909341SAndroid Build Coastguard Worker 1669*c0909341SAndroid Build Coastguard Worker vmov q8, q9 1670*c0909341SAndroid Build Coastguard Worker vmov q10, q11 1671*c0909341SAndroid Build Coastguard Worker vld1.16 {q9}, [\src]! 1672*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\sr2]! 1673*c0909341SAndroid Build Coastguard Worker b 8b 1674*c0909341SAndroid Build Coastguard Worker 1675*c0909341SAndroid Build Coastguard Worker9: 1676*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1677*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 1678*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1679*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 1680*c0909341SAndroid Build Coastguard Worker 1681*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1682*c0909341SAndroid Build Coastguard Worker bgt 81b 1683*c0909341SAndroid Build Coastguard Worker vpop {q4-q5} 1684*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1685*c0909341SAndroid Build Coastguard Worker 1686*c0909341SAndroid Build Coastguard Worker 1687*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v): 1688*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1689*c0909341SAndroid Build Coastguard Worker ubfx r10, \my, #7, #7 1690*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1691*c0909341SAndroid Build Coastguard Worker it gt 1692*c0909341SAndroid Build Coastguard Worker movgt \my, r10 1693*c0909341SAndroid Build Coastguard Worker add \my, r11, \my, lsl #3 1694*c0909341SAndroid Build Coastguard Worker 1695*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1696*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // 6 - intermediate_bits 1697*c0909341SAndroid Build Coastguard Worker vmov.i16 q15, #PREP_BIAS 1698*c0909341SAndroid Build Coastguard Worker.endif 1699*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_8tap_v_tbl) 1700*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 1701*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1702*c0909341SAndroid Build Coastguard Worker vneg.s32 q14, q14 // -(6-intermediate_bits) 1703*c0909341SAndroid Build Coastguard Worker.endif 1704*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 1705*c0909341SAndroid Build Coastguard Worker bx r10 1706*c0909341SAndroid Build Coastguard Worker 1707*c0909341SAndroid Build Coastguard Worker .align 2 1708*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v_tbl): 1709*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1710*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1711*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1712*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1713*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1714*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1715*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1716*c0909341SAndroid Build Coastguard Worker 1717*c0909341SAndroid Build Coastguard Worker20: // 2xN v 1718*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1719*c0909341SAndroid Build Coastguard Worker bgt 28f 1720*c0909341SAndroid Build Coastguard Worker 1721*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1722*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1723*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1724*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1725*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1726*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1727*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1728*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1729*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1730*c0909341SAndroid Build Coastguard Worker 1731*c0909341SAndroid Build Coastguard Worker // 2x2 v 1732*c0909341SAndroid Build Coastguard Worker load_32 \src, \sr2, \s_strd, d1, d2, d3, d4, d5 1733*c0909341SAndroid Build Coastguard Worker interleave_1_32 d1, d2, d3, d4, d5 1734*c0909341SAndroid Build Coastguard Worker bgt 24f 1735*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q8, d1, d2, d3, d4 1736*c0909341SAndroid Build Coastguard Worker vqrshrun_s32 6, q8, d16 1737*c0909341SAndroid Build Coastguard Worker vmin_u16 d30, d16 1738*c0909341SAndroid Build Coastguard Worker vst1_32 \d_strd, d16 1739*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1740*c0909341SAndroid Build Coastguard Worker 1741*c0909341SAndroid Build Coastguard Worker24: // 2x4 v 1742*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d6, d7 1743*c0909341SAndroid Build Coastguard Worker interleave_1_32 d5, d6, d7 1744*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q8, d1, d2, d3, d4 1745*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q9, d3, d4, d5, d6 1746*c0909341SAndroid Build Coastguard Worker vqrshrun_s32 6, q8, d16, q9, d17 1747*c0909341SAndroid Build Coastguard Worker vmin_u16 q15, q8 1748*c0909341SAndroid Build Coastguard Worker vst1_32 \d_strd, d16, d17 1749*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1750*c0909341SAndroid Build Coastguard Worker 1751*c0909341SAndroid Build Coastguard Worker28: // 2x6, 2x8, 2x12, 2x16 v 1752*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1753*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1754*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1755*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1756*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1757*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1758*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1759*c0909341SAndroid Build Coastguard Worker 1760*c0909341SAndroid Build Coastguard Worker load_32 \src, \sr2, \s_strd, d2, d3, d4, d5, d6, d7, d16 1761*c0909341SAndroid Build Coastguard Worker interleave_1_32 d2, d3, d4, d5, d6 1762*c0909341SAndroid Build Coastguard Worker interleave_1_32 d6, d7, d16 1763*c0909341SAndroid Build Coastguard Worker216: 1764*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1765*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d17, d18, d19, d20 1766*c0909341SAndroid Build Coastguard Worker interleave_1_32 d16, d17, d18, d19, d20 1767*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q13, d2, d3, d4, d5, d6, d7, d16, d17 1768*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q1, d4, d5, d6, d7, d16, d17, d18, d19 1769*c0909341SAndroid Build Coastguard Worker vqrshrun_s32 6, q13, d26, q1, d27 1770*c0909341SAndroid Build Coastguard Worker vmin_u16 q15, q13 1771*c0909341SAndroid Build Coastguard Worker vst1_32 \d_strd, d26, d27 1772*c0909341SAndroid Build Coastguard Worker ble 0f 1773*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1774*c0909341SAndroid Build Coastguard Worker vmov q1, q3 1775*c0909341SAndroid Build Coastguard Worker vmov q2, q8 1776*c0909341SAndroid Build Coastguard Worker vmov q3, q9 1777*c0909341SAndroid Build Coastguard Worker vmov d16, d20 1778*c0909341SAndroid Build Coastguard Worker beq 26f 1779*c0909341SAndroid Build Coastguard Worker b 216b 1780*c0909341SAndroid Build Coastguard Worker26: 1781*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d17, d18 1782*c0909341SAndroid Build Coastguard Worker interleave_1_32 d16, d17, d18 1783*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q13, d2, d3, d4, d5, d6, d7, d16, d17 1784*c0909341SAndroid Build Coastguard Worker vqrshrun_s32 6, q13, d26 1785*c0909341SAndroid Build Coastguard Worker vmin_u16 d30, d26 1786*c0909341SAndroid Build Coastguard Worker vst1_32 \d_strd, d26 1787*c0909341SAndroid Build Coastguard Worker0: 1788*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1789*c0909341SAndroid Build Coastguard Worker.endif 1790*c0909341SAndroid Build Coastguard Worker 1791*c0909341SAndroid Build Coastguard Worker40: 1792*c0909341SAndroid Build Coastguard Worker bgt 480f 1793*c0909341SAndroid Build Coastguard Worker 1794*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 v 1795*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1796*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1797*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1798*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1799*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1800*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1801*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1802*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1803*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1804*c0909341SAndroid Build Coastguard Worker 1805*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, d1, d2, d3, d4, d5 1806*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q8, d1, d2, d3, d4 1807*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q9, d2, d3, d4, d5 1808*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q8, q9, d16, d17 1809*c0909341SAndroid Build Coastguard Worker ble 0f 1810*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d6, d7 1811*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q8, d3, d4, d5, d6 1812*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q9, d4, d5, d6, d7 1813*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q8, q9, d16, d17 1814*c0909341SAndroid Build Coastguard Worker0: 1815*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1816*c0909341SAndroid Build Coastguard Worker 1817*c0909341SAndroid Build Coastguard Worker480: // 4x6, 4x8, 4x12, 4x16 v 1818*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1819*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1820*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1821*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1822*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1823*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1824*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1825*c0909341SAndroid Build Coastguard Worker 1826*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, d16, d17, d18, d19, d20, d21, d22 1827*c0909341SAndroid Build Coastguard Worker 1828*c0909341SAndroid Build Coastguard Worker48: 1829*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1830*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d23, d24, d25, d26 1831*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q1, d16, d17, d18, d19, d20, d21, d22, d23 1832*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q2, d17, d18, d19, d20, d21, d22, d23, d24 1833*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q3, d18, d19, d20, d21, d22, d23, d24, d25 1834*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q8, d19, d20, d21, d22, d23, d24, d25, d26 1835*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q1, q2, d2, d3, q3, q8, d4, d5 1836*c0909341SAndroid Build Coastguard Worker ble 0f 1837*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1838*c0909341SAndroid Build Coastguard Worker vmov q8, q10 1839*c0909341SAndroid Build Coastguard Worker vmov q9, q11 1840*c0909341SAndroid Build Coastguard Worker vmov q10, q12 1841*c0909341SAndroid Build Coastguard Worker vmov d22, d26 1842*c0909341SAndroid Build Coastguard Worker beq 46f 1843*c0909341SAndroid Build Coastguard Worker b 48b 1844*c0909341SAndroid Build Coastguard Worker46: 1845*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d23, d24 1846*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q1, d16, d17, d18, d19, d20, d21, d22, d23 1847*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q2, d17, d18, d19, d20, d21, d22, d23, d24 1848*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q1, q2, d2, d3 1849*c0909341SAndroid Build Coastguard Worker0: 1850*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker80: 1853*c0909341SAndroid Build Coastguard Worker bgt 880f 1854*c0909341SAndroid Build Coastguard Worker 1855*c0909341SAndroid Build Coastguard Worker // 8x2, 8x4 v 1856*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1857*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1858*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1859*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1860*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1861*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1862*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1863*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1864*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1865*c0909341SAndroid Build Coastguard Worker 1866*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, q1, q2, q3, q8, q9 1867*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q10, d2, d4, d6, d16 1868*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q11, d3, d5, d7, d17 1869*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q12, d4, d6, d16, d18 1870*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q13, d5, d7, d17, d19 1871*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q10, q11, d20, d21, q12, q13, d22, d23 1872*c0909341SAndroid Build Coastguard Worker ble 0f 1873*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, q10, q11 1874*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q1, d6, d16, d18, d20 1875*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q2, d7, d17, d19, d21 1876*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q12, d16, d18, d20, d22 1877*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q13, d17, d19, d21, d23 1878*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q1, q2, d2, d3, q12, q13, d4, d5 1879*c0909341SAndroid Build Coastguard Worker0: 1880*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1881*c0909341SAndroid Build Coastguard Worker 1882*c0909341SAndroid Build Coastguard Worker880: // 8x6, 8x8, 8x16, 8x32 v 1883*c0909341SAndroid Build Coastguard Worker1680: // 16x8, 16x16, ... 1884*c0909341SAndroid Build Coastguard Worker320: // 32x8, 32x16, ... 1885*c0909341SAndroid Build Coastguard Worker640: 1886*c0909341SAndroid Build Coastguard Worker1280: 1887*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1888*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1889*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1890*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 1891*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1892*c0909341SAndroid Build Coastguard Worker mov \my, \h 1893*c0909341SAndroid Build Coastguard Worker168: 1894*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1895*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1896*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1897*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1898*c0909341SAndroid Build Coastguard Worker 1899*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, q5, q6, q7, q8, q9, q10, q11 1900*c0909341SAndroid Build Coastguard Worker 1901*c0909341SAndroid Build Coastguard Worker88: 1902*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1903*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, q12, q13 1904*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q1, d10, d12, d14, d16, d18, d20, d22, d24 1905*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q2, d11, d13, d15, d17, d19, d21, d23, d25 1906*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q3, d12, d14, d16, d18, d20, d22, d24, d26 1907*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q4, d13, d15, d17, d19, d21, d23, d25, d27 1908*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q1, q2, d2, d3, q3, q4, d4, d5 1909*c0909341SAndroid Build Coastguard Worker ble 9f 1910*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1911*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, q1, q2 1912*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q3, d14, d16, d18, d20, d22, d24, d26, d2 1913*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q4, d15, d17, d19, d21, d23, d25, d27, d3 1914*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q5, d16, d18, d20, d22, d24, d26, d2, d4 1915*c0909341SAndroid Build Coastguard Worker vmull_vmlal_8 q6, d17, d19, d21, d23, d25, d27, d3, d5 1916*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q3, q4, d6, d7, q5, q6, d8, d9 1917*c0909341SAndroid Build Coastguard Worker ble 9f 1918*c0909341SAndroid Build Coastguard Worker vmov q5, q9 1919*c0909341SAndroid Build Coastguard Worker vmov q6, q10 1920*c0909341SAndroid Build Coastguard Worker vmov q7, q11 1921*c0909341SAndroid Build Coastguard Worker vmov q8, q12 1922*c0909341SAndroid Build Coastguard Worker vmov q9, q13 1923*c0909341SAndroid Build Coastguard Worker vmov q10, q1 1924*c0909341SAndroid Build Coastguard Worker vmov q11, q2 1925*c0909341SAndroid Build Coastguard Worker b 88b 1926*c0909341SAndroid Build Coastguard Worker9: 1927*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 1928*c0909341SAndroid Build Coastguard Worker ble 0f 1929*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 1930*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 1931*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 1932*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 1933*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 1934*c0909341SAndroid Build Coastguard Worker mov \h, \my 1935*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 1936*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 1937*c0909341SAndroid Build Coastguard Worker b 168b 1938*c0909341SAndroid Build Coastguard Worker0: 1939*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1940*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1941*c0909341SAndroid Build Coastguard Worker 1942*c0909341SAndroid Build Coastguard Worker160: 1943*c0909341SAndroid Build Coastguard Worker bgt 1680b 1944*c0909341SAndroid Build Coastguard Worker 1945*c0909341SAndroid Build Coastguard Worker // 16x2, 16x4 v 1946*c0909341SAndroid Build Coastguard Worker vpush {q6-q7} 1947*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1948*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1949*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1950*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1951*c0909341SAndroid Build Coastguard Worker 1952*c0909341SAndroid Build Coastguard Worker load_16s16 \src, \src, \s_strd, q6, q7, q8, q9, q10, q11 1953*c0909341SAndroid Build Coastguard Worker16: 1954*c0909341SAndroid Build Coastguard Worker load_16s16 \src, \src, \s_strd, q12, q13 1955*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 1956*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q1, d12, d16, d20, d24 1957*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q2, d13, d17, d21, d25 1958*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q3, d14, d18, d22, d26 1959*c0909341SAndroid Build Coastguard Worker vmull_vmlal_4 q6, d15, d19, d23, d27 1960*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, q1, q2, d2, d3, q3, q6, d4, d5 1961*c0909341SAndroid Build Coastguard Worker ble 0f 1962*c0909341SAndroid Build Coastguard Worker vmov q6, q8 1963*c0909341SAndroid Build Coastguard Worker vmov q7, q9 1964*c0909341SAndroid Build Coastguard Worker vmov q8, q10 1965*c0909341SAndroid Build Coastguard Worker vmov q9, q11 1966*c0909341SAndroid Build Coastguard Worker vmov q10, q12 1967*c0909341SAndroid Build Coastguard Worker vmov q11, q13 1968*c0909341SAndroid Build Coastguard Worker b 16b 1969*c0909341SAndroid Build Coastguard Worker0: 1970*c0909341SAndroid Build Coastguard Worker vpop {q6-q7} 1971*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1972*c0909341SAndroid Build Coastguard Worker 1973*c0909341SAndroid Build Coastguard Worker 1974*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv): 1975*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1976*c0909341SAndroid Build Coastguard Worker ubfx r10, \my, #7, #7 1977*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1978*c0909341SAndroid Build Coastguard Worker it gt 1979*c0909341SAndroid Build Coastguard Worker movgt \my, r10 1980*c0909341SAndroid Build Coastguard Worker4: 1981*c0909341SAndroid Build Coastguard Worker add \my, r11, \my, lsl #3 1982*c0909341SAndroid Build Coastguard Worker 1983*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_8tap_hv_tbl) 1984*c0909341SAndroid Build Coastguard Worker neg r12, r12 // -(6-intermediate_bits) 1985*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 1986*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // -(6-intermediate_bits) 1987*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1988*c0909341SAndroid Build Coastguard Worker neg r8, lr // -(6+intermeidate_bits) 1989*c0909341SAndroid Build Coastguard Worker.else 1990*c0909341SAndroid Build Coastguard Worker vmov.i16 q13, #PREP_BIAS 1991*c0909341SAndroid Build Coastguard Worker.endif 1992*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 1993*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1994*c0909341SAndroid Build Coastguard Worker vdup.32 q13, r8 // -(6+intermediate_bits) 1995*c0909341SAndroid Build Coastguard Worker.endif 1996*c0909341SAndroid Build Coastguard Worker bx r10 1997*c0909341SAndroid Build Coastguard Worker 1998*c0909341SAndroid Build Coastguard Worker .align 2 1999*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv_tbl): 2000*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2001*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2002*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2003*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2004*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2005*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2006*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 2007*c0909341SAndroid Build Coastguard Worker 2008*c0909341SAndroid Build Coastguard Worker20: 2009*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2010*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 2011*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 2012*c0909341SAndroid Build Coastguard Worker bgt 280f 2013*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 2014*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 2015*c0909341SAndroid Build Coastguard Worker 2016*c0909341SAndroid Build Coastguard Worker // 2x2, 2x4 hv 2017*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #2 2018*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2019*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2020*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2021*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2022*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2023*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2024*c0909341SAndroid Build Coastguard Worker 2025*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 2026*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 2027*c0909341SAndroid Build Coastguard Worker vmull.s16 q11, d22, d0 2028*c0909341SAndroid Build Coastguard Worker vmull.s16 q12, d24, d0 2029*c0909341SAndroid Build Coastguard Worker vpadd.s32 d22, d22, d23 2030*c0909341SAndroid Build Coastguard Worker vpadd.s32 d23, d24, d25 2031*c0909341SAndroid Build Coastguard Worker vpadd.s32 d22, d22, d23 2032*c0909341SAndroid Build Coastguard Worker vrshl.s32 d16, d22, d28 // -(6-intermediate_bits) 2033*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q8 2034*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2035*c0909341SAndroid Build Coastguard Worker 2036*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d16, #4 2037*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d24, #4 2038*c0909341SAndroid Build Coastguard Worker vmov d17, d24 2039*c0909341SAndroid Build Coastguard Worker 2040*c0909341SAndroid Build Coastguard Worker2: 2041*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2042*c0909341SAndroid Build Coastguard Worker 2043*c0909341SAndroid Build Coastguard Worker vext.8 d18, d17, d24, #4 2044*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 2045*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d2[1] 2046*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[2] 2047*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d2[3] 2048*c0909341SAndroid Build Coastguard Worker 2049*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q13 // -(6+intermediate_bits) 2050*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 2051*c0909341SAndroid Build Coastguard Worker vmin.u16 d4, d4, d30 2052*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2053*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 2054*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [\ds2, :32], \d_strd 2055*c0909341SAndroid Build Coastguard Worker ble 0f 2056*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2057*c0909341SAndroid Build Coastguard Worker vmov d17, d24 2058*c0909341SAndroid Build Coastguard Worker b 2b 2059*c0909341SAndroid Build Coastguard Worker 2060*c0909341SAndroid Build Coastguard Worker280: // 2x8, 2x16, 2x32 hv 2061*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 2062*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2063*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2064*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2065*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2066*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2067*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2068*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2069*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2070*c0909341SAndroid Build Coastguard Worker 2071*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 2072*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 2073*c0909341SAndroid Build Coastguard Worker vmull.s16 q11, d22, d0 2074*c0909341SAndroid Build Coastguard Worker vmull.s16 q12, d24, d0 2075*c0909341SAndroid Build Coastguard Worker vpadd.s32 d22, d22, d23 2076*c0909341SAndroid Build Coastguard Worker vpadd.s32 d23, d24, d25 2077*c0909341SAndroid Build Coastguard Worker vpadd.s32 d22, d22, d23 2078*c0909341SAndroid Build Coastguard Worker vrshl.s32 d16, d22, d28 // -(6-intermediate_bits) 2079*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q8 2080*c0909341SAndroid Build Coastguard Worker 2081*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2082*c0909341SAndroid Build Coastguard Worker 2083*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d16, #4 2084*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d24, #4 2085*c0909341SAndroid Build Coastguard Worker vmov d17, d24 2086*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2087*c0909341SAndroid Build Coastguard Worker vext.8 d18, d17, d24, #4 2088*c0909341SAndroid Build Coastguard Worker vmov d19, d24 2089*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2090*c0909341SAndroid Build Coastguard Worker vext.8 d20, d19, d24, #4 2091*c0909341SAndroid Build Coastguard Worker vmov d21, d24 2092*c0909341SAndroid Build Coastguard Worker 2093*c0909341SAndroid Build Coastguard Worker28: 2094*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2095*c0909341SAndroid Build Coastguard Worker vext.8 d22, d21, d24, #4 2096*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d16, d2[0] 2097*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d17, d2[1] 2098*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d18, d2[2] 2099*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d2[3] 2100*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d20, d3[0] 2101*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d3[1] 2102*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d22, d3[2] 2103*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d3[3] 2104*c0909341SAndroid Build Coastguard Worker 2105*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q13 // -(6+intermediate_bits) 2106*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q3 2107*c0909341SAndroid Build Coastguard Worker vmin.u16 d6, d6, d30 2108*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2109*c0909341SAndroid Build Coastguard Worker vst1.32 {d6[0]}, [\dst, :32], \d_strd 2110*c0909341SAndroid Build Coastguard Worker vst1.32 {d6[1]}, [\ds2, :32], \d_strd 2111*c0909341SAndroid Build Coastguard Worker ble 0f 2112*c0909341SAndroid Build Coastguard Worker vmov q8, q9 2113*c0909341SAndroid Build Coastguard Worker vmov q9, q10 2114*c0909341SAndroid Build Coastguard Worker vmov d20, d22 2115*c0909341SAndroid Build Coastguard Worker vmov d21, d24 2116*c0909341SAndroid Build Coastguard Worker b 28b 2117*c0909341SAndroid Build Coastguard Worker0: 2118*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2119*c0909341SAndroid Build Coastguard Worker 2120*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_2): 2121*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\sr2], \s_strd 2122*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [\src], \s_strd 2123*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d23, #2 2124*c0909341SAndroid Build Coastguard Worker vext.8 d25, d24, d25, #2 2125*c0909341SAndroid Build Coastguard Worker vtrn.32 q11, q12 2126*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d22, d0[0] 2127*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d23, d0[1] 2128*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d0[2] 2129*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[3] 2130*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 2131*c0909341SAndroid Build Coastguard Worker vmovn.i32 d24, q3 2132*c0909341SAndroid Build Coastguard Worker bx lr 2133*c0909341SAndroid Build Coastguard Worker.endif 2134*c0909341SAndroid Build Coastguard Worker 2135*c0909341SAndroid Build Coastguard Worker40: 2136*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 2137*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 2138*c0909341SAndroid Build Coastguard Worker bgt 480f 2139*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 2140*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 2141*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #2 2142*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2143*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2144*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2145*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2146*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2147*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2148*c0909341SAndroid Build Coastguard Worker 2149*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 hv 2150*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 2151*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 2152*c0909341SAndroid Build Coastguard Worker vext.8 d25, d22, d23, #4 2153*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d23, #6 2154*c0909341SAndroid Build Coastguard Worker vmull.s16 q10, d22, d0[0] 2155*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d24, d0[1] 2156*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d25, d0[2] 2157*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d23, d0[3] 2158*c0909341SAndroid Build Coastguard Worker vrshl.s32 q10, q10, q14 // -(6-intermediate_bits) 2159*c0909341SAndroid Build Coastguard Worker vmovn.i32 d17, q10 2160*c0909341SAndroid Build Coastguard Worker 2161*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2162*c0909341SAndroid Build Coastguard Worker vmov q9, q12 2163*c0909341SAndroid Build Coastguard Worker 2164*c0909341SAndroid Build Coastguard Worker4: 2165*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2166*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d17, d2[0] 2167*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[1] 2168*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d19, d2[2] 2169*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d2[3] 2170*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d18, d2[0] 2171*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d2[1] 2172*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d2[2] 2173*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d2[3] 2174*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2175*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q13 // -(6+intermediate_bits) 2176*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q13 // -(6+intermediate_bits) 2177*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 2178*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 2179*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 2180*c0909341SAndroid Build Coastguard Worker.else 2181*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 2182*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2183*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q13 // PREP_BIAS 2184*c0909341SAndroid Build Coastguard Worker.endif 2185*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2186*c0909341SAndroid Build Coastguard Worker 2187*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 2188*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [\ds2, :64], \d_strd 2189*c0909341SAndroid Build Coastguard Worker ble 0f 2190*c0909341SAndroid Build Coastguard Worker vmov d17, d19 2191*c0909341SAndroid Build Coastguard Worker vmov q9, q12 2192*c0909341SAndroid Build Coastguard Worker b 4b 2193*c0909341SAndroid Build Coastguard Worker0: 2194*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2195*c0909341SAndroid Build Coastguard Worker 2196*c0909341SAndroid Build Coastguard Worker480: // 4x8, 4x16, 4x32 hv 2197*c0909341SAndroid Build Coastguard Worker vpush {d13-d15} 2198*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 2199*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2200*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2201*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2202*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2203*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2204*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2205*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2206*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2207*c0909341SAndroid Build Coastguard Worker 2208*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 2209*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 2210*c0909341SAndroid Build Coastguard Worker vext.8 d25, d22, d23, #4 2211*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d23, #6 2212*c0909341SAndroid Build Coastguard Worker vmull.s16 q10, d22, d0[0] 2213*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d24, d0[1] 2214*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d25, d0[2] 2215*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d23, d0[3] 2216*c0909341SAndroid Build Coastguard Worker vrshl.s32 q10, q10, q14 // -(6-intermediate_bits) 2217*c0909341SAndroid Build Coastguard Worker vmovn.i32 d13, q10 2218*c0909341SAndroid Build Coastguard Worker 2219*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2220*c0909341SAndroid Build Coastguard Worker vmov q7, q12 2221*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2222*c0909341SAndroid Build Coastguard Worker vmov q8, q12 2223*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2224*c0909341SAndroid Build Coastguard Worker vmov q9, q12 2225*c0909341SAndroid Build Coastguard Worker 2226*c0909341SAndroid Build Coastguard Worker48: 2227*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2228*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d13, d2[0] 2229*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d14, d2[1] 2230*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d15, d2[2] 2231*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d16, d2[3] 2232*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d3[0] 2233*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d3[1] 2234*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d19, d3[2] 2235*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d3[3] 2236*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d14, d2[0] 2237*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d15, d2[1] 2238*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d16, d2[2] 2239*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d17, d2[3] 2240*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d18, d3[0] 2241*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d3[1] 2242*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d3[2] 2243*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d3[3] 2244*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2245*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q13 // -(6+intermediate_bits) 2246*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q13 // -(6+intermediate_bits) 2247*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 2248*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 2249*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 2250*c0909341SAndroid Build Coastguard Worker.else 2251*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 2252*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2253*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q13 // PREP_BIAS 2254*c0909341SAndroid Build Coastguard Worker.endif 2255*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2256*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 2257*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [\ds2, :64], \d_strd 2258*c0909341SAndroid Build Coastguard Worker ble 0f 2259*c0909341SAndroid Build Coastguard Worker vmov d13, d15 2260*c0909341SAndroid Build Coastguard Worker vmov q7, q8 2261*c0909341SAndroid Build Coastguard Worker vmov q8, q9 2262*c0909341SAndroid Build Coastguard Worker vmov q9, q12 2263*c0909341SAndroid Build Coastguard Worker b 48b 2264*c0909341SAndroid Build Coastguard Worker0: 2265*c0909341SAndroid Build Coastguard Worker vpop {d13-d15} 2266*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2267*c0909341SAndroid Build Coastguard Worker 2268*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_4): 2269*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [\sr2], \s_strd 2270*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 2271*c0909341SAndroid Build Coastguard Worker vext.8 d24, d20, d21, #2 2272*c0909341SAndroid Build Coastguard Worker vext.8 d25, d20, d21, #4 2273*c0909341SAndroid Build Coastguard Worker vext.8 d21, d20, d21, #6 2274*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d20, d0[0] 2275*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d0[1] 2276*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[2] 2277*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[3] 2278*c0909341SAndroid Build Coastguard Worker vext.8 d24, d22, d23, #2 2279*c0909341SAndroid Build Coastguard Worker vext.8 d25, d22, d23, #4 2280*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d23, #6 2281*c0909341SAndroid Build Coastguard Worker vmull.s16 q10, d22, d0[0] 2282*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d24, d0[1] 2283*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d25, d0[2] 2284*c0909341SAndroid Build Coastguard Worker vmlal.s16 q10, d23, d0[3] 2285*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 2286*c0909341SAndroid Build Coastguard Worker vrshl.s32 q10, q10, q14 // -(6-intermediate_bits) 2287*c0909341SAndroid Build Coastguard Worker vmovn.i32 d24, q3 2288*c0909341SAndroid Build Coastguard Worker vmovn.i32 d25, q10 2289*c0909341SAndroid Build Coastguard Worker bx lr 2290*c0909341SAndroid Build Coastguard Worker 2291*c0909341SAndroid Build Coastguard Worker80: 2292*c0909341SAndroid Build Coastguard Worker160: 2293*c0909341SAndroid Build Coastguard Worker320: 2294*c0909341SAndroid Build Coastguard Worker bgt 880f 2295*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 2296*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 2297*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 2298*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 2299*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2300*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2301*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2302*c0909341SAndroid Build Coastguard Worker mov \my, \h 2303*c0909341SAndroid Build Coastguard Worker 2304*c0909341SAndroid Build Coastguard Worker164: // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv 2305*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2306*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2307*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2308*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2309*c0909341SAndroid Build Coastguard Worker 2310*c0909341SAndroid Build Coastguard Worker vld1.16 {q11, q12}, [\src], \s_strd 2311*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d22, d0[0] 2312*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d23, d0[0] 2313*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // -(6-intermediate_bits) 2314*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 2315*c0909341SAndroid Build Coastguard Worker vext.8 q10, q11, q12, #(2*\i) 2316*c0909341SAndroid Build Coastguard Worker.if \i < 4 2317*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d0[\i] 2318*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[\i] 2319*c0909341SAndroid Build Coastguard Worker.else 2320*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d1[\i - 4] 2321*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d1[\i - 4] 2322*c0909341SAndroid Build Coastguard Worker.endif 2323*c0909341SAndroid Build Coastguard Worker.endr 2324*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // -(6-intermediate_bits) 2325*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 2326*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q2 2327*c0909341SAndroid Build Coastguard Worker vmovn.i32 d17, q3 2328*c0909341SAndroid Build Coastguard Worker 2329*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2330*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2331*c0909341SAndroid Build Coastguard Worker vmov q10, q12 2332*c0909341SAndroid Build Coastguard Worker 2333*c0909341SAndroid Build Coastguard Worker8: 2334*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2335*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 2336*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d17, d2[0] 2337*c0909341SAndroid Build Coastguard Worker vmull.s16 q13, d18, d2[0] 2338*c0909341SAndroid Build Coastguard Worker vmull.s16 q14, d19, d2[0] 2339*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2340*c0909341SAndroid Build Coastguard Worker vdup.32 q8, r8 // -(6+intermediate_bits) 2341*c0909341SAndroid Build Coastguard Worker.endif 2342*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[1] 2343*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d2[1] 2344*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d20, d2[1] 2345*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d21, d2[1] 2346*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d2[2] 2347*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d2[2] 2348*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d22, d2[2] 2349*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d23, d2[2] 2350*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d2[3] 2351*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d23, d2[3] 2352*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d24, d2[3] 2353*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d25, d2[3] 2354*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2355*c0909341SAndroid Build Coastguard Worker vdup.16 q9, \bdmax // bitdepth_max 2356*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q8 // -(6+intermediate_bits) 2357*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q8 // -(6+intermediate_bits) 2358*c0909341SAndroid Build Coastguard Worker vrshl.s32 q13, q13, q8 // -(6+intermediate_bits) 2359*c0909341SAndroid Build Coastguard Worker vrshl.s32 q14, q14, q8 // -(6+intermediate_bits) 2360*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 2361*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 2362*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q13 2363*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d7, q14 2364*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 2365*c0909341SAndroid Build Coastguard Worker vmin.u16 q3, q3, q15 2366*c0909341SAndroid Build Coastguard Worker.else 2367*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #PREP_BIAS 2368*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 2369*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2370*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d6, q13, #6 2371*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d7, q14, #6 2372*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q9 // PREP_BIAS 2373*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q9 // PREP_BIAS 2374*c0909341SAndroid Build Coastguard Worker.endif 2375*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2376*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [\dst, :128], \d_strd 2377*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [\ds2, :128], \d_strd 2378*c0909341SAndroid Build Coastguard Worker ble 9f 2379*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2380*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2381*c0909341SAndroid Build Coastguard Worker vmov q10, q12 2382*c0909341SAndroid Build Coastguard Worker b 8b 2383*c0909341SAndroid Build Coastguard Worker9: 2384*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2385*c0909341SAndroid Build Coastguard Worker ble 0f 2386*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2387*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2388*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2389*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2390*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #2 2391*c0909341SAndroid Build Coastguard Worker mov \h, \my 2392*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2393*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2394*c0909341SAndroid Build Coastguard Worker b 164b 2395*c0909341SAndroid Build Coastguard Worker0: 2396*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2397*c0909341SAndroid Build Coastguard Worker 2398*c0909341SAndroid Build Coastguard Worker880: // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv 2399*c0909341SAndroid Build Coastguard Worker640: 2400*c0909341SAndroid Build Coastguard Worker1280: 2401*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 2402*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 2403*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 2404*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 2405*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2406*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2407*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2408*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2409*c0909341SAndroid Build Coastguard Worker mov \my, \h 2410*c0909341SAndroid Build Coastguard Worker 2411*c0909341SAndroid Build Coastguard Worker168: 2412*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2413*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2414*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2415*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2416*c0909341SAndroid Build Coastguard Worker 2417*c0909341SAndroid Build Coastguard Worker vld1.16 {q11, q12}, [\src], \s_strd 2418*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d22, d0[0] 2419*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d23, d0[0] 2420*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // -(6-intermediate_bits) 2421*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 2422*c0909341SAndroid Build Coastguard Worker vext.8 q10, q11, q12, #(2*\i) 2423*c0909341SAndroid Build Coastguard Worker.if \i < 4 2424*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d0[\i] 2425*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[\i] 2426*c0909341SAndroid Build Coastguard Worker.else 2427*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d1[\i - 4] 2428*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d1[\i - 4] 2429*c0909341SAndroid Build Coastguard Worker.endif 2430*c0909341SAndroid Build Coastguard Worker.endr 2431*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q14 // -(6-intermediate_bits) 2432*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q14 // -(6-intermediate_bits) 2433*c0909341SAndroid Build Coastguard Worker vmovn.i32 d8, q2 2434*c0909341SAndroid Build Coastguard Worker vmovn.i32 d9, q3 2435*c0909341SAndroid Build Coastguard Worker 2436*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2437*c0909341SAndroid Build Coastguard Worker vmov q5, q11 2438*c0909341SAndroid Build Coastguard Worker vmov q6, q12 2439*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2440*c0909341SAndroid Build Coastguard Worker vmov q7, q11 2441*c0909341SAndroid Build Coastguard Worker vmov q8, q12 2442*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2443*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2444*c0909341SAndroid Build Coastguard Worker vmov q10, q12 2445*c0909341SAndroid Build Coastguard Worker 2446*c0909341SAndroid Build Coastguard Worker88: 2447*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2448*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d8, d2[0] 2449*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d9, d2[0] 2450*c0909341SAndroid Build Coastguard Worker vmull.s16 q13, d10, d2[0] 2451*c0909341SAndroid Build Coastguard Worker vmull.s16 q14, d11, d2[0] 2452*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2453*c0909341SAndroid Build Coastguard Worker vdup.32 q4, r8 // -(6+intermediate_bits) 2454*c0909341SAndroid Build Coastguard Worker.endif 2455*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d10, d2[1] 2456*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d11, d2[1] 2457*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d12, d2[1] 2458*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d13, d2[1] 2459*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d12, d2[2] 2460*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d13, d2[2] 2461*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d14, d2[2] 2462*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d15, d2[2] 2463*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d14, d2[3] 2464*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d15, d2[3] 2465*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d16, d2[3] 2466*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d17, d2[3] 2467*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d16, d3[0] 2468*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d17, d3[0] 2469*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d18, d3[0] 2470*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d19, d3[0] 2471*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d3[1] 2472*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d3[1] 2473*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d20, d3[1] 2474*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d21, d3[1] 2475*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d3[2] 2476*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d3[2] 2477*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d22, d3[2] 2478*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d23, d3[2] 2479*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d3[3] 2480*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d23, d3[3] 2481*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d24, d3[3] 2482*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d25, d3[3] 2483*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2484*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q4 // -(6+intermediate_bits) 2485*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q4 // -(6+intermediate_bits) 2486*c0909341SAndroid Build Coastguard Worker vrshl.s32 q13, q13, q4 // -(6+intermediate_bits) 2487*c0909341SAndroid Build Coastguard Worker vrshl.s32 q14, q14, q4 // -(6+intermediate_bits) 2488*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d4, q2 2489*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d5, q3 2490*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d6, q13 2491*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d7, q14 2492*c0909341SAndroid Build Coastguard Worker vmin.u16 q2, q2, q15 2493*c0909341SAndroid Build Coastguard Worker vmin.u16 q3, q3, q15 2494*c0909341SAndroid Build Coastguard Worker.else 2495*c0909341SAndroid Build Coastguard Worker vmov.i16 q5, #PREP_BIAS 2496*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 2497*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2498*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d6, q13, #6 2499*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d7, q14, #6 2500*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q5 // PREP_BIAS 2501*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q5 // PREP_BIAS 2502*c0909341SAndroid Build Coastguard Worker.endif 2503*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2504*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [\dst, :128], \d_strd 2505*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [\ds2, :128], \d_strd 2506*c0909341SAndroid Build Coastguard Worker ble 9f 2507*c0909341SAndroid Build Coastguard Worker vmov q4, q6 2508*c0909341SAndroid Build Coastguard Worker vmov q5, q7 2509*c0909341SAndroid Build Coastguard Worker vmov q6, q8 2510*c0909341SAndroid Build Coastguard Worker vmov q7, q9 2511*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2512*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2513*c0909341SAndroid Build Coastguard Worker vmov q10, q12 2514*c0909341SAndroid Build Coastguard Worker b 88b 2515*c0909341SAndroid Build Coastguard Worker9: 2516*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2517*c0909341SAndroid Build Coastguard Worker ble 0f 2518*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2519*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2520*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2521*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2522*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 2523*c0909341SAndroid Build Coastguard Worker mov \h, \my 2524*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2525*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2526*c0909341SAndroid Build Coastguard Worker b 168b 2527*c0909341SAndroid Build Coastguard Worker0: 2528*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 2529*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2530*c0909341SAndroid Build Coastguard Worker 2531*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_8): 2532*c0909341SAndroid Build Coastguard Worker vld1.16 {q13, q14}, [\sr2], \s_strd 2533*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d26, d0[0] 2534*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d27, d0[0] 2535*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 2536*c0909341SAndroid Build Coastguard Worker vext.8 q12, q13, q14, #(2*\i) 2537*c0909341SAndroid Build Coastguard Worker.if \i < 4 2538*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d0[\i] 2539*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[\i] 2540*c0909341SAndroid Build Coastguard Worker.else 2541*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d1[\i - 4] 2542*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d1[\i - 4] 2543*c0909341SAndroid Build Coastguard Worker.endif 2544*c0909341SAndroid Build Coastguard Worker.endr 2545*c0909341SAndroid Build Coastguard Worker vdup.32 q12, r12 // -(6-intermediate_bits) 2546*c0909341SAndroid Build Coastguard Worker vld1.16 {q13, q14}, [\src], \s_strd 2547*c0909341SAndroid Build Coastguard Worker vrshl.s32 q2, q2, q12 // -(6-intermediate_bits) 2548*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q12 // -(6-intermediate_bits) 2549*c0909341SAndroid Build Coastguard Worker vmovn.i32 d4, q2 2550*c0909341SAndroid Build Coastguard Worker vmovn.i32 d5, q3 2551*c0909341SAndroid Build Coastguard Worker 2552*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d26, d0[0] 2553*c0909341SAndroid Build Coastguard Worker vmull.s16 q11, d27, d0[0] 2554*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 2555*c0909341SAndroid Build Coastguard Worker vext.8 q12, q13, q14, #(2*\i) 2556*c0909341SAndroid Build Coastguard Worker.if \i < 4 2557*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d0[\i] 2558*c0909341SAndroid Build Coastguard Worker vmlal.s16 q11, d25, d0[\i] 2559*c0909341SAndroid Build Coastguard Worker.else 2560*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d1[\i - 4] 2561*c0909341SAndroid Build Coastguard Worker vmlal.s16 q11, d25, d1[\i - 4] 2562*c0909341SAndroid Build Coastguard Worker.endif 2563*c0909341SAndroid Build Coastguard Worker.endr 2564*c0909341SAndroid Build Coastguard Worker vdup.32 q13, r12 // -(6-intermediate_bits) 2565*c0909341SAndroid Build Coastguard Worker vrshl.s32 q3, q3, q13 // -(6-intermediate_bits) 2566*c0909341SAndroid Build Coastguard Worker vrshl.s32 q11, q11, q13 // -(6-intermediate_bits) 2567*c0909341SAndroid Build Coastguard Worker 2568*c0909341SAndroid Build Coastguard Worker vmovn.i32 d24, q3 2569*c0909341SAndroid Build Coastguard Worker vmovn.i32 d25, q11 2570*c0909341SAndroid Build Coastguard Worker vmov q11, q2 2571*c0909341SAndroid Build Coastguard Worker bx lr 2572*c0909341SAndroid Build Coastguard Workerendfunc 2573*c0909341SAndroid Build Coastguard Worker 2574*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_16bpc_neon, export=1 2575*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 2576*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 2577*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 2578*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, r8 2579*c0909341SAndroid Build Coastguard Worker ldr r8, [sp, #52] 2580*c0909341SAndroid Build Coastguard Worker.endif 2581*c0909341SAndroid Build Coastguard Worker vdup.16 q1, \mx 2582*c0909341SAndroid Build Coastguard Worker vdup.16 q3, \my 2583*c0909341SAndroid Build Coastguard Worker rsb r9, \mx, #16 2584*c0909341SAndroid Build Coastguard Worker rsb r10, \my, #16 2585*c0909341SAndroid Build Coastguard Worker vdup.16 q0, r9 2586*c0909341SAndroid Build Coastguard Worker vdup.16 q2, r10 2587*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2588*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \w, #1 2589*c0909341SAndroid Build Coastguard Worker.endif 2590*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax // bitdepth_max 2591*c0909341SAndroid Build Coastguard Worker clz r9, \w 2592*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #18 // intermediate_bits = clz(bitdepth_max) - 18 2593*c0909341SAndroid Build Coastguard Worker cmp \mx, #0 2594*c0909341SAndroid Build Coastguard Worker sub r9, r9, #24 2595*c0909341SAndroid Build Coastguard Worker rsb r11, \bdmax, #4 // 4 - intermediate_bits 2596*c0909341SAndroid Build Coastguard Worker add r12, \bdmax, #4 // 4 + intermediate_bits 2597*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_h) 2598*c0909341SAndroid Build Coastguard Worker cmp \my, #0 2599*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_v) 2600*c0909341SAndroid Build Coastguard Worker b \type\()_neon 2601*c0909341SAndroid Build Coastguard Worker 2602*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h): 2603*c0909341SAndroid Build Coastguard Worker cmp \my, #0 2604*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_hv) 2605*c0909341SAndroid Build Coastguard Worker 2606*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_bilin_h_tbl) 2607*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r11 // 4 - intermediate_bits 2608*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 2609*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 // -(4-intermediate_bits) 2610*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2611*c0909341SAndroid Build Coastguard Worker vdup.16 q14, \bdmax // intermediate_bits 2612*c0909341SAndroid Build Coastguard Worker.else 2613*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #PREP_BIAS 2614*c0909341SAndroid Build Coastguard Worker.endif 2615*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 2616*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2617*c0909341SAndroid Build Coastguard Worker vneg.s16 q14, q14 // -intermediate_bits 2618*c0909341SAndroid Build Coastguard Worker.endif 2619*c0909341SAndroid Build Coastguard Worker bx r10 2620*c0909341SAndroid Build Coastguard Worker 2621*c0909341SAndroid Build Coastguard Worker .align 2 2622*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h_tbl): 2623*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2624*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2625*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2626*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2627*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2628*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2629*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2630*c0909341SAndroid Build Coastguard Worker 2631*c0909341SAndroid Build Coastguard Worker20: // 2xN h 2632*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2633*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2634*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2635*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2636*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2637*c0909341SAndroid Build Coastguard Worker2: 2638*c0909341SAndroid Build Coastguard Worker vld1.16 {d16}, [\src], \s_strd 2639*c0909341SAndroid Build Coastguard Worker vld1.16 {d18}, [\sr2], \s_strd 2640*c0909341SAndroid Build Coastguard Worker vext.8 d17, d16, d16, #2 2641*c0909341SAndroid Build Coastguard Worker vext.8 d19, d18, d18, #2 2642*c0909341SAndroid Build Coastguard Worker vtrn.32 d16, d18 2643*c0909341SAndroid Build Coastguard Worker vtrn.32 d17, d19 2644*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2645*c0909341SAndroid Build Coastguard Worker vmul.i16 d16, d16, d0 2646*c0909341SAndroid Build Coastguard Worker vmla.i16 d16, d17, d2 2647*c0909341SAndroid Build Coastguard Worker vrshl.u16 d16, d16, d30 2648*c0909341SAndroid Build Coastguard Worker vrshl.u16 d16, d16, d28 2649*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [\dst, :32], \d_strd 2650*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [\ds2, :32], \d_strd 2651*c0909341SAndroid Build Coastguard Worker bgt 2b 2652*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2653*c0909341SAndroid Build Coastguard Worker.endif 2654*c0909341SAndroid Build Coastguard Worker 2655*c0909341SAndroid Build Coastguard Worker40: // 4xN h 2656*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2657*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2658*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2659*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2660*c0909341SAndroid Build Coastguard Worker4: 2661*c0909341SAndroid Build Coastguard Worker vld1.16 {q8}, [\src], \s_strd 2662*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [\sr2], \s_strd 2663*c0909341SAndroid Build Coastguard Worker vext.8 q9, q8, q8, #2 2664*c0909341SAndroid Build Coastguard Worker vext.8 q11, q10, q10, #2 2665*c0909341SAndroid Build Coastguard Worker vmov d17, d20 2666*c0909341SAndroid Build Coastguard Worker vmov d19, d22 2667*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2668*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q0 2669*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q9, q1 2670*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 2671*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2672*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q14 2673*c0909341SAndroid Build Coastguard Worker.else 2674*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 2675*c0909341SAndroid Build Coastguard Worker.endif 2676*c0909341SAndroid Build Coastguard Worker vst1.16 {d16}, [\dst, :64], \d_strd 2677*c0909341SAndroid Build Coastguard Worker vst1.16 {d17}, [\ds2, :64], \d_strd 2678*c0909341SAndroid Build Coastguard Worker bgt 4b 2679*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2680*c0909341SAndroid Build Coastguard Worker 2681*c0909341SAndroid Build Coastguard Worker80: // 8xN h 2682*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2683*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2684*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2685*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2686*c0909341SAndroid Build Coastguard Worker8: 2687*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18}, [\src], \s_strd 2688*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22}, [\sr2], \s_strd 2689*c0909341SAndroid Build Coastguard Worker vext.8 q9, q8, q9, #2 2690*c0909341SAndroid Build Coastguard Worker vext.8 q11, q10, q11, #2 2691*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2692*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q0 2693*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q9, q1 2694*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q0 2695*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q11, q1 2696*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 2697*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q15 2698*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2699*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q14 2700*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q14 2701*c0909341SAndroid Build Coastguard Worker.else 2702*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 2703*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 2704*c0909341SAndroid Build Coastguard Worker.endif 2705*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [\dst, :128], \d_strd 2706*c0909341SAndroid Build Coastguard Worker vst1.16 {q10}, [\ds2, :128], \d_strd 2707*c0909341SAndroid Build Coastguard Worker bgt 8b 2708*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2709*c0909341SAndroid Build Coastguard Worker160: 2710*c0909341SAndroid Build Coastguard Worker320: 2711*c0909341SAndroid Build Coastguard Worker640: 2712*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 2713*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 2714*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2715*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2716*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2717*c0909341SAndroid Build Coastguard Worker 2718*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, lsl #1 2719*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #16 2720*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2721*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2722*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, lsl #1 2723*c0909341SAndroid Build Coastguard Worker.endif 2724*c0909341SAndroid Build Coastguard Worker161: 2725*c0909341SAndroid Build Coastguard Worker vld1.16 {q4}, [\src]! 2726*c0909341SAndroid Build Coastguard Worker vld1.16 {q9}, [\sr2]! 2727*c0909341SAndroid Build Coastguard Worker mov \mx, \w 2728*c0909341SAndroid Build Coastguard Worker 2729*c0909341SAndroid Build Coastguard Worker16: 2730*c0909341SAndroid Build Coastguard Worker vld1.16 {q5, q6}, [\src]! 2731*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [\sr2]! 2732*c0909341SAndroid Build Coastguard Worker vext.8 q7, q4, q5, #2 2733*c0909341SAndroid Build Coastguard Worker vext.8 q8, q5, q6, #2 2734*c0909341SAndroid Build Coastguard Worker vext.8 q12, q9, q10, #2 2735*c0909341SAndroid Build Coastguard Worker vext.8 q13, q10, q11, #2 2736*c0909341SAndroid Build Coastguard Worker vmul.i16 q4, q4, q0 2737*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q7, q1 2738*c0909341SAndroid Build Coastguard Worker vmul.i16 q5, q5, q0 2739*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q8, q1 2740*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q0 2741*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q12, q1 2742*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q0 2743*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q13, q1 2744*c0909341SAndroid Build Coastguard Worker vrshl.u16 q4, q4, q15 2745*c0909341SAndroid Build Coastguard Worker vrshl.u16 q5, q5, q15 2746*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q15 2747*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q15 2748*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 2749*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2750*c0909341SAndroid Build Coastguard Worker vrshl.u16 q4, q4, q14 2751*c0909341SAndroid Build Coastguard Worker vrshl.u16 q5, q5, q14 2752*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q14 2753*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q14 2754*c0909341SAndroid Build Coastguard Worker.else 2755*c0909341SAndroid Build Coastguard Worker vsub.i16 q4, q4, q14 2756*c0909341SAndroid Build Coastguard Worker vsub.i16 q5, q5, q14 2757*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 2758*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 2759*c0909341SAndroid Build Coastguard Worker.endif 2760*c0909341SAndroid Build Coastguard Worker vst1.16 {q4, q5}, [\dst, :128]! 2761*c0909341SAndroid Build Coastguard Worker vst1.16 {q9, q10}, [\ds2, :128]! 2762*c0909341SAndroid Build Coastguard Worker ble 9f 2763*c0909341SAndroid Build Coastguard Worker 2764*c0909341SAndroid Build Coastguard Worker vmov q4, q6 2765*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2766*c0909341SAndroid Build Coastguard Worker b 16b 2767*c0909341SAndroid Build Coastguard Worker 2768*c0909341SAndroid Build Coastguard Worker9: 2769*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 2770*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 2771*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 2772*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 2773*c0909341SAndroid Build Coastguard Worker 2774*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2775*c0909341SAndroid Build Coastguard Worker bgt 161b 2776*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 2777*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2778*c0909341SAndroid Build Coastguard Worker 2779*c0909341SAndroid Build Coastguard Worker 2780*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v): 2781*c0909341SAndroid Build Coastguard Worker cmp \h, #4 2782*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_bilin_v_tbl) 2783*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2784*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r11 // 4 - intermediate_bits 2785*c0909341SAndroid Build Coastguard Worker.endif 2786*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 2787*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2788*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #PREP_BIAS 2789*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 // -(4-intermediate_bits) 2790*c0909341SAndroid Build Coastguard Worker.endif 2791*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 2792*c0909341SAndroid Build Coastguard Worker bx r10 2793*c0909341SAndroid Build Coastguard Worker 2794*c0909341SAndroid Build Coastguard Worker .align 2 2795*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v_tbl): 2796*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2797*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2798*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2799*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2800*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2801*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2802*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2803*c0909341SAndroid Build Coastguard Worker 2804*c0909341SAndroid Build Coastguard Worker20: // 2xN v 2805*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2806*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2807*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2808*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2809*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2810*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2811*c0909341SAndroid Build Coastguard Worker 2812*c0909341SAndroid Build Coastguard Worker // 2x2 v 2813*c0909341SAndroid Build Coastguard Worker vld1.32 {d16[]}, [\src], \s_strd 2814*c0909341SAndroid Build Coastguard Worker bgt 24f 2815*c0909341SAndroid Build Coastguard Worker22: 2816*c0909341SAndroid Build Coastguard Worker vld1.32 {d17[]}, [\sr2], \s_strd 2817*c0909341SAndroid Build Coastguard Worker vld1.32 {d18[]}, [\src], \s_strd 2818*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d17, #4 2819*c0909341SAndroid Build Coastguard Worker vext.8 d17, d17, d18, #4 2820*c0909341SAndroid Build Coastguard Worker vmul.i16 d16, d16, d4 2821*c0909341SAndroid Build Coastguard Worker vmla.i16 d16, d17, d6 2822*c0909341SAndroid Build Coastguard Worker vrshr.u16 d16, d16, #4 2823*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [\dst, :32] 2824*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [\ds2, :32] 2825*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2826*c0909341SAndroid Build Coastguard Worker24: // 2x4, 2x6, 2x8, ... v 2827*c0909341SAndroid Build Coastguard Worker vld1.32 {d17[]}, [\sr2], \s_strd 2828*c0909341SAndroid Build Coastguard Worker vld1.32 {d18[]}, [\src], \s_strd 2829*c0909341SAndroid Build Coastguard Worker vld1.32 {d19[]}, [\sr2], \s_strd 2830*c0909341SAndroid Build Coastguard Worker vld1.32 {d20[]}, [\src], \s_strd 2831*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 2832*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d17, #4 2833*c0909341SAndroid Build Coastguard Worker vext.8 d17, d17, d18, #4 2834*c0909341SAndroid Build Coastguard Worker vext.8 d18, d18, d19, #4 2835*c0909341SAndroid Build Coastguard Worker vext.8 d19, d19, d20, #4 2836*c0909341SAndroid Build Coastguard Worker vswp d17, d18 2837*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q2 2838*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q9, q3 2839*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2840*c0909341SAndroid Build Coastguard Worker vrshr.u16 q8, q8, #4 2841*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [\dst, :32], \d_strd 2842*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [\ds2, :32], \d_strd 2843*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[0]}, [\dst, :32], \d_strd 2844*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[1]}, [\ds2, :32], \d_strd 2845*c0909341SAndroid Build Coastguard Worker blt 0f 2846*c0909341SAndroid Build Coastguard Worker vmov d16, d20 2847*c0909341SAndroid Build Coastguard Worker beq 22b 2848*c0909341SAndroid Build Coastguard Worker b 24b 2849*c0909341SAndroid Build Coastguard Worker0: 2850*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2851*c0909341SAndroid Build Coastguard Worker.endif 2852*c0909341SAndroid Build Coastguard Worker 2853*c0909341SAndroid Build Coastguard Worker40: // 4xN v 2854*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2855*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2856*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2857*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2858*c0909341SAndroid Build Coastguard Worker vld1.16 {d16}, [\src], \s_strd 2859*c0909341SAndroid Build Coastguard Worker4: 2860*c0909341SAndroid Build Coastguard Worker vld1.16 {d17}, [\sr2], \s_strd 2861*c0909341SAndroid Build Coastguard Worker vld1.16 {d19}, [\src], \s_strd 2862*c0909341SAndroid Build Coastguard Worker vmov d18, d17 2863*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q2 2864*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q9, q3 2865*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2866*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2867*c0909341SAndroid Build Coastguard Worker vrshr.u16 q8, q8, #4 2868*c0909341SAndroid Build Coastguard Worker.else 2869*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 2870*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 2871*c0909341SAndroid Build Coastguard Worker.endif 2872*c0909341SAndroid Build Coastguard Worker vst1.16 {d16}, [\dst, :64], \d_strd 2873*c0909341SAndroid Build Coastguard Worker vst1.16 {d17}, [\ds2, :64], \d_strd 2874*c0909341SAndroid Build Coastguard Worker ble 0f 2875*c0909341SAndroid Build Coastguard Worker vmov d16, d19 2876*c0909341SAndroid Build Coastguard Worker b 4b 2877*c0909341SAndroid Build Coastguard Worker0: 2878*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2879*c0909341SAndroid Build Coastguard Worker 2880*c0909341SAndroid Build Coastguard Worker80: // 8xN v 2881*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2882*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2883*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2884*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2885*c0909341SAndroid Build Coastguard Worker vld1.16 {q8}, [\src], \s_strd 2886*c0909341SAndroid Build Coastguard Worker8: 2887*c0909341SAndroid Build Coastguard Worker vld1.16 {q9}, [\sr2], \s_strd 2888*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [\src], \s_strd 2889*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q2 2890*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q9, q3 2891*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q2 2892*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q10, q3 2893*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2894*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2895*c0909341SAndroid Build Coastguard Worker vrshr.u16 q8, q8, #4 2896*c0909341SAndroid Build Coastguard Worker vrshr.u16 q9, q9, #4 2897*c0909341SAndroid Build Coastguard Worker.else 2898*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 2899*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q15 2900*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 2901*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 2902*c0909341SAndroid Build Coastguard Worker.endif 2903*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [\dst, :128], \d_strd 2904*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [\ds2, :128], \d_strd 2905*c0909341SAndroid Build Coastguard Worker ble 0f 2906*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2907*c0909341SAndroid Build Coastguard Worker b 8b 2908*c0909341SAndroid Build Coastguard Worker0: 2909*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2910*c0909341SAndroid Build Coastguard Worker 2911*c0909341SAndroid Build Coastguard Worker160: // 16xN, 32xN, ... 2912*c0909341SAndroid Build Coastguard Worker320: 2913*c0909341SAndroid Build Coastguard Worker640: 2914*c0909341SAndroid Build Coastguard Worker1280: 2915*c0909341SAndroid Build Coastguard Worker mov \my, \h 2916*c0909341SAndroid Build Coastguard Worker1: 2917*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2918*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2919*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2920*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2921*c0909341SAndroid Build Coastguard Worker 2922*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [\src], \s_strd 2923*c0909341SAndroid Build Coastguard Worker2: 2924*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [\sr2], \s_strd 2925*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [\src], \s_strd 2926*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q2 2927*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q10, q3 2928*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q2 2929*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q11, q3 2930*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q2 2931*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q12, q3 2932*c0909341SAndroid Build Coastguard Worker vmul.i16 q11, q11, q2 2933*c0909341SAndroid Build Coastguard Worker vmla.i16 q11, q13, q3 2934*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2935*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2936*c0909341SAndroid Build Coastguard Worker vrshr.u16 q8, q8, #4 2937*c0909341SAndroid Build Coastguard Worker vrshr.u16 q9, q9, #4 2938*c0909341SAndroid Build Coastguard Worker vrshr.u16 q10, q10, #4 2939*c0909341SAndroid Build Coastguard Worker vrshr.u16 q11, q11, #4 2940*c0909341SAndroid Build Coastguard Worker.else 2941*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 2942*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q15 2943*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q15 2944*c0909341SAndroid Build Coastguard Worker vrshl.u16 q11, q11, q15 2945*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 2946*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q9, q14 2947*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 2948*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q11, q14 2949*c0909341SAndroid Build Coastguard Worker.endif 2950*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [\dst, :128], \d_strd 2951*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [\ds2, :128], \d_strd 2952*c0909341SAndroid Build Coastguard Worker ble 9f 2953*c0909341SAndroid Build Coastguard Worker vmov q8, q12 2954*c0909341SAndroid Build Coastguard Worker vmov q9, q13 2955*c0909341SAndroid Build Coastguard Worker b 2b 2956*c0909341SAndroid Build Coastguard Worker9: 2957*c0909341SAndroid Build Coastguard Worker subs \w, \w, #16 2958*c0909341SAndroid Build Coastguard Worker ble 0f 2959*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2960*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2961*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2962*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2963*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2964*c0909341SAndroid Build Coastguard Worker mov \h, \my 2965*c0909341SAndroid Build Coastguard Worker add \src, \src, #32 2966*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #32 2967*c0909341SAndroid Build Coastguard Worker b 1b 2968*c0909341SAndroid Build Coastguard Worker0: 2969*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2970*c0909341SAndroid Build Coastguard Worker 2971*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv): 2972*c0909341SAndroid Build Coastguard Worker adr r10, L(\type\()_bilin_hv_tbl) 2973*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r11 // 4 - intermediate_bits 2974*c0909341SAndroid Build Coastguard Worker ldr r9, [r10, r9, lsl #2] 2975*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 // -(4-intermediate_bits) 2976*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2977*c0909341SAndroid Build Coastguard Worker vdup.32 q14, r12 // 4 + intermediate_bits 2978*c0909341SAndroid Build Coastguard Worker.else 2979*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #PREP_BIAS 2980*c0909341SAndroid Build Coastguard Worker.endif 2981*c0909341SAndroid Build Coastguard Worker add r10, r10, r9 2982*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2983*c0909341SAndroid Build Coastguard Worker vneg.s32 q14, q14 // -(4+intermediate_bits) 2984*c0909341SAndroid Build Coastguard Worker.endif 2985*c0909341SAndroid Build Coastguard Worker bx r10 2986*c0909341SAndroid Build Coastguard Worker 2987*c0909341SAndroid Build Coastguard Worker .align 2 2988*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv_tbl): 2989*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2990*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2991*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2992*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2993*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2994*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2995*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2996*c0909341SAndroid Build Coastguard Worker 2997*c0909341SAndroid Build Coastguard Worker20: // 2xN hv 2998*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2999*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3000*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3001*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3002*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3003*c0909341SAndroid Build Coastguard Worker 3004*c0909341SAndroid Build Coastguard Worker vld1.16 {d20}, [\src], \s_strd 3005*c0909341SAndroid Build Coastguard Worker vext.8 d21, d20, d20, #2 3006*c0909341SAndroid Build Coastguard Worker vmul.i16 d16, d20, d0 3007*c0909341SAndroid Build Coastguard Worker vmla.i16 d16, d21, d2 3008*c0909341SAndroid Build Coastguard Worker vrshl.u16 d16, d16, d30 3009*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d16, #4 3010*c0909341SAndroid Build Coastguard Worker 3011*c0909341SAndroid Build Coastguard Worker2: 3012*c0909341SAndroid Build Coastguard Worker vld1.16 {d20}, [\sr2], \s_strd 3013*c0909341SAndroid Build Coastguard Worker vld1.16 {d22}, [\src], \s_strd 3014*c0909341SAndroid Build Coastguard Worker vext.8 d21, d20, d20, #2 3015*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d22, #2 3016*c0909341SAndroid Build Coastguard Worker vtrn.32 d20, d22 3017*c0909341SAndroid Build Coastguard Worker vtrn.32 d21, d23 3018*c0909341SAndroid Build Coastguard Worker vmul.i16 d18, d20, d0 3019*c0909341SAndroid Build Coastguard Worker vmla.i16 d18, d21, d2 3020*c0909341SAndroid Build Coastguard Worker vrshl.u16 d18, d18, d30 3021*c0909341SAndroid Build Coastguard Worker 3022*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d18, #4 3023*c0909341SAndroid Build Coastguard Worker 3024*c0909341SAndroid Build Coastguard Worker vmull.u16 q8, d16, d4 3025*c0909341SAndroid Build Coastguard Worker vmlal.u16 q8, d18, d6 3026*c0909341SAndroid Build Coastguard Worker vrshl.u32 q8, q8, q14 3027*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q8 3028*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3029*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [\dst, :32], \d_strd 3030*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [\ds2, :32], \d_strd 3031*c0909341SAndroid Build Coastguard Worker ble 0f 3032*c0909341SAndroid Build Coastguard Worker vmov d16, d18 3033*c0909341SAndroid Build Coastguard Worker b 2b 3034*c0909341SAndroid Build Coastguard Worker0: 3035*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3036*c0909341SAndroid Build Coastguard Worker.endif 3037*c0909341SAndroid Build Coastguard Worker 3038*c0909341SAndroid Build Coastguard Worker40: // 4xN hv 3039*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3040*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3041*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3042*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3043*c0909341SAndroid Build Coastguard Worker 3044*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [\src], \s_strd 3045*c0909341SAndroid Build Coastguard Worker vext.8 d21, d20, d21, #2 3046*c0909341SAndroid Build Coastguard Worker vmul.i16 d16, d20, d0 3047*c0909341SAndroid Build Coastguard Worker vmla.i16 d16, d21, d2 3048*c0909341SAndroid Build Coastguard Worker vrshl.u16 d16, d16, d30 3049*c0909341SAndroid Build Coastguard Worker 3050*c0909341SAndroid Build Coastguard Worker4: 3051*c0909341SAndroid Build Coastguard Worker vld1.16 {q10}, [\sr2], \s_strd 3052*c0909341SAndroid Build Coastguard Worker vld1.16 {q11}, [\src], \s_strd 3053*c0909341SAndroid Build Coastguard Worker vext.8 d21, d20, d21, #2 3054*c0909341SAndroid Build Coastguard Worker vext.8 d23, d22, d23, #2 3055*c0909341SAndroid Build Coastguard Worker vswp d21, d22 3056*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q10, q0 3057*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q11, q1 3058*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q15 3059*c0909341SAndroid Build Coastguard Worker 3060*c0909341SAndroid Build Coastguard Worker vmull.u16 q10, d16, d4 3061*c0909341SAndroid Build Coastguard Worker vmlal.u16 q10, d18, d6 3062*c0909341SAndroid Build Coastguard Worker vmull.u16 q11, d18, d4 3063*c0909341SAndroid Build Coastguard Worker vmlal.u16 q11, d19, d6 3064*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3065*c0909341SAndroid Build Coastguard Worker vrshl.u32 q10, q10, q14 3066*c0909341SAndroid Build Coastguard Worker vrshl.u32 q11, q11, q14 3067*c0909341SAndroid Build Coastguard Worker vmovn.i32 d20, q10 3068*c0909341SAndroid Build Coastguard Worker vmovn.i32 d21, q11 3069*c0909341SAndroid Build Coastguard Worker.else 3070*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d20, q10, #4 3071*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d21, q11, #4 3072*c0909341SAndroid Build Coastguard Worker vsub.i16 q10, q10, q14 3073*c0909341SAndroid Build Coastguard Worker.endif 3074*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3075*c0909341SAndroid Build Coastguard Worker vst1.16 {d20}, [\dst, :64], \d_strd 3076*c0909341SAndroid Build Coastguard Worker vst1.16 {d21}, [\ds2, :64], \d_strd 3077*c0909341SAndroid Build Coastguard Worker ble 0f 3078*c0909341SAndroid Build Coastguard Worker vmov d16, d19 3079*c0909341SAndroid Build Coastguard Worker b 4b 3080*c0909341SAndroid Build Coastguard Worker0: 3081*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3082*c0909341SAndroid Build Coastguard Worker 3083*c0909341SAndroid Build Coastguard Worker80: // 8xN, 16xN, ... hv 3084*c0909341SAndroid Build Coastguard Worker160: 3085*c0909341SAndroid Build Coastguard Worker320: 3086*c0909341SAndroid Build Coastguard Worker640: 3087*c0909341SAndroid Build Coastguard Worker1280: 3088*c0909341SAndroid Build Coastguard Worker mov \my, \h 3089*c0909341SAndroid Build Coastguard Worker 3090*c0909341SAndroid Build Coastguard Worker1: 3091*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3092*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3093*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3094*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3095*c0909341SAndroid Build Coastguard Worker 3096*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22}, [\src], \s_strd 3097*c0909341SAndroid Build Coastguard Worker vext.8 q11, q10, q11, #2 3098*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q10, q0 3099*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q11, q1 3100*c0909341SAndroid Build Coastguard Worker vrshl.u16 q8, q8, q15 3101*c0909341SAndroid Build Coastguard Worker 3102*c0909341SAndroid Build Coastguard Worker2: 3103*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22}, [\sr2], \s_strd 3104*c0909341SAndroid Build Coastguard Worker vld1.16 {d24, d25, d26}, [\src], \s_strd 3105*c0909341SAndroid Build Coastguard Worker vext.8 q11, q10, q11, #2 3106*c0909341SAndroid Build Coastguard Worker vext.8 q13, q12, q13, #2 3107*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q10, q0 3108*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q11, q1 3109*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q12, q0 3110*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q13, q1 3111*c0909341SAndroid Build Coastguard Worker vrshl.u16 q9, q9, q15 3112*c0909341SAndroid Build Coastguard Worker vrshl.u16 q10, q10, q15 3113*c0909341SAndroid Build Coastguard Worker 3114*c0909341SAndroid Build Coastguard Worker vmull.u16 q11, d16, d4 3115*c0909341SAndroid Build Coastguard Worker vmlal.u16 q11, d18, d6 3116*c0909341SAndroid Build Coastguard Worker vmull.u16 q12, d17, d4 3117*c0909341SAndroid Build Coastguard Worker vmlal.u16 q12, d19, d6 3118*c0909341SAndroid Build Coastguard Worker vmull.u16 q8, d18, d4 3119*c0909341SAndroid Build Coastguard Worker vmlal.u16 q8, d20, d6 3120*c0909341SAndroid Build Coastguard Worker vmull.u16 q9, d19, d4 3121*c0909341SAndroid Build Coastguard Worker vmlal.u16 q9, d21, d6 3122*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3123*c0909341SAndroid Build Coastguard Worker vrshl.u32 q11, q11, q14 3124*c0909341SAndroid Build Coastguard Worker vrshl.u32 q12, q12, q14 3125*c0909341SAndroid Build Coastguard Worker vrshl.u32 q8, q8, q14 3126*c0909341SAndroid Build Coastguard Worker vrshl.u32 q9, q9, q14 3127*c0909341SAndroid Build Coastguard Worker vmovn.i32 d22, q11 3128*c0909341SAndroid Build Coastguard Worker vmovn.i32 d23, q12 3129*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q8 3130*c0909341SAndroid Build Coastguard Worker vmovn.i32 d17, q9 3131*c0909341SAndroid Build Coastguard Worker.else 3132*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d22, q11, #4 3133*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d23, q12, #4 3134*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d16, q8, #4 3135*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d17, q9, #4 3136*c0909341SAndroid Build Coastguard Worker vsub.i16 q11, q11, q14 3137*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q14 3138*c0909341SAndroid Build Coastguard Worker.endif 3139*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3140*c0909341SAndroid Build Coastguard Worker vst1.16 {q11}, [\dst, :128], \d_strd 3141*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [\ds2, :128], \d_strd 3142*c0909341SAndroid Build Coastguard Worker ble 9f 3143*c0909341SAndroid Build Coastguard Worker vmov q8, q10 3144*c0909341SAndroid Build Coastguard Worker b 2b 3145*c0909341SAndroid Build Coastguard Worker9: 3146*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 3147*c0909341SAndroid Build Coastguard Worker ble 0f 3148*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 3149*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 3150*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 3151*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 3152*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 3153*c0909341SAndroid Build Coastguard Worker mov \h, \my 3154*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 3155*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 3156*c0909341SAndroid Build Coastguard Worker b 1b 3157*c0909341SAndroid Build Coastguard Worker0: 3158*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3159*c0909341SAndroid Build Coastguard Workerendfunc 3160*c0909341SAndroid Build Coastguard Worker.endm 3161*c0909341SAndroid Build Coastguard Worker 3162*c0909341SAndroid Build Coastguard Workerfilter_fn put, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 3163*c0909341SAndroid Build Coastguard Workerfilter_fn prep, r0, r8, r1, r2, r3, r4, r5, r6, r7, r9, r10 3164*c0909341SAndroid Build Coastguard Worker 3165*c0909341SAndroid Build Coastguard Worker.macro load_filter_ptr src 3166*c0909341SAndroid Build Coastguard Worker asr r12, \src, #10 3167*c0909341SAndroid Build Coastguard Worker add r12, r11, r12, lsl #3 3168*c0909341SAndroid Build Coastguard Worker.endm 3169*c0909341SAndroid Build Coastguard Worker 3170*c0909341SAndroid Build Coastguard Worker.macro load_filter_coef dst, src, inc 3171*c0909341SAndroid Build Coastguard Worker add \src, \src, \inc 3172*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst}, [r12, :64] 3173*c0909341SAndroid Build Coastguard Worker.endm 3174*c0909341SAndroid Build Coastguard Worker 3175*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc 3176*c0909341SAndroid Build Coastguard Worker load_filter_ptr \src 3177*c0909341SAndroid Build Coastguard Worker load_filter_coef \dst, \src, \inc 3178*c0909341SAndroid Build Coastguard Worker.endm 3179*c0909341SAndroid Build Coastguard Worker 3180*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon 3181*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 0 3182*c0909341SAndroid Build Coastguard Worker vld1.16 {q6,q7}, [r2], r3 3183*c0909341SAndroid Build Coastguard Worker 3184*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 0 3185*c0909341SAndroid Build Coastguard Worker load_filter_row d2, r5, r7 // filter 1 3186*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 // filter 0 3187*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*1 // filter 1 pixels 3188*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 // filter 1 3189*c0909341SAndroid Build Coastguard Worker 3190*c0909341SAndroid Build Coastguard Worker vmull.s16 q4, d12, d0 // filter 0 output (0-3) 3191*c0909341SAndroid Build Coastguard Worker vmull.s16 q5, d13, d1 // filter 0 output (4-7) 3192*c0909341SAndroid Build Coastguard Worker 3193*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 2 3194*c0909341SAndroid Build Coastguard Worker 3195*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d2 // filter 1 output (0-3) 3196*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d3 // filter 1 output (4-7) 3197*c0909341SAndroid Build Coastguard Worker 3198*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 2 3199*c0909341SAndroid Build Coastguard Worker 3200*c0909341SAndroid Build Coastguard Worker vpadd.i32 d8, d8, d9 // half pixel 0 (2x32) 3201*c0909341SAndroid Build Coastguard Worker vpadd.i32 d9, d10, d11 // half pixel 0 (2x32) 3202*c0909341SAndroid Build Coastguard Worker 3203*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 3 3204*c0909341SAndroid Build Coastguard Worker 3205*c0909341SAndroid Build Coastguard Worker vpadd.i32 d4, d4, d5 // half pixel 1 (2x32) 3206*c0909341SAndroid Build Coastguard Worker vpadd.i32 d5, d6, d7 // half pixel 1 (2x32) 3207*c0909341SAndroid Build Coastguard Worker 3208*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 // filter 2 3209*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*2 // filter 2 pixels 3210*c0909341SAndroid Build Coastguard Worker 3211*c0909341SAndroid Build Coastguard Worker vpadd.i32 d8, d8, d9 // pixel 0 (2x32) 3212*c0909341SAndroid Build Coastguard Worker vpadd.i32 d9, d4, d5 // pixel 1 (2x32) 3213*c0909341SAndroid Build Coastguard Worker 3214*c0909341SAndroid Build Coastguard Worker load_filter_coef d2, r5, r7 // filter 3 3215*c0909341SAndroid Build Coastguard Worker 3216*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d0 // filter 2 output (0-3) 3217*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d1 // filter 2 output (4-7) 3218*c0909341SAndroid Build Coastguard Worker 3219*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 4 3220*c0909341SAndroid Build Coastguard Worker 3221*c0909341SAndroid Build Coastguard Worker vpadd.i32 d8, d8, d9 // pixel 0,1 3222*c0909341SAndroid Build Coastguard Worker 3223*c0909341SAndroid Build Coastguard Worker vpadd.i32 d9, d4, d5 // half pixel 2 (2x32) 3224*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d6, d7 // half pixel 2 (2x32) 3225*c0909341SAndroid Build Coastguard Worker 3226*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 // filter 3 3227*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*3 // filter 3 pixels 3228*c0909341SAndroid Build Coastguard Worker 3229*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 4 3230*c0909341SAndroid Build Coastguard Worker 3231*c0909341SAndroid Build Coastguard Worker vpadd.i32 d9, d9, d10 // pixel 2 (2x32) 3232*c0909341SAndroid Build Coastguard Worker 3233*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d2 // filter 3 output (0-3) 3234*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d3 // filter 3 output (4-7) 3235*c0909341SAndroid Build Coastguard Worker 3236*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 // filter 4 3237*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 5 3238*c0909341SAndroid Build Coastguard Worker 3239*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d4, d5 // half pixel 3 (2x32) 3240*c0909341SAndroid Build Coastguard Worker vpadd.i32 d11, d6, d7 // half pixel 3 (2x32) 3241*c0909341SAndroid Build Coastguard Worker 3242*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*4 // filter 4 pixels 3243*c0909341SAndroid Build Coastguard Worker load_filter_coef d2, r5, r7 // filter 5 3244*c0909341SAndroid Build Coastguard Worker 3245*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d10, d11 // pixel 3 (2x32) 3246*c0909341SAndroid Build Coastguard Worker 3247*c0909341SAndroid Build Coastguard Worker vpadd.i32 d9, d9, d10 // pixel 2,3 3248*c0909341SAndroid Build Coastguard Worker 3249*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d0 // filter 4 output (0-3) 3250*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d1 // filter 4 output (4-7) 3251*c0909341SAndroid Build Coastguard Worker 3252*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 // filter 5 3253*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 6 3254*c0909341SAndroid Build Coastguard Worker 3255*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d4, d5 // half pixel 4 (2x32) 3256*c0909341SAndroid Build Coastguard Worker vpadd.i32 d11, d6, d7 // half pixel 4 (2x32) 3257*c0909341SAndroid Build Coastguard Worker 3258*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*5 // filter 5 pixels 3259*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 6 3260*c0909341SAndroid Build Coastguard Worker 3261*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d10, d11 // pixel 4 (2x32) 3262*c0909341SAndroid Build Coastguard Worker 3263*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d2 // filter 5 output (0-3) 3264*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d3 // filter 5 output (4-7) 3265*c0909341SAndroid Build Coastguard Worker 3266*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 // filter 6 3267*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 7 3268*c0909341SAndroid Build Coastguard Worker 3269*c0909341SAndroid Build Coastguard Worker vpadd.i32 d4, d4, d5 // half pixel 5 (2x32) 3270*c0909341SAndroid Build Coastguard Worker vpadd.i32 d5, d6, d7 // half pixel 5 (2x32) 3271*c0909341SAndroid Build Coastguard Worker 3272*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*6 // filter 6 pixels 3273*c0909341SAndroid Build Coastguard Worker load_filter_coef d2, r5, r7 // filter 7 3274*c0909341SAndroid Build Coastguard Worker 3275*c0909341SAndroid Build Coastguard Worker vpadd.i32 d11, d4, d5 // pixel 5 (2x32) 3276*c0909341SAndroid Build Coastguard Worker 3277*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d0 // filter 6 output (0-3) 3278*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d1 // filter 6 output (4-7) 3279*c0909341SAndroid Build Coastguard Worker 3280*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 // filter 7 3281*c0909341SAndroid Build Coastguard Worker 3282*c0909341SAndroid Build Coastguard Worker vpadd.i32 d10, d10, d11 // pixel 4,5 3283*c0909341SAndroid Build Coastguard Worker 3284*c0909341SAndroid Build Coastguard Worker vpadd.i32 d4, d4, d5 // half pixel 6 (2x32) 3285*c0909341SAndroid Build Coastguard Worker vpadd.i32 d5, d6, d7 // half pixel 6 (2x32) 3286*c0909341SAndroid Build Coastguard Worker 3287*c0909341SAndroid Build Coastguard Worker vext.8 q3, q6, q7, #2*7 // filter 7 pixels 3288*c0909341SAndroid Build Coastguard Worker 3289*c0909341SAndroid Build Coastguard Worker vpadd.i32 d11, d4, d5 // pixel 6 (2x32) 3290*c0909341SAndroid Build Coastguard Worker 3291*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d6, d2 // filter 7 output (0-3) 3292*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d7, d3 // filter 7 output (4-7) 3293*c0909341SAndroid Build Coastguard Worker 3294*c0909341SAndroid Build Coastguard Worker vld1.32 {d14[],d15[]}, [sp] // -(7 - intermediate_bits) 3295*c0909341SAndroid Build Coastguard Worker 3296*c0909341SAndroid Build Coastguard Worker vpadd.i32 d4, d4, d5 // half pixel 7 (2x32) 3297*c0909341SAndroid Build Coastguard Worker vpadd.i32 d5, d6, d7 // half pixel 7 (2x32) 3298*c0909341SAndroid Build Coastguard Worker 3299*c0909341SAndroid Build Coastguard Worker sub r5, r5, r7, lsl #3 3300*c0909341SAndroid Build Coastguard Worker 3301*c0909341SAndroid Build Coastguard Worker vpadd.i32 d4, d4, d5 // pixel 7 (2x32) 3302*c0909341SAndroid Build Coastguard Worker 3303*c0909341SAndroid Build Coastguard Worker add r5, r5, r8 3304*c0909341SAndroid Build Coastguard Worker 3305*c0909341SAndroid Build Coastguard Worker vpadd.i32 d11, d11, d4 // pixel 6,7 3306*c0909341SAndroid Build Coastguard Worker 3307*c0909341SAndroid Build Coastguard Worker vrshl.s32 q4, q4, q7 // -(7 - intermediate_bits) 3308*c0909341SAndroid Build Coastguard Worker vrshl.s32 q5, q5, q7 // -(7 - intermediate_bits) 3309*c0909341SAndroid Build Coastguard Worker 3310*c0909341SAndroid Build Coastguard Worker bx lr 3311*c0909341SAndroid Build Coastguard Workerendfunc 3312*c0909341SAndroid Build Coastguard Worker 3313*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_16bpc_neon( 3314*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3315*c0909341SAndroid Build Coastguard Worker// const pixel *src, const ptrdiff_t src_stride, 3316*c0909341SAndroid Build Coastguard Worker// const int16_t *const abcd, int mx, int my, 3317*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max) 3318*c0909341SAndroid Build Coastguard Worker.macro warp t 3319*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_16bpc_neon, export=1 3320*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 3321*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 3322*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #100] 3323*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #108] 3324*c0909341SAndroid Build Coastguard Worker sub sp, sp, #8 3325*c0909341SAndroid Build Coastguard Worker 3326*c0909341SAndroid Build Coastguard Worker clz r7, r7 3327*c0909341SAndroid Build Coastguard Worker // intermediate_bits = clz(bitdepth_max) - 18 3328*c0909341SAndroid Build Coastguard Worker.ifb \t 3329*c0909341SAndroid Build Coastguard Worker sub r8, r7, #11 // 7 + intermediate_bits = clz(bitdepth_max) - 18 + 7 3330*c0909341SAndroid Build Coastguard Worker.endif 3331*c0909341SAndroid Build Coastguard Worker sub r7, r7, #25 // -(7 - intermediate_bits) 3332*c0909341SAndroid Build Coastguard Worker.ifb \t 3333*c0909341SAndroid Build Coastguard Worker neg r8, r8 // -(7 + intermediate_bits) 3334*c0909341SAndroid Build Coastguard Worker.endif 3335*c0909341SAndroid Build Coastguard Worker str r7, [sp] // spill -(7 - intermediate_bits) on stack 3336*c0909341SAndroid Build Coastguard Worker.ifb \t 3337*c0909341SAndroid Build Coastguard Worker str r8, [sp, #4] // spill -(7 + intermediate_bits) on stack 3338*c0909341SAndroid Build Coastguard Worker.endif 3339*c0909341SAndroid Build Coastguard Worker 3340*c0909341SAndroid Build Coastguard Worker ldrd r8, r9, [r4] 3341*c0909341SAndroid Build Coastguard Worker sxth r7, r8 3342*c0909341SAndroid Build Coastguard Worker asr r8, r8, #16 3343*c0909341SAndroid Build Coastguard Worker asr r4, r9, #16 3344*c0909341SAndroid Build Coastguard Worker sxth r9, r9 3345*c0909341SAndroid Build Coastguard Worker mov r10, #8 3346*c0909341SAndroid Build Coastguard Worker sub r2, r2, r3, lsl #1 3347*c0909341SAndroid Build Coastguard Worker sub r2, r2, r3 3348*c0909341SAndroid Build Coastguard Worker sub r2, r2, #6 3349*c0909341SAndroid Build Coastguard Worker movrel r11, X(mc_warp_filter), 64*8 3350*c0909341SAndroid Build Coastguard Worker.ifnb \t 3351*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 3352*c0909341SAndroid Build Coastguard Worker.endif 3353*c0909341SAndroid Build Coastguard Worker add r5, r5, #512 3354*c0909341SAndroid Build Coastguard Worker add r6, r6, #512 3355*c0909341SAndroid Build Coastguard Worker 3356*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3357*c0909341SAndroid Build Coastguard Worker vmovn.i32 d16, q4 3358*c0909341SAndroid Build Coastguard Worker vmovn.i32 d17, q5 3359*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3360*c0909341SAndroid Build Coastguard Worker vmovn.i32 d18, q4 3361*c0909341SAndroid Build Coastguard Worker vmovn.i32 d19, q5 3362*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3363*c0909341SAndroid Build Coastguard Worker vmovn.i32 d20, q4 3364*c0909341SAndroid Build Coastguard Worker vmovn.i32 d21, q5 3365*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3366*c0909341SAndroid Build Coastguard Worker vmovn.i32 d22, q4 3367*c0909341SAndroid Build Coastguard Worker vmovn.i32 d23, q5 3368*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3369*c0909341SAndroid Build Coastguard Worker vmovn.i32 d24, q4 3370*c0909341SAndroid Build Coastguard Worker vmovn.i32 d25, q5 3371*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3372*c0909341SAndroid Build Coastguard Worker vmovn.i32 d26, q4 3373*c0909341SAndroid Build Coastguard Worker vmovn.i32 d27, q5 3374*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3375*c0909341SAndroid Build Coastguard Worker vmovn.i32 d28, q4 3376*c0909341SAndroid Build Coastguard Worker vmovn.i32 d29, q5 3377*c0909341SAndroid Build Coastguard Worker 3378*c0909341SAndroid Build Coastguard Worker1: 3379*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3380*c0909341SAndroid Build Coastguard Worker vmovn.i32 d30, q4 3381*c0909341SAndroid Build Coastguard Worker vmovn.i32 d31, q5 3382*c0909341SAndroid Build Coastguard Worker 3383*c0909341SAndroid Build Coastguard Worker load_filter_row d8, r6, r9 3384*c0909341SAndroid Build Coastguard Worker load_filter_row d9, r6, r9 3385*c0909341SAndroid Build Coastguard Worker load_filter_row d10, r6, r9 3386*c0909341SAndroid Build Coastguard Worker load_filter_row d11, r6, r9 3387*c0909341SAndroid Build Coastguard Worker load_filter_row d12, r6, r9 3388*c0909341SAndroid Build Coastguard Worker load_filter_row d13, r6, r9 3389*c0909341SAndroid Build Coastguard Worker load_filter_row d14, r6, r9 3390*c0909341SAndroid Build Coastguard Worker load_filter_row d15, r6, r9 3391*c0909341SAndroid Build Coastguard Worker transpose_8x8b q4, q5, q6, q7, d8, d9, d10, d11, d12, d13, d14, d15 3392*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d8 3393*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d9 3394*c0909341SAndroid Build Coastguard Worker vmovl.s8 q3, d10 3395*c0909341SAndroid Build Coastguard Worker vmovl.s8 q4, d11 3396*c0909341SAndroid Build Coastguard Worker vmovl.s8 q5, d12 3397*c0909341SAndroid Build Coastguard Worker vmovl.s8 q6, d13 3398*c0909341SAndroid Build Coastguard Worker 3399*c0909341SAndroid Build Coastguard Worker sub r6, r6, r9, lsl #3 3400*c0909341SAndroid Build Coastguard Worker 3401*c0909341SAndroid Build Coastguard Worker // This ordering of vmull/vmlal is highly beneficial for 3402*c0909341SAndroid Build Coastguard Worker // Cortex A8/A9/A53 here, but harmful for Cortex A7. 3403*c0909341SAndroid Build Coastguard Worker vmull.s16 q0, d16, d2 3404*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d18, d4 3405*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d20, d6 3406*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d22, d8 3407*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d24, d10 3408*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d26, d12 3409*c0909341SAndroid Build Coastguard Worker vmull.s16 q1, d17, d3 3410*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d19, d5 3411*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d21, d7 3412*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d23, d9 3413*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d25, d11 3414*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d27, d13 3415*c0909341SAndroid Build Coastguard Worker 3416*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d14 3417*c0909341SAndroid Build Coastguard Worker vmovl.s8 q3, d15 3418*c0909341SAndroid Build Coastguard Worker 3419*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d28, d4 3420*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d30, d6 3421*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d29, d5 3422*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d31, d7 3423*c0909341SAndroid Build Coastguard Worker 3424*c0909341SAndroid Build Coastguard Worker.ifb \t 3425*c0909341SAndroid Build Coastguard Worker ldr lr, [sp, #4] // -(7 + intermediate_bits) 3426*c0909341SAndroid Build Coastguard Worker ldr r12, [sp, #120] // bitdepth_max 3427*c0909341SAndroid Build Coastguard Worker vdup.32 q2, lr // -(7 + intermediate_bits) 3428*c0909341SAndroid Build Coastguard Worker vdup.16 q3, r12 // bitdepth_max 3429*c0909341SAndroid Build Coastguard Worker.endif 3430*c0909341SAndroid Build Coastguard Worker 3431*c0909341SAndroid Build Coastguard Worker vmov q8, q9 3432*c0909341SAndroid Build Coastguard Worker vmov q9, q10 3433*c0909341SAndroid Build Coastguard Worker.ifb \t 3434*c0909341SAndroid Build Coastguard Worker vrshl.s32 q0, q0, q2 // -(7 + intermediate_bits) 3435*c0909341SAndroid Build Coastguard Worker vrshl.s32 q1, q1, q2 // -(7 + intermediate_bits) 3436*c0909341SAndroid Build Coastguard Worker.else 3437*c0909341SAndroid Build Coastguard Worker vrshrn.s32 d0, q0, #7 3438*c0909341SAndroid Build Coastguard Worker vrshrn.s32 d1, q1, #7 3439*c0909341SAndroid Build Coastguard Worker vmov.i16 q3, #PREP_BIAS 3440*c0909341SAndroid Build Coastguard Worker.endif 3441*c0909341SAndroid Build Coastguard Worker vmov q10, q11 3442*c0909341SAndroid Build Coastguard Worker.ifb \t 3443*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d0, q0 3444*c0909341SAndroid Build Coastguard Worker vqmovun.s32 d1, q1 3445*c0909341SAndroid Build Coastguard Worker.else 3446*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q3 // PREP_BIAS 3447*c0909341SAndroid Build Coastguard Worker.endif 3448*c0909341SAndroid Build Coastguard Worker vmov q11, q12 3449*c0909341SAndroid Build Coastguard Worker vmov q12, q13 3450*c0909341SAndroid Build Coastguard Worker.ifb \t 3451*c0909341SAndroid Build Coastguard Worker vmin.u16 q0, q0, q3 // bitdepth_max 3452*c0909341SAndroid Build Coastguard Worker.endif 3453*c0909341SAndroid Build Coastguard Worker vmov q13, q14 3454*c0909341SAndroid Build Coastguard Worker vmov q14, q15 3455*c0909341SAndroid Build Coastguard Worker subs r10, r10, #1 3456*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 3457*c0909341SAndroid Build Coastguard Worker 3458*c0909341SAndroid Build Coastguard Worker add r6, r6, r4 3459*c0909341SAndroid Build Coastguard Worker bgt 1b 3460*c0909341SAndroid Build Coastguard Worker 3461*c0909341SAndroid Build Coastguard Worker add sp, sp, #8 3462*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 3463*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3464*c0909341SAndroid Build Coastguard Workerendfunc 3465*c0909341SAndroid Build Coastguard Worker.endm 3466*c0909341SAndroid Build Coastguard Worker 3467*c0909341SAndroid Build Coastguard Workerwarp 3468*c0909341SAndroid Build Coastguard Workerwarp t 3469*c0909341SAndroid Build Coastguard Worker 3470*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_16bpc_neon( 3471*c0909341SAndroid Build Coastguard Worker// const intptr_t bw, const intptr_t bh, 3472*c0909341SAndroid Build Coastguard Worker// const intptr_t iw, const intptr_t ih, 3473*c0909341SAndroid Build Coastguard Worker// const intptr_t x, const intptr_t y, 3474*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3475*c0909341SAndroid Build Coastguard Worker// const pixel *ref, const ptrdiff_t ref_stride) 3476*c0909341SAndroid Build Coastguard Workerfunction emu_edge_16bpc_neon, export=1 3477*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 3478*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 3479*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 3480*c0909341SAndroid Build Coastguard Worker ldrd r8, r9, [sp, #52] 3481*c0909341SAndroid Build Coastguard Worker 3482*c0909341SAndroid Build Coastguard Worker // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) 3483*c0909341SAndroid Build Coastguard Worker // ref += iclip(x, 0, iw - 1) 3484*c0909341SAndroid Build Coastguard Worker sub r12, r3, #1 // ih - 1 3485*c0909341SAndroid Build Coastguard Worker cmp r5, r3 3486*c0909341SAndroid Build Coastguard Worker sub lr, r2, #1 // iw - 1 3487*c0909341SAndroid Build Coastguard Worker it lt 3488*c0909341SAndroid Build Coastguard Worker movlt r12, r5 // min(y, ih - 1) 3489*c0909341SAndroid Build Coastguard Worker cmp r4, r2 3490*c0909341SAndroid Build Coastguard Worker bic r12, r12, r12, asr #31 // max(min(y, ih - 1), 0) 3491*c0909341SAndroid Build Coastguard Worker it lt 3492*c0909341SAndroid Build Coastguard Worker movlt lr, r4 // min(x, iw - 1) 3493*c0909341SAndroid Build Coastguard Worker bic lr, lr, lr, asr #31 // max(min(x, iw - 1), 0) 3494*c0909341SAndroid Build Coastguard Worker mla r8, r12, r9, r8 // ref += iclip() * stride 3495*c0909341SAndroid Build Coastguard Worker add r8, r8, lr, lsl #1 // ref += iclip() 3496*c0909341SAndroid Build Coastguard Worker 3497*c0909341SAndroid Build Coastguard Worker // bottom_ext = iclip(y + bh - ih, 0, bh - 1) 3498*c0909341SAndroid Build Coastguard Worker // top_ext = iclip(-y, 0, bh - 1) 3499*c0909341SAndroid Build Coastguard Worker add r10, r5, r1 // y + bh 3500*c0909341SAndroid Build Coastguard Worker neg r5, r5 // -y 3501*c0909341SAndroid Build Coastguard Worker sub r10, r10, r3 // y + bh - ih 3502*c0909341SAndroid Build Coastguard Worker sub r12, r1, #1 // bh - 1 3503*c0909341SAndroid Build Coastguard Worker cmp r10, r1 3504*c0909341SAndroid Build Coastguard Worker bic r5, r5, r5, asr #31 // max(-y, 0) 3505*c0909341SAndroid Build Coastguard Worker it ge 3506*c0909341SAndroid Build Coastguard Worker movge r10, r12 // min(y + bh - ih, bh-1) 3507*c0909341SAndroid Build Coastguard Worker cmp r5, r1 3508*c0909341SAndroid Build Coastguard Worker bic r10, r10, r10, asr #31 // max(min(y + bh - ih, bh-1), 0) 3509*c0909341SAndroid Build Coastguard Worker it ge 3510*c0909341SAndroid Build Coastguard Worker movge r5, r12 // min(max(-y, 0), bh-1) 3511*c0909341SAndroid Build Coastguard Worker 3512*c0909341SAndroid Build Coastguard Worker // right_ext = iclip(x + bw - iw, 0, bw - 1) 3513*c0909341SAndroid Build Coastguard Worker // left_ext = iclip(-x, 0, bw - 1) 3514*c0909341SAndroid Build Coastguard Worker add r11, r4, r0 // x + bw 3515*c0909341SAndroid Build Coastguard Worker neg r4, r4 // -x 3516*c0909341SAndroid Build Coastguard Worker sub r11, r11, r2 // x + bw - iw 3517*c0909341SAndroid Build Coastguard Worker sub lr, r0, #1 // bw - 1 3518*c0909341SAndroid Build Coastguard Worker cmp r11, r0 3519*c0909341SAndroid Build Coastguard Worker bic r4, r4, r4, asr #31 // max(-x, 0) 3520*c0909341SAndroid Build Coastguard Worker it ge 3521*c0909341SAndroid Build Coastguard Worker movge r11, lr // min(x + bw - iw, bw-1) 3522*c0909341SAndroid Build Coastguard Worker cmp r4, r0 3523*c0909341SAndroid Build Coastguard Worker bic r11, r11, r11, asr #31 // max(min(x + bw - iw, bw-1), 0) 3524*c0909341SAndroid Build Coastguard Worker it ge 3525*c0909341SAndroid Build Coastguard Worker movge r4, lr // min(max(-x, 0), bw - 1) 3526*c0909341SAndroid Build Coastguard Worker 3527*c0909341SAndroid Build Coastguard Worker // center_h = bh - top_ext - bottom_ext 3528*c0909341SAndroid Build Coastguard Worker // dst += top_ext * PXSTRIDE(dst_stride) 3529*c0909341SAndroid Build Coastguard Worker // center_w = bw - left_ext - right_ext 3530*c0909341SAndroid Build Coastguard Worker sub r1, r1, r5 // bh - top_ext 3531*c0909341SAndroid Build Coastguard Worker mla r6, r5, r7, r6 3532*c0909341SAndroid Build Coastguard Worker sub r2, r0, r4 // bw - left_ext 3533*c0909341SAndroid Build Coastguard Worker sub r1, r1, r10 // center_h = bh - top_ext - bottom_ext 3534*c0909341SAndroid Build Coastguard Worker sub r2, r2, r11 // center_w = bw - left_ext - right_ext 3535*c0909341SAndroid Build Coastguard Worker 3536*c0909341SAndroid Build Coastguard Worker mov r0, r6 // backup of dst 3537*c0909341SAndroid Build Coastguard Worker 3538*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right 3539*c0909341SAndroid Build Coastguard Worker0: 3540*c0909341SAndroid Build Coastguard Worker.if \need_left 3541*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r8] 3542*c0909341SAndroid Build Coastguard Worker mov r12, r6 // out = dst 3543*c0909341SAndroid Build Coastguard Worker mov r3, r4 3544*c0909341SAndroid Build Coastguard Worker vmov q1, q0 3545*c0909341SAndroid Build Coastguard Worker1: 3546*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 3547*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r12, :128]! 3548*c0909341SAndroid Build Coastguard Worker bgt 1b 3549*c0909341SAndroid Build Coastguard Worker.endif 3550*c0909341SAndroid Build Coastguard Worker mov lr, r8 3551*c0909341SAndroid Build Coastguard Worker add r12, r6, r4, lsl #1 // out = dst + left_ext 3552*c0909341SAndroid Build Coastguard Worker mov r3, r2 3553*c0909341SAndroid Build Coastguard Worker1: 3554*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [lr]! 3555*c0909341SAndroid Build Coastguard Worker subs r3, r3, #32 3556*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [lr]! 3557*c0909341SAndroid Build Coastguard Worker.if \need_left 3558*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r12]! 3559*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12]! 3560*c0909341SAndroid Build Coastguard Worker.else 3561*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r12, :128]! 3562*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128]! 3563*c0909341SAndroid Build Coastguard Worker.endif 3564*c0909341SAndroid Build Coastguard Worker bgt 1b 3565*c0909341SAndroid Build Coastguard Worker.if \need_right 3566*c0909341SAndroid Build Coastguard Worker add r3, r8, r2, lsl #1 // in + center_w 3567*c0909341SAndroid Build Coastguard Worker sub r3, r3, #2 // in + center_w - 1 3568*c0909341SAndroid Build Coastguard Worker add r12, r6, r4, lsl #1 // dst + left_ext 3569*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r3] 3570*c0909341SAndroid Build Coastguard Worker add r12, r12, r2, lsl #1 // out = dst + left_ext + center_w 3571*c0909341SAndroid Build Coastguard Worker mov r3, r11 3572*c0909341SAndroid Build Coastguard Worker vmov q1, q0 3573*c0909341SAndroid Build Coastguard Worker1: 3574*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 3575*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r12]! 3576*c0909341SAndroid Build Coastguard Worker bgt 1b 3577*c0909341SAndroid Build Coastguard Worker.endif 3578*c0909341SAndroid Build Coastguard Worker 3579*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 // center_h-- 3580*c0909341SAndroid Build Coastguard Worker add r6, r6, r7 3581*c0909341SAndroid Build Coastguard Worker add r8, r8, r9 3582*c0909341SAndroid Build Coastguard Worker bgt 0b 3583*c0909341SAndroid Build Coastguard Worker.endm 3584*c0909341SAndroid Build Coastguard Worker 3585*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3586*c0909341SAndroid Build Coastguard Worker beq 2f 3587*c0909341SAndroid Build Coastguard Worker // need_left 3588*c0909341SAndroid Build Coastguard Worker cmp r11, #0 3589*c0909341SAndroid Build Coastguard Worker beq 3f 3590*c0909341SAndroid Build Coastguard Worker // need_left + need_right 3591*c0909341SAndroid Build Coastguard Worker v_loop 1, 1 3592*c0909341SAndroid Build Coastguard Worker b 5f 3593*c0909341SAndroid Build Coastguard Worker 3594*c0909341SAndroid Build Coastguard Worker2: 3595*c0909341SAndroid Build Coastguard Worker // !need_left 3596*c0909341SAndroid Build Coastguard Worker cmp r11, #0 3597*c0909341SAndroid Build Coastguard Worker beq 4f 3598*c0909341SAndroid Build Coastguard Worker // !need_left + need_right 3599*c0909341SAndroid Build Coastguard Worker v_loop 0, 1 3600*c0909341SAndroid Build Coastguard Worker b 5f 3601*c0909341SAndroid Build Coastguard Worker 3602*c0909341SAndroid Build Coastguard Worker3: 3603*c0909341SAndroid Build Coastguard Worker // need_left + !need_right 3604*c0909341SAndroid Build Coastguard Worker v_loop 1, 0 3605*c0909341SAndroid Build Coastguard Worker b 5f 3606*c0909341SAndroid Build Coastguard Worker 3607*c0909341SAndroid Build Coastguard Worker4: 3608*c0909341SAndroid Build Coastguard Worker // !need_left + !need_right 3609*c0909341SAndroid Build Coastguard Worker v_loop 0, 0 3610*c0909341SAndroid Build Coastguard Worker 3611*c0909341SAndroid Build Coastguard Worker5: 3612*c0909341SAndroid Build Coastguard Worker cmp r10, #0 3613*c0909341SAndroid Build Coastguard Worker // Storing the original dst in r0 overwrote bw, recalculate it here 3614*c0909341SAndroid Build Coastguard Worker add r2, r2, r4 // center_w + left_ext 3615*c0909341SAndroid Build Coastguard Worker add r2, r2, r11 // bw = center_w + left_ext + right_ext 3616*c0909341SAndroid Build Coastguard Worker 3617*c0909341SAndroid Build Coastguard Worker beq 3f 3618*c0909341SAndroid Build Coastguard Worker // need_bottom 3619*c0909341SAndroid Build Coastguard Worker sub r8, r6, r7 // ref = dst - stride 3620*c0909341SAndroid Build Coastguard Worker mov r4, r2 3621*c0909341SAndroid Build Coastguard Worker sub r12, r7, #32 3622*c0909341SAndroid Build Coastguard Worker1: 3623*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r8, :128]! 3624*c0909341SAndroid Build Coastguard Worker mov r3, r10 3625*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r8, :128]! 3626*c0909341SAndroid Build Coastguard Worker2: 3627*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r6, :128]! 3628*c0909341SAndroid Build Coastguard Worker subs r3, r3, #1 3629*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r6, :128], r12 3630*c0909341SAndroid Build Coastguard Worker bgt 2b 3631*c0909341SAndroid Build Coastguard Worker mls r6, r7, r10, r6 // dst -= bottom_ext * stride 3632*c0909341SAndroid Build Coastguard Worker subs r4, r4, #32 // bw -= 32 3633*c0909341SAndroid Build Coastguard Worker add r6, r6, #64 // dst += 32 3634*c0909341SAndroid Build Coastguard Worker bgt 1b 3635*c0909341SAndroid Build Coastguard Worker 3636*c0909341SAndroid Build Coastguard Worker3: 3637*c0909341SAndroid Build Coastguard Worker cmp r5, #0 3638*c0909341SAndroid Build Coastguard Worker beq 3f 3639*c0909341SAndroid Build Coastguard Worker // need_top 3640*c0909341SAndroid Build Coastguard Worker mls r6, r7, r5, r0 // dst = stored_dst - top_ext * stride 3641*c0909341SAndroid Build Coastguard Worker sub r12, r7, #32 3642*c0909341SAndroid Build Coastguard Worker1: 3643*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128]! 3644*c0909341SAndroid Build Coastguard Worker mov r3, r5 3645*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r0, :128]! 3646*c0909341SAndroid Build Coastguard Worker2: 3647*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r6, :128]! 3648*c0909341SAndroid Build Coastguard Worker subs r3, r3, #1 3649*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r6, :128], r12 3650*c0909341SAndroid Build Coastguard Worker bgt 2b 3651*c0909341SAndroid Build Coastguard Worker mls r6, r7, r5, r6 // dst -= top_ext * stride 3652*c0909341SAndroid Build Coastguard Worker subs r2, r2, #32 // bw -= 32 3653*c0909341SAndroid Build Coastguard Worker add r6, r6, #64 // dst += 32 3654*c0909341SAndroid Build Coastguard Worker bgt 1b 3655*c0909341SAndroid Build Coastguard Worker 3656*c0909341SAndroid Build Coastguard Worker3: 3657*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3658*c0909341SAndroid Build Coastguard Workerendfunc 3659