1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Martin Storsjo 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker.macro avg dst0, dst1, t0, t1, t2, t3 33*c0909341SAndroid Build Coastguard Worker vld1.16 {\t0,\t1}, [r2, :128]! 34*c0909341SAndroid Build Coastguard Worker vld1.16 {\t2,\t3}, [r3, :128]! 35*c0909341SAndroid Build Coastguard Worker vadd.i16 \t0, \t0, \t2 36*c0909341SAndroid Build Coastguard Worker vadd.i16 \t1, \t1, \t3 37*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst0, \t0, #5 38*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst1, \t1, #5 39*c0909341SAndroid Build Coastguard Worker.endm 40*c0909341SAndroid Build Coastguard Worker 41*c0909341SAndroid Build Coastguard Worker.macro w_avg dst0, dst1, t0, t1, t2, t3 42*c0909341SAndroid Build Coastguard Worker vld1.16 {\t0,\t1}, [r2, :128]! 43*c0909341SAndroid Build Coastguard Worker vld1.16 {\t2,\t3}, [r3, :128]! 44*c0909341SAndroid Build Coastguard Worker vsub.i16 \t0, \t2, \t0 45*c0909341SAndroid Build Coastguard Worker vsub.i16 \t1, \t3, \t1 46*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 \t0, \t0, q15 47*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 \t1, \t1, q15 48*c0909341SAndroid Build Coastguard Worker vadd.i16 \t0, \t2, \t0 49*c0909341SAndroid Build Coastguard Worker vadd.i16 \t1, \t3, \t1 50*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst0, \t0, #4 51*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst1, \t1, #4 52*c0909341SAndroid Build Coastguard Worker.endm 53*c0909341SAndroid Build Coastguard Worker 54*c0909341SAndroid Build Coastguard Worker.macro mask dst0, dst1, t0, t1, t2, t3 55*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [lr, :128]! 56*c0909341SAndroid Build Coastguard Worker vld1.16 {\t0,\t1}, [r2, :128]! 57*c0909341SAndroid Build Coastguard Worker vmul.i8 q14, q14, q15 58*c0909341SAndroid Build Coastguard Worker vld1.16 {\t2,\t3}, [r3, :128]! 59*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d28, #8 60*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d29, #8 61*c0909341SAndroid Build Coastguard Worker vsub.i16 \t0, \t2, \t0 62*c0909341SAndroid Build Coastguard Worker vsub.i16 \t1, \t3, \t1 63*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 \t0, \t0, q13 64*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 \t1, \t1, q14 65*c0909341SAndroid Build Coastguard Worker vadd.i16 \t0, \t2, \t0 66*c0909341SAndroid Build Coastguard Worker vadd.i16 \t1, \t3, \t1 67*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst0, \t0, #4 68*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \dst1, \t1, #4 69*c0909341SAndroid Build Coastguard Worker.endm 70*c0909341SAndroid Build Coastguard Worker 71*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type 72*c0909341SAndroid Build Coastguard Workerfunction \type\()_8bpc_neon, export=1 73*c0909341SAndroid Build Coastguard Worker push {r4-r6,lr} 74*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #16] 75*c0909341SAndroid Build Coastguard Worker clz r4, r4 76*c0909341SAndroid Build Coastguard Worker.ifnc \type, avg 77*c0909341SAndroid Build Coastguard Worker ldr lr, [sp, #24] 78*c0909341SAndroid Build Coastguard Worker.endif 79*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 80*c0909341SAndroid Build Coastguard Worker vdup.s16 q15, lr 81*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 82*c0909341SAndroid Build Coastguard Worker vshl.i16 q15, q15, #11 83*c0909341SAndroid Build Coastguard Worker.endif 84*c0909341SAndroid Build Coastguard Worker.ifc \type, mask 85*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #256-2 86*c0909341SAndroid Build Coastguard Worker.endif 87*c0909341SAndroid Build Coastguard Worker adr r12, L(\type\()_tbl) 88*c0909341SAndroid Build Coastguard Worker sub r4, r4, #24 89*c0909341SAndroid Build Coastguard Worker ldr r4, [r12, r4, lsl #2] 90*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 91*c0909341SAndroid Build Coastguard Worker add r12, r12, r4 92*c0909341SAndroid Build Coastguard Worker bx r12 93*c0909341SAndroid Build Coastguard Worker 94*c0909341SAndroid Build Coastguard Worker .align 2 95*c0909341SAndroid Build Coastguard WorkerL(\type\()_tbl): 96*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_tbl) + CONFIG_THUMB 97*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_tbl) + CONFIG_THUMB 98*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_tbl) + CONFIG_THUMB 99*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_tbl) + CONFIG_THUMB 100*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_tbl) + CONFIG_THUMB 101*c0909341SAndroid Build Coastguard Worker .word 4f - L(\type\()_tbl) + CONFIG_THUMB 102*c0909341SAndroid Build Coastguard Worker 103*c0909341SAndroid Build Coastguard Worker4: 104*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 105*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 106*c0909341SAndroid Build Coastguard Worker cmp r5, #4 107*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [r0, :32], r1 108*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [r6, :32], r1 109*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[0]}, [r0, :32], r1 110*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[1]}, [r6, :32], r1 111*c0909341SAndroid Build Coastguard Worker beq 0f 112*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 113*c0909341SAndroid Build Coastguard Worker cmp r5, #8 114*c0909341SAndroid Build Coastguard Worker vst1.32 {d18[0]}, [r0, :32], r1 115*c0909341SAndroid Build Coastguard Worker vst1.32 {d18[1]}, [r6, :32], r1 116*c0909341SAndroid Build Coastguard Worker vst1.32 {d19[0]}, [r0, :32], r1 117*c0909341SAndroid Build Coastguard Worker vst1.32 {d19[1]}, [r6, :32], r1 118*c0909341SAndroid Build Coastguard Worker beq 0f 119*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 120*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [r0, :32], r1 121*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [r6, :32], r1 122*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 123*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[0]}, [r0, :32], r1 124*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[1]}, [r6, :32], r1 125*c0909341SAndroid Build Coastguard Worker vst1.32 {d18[0]}, [r0, :32], r1 126*c0909341SAndroid Build Coastguard Worker vst1.32 {d18[1]}, [r6, :32], r1 127*c0909341SAndroid Build Coastguard Worker vst1.32 {d19[0]}, [r0, :32], r1 128*c0909341SAndroid Build Coastguard Worker vst1.32 {d19[1]}, [r6, :32], r1 129*c0909341SAndroid Build Coastguard Worker pop {r4-r6,pc} 130*c0909341SAndroid Build Coastguard Worker80: 131*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 132*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 133*c0909341SAndroid Build Coastguard Worker8: 134*c0909341SAndroid Build Coastguard Worker vst1.8 {d16}, [r0, :64], r1 135*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 136*c0909341SAndroid Build Coastguard Worker vst1.8 {d17}, [r6, :64], r1 137*c0909341SAndroid Build Coastguard Worker vst1.8 {d18}, [r0, :64], r1 138*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 139*c0909341SAndroid Build Coastguard Worker vst1.8 {d19}, [r6, :64], r1 140*c0909341SAndroid Build Coastguard Worker ble 0f 141*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 142*c0909341SAndroid Build Coastguard Worker b 8b 143*c0909341SAndroid Build Coastguard Worker160: 144*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 145*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 146*c0909341SAndroid Build Coastguard Worker16: 147*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 148*c0909341SAndroid Build Coastguard Worker vst1.8 {q8}, [r0, :128], r1 149*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 150*c0909341SAndroid Build Coastguard Worker vst1.8 {q9}, [r6, :128], r1 151*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 152*c0909341SAndroid Build Coastguard Worker vst1.8 {q10}, [r0, :128], r1 153*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 154*c0909341SAndroid Build Coastguard Worker vst1.8 {q11}, [r6, :128], r1 155*c0909341SAndroid Build Coastguard Worker ble 0f 156*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 157*c0909341SAndroid Build Coastguard Worker b 16b 158*c0909341SAndroid Build Coastguard Worker320: 159*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 160*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 161*c0909341SAndroid Build Coastguard Worker32: 162*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 163*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 164*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128], r1 165*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 166*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 167*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r6, :128], r1 168*c0909341SAndroid Build Coastguard Worker ble 0f 169*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 170*c0909341SAndroid Build Coastguard Worker b 32b 171*c0909341SAndroid Build Coastguard Worker640: 172*c0909341SAndroid Build Coastguard Worker add r6, r0, #32 173*c0909341SAndroid Build Coastguard Worker64: 174*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 175*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 176*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 177*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128], r1 178*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 179*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r6, :128], r1 180*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 181*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 182*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128], r1 183*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 184*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 185*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r6, :128], r1 186*c0909341SAndroid Build Coastguard Worker ble 0f 187*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 188*c0909341SAndroid Build Coastguard Worker b 64b 189*c0909341SAndroid Build Coastguard Worker1280: 190*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 191*c0909341SAndroid Build Coastguard Worker add r6, r0, #64 192*c0909341SAndroid Build Coastguard Worker128: 193*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 194*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 195*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 196*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128]! 197*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 198*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r0, :128], r1 199*c0909341SAndroid Build Coastguard Worker \type d18, d19, q0, q1, q2, q3 200*c0909341SAndroid Build Coastguard Worker \type d20, d21, q0, q1, q2, q3 201*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r6, :128]! 202*c0909341SAndroid Build Coastguard Worker \type d22, d23, q0, q1, q2, q3 203*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 204*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r6, :128], r1 205*c0909341SAndroid Build Coastguard Worker ble 0f 206*c0909341SAndroid Build Coastguard Worker \type d16, d17, q0, q1, q2, q3 207*c0909341SAndroid Build Coastguard Worker b 128b 208*c0909341SAndroid Build Coastguard Worker 209*c0909341SAndroid Build Coastguard Worker0: 210*c0909341SAndroid Build Coastguard Worker pop {r4-r6,pc} 211*c0909341SAndroid Build Coastguard Workerendfunc 212*c0909341SAndroid Build Coastguard Worker.endm 213*c0909341SAndroid Build Coastguard Worker 214*c0909341SAndroid Build Coastguard Workerbidir_fn avg 215*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg 216*c0909341SAndroid Build Coastguard Workerbidir_fn mask 217*c0909341SAndroid Build Coastguard Worker 218*c0909341SAndroid Build Coastguard Worker 219*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type 220*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_8bpc_neon, export=1 221*c0909341SAndroid Build Coastguard Worker push {r4-r9,lr} 222*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #28] 223*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #36] 224*c0909341SAndroid Build Coastguard Worker clz r8, r4 225*c0909341SAndroid Build Coastguard Worker adr r9, L(w_mask_\type\()_tbl) 226*c0909341SAndroid Build Coastguard Worker sub r8, r8, #24 227*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 228*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 229*c0909341SAndroid Build Coastguard Worker movw r12, #6903 230*c0909341SAndroid Build Coastguard Worker vdup.16 q14, r12 231*c0909341SAndroid Build Coastguard Worker.if \type == 444 232*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #64 233*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 234*c0909341SAndroid Build Coastguard Worker vdup.8 d0, r7 // d0[] <- sign 235*c0909341SAndroid Build Coastguard Worker vmov.i8 d30, #129 236*c0909341SAndroid Build Coastguard Worker vsub.i8 d30, d30, d0 // 129 - sign 237*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 238*c0909341SAndroid Build Coastguard Worker vdup.16 q0, r7 // d0[] <- sign 239*c0909341SAndroid Build Coastguard Worker vmov.i16 q15, #256 240*c0909341SAndroid Build Coastguard Worker vsub.i16 q15, q15, q0 // 256 - sign 241*c0909341SAndroid Build Coastguard Worker.endif 242*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 243*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 244*c0909341SAndroid Build Coastguard Worker bx r9 245*c0909341SAndroid Build Coastguard Worker 246*c0909341SAndroid Build Coastguard Worker .align 2 247*c0909341SAndroid Build Coastguard WorkerL(w_mask_\type\()_tbl): 248*c0909341SAndroid Build Coastguard Worker .word 1280f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 249*c0909341SAndroid Build Coastguard Worker .word 640f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 250*c0909341SAndroid Build Coastguard Worker .word 320f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 251*c0909341SAndroid Build Coastguard Worker .word 160f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 252*c0909341SAndroid Build Coastguard Worker .word 8f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 253*c0909341SAndroid Build Coastguard Worker .word 4f - L(w_mask_\type\()_tbl) + CONFIG_THUMB 254*c0909341SAndroid Build Coastguard Worker 255*c0909341SAndroid Build Coastguard Worker4: 256*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! // tmp1 (four rows at once) 257*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r3, :128]! // tmp2 (four rows at once) 258*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 259*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q2, q0 // tmp2-tmp1 260*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q3, q1 261*c0909341SAndroid Build Coastguard Worker vabd.s16 q10, q0, q2 // (abs(tmp1[x] - tmp2[x])) 262*c0909341SAndroid Build Coastguard Worker vabd.s16 q11, q1, q3 263*c0909341SAndroid Build Coastguard Worker vqsub.u16 q10, q14, q10 // 6903 - abs () 264*c0909341SAndroid Build Coastguard Worker vqsub.u16 q11, q14, q11 265*c0909341SAndroid Build Coastguard Worker vshr.s16 q10, q10, #8 // 64-m = (6903 - abs()) >> 8 266*c0909341SAndroid Build Coastguard Worker vshr.s16 q11, q11, #8 267*c0909341SAndroid Build Coastguard Worker vshl.s16 q12, q10, #9 // (64-m)<<9 268*c0909341SAndroid Build Coastguard Worker vshl.s16 q13, q11, #9 269*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q12, q12, q8 // ((tmp2-tmp1)*(64-m)<<9)>>15 270*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q13, q13, q9 271*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q0 // (((tmp2-tmp1)*(64-m)<<9)>>15) + tmp1 272*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q1 273*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d24, q12, #4 // (((((tmp2-tmp1)*(64-m)<<9)>>15) + tmp1) + 8) >> 4 274*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d25, q13, #4 275*c0909341SAndroid Build Coastguard Worker.if \type == 444 276*c0909341SAndroid Build Coastguard Worker vmovn.u16 d20, q10 // 64 - m 277*c0909341SAndroid Build Coastguard Worker vmovn.u16 d21, q11 278*c0909341SAndroid Build Coastguard Worker vsub.i8 q10, q15, q10 // m 279*c0909341SAndroid Build Coastguard Worker vst1.8 {d20, d21}, [r6, :128]! 280*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 281*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (64 - m) + (64 - n) (column wise addition) 282*c0909341SAndroid Build Coastguard Worker vpadd.s16 d21, d22, d23 283*c0909341SAndroid Build Coastguard Worker vmovn.s16 d6, q10 284*c0909341SAndroid Build Coastguard Worker vhsub.u8 d6, d30, d6 // ((129 - sign) - ((64 - m) + (64 - n))) >> 1 285*c0909341SAndroid Build Coastguard Worker vst1.8 {d6}, [r6, :64]! 286*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 287*c0909341SAndroid Build Coastguard Worker vadd.s16 d20, d20, d21 // (64 - my1) + (64 - my2) (row wise addition) 288*c0909341SAndroid Build Coastguard Worker vadd.s16 d21, d22, d23 289*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (128 - m) + (128 - n) (column wise addition) 290*c0909341SAndroid Build Coastguard Worker vsub.s16 d20, d30, d20 // (256 - sign) - ((128 - m) + (128 - n)) 291*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d20, q10, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 292*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r6, :32]! 293*c0909341SAndroid Build Coastguard Worker.endif 294*c0909341SAndroid Build Coastguard Worker vst1.32 {d24[0]}, [r0, :32], r1 295*c0909341SAndroid Build Coastguard Worker vst1.32 {d24[1]}, [r12, :32], r1 296*c0909341SAndroid Build Coastguard Worker vst1.32 {d25[0]}, [r0, :32], r1 297*c0909341SAndroid Build Coastguard Worker vst1.32 {d25[1]}, [r12, :32], r1 298*c0909341SAndroid Build Coastguard Worker bgt 4b 299*c0909341SAndroid Build Coastguard Worker pop {r4-r9,pc} 300*c0909341SAndroid Build Coastguard Worker8: 301*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! // tmp1y1, tmp1y2 302*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r3, :128]! // tmp2y1, tmp2y2 303*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 304*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q2, q0 // tmp2y1 - tmp1y1 305*c0909341SAndroid Build Coastguard Worker vsub.i16 q9, q3, q1 // tmp2y2 - tmp1y2 306*c0909341SAndroid Build Coastguard Worker vabd.s16 q10, q0, q2 // abs(tmp1y1 - tmp2y1) 307*c0909341SAndroid Build Coastguard Worker vabd.s16 q11, q1, q3 // abs(tmp1y2 - tmp2y2) 308*c0909341SAndroid Build Coastguard Worker vqsub.u16 q10, q14, q10 // 6903 - abs(tmp1y1 - tmp2y1) 309*c0909341SAndroid Build Coastguard Worker vqsub.u16 q11, q14, q11 // 6903 - abs(tmp1y2 - tmp2y2) 310*c0909341SAndroid Build Coastguard Worker vshr.s16 q10, q10, #8 // 64 - my1 = 6903 - abs(tmp1y1 - tmp2y1) >> 8 311*c0909341SAndroid Build Coastguard Worker vshr.s16 q11, q11, #8 // 64 - my2 = 6903 - abs(tmp1y2 - tmp2y2) >> 8 312*c0909341SAndroid Build Coastguard Worker vshl.s16 q12, q10, #9 // (64 - my1) << 9 313*c0909341SAndroid Build Coastguard Worker vshl.s16 q13, q11, #9 // (64 - my2) << 9 314*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q12, q12, q8 // ((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15 315*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q13, q13, q9 // ((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15 316*c0909341SAndroid Build Coastguard Worker vadd.s16 q12, q12, q0 // (((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1 317*c0909341SAndroid Build Coastguard Worker vadd.s16 q13, q13, q1 // (((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2 318*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d24, q12, #4 // (((((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1) + 8) >> 4 319*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d25, q13, #4 // (((((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2) + 8) >> 4 320*c0909341SAndroid Build Coastguard Worker.if \type == 444 321*c0909341SAndroid Build Coastguard Worker vmovn.u16 d20, q10 // 64 - m 322*c0909341SAndroid Build Coastguard Worker vmovn.u16 d21, q11 323*c0909341SAndroid Build Coastguard Worker vsub.i8 q10, q15, q10 // m 324*c0909341SAndroid Build Coastguard Worker vst1.8 {d20, d21}, [r6, :128]! 325*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 326*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (64 - my1) + (64 - ny1) (column wise addition) 327*c0909341SAndroid Build Coastguard Worker vpadd.s16 d21, d22, d23 // (64 - my2) + (64 - ny2) 328*c0909341SAndroid Build Coastguard Worker vmovn.s16 d20, q10 329*c0909341SAndroid Build Coastguard Worker vhsub.u8 d20, d30, d20 // ((129 - sign) - ((64 - my1/y2) + (64 - ny1/y2))) >> 1 330*c0909341SAndroid Build Coastguard Worker vst1.8 {d20}, [r6, :64]! 331*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 332*c0909341SAndroid Build Coastguard Worker vadd.s16 q10, q10, q11 // (64 - my1) + (64 - my2) (row wise addition) 333*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (128 - m) + (128 - n) (column wise addition) 334*c0909341SAndroid Build Coastguard Worker vsub.s16 d20, d30, d20 // (256 - sign) - ((128 - m) + (128 - n)) 335*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d20, q10, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 336*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r6, :32]! 337*c0909341SAndroid Build Coastguard Worker.endif 338*c0909341SAndroid Build Coastguard Worker vst1.16 {d24}, [r0, :64], r1 339*c0909341SAndroid Build Coastguard Worker vst1.16 {d25}, [r12, :64], r1 340*c0909341SAndroid Build Coastguard Worker bgt 8b 341*c0909341SAndroid Build Coastguard Worker pop {r4-r9,pc} 342*c0909341SAndroid Build Coastguard Worker1280: 343*c0909341SAndroid Build Coastguard Worker640: 344*c0909341SAndroid Build Coastguard Worker320: 345*c0909341SAndroid Build Coastguard Worker160: 346*c0909341SAndroid Build Coastguard Worker sub r1, r1, r4 347*c0909341SAndroid Build Coastguard Worker.if \type == 444 348*c0909341SAndroid Build Coastguard Worker add lr, r6, r4 349*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 350*c0909341SAndroid Build Coastguard Worker add lr, r6, r4, lsr #1 351*c0909341SAndroid Build Coastguard Worker.endif 352*c0909341SAndroid Build Coastguard Worker add r9, r3, r4, lsl #1 353*c0909341SAndroid Build Coastguard Worker add r7, r2, r4, lsl #1 354*c0909341SAndroid Build Coastguard Worker161: 355*c0909341SAndroid Build Coastguard Worker mov r8, r4 356*c0909341SAndroid Build Coastguard Worker16: 357*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! // tmp1y1 358*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r3, :128]! // tmp2y1 359*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r7, :128]! // tmp1y2 360*c0909341SAndroid Build Coastguard Worker subs r8, r8, #16 361*c0909341SAndroid Build Coastguard Worker vsub.i16 q2, q2, q0 // tmp2y1 - tmp1y1 362*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q1 363*c0909341SAndroid Build Coastguard Worker vabs.s16 q10, q2 // abs(tm2y1 - tmp1y1) 364*c0909341SAndroid Build Coastguard Worker vabs.s16 q11, q3 365*c0909341SAndroid Build Coastguard Worker vqsub.u16 q10, q14, q10 // 6903 - abs(tmp1y1 - tmp2y1) 366*c0909341SAndroid Build Coastguard Worker vqsub.u16 q11, q14, q11 367*c0909341SAndroid Build Coastguard Worker vshr.s16 q10, q10, #8 // 64 - my1 = 6903 - abs(tmp1y1 - tmp2y1) >> 8 368*c0909341SAndroid Build Coastguard Worker vshr.s16 q11, q11, #8 369*c0909341SAndroid Build Coastguard Worker vshl.s16 q12, q10, #9 // (64 - my1) << 9 370*c0909341SAndroid Build Coastguard Worker vshl.s16 q13, q11, #9 371*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q12, q12, q2 // ((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15 372*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q13, q13, q3 373*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q0 // (((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1 374*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q1 375*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r9, :128]! // tmp2h2 376*c0909341SAndroid Build Coastguard Worker.if \type == 444 377*c0909341SAndroid Build Coastguard Worker vmovn.u16 d20, q10 // 64 - my1 378*c0909341SAndroid Build Coastguard Worker vmovn.u16 d21, q11 379*c0909341SAndroid Build Coastguard Worker vsub.i8 q10, q15, q10 // my1 380*c0909341SAndroid Build Coastguard Worker vst1.8 {d20, d21}, [r6, :128]! 381*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 382*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (64 - my1) + (64 - ny1) (column wise addition) 383*c0909341SAndroid Build Coastguard Worker vpadd.s16 d21, d22, d23 384*c0909341SAndroid Build Coastguard Worker vmovn.s16 d20, q10 385*c0909341SAndroid Build Coastguard Worker vhsub.u8 d20, d30, d20 // ((129 - sign) - ((64 - my1) + (64 - ny1))) >> 1 386*c0909341SAndroid Build Coastguard Worker vst1.8 {d20}, [r6, :64]! 387*c0909341SAndroid Build Coastguard Worker.endif 388*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d24, q12, #4 // (((((tmp2y1 - tmp1y1)*(64 - my1) << 9) >> 15) + tmp1y1) + 8) >> 4 389*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d25, q13, #4 390*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q8 // tmp2y2 - tmp1y2 391*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q9 392*c0909341SAndroid Build Coastguard Worker vst1.16 {d24, d25}, [r0, :128]! // store dsty1 393*c0909341SAndroid Build Coastguard Worker vabs.s16 q2, q0 // abs(tmp2y2 - tmp1y2) 394*c0909341SAndroid Build Coastguard Worker vabs.s16 q3, q1 395*c0909341SAndroid Build Coastguard Worker vqsub.u16 q2, q14, q2 // 6903 - abs(tmp2y2 - tmp1y2) 396*c0909341SAndroid Build Coastguard Worker vqsub.u16 q3, q14, q3 397*c0909341SAndroid Build Coastguard Worker vshr.s16 q2, q2, #8 // (6903 - abs(tmp2y2 - tmp1y2)) >> 8 398*c0909341SAndroid Build Coastguard Worker vshr.s16 q3, q3, #8 399*c0909341SAndroid Build Coastguard Worker vshl.s16 q12, q2, #9 // (64 - my2) << 9 400*c0909341SAndroid Build Coastguard Worker vshl.s16 q13, q3, #9 401*c0909341SAndroid Build Coastguard Worker.if \type == 444 402*c0909341SAndroid Build Coastguard Worker vmovn.u16 d4, q2 // 64 - my2 403*c0909341SAndroid Build Coastguard Worker vmovn.u16 d5, q3 404*c0909341SAndroid Build Coastguard Worker vsub.i8 q2, q15, q2 // my2 405*c0909341SAndroid Build Coastguard Worker vst1.8 {d4, d5}, [lr, :128]! 406*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 407*c0909341SAndroid Build Coastguard Worker vpadd.s16 d4, d4, d5 // (64 - my2) + (64 - ny2) (column wise addition) 408*c0909341SAndroid Build Coastguard Worker vpadd.s16 d5, d6, d7 409*c0909341SAndroid Build Coastguard Worker vmovn.s16 d4, q2 410*c0909341SAndroid Build Coastguard Worker vhsub.u8 d4, d30, d4 // ((129 - sign) - ((64 - my2) + (64 - ny2))) >> 1 411*c0909341SAndroid Build Coastguard Worker vst1.8 {d4}, [lr, :64]! 412*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 413*c0909341SAndroid Build Coastguard Worker vadd.s16 q10, q10, q2 // (64 - my1) + (64 - my2) (row wise addition) 414*c0909341SAndroid Build Coastguard Worker vadd.s16 q11, q11, q3 415*c0909341SAndroid Build Coastguard Worker vpadd.s16 d20, d20, d21 // (128 - m) + (128 - n) (column wise addition) 416*c0909341SAndroid Build Coastguard Worker vpadd.s16 d21, d22, d23 417*c0909341SAndroid Build Coastguard Worker vsub.s16 q10, q15, q10 // (256 - sign) - ((128 - m) + (128 - n)) 418*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d20, q10, #2 // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2 419*c0909341SAndroid Build Coastguard Worker vst1.8 {d20}, [r6, :64]! 420*c0909341SAndroid Build Coastguard Worker.endif 421*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q12, q12, q0 // ((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15 422*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 q13, q13, q1 423*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q8 // (((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2 424*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q9 425*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d24, q12, #4 // (((((tmp2y2 - tmp1y2)*(64 - my2) << 9) >> 15) + tmp1y2) + 8) >> 4 426*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d25, q13, #4 427*c0909341SAndroid Build Coastguard Worker vst1.16 {d24, d25}, [r12, :128]! // store dsty2 428*c0909341SAndroid Build Coastguard Worker bgt 16b 429*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 430*c0909341SAndroid Build Coastguard Worker add r2, r2, r4, lsl #1 431*c0909341SAndroid Build Coastguard Worker add r3, r3, r4, lsl #1 432*c0909341SAndroid Build Coastguard Worker add r7, r7, r4, lsl #1 433*c0909341SAndroid Build Coastguard Worker add r9, r9, r4, lsl #1 434*c0909341SAndroid Build Coastguard Worker.if \type == 444 435*c0909341SAndroid Build Coastguard Worker add r6, r6, r4 436*c0909341SAndroid Build Coastguard Worker add lr, lr, r4 437*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 438*c0909341SAndroid Build Coastguard Worker add r6, r6, r4, lsr #1 439*c0909341SAndroid Build Coastguard Worker add lr, lr, r4, lsr #1 440*c0909341SAndroid Build Coastguard Worker.endif 441*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 442*c0909341SAndroid Build Coastguard Worker add r12, r12, r1 443*c0909341SAndroid Build Coastguard Worker bgt 161b 444*c0909341SAndroid Build Coastguard Worker pop {r4-r9,pc} 445*c0909341SAndroid Build Coastguard Workerendfunc 446*c0909341SAndroid Build Coastguard Worker.endm 447*c0909341SAndroid Build Coastguard Worker 448*c0909341SAndroid Build Coastguard Workerw_mask_fn 444 449*c0909341SAndroid Build Coastguard Workerw_mask_fn 422 450*c0909341SAndroid Build Coastguard Workerw_mask_fn 420 451*c0909341SAndroid Build Coastguard Worker 452*c0909341SAndroid Build Coastguard Worker 453*c0909341SAndroid Build Coastguard Workerfunction blend_8bpc_neon, export=1 454*c0909341SAndroid Build Coastguard Worker push {r4-r5,lr} 455*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #12] 456*c0909341SAndroid Build Coastguard Worker clz lr, r3 457*c0909341SAndroid Build Coastguard Worker adr r3, L(blend_tbl) 458*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 459*c0909341SAndroid Build Coastguard Worker ldr lr, [r3, lr, lsl #2] 460*c0909341SAndroid Build Coastguard Worker add r3, r3, lr 461*c0909341SAndroid Build Coastguard Worker bx r3 462*c0909341SAndroid Build Coastguard Worker 463*c0909341SAndroid Build Coastguard Worker .align 2 464*c0909341SAndroid Build Coastguard WorkerL(blend_tbl): 465*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_tbl) + CONFIG_THUMB 466*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_tbl) + CONFIG_THUMB 467*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_tbl) + CONFIG_THUMB 468*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_tbl) + CONFIG_THUMB 469*c0909341SAndroid Build Coastguard Worker 470*c0909341SAndroid Build Coastguard Worker40: 471*c0909341SAndroid Build Coastguard Worker vmov.i8 d22, #64 472*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 473*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 474*c0909341SAndroid Build Coastguard Worker4: 475*c0909341SAndroid Build Coastguard Worker vld1.u8 {d2}, [r5, :64]! 476*c0909341SAndroid Build Coastguard Worker vld1.u8 {d1}, [r2, :64]! 477*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r0, :32] 478*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 479*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32] 480*c0909341SAndroid Build Coastguard Worker vsub.i8 d3, d22, d2 481*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d1, d2 482*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d0, d3 483*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q8, #6 484*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r0, :32], r1 485*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[1]}, [r12, :32], r1 486*c0909341SAndroid Build Coastguard Worker bgt 4b 487*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 488*c0909341SAndroid Build Coastguard Worker80: 489*c0909341SAndroid Build Coastguard Worker vmov.i8 d16, #64 490*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 491*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 492*c0909341SAndroid Build Coastguard Worker8: 493*c0909341SAndroid Build Coastguard Worker vld1.u8 {q1}, [r5, :128]! 494*c0909341SAndroid Build Coastguard Worker vld1.u8 {q2}, [r2, :128]! 495*c0909341SAndroid Build Coastguard Worker vld1.u8 {d0}, [r0, :64] 496*c0909341SAndroid Build Coastguard Worker vsub.i8 d17, d16, d2 497*c0909341SAndroid Build Coastguard Worker vld1.u8 {d1}, [r12, :64] 498*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 499*c0909341SAndroid Build Coastguard Worker vsub.i8 d18, d16, d3 500*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d4 501*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d17 502*c0909341SAndroid Build Coastguard Worker vmull.u8 q10, d3, d5 503*c0909341SAndroid Build Coastguard Worker vmlal.u8 q10, d1, d18 504*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q3, #6 505*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q10, #6 506*c0909341SAndroid Build Coastguard Worker vst1.u8 {d22}, [r0, :64], r1 507*c0909341SAndroid Build Coastguard Worker vst1.u8 {d23}, [r12, :64], r1 508*c0909341SAndroid Build Coastguard Worker bgt 8b 509*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 510*c0909341SAndroid Build Coastguard Worker160: 511*c0909341SAndroid Build Coastguard Worker vmov.i8 q12, #64 512*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 513*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 514*c0909341SAndroid Build Coastguard Worker16: 515*c0909341SAndroid Build Coastguard Worker vld1.u8 {q1, q2}, [r5, :128]! 516*c0909341SAndroid Build Coastguard Worker vld1.u8 {q8, q9}, [r2, :128]! 517*c0909341SAndroid Build Coastguard Worker vld1.u8 {q0}, [r0, :128] 518*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 519*c0909341SAndroid Build Coastguard Worker vsub.i8 q15, q12, q1 520*c0909341SAndroid Build Coastguard Worker vld1.u8 {q13}, [r12, :128] 521*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d16, d2 522*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d30 523*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d17, d3 524*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d1, d31 525*c0909341SAndroid Build Coastguard Worker vsub.i8 q15, q12, q2 526*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q3, #6 527*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q14, #6 528*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d18, d4 529*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d26, d30 530*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d19, d5 531*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d27, d31 532*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q3, #6 533*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q14, #6 534*c0909341SAndroid Build Coastguard Worker vst1.u8 {q10}, [r0, :128], r1 535*c0909341SAndroid Build Coastguard Worker vst1.u8 {q11}, [r12, :128], r1 536*c0909341SAndroid Build Coastguard Worker bgt 16b 537*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 538*c0909341SAndroid Build Coastguard Worker320: 539*c0909341SAndroid Build Coastguard Worker vmov.i8 q10, #64 540*c0909341SAndroid Build Coastguard Worker32: 541*c0909341SAndroid Build Coastguard Worker vld1.u8 {q2, q3}, [r5, :128]! 542*c0909341SAndroid Build Coastguard Worker vld1.u8 {q8, q9}, [r2, :128]! 543*c0909341SAndroid Build Coastguard Worker vld1.u8 {q0, q1}, [r0, :128] 544*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 545*c0909341SAndroid Build Coastguard Worker vsub.i8 q11, q10, q2 546*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d16, d4 547*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d0, d22 548*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d17, d5 549*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d1, d23 550*c0909341SAndroid Build Coastguard Worker vsub.i8 q11, q10, q3 551*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q15, #6 552*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q14, #6 553*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d18, d6 554*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d2, d22 555*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d19, d7 556*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d3, d23 557*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d26, q15, #6 558*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d27, q14, #6 559*c0909341SAndroid Build Coastguard Worker vst1.u8 {q12, q13}, [r0, :128], r1 560*c0909341SAndroid Build Coastguard Worker bgt 32b 561*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 562*c0909341SAndroid Build Coastguard Workerendfunc 563*c0909341SAndroid Build Coastguard Worker 564*c0909341SAndroid Build Coastguard Workerfunction blend_h_8bpc_neon, export=1 565*c0909341SAndroid Build Coastguard Worker push {r4-r5,lr} 566*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 567*c0909341SAndroid Build Coastguard Worker movrel r5, X(obmc_masks) 568*c0909341SAndroid Build Coastguard Worker add r5, r5, r4 569*c0909341SAndroid Build Coastguard Worker sub r4, r4, r4, lsr #2 570*c0909341SAndroid Build Coastguard Worker clz lr, r3 571*c0909341SAndroid Build Coastguard Worker adr r12, L(blend_h_tbl) 572*c0909341SAndroid Build Coastguard Worker sub lr, lr, #24 573*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 574*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 575*c0909341SAndroid Build Coastguard Worker bx r12 576*c0909341SAndroid Build Coastguard Worker 577*c0909341SAndroid Build Coastguard Worker .align 2 578*c0909341SAndroid Build Coastguard WorkerL(blend_h_tbl): 579*c0909341SAndroid Build Coastguard Worker .word 1280f - L(blend_h_tbl) + CONFIG_THUMB 580*c0909341SAndroid Build Coastguard Worker .word 640f - L(blend_h_tbl) + CONFIG_THUMB 581*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_h_tbl) + CONFIG_THUMB 582*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_h_tbl) + CONFIG_THUMB 583*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_h_tbl) + CONFIG_THUMB 584*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_h_tbl) + CONFIG_THUMB 585*c0909341SAndroid Build Coastguard Worker .word 20f - L(blend_h_tbl) + CONFIG_THUMB 586*c0909341SAndroid Build Coastguard Worker 587*c0909341SAndroid Build Coastguard Worker20: 588*c0909341SAndroid Build Coastguard Worker vmov.i8 d22, #64 589*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 590*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 591*c0909341SAndroid Build Coastguard Worker2: 592*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[], d3[]}, [r5, :16]! 593*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[]}, [r2, :32]! 594*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 595*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[]}, [r0, :16] 596*c0909341SAndroid Build Coastguard Worker vzip.8 d2, d3 597*c0909341SAndroid Build Coastguard Worker vsub.i8 d4, d22, d2 598*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r12, :16] 599*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d1, d2 600*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d0, d4 601*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q8, #6 602*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[0]}, [r0, :16], r1 603*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[1]}, [r12, :16], r1 604*c0909341SAndroid Build Coastguard Worker bgt 2b 605*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 606*c0909341SAndroid Build Coastguard Worker40: 607*c0909341SAndroid Build Coastguard Worker vmov.i8 d22, #64 608*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 609*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 610*c0909341SAndroid Build Coastguard Worker4: 611*c0909341SAndroid Build Coastguard Worker vld2.u8 {d2[], d3[]}, [r5, :16]! 612*c0909341SAndroid Build Coastguard Worker vld1.u8 {d1}, [r2, :64]! 613*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 614*c0909341SAndroid Build Coastguard Worker vext.u8 d2, d2, d3, #4 615*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r0, :32] 616*c0909341SAndroid Build Coastguard Worker vsub.i8 d6, d22, d2 617*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32] 618*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d1, d2 619*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d0, d6 620*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q8, #6 621*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r0, :32], r1 622*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[1]}, [r12, :32], r1 623*c0909341SAndroid Build Coastguard Worker bgt 4b 624*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 625*c0909341SAndroid Build Coastguard Worker80: 626*c0909341SAndroid Build Coastguard Worker vmov.i8 q8, #64 627*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 628*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 629*c0909341SAndroid Build Coastguard Worker8: 630*c0909341SAndroid Build Coastguard Worker vld2.u8 {d2[], d3[]}, [r5, :16]! 631*c0909341SAndroid Build Coastguard Worker vld1.u8 {d4, d5}, [r2, :128]! 632*c0909341SAndroid Build Coastguard Worker vld1.u8 {d0}, [r0, :64] 633*c0909341SAndroid Build Coastguard Worker vsub.i8 q9, q8, q1 634*c0909341SAndroid Build Coastguard Worker vld1.u8 {d1}, [r12, :64] 635*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 636*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d4 637*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d18 638*c0909341SAndroid Build Coastguard Worker vmull.u8 q10, d3, d5 639*c0909341SAndroid Build Coastguard Worker vmlal.u8 q10, d1, d19 640*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q3, #6 641*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q10, #6 642*c0909341SAndroid Build Coastguard Worker vst1.u8 {d22}, [r0, :64], r1 643*c0909341SAndroid Build Coastguard Worker vst1.u8 {d23}, [r12, :64], r1 644*c0909341SAndroid Build Coastguard Worker bgt 8b 645*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 646*c0909341SAndroid Build Coastguard Worker160: 647*c0909341SAndroid Build Coastguard Worker vmov.i8 q12, #64 648*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 649*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 650*c0909341SAndroid Build Coastguard Worker16: 651*c0909341SAndroid Build Coastguard Worker vld2.u8 {d28[], d29[]}, [r5, :16]! 652*c0909341SAndroid Build Coastguard Worker vld1.u8 {d2, d3, d4, d5}, [r2, :128]! 653*c0909341SAndroid Build Coastguard Worker vsub.i8 q15, q12, q14 654*c0909341SAndroid Build Coastguard Worker vld1.u8 {q0}, [r0, :128] 655*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 656*c0909341SAndroid Build Coastguard Worker vld1.u8 {q13}, [r12, :128] 657*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d28 658*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d30 659*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d3, d28 660*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d1, d30 661*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d18, q3, #6 662*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d19, q8, #6 663*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d4, d29 664*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d26, d31 665*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d5, d29 666*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d27, d31 667*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q3, #6 668*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q8, #6 669*c0909341SAndroid Build Coastguard Worker vst1.u8 {q9}, [r0, :128], r1 670*c0909341SAndroid Build Coastguard Worker vst1.u8 {q10}, [r12, :128], r1 671*c0909341SAndroid Build Coastguard Worker bgt 16b 672*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 673*c0909341SAndroid Build Coastguard Worker320: 674*c0909341SAndroid Build Coastguard Worker640: 675*c0909341SAndroid Build Coastguard Worker1280: 676*c0909341SAndroid Build Coastguard Worker vmov.i8 d20, #64 677*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 678*c0909341SAndroid Build Coastguard Worker321: 679*c0909341SAndroid Build Coastguard Worker vld1.u8 {d6[]}, [r5]! 680*c0909341SAndroid Build Coastguard Worker vsub.i8 d7, d20, d6 681*c0909341SAndroid Build Coastguard Worker mov r12, r3 682*c0909341SAndroid Build Coastguard Worker32: 683*c0909341SAndroid Build Coastguard Worker vld1.u8 {q8, q9}, [r2, :128]! 684*c0909341SAndroid Build Coastguard Worker vld1.u8 {q0, q1}, [r0, :128] 685*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d16, d6 686*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d0, d7 687*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d17, d6 688*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d1, d7 689*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d0, q15, #6 690*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d1, q14, #6 691*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d18, d6 692*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d2, d7 693*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d19, d6 694*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d3, d7 695*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d2, q15, #6 696*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d3, q14, #6 697*c0909341SAndroid Build Coastguard Worker subs r12, r12, #32 698*c0909341SAndroid Build Coastguard Worker vst1.u8 {q0, q1}, [r0, :128]! 699*c0909341SAndroid Build Coastguard Worker bgt 32b 700*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 701*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 702*c0909341SAndroid Build Coastguard Worker bgt 321b 703*c0909341SAndroid Build Coastguard Worker pop {r4-r5,pc} 704*c0909341SAndroid Build Coastguard Workerendfunc 705*c0909341SAndroid Build Coastguard Worker 706*c0909341SAndroid Build Coastguard Workerfunction blend_v_8bpc_neon, export=1 707*c0909341SAndroid Build Coastguard Worker push {r4,lr} 708*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #8] 709*c0909341SAndroid Build Coastguard Worker movrel lr, X(obmc_masks) 710*c0909341SAndroid Build Coastguard Worker add lr, lr, r3 711*c0909341SAndroid Build Coastguard Worker clz r12, r3 712*c0909341SAndroid Build Coastguard Worker adr r3, L(blend_v_tbl) 713*c0909341SAndroid Build Coastguard Worker sub r12, r12, #26 714*c0909341SAndroid Build Coastguard Worker ldr r12, [r3, r12, lsl #2] 715*c0909341SAndroid Build Coastguard Worker add r3, r3, r12 716*c0909341SAndroid Build Coastguard Worker bx r3 717*c0909341SAndroid Build Coastguard Worker 718*c0909341SAndroid Build Coastguard Worker .align 2 719*c0909341SAndroid Build Coastguard WorkerL(blend_v_tbl): 720*c0909341SAndroid Build Coastguard Worker .word 320f - L(blend_v_tbl) + CONFIG_THUMB 721*c0909341SAndroid Build Coastguard Worker .word 160f - L(blend_v_tbl) + CONFIG_THUMB 722*c0909341SAndroid Build Coastguard Worker .word 80f - L(blend_v_tbl) + CONFIG_THUMB 723*c0909341SAndroid Build Coastguard Worker .word 40f - L(blend_v_tbl) + CONFIG_THUMB 724*c0909341SAndroid Build Coastguard Worker .word 20f - L(blend_v_tbl) + CONFIG_THUMB 725*c0909341SAndroid Build Coastguard Worker 726*c0909341SAndroid Build Coastguard Worker20: 727*c0909341SAndroid Build Coastguard Worker vmov.i8 d22, #64 728*c0909341SAndroid Build Coastguard Worker vld1.8 {d2[]}, [lr] 729*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 730*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 731*c0909341SAndroid Build Coastguard Worker vsub.i8 d3, d22, d2 732*c0909341SAndroid Build Coastguard Worker2: 733*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[0]}, [r2, :16]! 734*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[]}, [r0] 735*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 736*c0909341SAndroid Build Coastguard Worker vld1.8 {d1[1]}, [r2] 737*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[1]}, [r12] 738*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d1, d2 739*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d0, d3 740*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d6, q2, #6 741*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 742*c0909341SAndroid Build Coastguard Worker vst1.8 {d6[0]}, [r0], r1 743*c0909341SAndroid Build Coastguard Worker vst1.8 {d6[1]}, [r12], r1 744*c0909341SAndroid Build Coastguard Worker bgt 2b 745*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 746*c0909341SAndroid Build Coastguard Worker40: 747*c0909341SAndroid Build Coastguard Worker vmov.i8 d22, #64 748*c0909341SAndroid Build Coastguard Worker vld1.32 {d4[]}, [lr, :32] 749*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 750*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 751*c0909341SAndroid Build Coastguard Worker vsub.i8 d5, d22, d4 752*c0909341SAndroid Build Coastguard Worker sub r1, r1, #2 753*c0909341SAndroid Build Coastguard Worker4: 754*c0909341SAndroid Build Coastguard Worker vld1.u8 {d2}, [r2, :64]! 755*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r0, :32] 756*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32] 757*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 758*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d4 759*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d5 760*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q3, #6 761*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[0]}, [r0, :16]! 762*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[2]}, [r12, :16]! 763*c0909341SAndroid Build Coastguard Worker vst1.8 {d20[2]}, [r0], r1 764*c0909341SAndroid Build Coastguard Worker vst1.8 {d20[6]}, [r12], r1 765*c0909341SAndroid Build Coastguard Worker bgt 4b 766*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 767*c0909341SAndroid Build Coastguard Worker80: 768*c0909341SAndroid Build Coastguard Worker vmov.i8 d16, #64 769*c0909341SAndroid Build Coastguard Worker vld1.u8 {d2}, [lr, :64] 770*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 771*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 772*c0909341SAndroid Build Coastguard Worker vsub.i8 d17, d16, d2 773*c0909341SAndroid Build Coastguard Worker sub r1, r1, #4 774*c0909341SAndroid Build Coastguard Worker8: 775*c0909341SAndroid Build Coastguard Worker vld1.u8 {d4, d5}, [r2, :128]! 776*c0909341SAndroid Build Coastguard Worker vld1.u8 {d0}, [r0, :64] 777*c0909341SAndroid Build Coastguard Worker vld1.u8 {d1}, [r12, :64] 778*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 779*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d4 780*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d17 781*c0909341SAndroid Build Coastguard Worker vmull.u8 q10, d2, d5 782*c0909341SAndroid Build Coastguard Worker vmlal.u8 q10, d1, d17 783*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q3, #6 784*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q10, #6 785*c0909341SAndroid Build Coastguard Worker vst1.32 {d22[0]}, [r0, :32]! 786*c0909341SAndroid Build Coastguard Worker vst1.32 {d23[0]}, [r12, :32]! 787*c0909341SAndroid Build Coastguard Worker vst1.16 {d22[2]}, [r0, :16], r1 788*c0909341SAndroid Build Coastguard Worker vst1.16 {d23[2]}, [r12, :16], r1 789*c0909341SAndroid Build Coastguard Worker bgt 8b 790*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 791*c0909341SAndroid Build Coastguard Worker160: 792*c0909341SAndroid Build Coastguard Worker vmov.i8 q12, #64 793*c0909341SAndroid Build Coastguard Worker vld1.u8 {q14}, [lr, :128] 794*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 795*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 796*c0909341SAndroid Build Coastguard Worker vsub.i8 q11, q12, q14 797*c0909341SAndroid Build Coastguard Worker sub r1, r1, #8 798*c0909341SAndroid Build Coastguard Worker16: 799*c0909341SAndroid Build Coastguard Worker vld1.u8 {q1, q2}, [r2, :128]! 800*c0909341SAndroid Build Coastguard Worker vld1.u8 {q0}, [r0, :128] 801*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 802*c0909341SAndroid Build Coastguard Worker vld1.u8 {q13}, [r12, :128] 803*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d2, d28 804*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d0, d22 805*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d3, d29 806*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d1, d23 807*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d18, q3, #6 808*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d19, q8, #6 809*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d4, d28 810*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d26, d22 811*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d5, d29 812*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d27, d23 813*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q3, #6 814*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q8, #6 815*c0909341SAndroid Build Coastguard Worker vst1.u8 {d18}, [r0, :64]! 816*c0909341SAndroid Build Coastguard Worker vst1.u8 {d20}, [r12, :64]! 817*c0909341SAndroid Build Coastguard Worker vst1.32 {d19[0]}, [r0, :32], r1 818*c0909341SAndroid Build Coastguard Worker vst1.32 {d21[0]}, [r12, :32], r1 819*c0909341SAndroid Build Coastguard Worker bgt 16b 820*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 821*c0909341SAndroid Build Coastguard Worker320: 822*c0909341SAndroid Build Coastguard Worker vmov.i8 q10, #64 823*c0909341SAndroid Build Coastguard Worker vld1.u8 {q2, q3}, [lr, :128] 824*c0909341SAndroid Build Coastguard Worker vsub.i8 q11, q10, q2 825*c0909341SAndroid Build Coastguard Worker vsub.i8 d24, d20, d6 826*c0909341SAndroid Build Coastguard Worker32: 827*c0909341SAndroid Build Coastguard Worker vld1.u8 {q8, q9}, [r2, :128]! 828*c0909341SAndroid Build Coastguard Worker vld1.u8 {d0, d1, d2}, [r0, :64] 829*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 830*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d16, d4 831*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d0, d22 832*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d17, d5 833*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d1, d23 834*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d0, q15, #6 835*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d1, q14, #6 836*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d18, d6 837*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d2, d24 838*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d2, q15, #6 839*c0909341SAndroid Build Coastguard Worker vst1.u8 {d0, d1, d2}, [r0, :64], r1 840*c0909341SAndroid Build Coastguard Worker bgt 32b 841*c0909341SAndroid Build Coastguard Worker pop {r4,pc} 842*c0909341SAndroid Build Coastguard Workerendfunc 843*c0909341SAndroid Build Coastguard Worker 844*c0909341SAndroid Build Coastguard Worker 845*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions, 846*c0909341SAndroid Build Coastguard Worker// assumes that the caller has loaded the h argument into r5, 847*c0909341SAndroid Build Coastguard Worker// and assumes that r8 is set to (clz(w)-24). 848*c0909341SAndroid Build Coastguard Workerfunction put_neon 849*c0909341SAndroid Build Coastguard Worker adr r9, L(put_tbl) 850*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 851*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 852*c0909341SAndroid Build Coastguard Worker bx r9 853*c0909341SAndroid Build Coastguard Worker 854*c0909341SAndroid Build Coastguard Worker .align 2 855*c0909341SAndroid Build Coastguard WorkerL(put_tbl): 856*c0909341SAndroid Build Coastguard Worker .word 1280f - L(put_tbl) + CONFIG_THUMB 857*c0909341SAndroid Build Coastguard Worker .word 640f - L(put_tbl) + CONFIG_THUMB 858*c0909341SAndroid Build Coastguard Worker .word 32f - L(put_tbl) + CONFIG_THUMB 859*c0909341SAndroid Build Coastguard Worker .word 160f - L(put_tbl) + CONFIG_THUMB 860*c0909341SAndroid Build Coastguard Worker .word 8f - L(put_tbl) + CONFIG_THUMB 861*c0909341SAndroid Build Coastguard Worker .word 4f - L(put_tbl) + CONFIG_THUMB 862*c0909341SAndroid Build Coastguard Worker .word 2f - L(put_tbl) + CONFIG_THUMB 863*c0909341SAndroid Build Coastguard Worker 864*c0909341SAndroid Build Coastguard Worker2: 865*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[]}, [r2], r3 866*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[]}, [r2], r3 867*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 868*c0909341SAndroid Build Coastguard Worker vst1.16 {d0[0]}, [r0, :16], r1 869*c0909341SAndroid Build Coastguard Worker vst1.16 {d1[0]}, [r0, :16], r1 870*c0909341SAndroid Build Coastguard Worker bgt 2b 871*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 872*c0909341SAndroid Build Coastguard Worker4: 873*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2], r3 874*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[]}, [r2], r3 875*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 876*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 877*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[0]}, [r0, :32], r1 878*c0909341SAndroid Build Coastguard Worker bgt 4b 879*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 880*c0909341SAndroid Build Coastguard Worker8: 881*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2], r3 882*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r2], r3 883*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 884*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 885*c0909341SAndroid Build Coastguard Worker vst1.8 {d1}, [r0, :64], r1 886*c0909341SAndroid Build Coastguard Worker bgt 8b 887*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 888*c0909341SAndroid Build Coastguard Worker160: 889*c0909341SAndroid Build Coastguard Worker add r8, r0, r1 890*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 891*c0909341SAndroid Build Coastguard Worker add r9, r2, r3 892*c0909341SAndroid Build Coastguard Worker lsl r3, r3, #1 893*c0909341SAndroid Build Coastguard Worker16: 894*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r2], r3 895*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r9], r3 896*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 897*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128], r1 898*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r8, :128], r1 899*c0909341SAndroid Build Coastguard Worker bgt 16b 900*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 901*c0909341SAndroid Build Coastguard Worker32: 902*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r2], r3 903*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 904*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r0, :128], r1 905*c0909341SAndroid Build Coastguard Worker bgt 32b 906*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 907*c0909341SAndroid Build Coastguard Worker640: 908*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 909*c0909341SAndroid Build Coastguard Worker sub r3, r3, #32 910*c0909341SAndroid Build Coastguard Worker64: 911*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r2]! 912*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r0, :128]! 913*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2], r3 914*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 915*c0909341SAndroid Build Coastguard Worker vst1.8 {q2, q3}, [r0, :128], r1 916*c0909341SAndroid Build Coastguard Worker bgt 64b 917*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 918*c0909341SAndroid Build Coastguard Worker1280: 919*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 920*c0909341SAndroid Build Coastguard Worker sub r3, r3, #96 921*c0909341SAndroid Build Coastguard Worker128: 922*c0909341SAndroid Build Coastguard Worker vld1.8 {q8, q9}, [r2]! 923*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128]! 924*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r2]! 925*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r0, :128]! 926*c0909341SAndroid Build Coastguard Worker vld1.8 {q12, q13}, [r2]! 927*c0909341SAndroid Build Coastguard Worker vst1.8 {q12, q13}, [r0, :128]! 928*c0909341SAndroid Build Coastguard Worker vld1.8 {q14, q15}, [r2], r3 929*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 930*c0909341SAndroid Build Coastguard Worker vst1.8 {q14, q15}, [r0, :128], r1 931*c0909341SAndroid Build Coastguard Worker bgt 128b 932*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 933*c0909341SAndroid Build Coastguard Workerendfunc 934*c0909341SAndroid Build Coastguard Worker 935*c0909341SAndroid Build Coastguard Worker 936*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions, 937*c0909341SAndroid Build Coastguard Worker// assumes that the caller has loaded the h argument into r4, 938*c0909341SAndroid Build Coastguard Worker// and assumes that r8 is set to (clz(w)-24), and r7 to w*2. 939*c0909341SAndroid Build Coastguard Workerfunction prep_neon 940*c0909341SAndroid Build Coastguard Worker adr r9, L(prep_tbl) 941*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 942*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 943*c0909341SAndroid Build Coastguard Worker bx r9 944*c0909341SAndroid Build Coastguard Worker 945*c0909341SAndroid Build Coastguard Worker .align 2 946*c0909341SAndroid Build Coastguard WorkerL(prep_tbl): 947*c0909341SAndroid Build Coastguard Worker .word 1280f - L(prep_tbl) + CONFIG_THUMB 948*c0909341SAndroid Build Coastguard Worker .word 640f - L(prep_tbl) + CONFIG_THUMB 949*c0909341SAndroid Build Coastguard Worker .word 320f - L(prep_tbl) + CONFIG_THUMB 950*c0909341SAndroid Build Coastguard Worker .word 160f - L(prep_tbl) + CONFIG_THUMB 951*c0909341SAndroid Build Coastguard Worker .word 8f - L(prep_tbl) + CONFIG_THUMB 952*c0909341SAndroid Build Coastguard Worker .word 4f - L(prep_tbl) + CONFIG_THUMB 953*c0909341SAndroid Build Coastguard Worker 954*c0909341SAndroid Build Coastguard Worker4: 955*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r1], r2 956*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [r1], r2 957*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 958*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #4 959*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d2, #4 960*c0909341SAndroid Build Coastguard Worker vst1.16 {d1, d2}, [r0, :64]! 961*c0909341SAndroid Build Coastguard Worker bgt 4b 962*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 963*c0909341SAndroid Build Coastguard Worker8: 964*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1], r2 965*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r1], r2 966*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 967*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #4 968*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d2, #4 969*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 970*c0909341SAndroid Build Coastguard Worker bgt 8b 971*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 972*c0909341SAndroid Build Coastguard Worker160: 973*c0909341SAndroid Build Coastguard Worker add r9, r1, r2 974*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 975*c0909341SAndroid Build Coastguard Worker add r8, r0, r7 976*c0909341SAndroid Build Coastguard Worker lsl r7, r7, #1 977*c0909341SAndroid Build Coastguard Worker16: 978*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r1], r2 979*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r9], r2 980*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 981*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d4, #4 982*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d5, #4 983*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d6, #4 984*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d7, #4 985*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r7 986*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r8, :128], r7 987*c0909341SAndroid Build Coastguard Worker bgt 16b 988*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 989*c0909341SAndroid Build Coastguard Worker320: 990*c0909341SAndroid Build Coastguard Worker add r8, r0, r3 991*c0909341SAndroid Build Coastguard Worker32: 992*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1], r2 993*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 994*c0909341SAndroid Build Coastguard Worker vshll.u8 q8, d0, #4 995*c0909341SAndroid Build Coastguard Worker vshll.u8 q9, d1, #4 996*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r1], r2 997*c0909341SAndroid Build Coastguard Worker vshll.u8 q10, d2, #4 998*c0909341SAndroid Build Coastguard Worker vshll.u8 q11, d3, #4 999*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d4, #4 1000*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r7 1001*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d5, #4 1002*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r8, :128], r7 1003*c0909341SAndroid Build Coastguard Worker vshll.u8 q14, d6, #4 1004*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128], r7 1005*c0909341SAndroid Build Coastguard Worker vshll.u8 q15, d7, #4 1006*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r8, :128], r7 1007*c0909341SAndroid Build Coastguard Worker bgt 32b 1008*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1009*c0909341SAndroid Build Coastguard Worker640: 1010*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 1011*c0909341SAndroid Build Coastguard Worker add r8, r0, #32 1012*c0909341SAndroid Build Coastguard Worker mov r6, #64 1013*c0909341SAndroid Build Coastguard Worker64: 1014*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1]! 1015*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 1016*c0909341SAndroid Build Coastguard Worker vshll.u8 q8, d0, #4 1017*c0909341SAndroid Build Coastguard Worker vshll.u8 q9, d1, #4 1018*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r1], r2 1019*c0909341SAndroid Build Coastguard Worker vshll.u8 q10, d2, #4 1020*c0909341SAndroid Build Coastguard Worker vshll.u8 q11, d3, #4 1021*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d4, #4 1022*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r6 1023*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d5, #4 1024*c0909341SAndroid Build Coastguard Worker vshll.u8 q14, d6, #4 1025*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r8, :128], r6 1026*c0909341SAndroid Build Coastguard Worker vshll.u8 q15, d7, #4 1027*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128], r6 1028*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r8, :128], r6 1029*c0909341SAndroid Build Coastguard Worker bgt 64b 1030*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1031*c0909341SAndroid Build Coastguard Worker1280: 1032*c0909341SAndroid Build Coastguard Worker sub r2, r2, #96 1033*c0909341SAndroid Build Coastguard Worker add r8, r0, #32 1034*c0909341SAndroid Build Coastguard Worker mov r6, #64 1035*c0909341SAndroid Build Coastguard Worker128: 1036*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1]! 1037*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r1]! 1038*c0909341SAndroid Build Coastguard Worker vshll.u8 q10, d0, #4 1039*c0909341SAndroid Build Coastguard Worker vshll.u8 q11, d1, #4 1040*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d2, #4 1041*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d3, #4 1042*c0909341SAndroid Build Coastguard Worker vshll.u8 q14, d4, #4 1043*c0909341SAndroid Build Coastguard Worker vshll.u8 q15, d5, #4 1044*c0909341SAndroid Build Coastguard Worker vld1.8 {q8, q9}, [r1]! 1045*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128], r6 1046*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r8, :128], r6 1047*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d6, #4 1048*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d7, #4 1049*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d16, #4 1050*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d17, #4 1051*c0909341SAndroid Build Coastguard Worker vshll.u8 q8, d18, #4 1052*c0909341SAndroid Build Coastguard Worker vshll.u8 q9, d19, #4 1053*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r1], r2 1054*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r0, :128], r6 1055*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r8, :128], r6 1056*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d20, #4 1057*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d21, #4 1058*c0909341SAndroid Build Coastguard Worker vshll.u8 q14, d22, #4 1059*c0909341SAndroid Build Coastguard Worker vshll.u8 q15, d23, #4 1060*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 1061*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128], r6 1062*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r8, :128], r6 1063*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128], r6 1064*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [r8, :128], r6 1065*c0909341SAndroid Build Coastguard Worker bgt 128b 1066*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1067*c0909341SAndroid Build Coastguard Workerendfunc 1068*c0909341SAndroid Build Coastguard Worker 1069*c0909341SAndroid Build Coastguard Worker 1070*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1071*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d0[]}, [\s0], \strd 1072*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d1[]}, [\s1], \strd 1073*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1074*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d2[]}, [\s0], \strd 1075*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d3[]}, [\s1], \strd 1076*c0909341SAndroid Build Coastguard Worker.endif 1077*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1078*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d4[]}, [\s0], \strd 1079*c0909341SAndroid Build Coastguard Worker.endif 1080*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1081*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d5[]}, [\s1], \strd 1082*c0909341SAndroid Build Coastguard Worker.endif 1083*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1084*c0909341SAndroid Build Coastguard Worker vld1.\wd {\d6[]}, [\s0], \strd 1085*c0909341SAndroid Build Coastguard Worker.endif 1086*c0909341SAndroid Build Coastguard Worker.endm 1087*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1088*c0909341SAndroid Build Coastguard Worker vld1.8 {\d0}, [\s0], \strd 1089*c0909341SAndroid Build Coastguard Worker vld1.8 {\d1}, [\s1], \strd 1090*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1091*c0909341SAndroid Build Coastguard Worker vld1.8 {\d2}, [\s0], \strd 1092*c0909341SAndroid Build Coastguard Worker vld1.8 {\d3}, [\s1], \strd 1093*c0909341SAndroid Build Coastguard Worker.endif 1094*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1095*c0909341SAndroid Build Coastguard Worker vld1.8 {\d4}, [\s0], \strd 1096*c0909341SAndroid Build Coastguard Worker.endif 1097*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1098*c0909341SAndroid Build Coastguard Worker vld1.8 {\d5}, [\s1], \strd 1099*c0909341SAndroid Build Coastguard Worker.endif 1100*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1101*c0909341SAndroid Build Coastguard Worker vld1.8 {\d6}, [\s0], \strd 1102*c0909341SAndroid Build Coastguard Worker.endif 1103*c0909341SAndroid Build Coastguard Worker.endm 1104*c0909341SAndroid Build Coastguard Worker.macro load_16 s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1105*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, 16, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1106*c0909341SAndroid Build Coastguard Worker.endm 1107*c0909341SAndroid Build Coastguard Worker.macro load_32 s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1108*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, 32, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1109*c0909341SAndroid Build Coastguard Worker.endm 1110*c0909341SAndroid Build Coastguard Worker.macro interleave_1_16 r0, r1, r2, r3, r4 1111*c0909341SAndroid Build Coastguard Worker vext.8 \r0, \r0, \r1, #6 1112*c0909341SAndroid Build Coastguard Worker vext.8 \r1, \r1, \r2, #6 1113*c0909341SAndroid Build Coastguard Worker.ifnb \r3 1114*c0909341SAndroid Build Coastguard Worker vext.8 \r2, \r2, \r3, #6 1115*c0909341SAndroid Build Coastguard Worker vext.8 \r3, \r3, \r4, #6 1116*c0909341SAndroid Build Coastguard Worker.endif 1117*c0909341SAndroid Build Coastguard Worker.endm 1118*c0909341SAndroid Build Coastguard Worker.macro interleave_1_32 r0, r1, r2, r3, r4 1119*c0909341SAndroid Build Coastguard Worker vext.8 \r0, \r0, \r1, #4 1120*c0909341SAndroid Build Coastguard Worker vext.8 \r1, \r1, \r2, #4 1121*c0909341SAndroid Build Coastguard Worker.ifnb \r3 1122*c0909341SAndroid Build Coastguard Worker vext.8 \r2, \r2, \r3, #4 1123*c0909341SAndroid Build Coastguard Worker vext.8 \r3, \r3, \r4, #4 1124*c0909341SAndroid Build Coastguard Worker.endif 1125*c0909341SAndroid Build Coastguard Worker.endm 1126*c0909341SAndroid Build Coastguard Worker.macro vmovl_u8 q0, d0, q1, d1, q2, d2, q3, d3, q4, d4, q5, d5, q6, d6 1127*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q0, \d0 1128*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q1, \d1 1129*c0909341SAndroid Build Coastguard Worker.ifnb \q2 1130*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q2, \d2 1131*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q3, \d3 1132*c0909341SAndroid Build Coastguard Worker.endif 1133*c0909341SAndroid Build Coastguard Worker.ifnb \q4 1134*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q4, \d4 1135*c0909341SAndroid Build Coastguard Worker.endif 1136*c0909341SAndroid Build Coastguard Worker.ifnb \q5 1137*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q5, \d5 1138*c0909341SAndroid Build Coastguard Worker.endif 1139*c0909341SAndroid Build Coastguard Worker.ifnb \q6 1140*c0909341SAndroid Build Coastguard Worker vmovl.u8 \q6, \d6 1141*c0909341SAndroid Build Coastguard Worker.endif 1142*c0909341SAndroid Build Coastguard Worker.endm 1143*c0909341SAndroid Build Coastguard Worker.macro mul_mla_4 d, s0, s1, s2, s3 1144*c0909341SAndroid Build Coastguard Worker vmul.s16 \d, \s0, d0[0] 1145*c0909341SAndroid Build Coastguard Worker vmla.s16 \d, \s1, d0[1] 1146*c0909341SAndroid Build Coastguard Worker vmla.s16 \d, \s2, d0[2] 1147*c0909341SAndroid Build Coastguard Worker vmla.s16 \d, \s3, d0[3] 1148*c0909341SAndroid Build Coastguard Worker.endm 1149*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_0 d0, s0, s1, s2, s3, s4, s5, s6, s7 1150*c0909341SAndroid Build Coastguard Worker vmul.s16 \d0, \s0, d0[0] 1151*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s1, d0[1] 1152*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s2, d0[2] 1153*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s3, d0[3] 1154*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s4, d1[0] 1155*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s5, d1[1] 1156*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s6, d1[2] 1157*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s7, d1[3] 1158*c0909341SAndroid Build Coastguard Worker.endm 1159*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8 1160*c0909341SAndroid Build Coastguard Worker vmul.s16 \d0, \s0, d0[0] 1161*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s1, d0[1] 1162*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s2, d0[2] 1163*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s3, d0[3] 1164*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s4, d1[0] 1165*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s5, d1[1] 1166*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s6, d1[2] 1167*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s7, d1[3] 1168*c0909341SAndroid Build Coastguard Worker vmul.s16 \d1, \s1, d0[0] 1169*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s2, d0[1] 1170*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s3, d0[2] 1171*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s4, d0[3] 1172*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s5, d1[0] 1173*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s6, d1[1] 1174*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s7, d1[2] 1175*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s8, d1[3] 1176*c0909341SAndroid Build Coastguard Worker.endm 1177*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9 1178*c0909341SAndroid Build Coastguard Worker vmul.s16 \d0, \s0, d0[0] 1179*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s1, d0[1] 1180*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s2, d0[2] 1181*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s3, d0[3] 1182*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s4, d1[0] 1183*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s5, d1[1] 1184*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s6, d1[2] 1185*c0909341SAndroid Build Coastguard Worker vmla.s16 \d0, \s7, d1[3] 1186*c0909341SAndroid Build Coastguard Worker vmul.s16 \d1, \s2, d0[0] 1187*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s3, d0[1] 1188*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s4, d0[2] 1189*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s5, d0[3] 1190*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s6, d1[0] 1191*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s7, d1[1] 1192*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s8, d1[2] 1193*c0909341SAndroid Build Coastguard Worker vmla.s16 \d1, \s9, d1[3] 1194*c0909341SAndroid Build Coastguard Worker.endm 1195*c0909341SAndroid Build Coastguard Worker.macro vqrshrun_s16 shift, q0, d0, q1, d1, q2, d2, q3, d3 1196*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d0, \q0, #\shift 1197*c0909341SAndroid Build Coastguard Worker.ifnb \q1 1198*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d1, \q1, #\shift 1199*c0909341SAndroid Build Coastguard Worker.endif 1200*c0909341SAndroid Build Coastguard Worker.ifnb \q2 1201*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d2, \q2, #\shift 1202*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d3, \q3, #\shift 1203*c0909341SAndroid Build Coastguard Worker.endif 1204*c0909341SAndroid Build Coastguard Worker.endm 1205*c0909341SAndroid Build Coastguard Worker.macro vrshr_s16 shift, r0, r1, r2, r3 1206*c0909341SAndroid Build Coastguard Worker vrshr.s16 \r0, \r0, #\shift 1207*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1208*c0909341SAndroid Build Coastguard Worker vrshr.s16 \r1, \r1, #\shift 1209*c0909341SAndroid Build Coastguard Worker.endif 1210*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1211*c0909341SAndroid Build Coastguard Worker vrshr.s16 \r2, \r2, #\shift 1212*c0909341SAndroid Build Coastguard Worker vrshr.s16 \r3, \r3, #\shift 1213*c0909341SAndroid Build Coastguard Worker.endif 1214*c0909341SAndroid Build Coastguard Worker.endm 1215*c0909341SAndroid Build Coastguard Worker.macro st_16 strd, reg, lanes 1216*c0909341SAndroid Build Coastguard Worker vst1.16 {\reg[0]}, [r0, :16], \strd 1217*c0909341SAndroid Build Coastguard Worker vst1.16 {\reg[1]}, [r8, :16], \strd 1218*c0909341SAndroid Build Coastguard Worker.if \lanes > 2 1219*c0909341SAndroid Build Coastguard Worker vst1.16 {\reg[2]}, [r0, :16], \strd 1220*c0909341SAndroid Build Coastguard Worker vst1.16 {\reg[3]}, [r8, :16], \strd 1221*c0909341SAndroid Build Coastguard Worker.endif 1222*c0909341SAndroid Build Coastguard Worker.endm 1223*c0909341SAndroid Build Coastguard Worker.macro st_32 strd, r0, r1 1224*c0909341SAndroid Build Coastguard Worker vst1.32 {\r0[0]}, [r0, :32], \strd 1225*c0909341SAndroid Build Coastguard Worker vst1.32 {\r0[1]}, [r8, :32], \strd 1226*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1227*c0909341SAndroid Build Coastguard Worker vst1.32 {\r1[0]}, [r0, :32], \strd 1228*c0909341SAndroid Build Coastguard Worker vst1.32 {\r1[1]}, [r8, :32], \strd 1229*c0909341SAndroid Build Coastguard Worker.endif 1230*c0909341SAndroid Build Coastguard Worker.endm 1231*c0909341SAndroid Build Coastguard Worker.macro st_reg strd, align, r0, r1, r2, r3, r4, r5, r6, r7 1232*c0909341SAndroid Build Coastguard Worker vst1.8 {\r0}, [r0, \align], \strd 1233*c0909341SAndroid Build Coastguard Worker vst1.8 {\r1}, [r8, \align], \strd 1234*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1235*c0909341SAndroid Build Coastguard Worker vst1.8 {\r2}, [r0, \align], \strd 1236*c0909341SAndroid Build Coastguard Worker vst1.8 {\r3}, [r8, \align], \strd 1237*c0909341SAndroid Build Coastguard Worker.endif 1238*c0909341SAndroid Build Coastguard Worker.ifnb \r4 1239*c0909341SAndroid Build Coastguard Worker vst1.8 {\r4}, [r0, \align], \strd 1240*c0909341SAndroid Build Coastguard Worker vst1.8 {\r5}, [r8, \align], \strd 1241*c0909341SAndroid Build Coastguard Worker vst1.8 {\r6}, [r0, \align], \strd 1242*c0909341SAndroid Build Coastguard Worker vst1.8 {\r7}, [r8, \align], \strd 1243*c0909341SAndroid Build Coastguard Worker.endif 1244*c0909341SAndroid Build Coastguard Worker.endm 1245*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, q0, d0, d1, q1, d2, d3 1246*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1247*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, \q0, \d0, \q1, \d2 1248*c0909341SAndroid Build Coastguard Worker st_32 \strd, \d0, \d2 1249*c0909341SAndroid Build Coastguard Worker.else 1250*c0909341SAndroid Build Coastguard Worker vrshr_s16 2, \q0, \q1 1251*c0909341SAndroid Build Coastguard Worker st_reg \strd, :64, \d0, \d1, \d2, \d3 1252*c0909341SAndroid Build Coastguard Worker.endif 1253*c0909341SAndroid Build Coastguard Worker.endm 1254*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, q0, d0, q1, d1, q2, d2, q3, d3 1255*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1256*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, \q0, \d0, \q1, \d1, \q2, \d2, \q3, \d3 1257*c0909341SAndroid Build Coastguard Worker st_reg \strd, :64, \d0, \d1, \d2, \d3 1258*c0909341SAndroid Build Coastguard Worker.else 1259*c0909341SAndroid Build Coastguard Worker vrshr_s16 2, \q0, \q1, \q2, \q3 1260*c0909341SAndroid Build Coastguard Worker st_reg \strd, :128,\q0, \q1, \q2, \q3 1261*c0909341SAndroid Build Coastguard Worker.endif 1262*c0909341SAndroid Build Coastguard Worker.endm 1263*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, q0, d0, d1, q1, q2, d4, d5, q3 1264*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1265*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d0, \q0, #6 1266*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d1, \q1, #6 1267*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d4, \q2, #6 1268*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 \d5, \q3, #6 1269*c0909341SAndroid Build Coastguard Worker st_reg \strd, :128, \q0, \q2 1270*c0909341SAndroid Build Coastguard Worker.else 1271*c0909341SAndroid Build Coastguard Worker vrshr_s16 2, \q0, \q1, \q2, \q3 1272*c0909341SAndroid Build Coastguard Worker vst1.16 {\q0, \q1}, [r0, :128], \strd 1273*c0909341SAndroid Build Coastguard Worker vst1.16 {\q2, \q3}, [r8, :128], \strd 1274*c0909341SAndroid Build Coastguard Worker.endif 1275*c0909341SAndroid Build Coastguard Worker.endm 1276*c0909341SAndroid Build Coastguard Worker 1277*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v 1278*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_8bpc_neon, export=1 1279*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 1280*c0909341SAndroid Build Coastguard Worker movw r8, \type_h 1281*c0909341SAndroid Build Coastguard Worker movw r9, \type_v 1282*c0909341SAndroid Build Coastguard Worker b \op\()_8tap_neon 1283*c0909341SAndroid Build Coastguard Workerendfunc 1284*c0909341SAndroid Build Coastguard Worker.endm 1285*c0909341SAndroid Build Coastguard Worker 1286*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor. 1287*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15) 1288*c0909341SAndroid Build Coastguard Worker#define SMOOTH ((1*15<<7)|4*15) 1289*c0909341SAndroid Build Coastguard Worker#define SHARP ((2*15<<7)|3*15) 1290*c0909341SAndroid Build Coastguard Worker 1291*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, my, ds2, sr2, shift_hv 1292*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular, REGULAR, REGULAR 1293*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_smooth, REGULAR, SMOOTH 1294*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_sharp, REGULAR, SHARP 1295*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth, SMOOTH, SMOOTH 1296*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_regular, SMOOTH, REGULAR 1297*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_sharp, SMOOTH, SHARP 1298*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp, SHARP, SHARP 1299*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_regular, SHARP, REGULAR 1300*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_smooth, SHARP, SMOOTH 1301*c0909341SAndroid Build Coastguard Worker 1302*c0909341SAndroid Build Coastguard Workerfunction \type\()_8tap_neon 1303*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 1304*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 1305*c0909341SAndroid Build Coastguard Worker movw r10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) 1306*c0909341SAndroid Build Coastguard Worker mul \mx, \mx, r10 1307*c0909341SAndroid Build Coastguard Worker mul \my, \my, r10 1308*c0909341SAndroid Build Coastguard Worker add \mx, \mx, r8 // mx, 8tap_h, 4tap_h 1309*c0909341SAndroid Build Coastguard Worker add \my, \my, r9 // my, 8tap_v, 4tap_v 1310*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1311*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \w, #1 1312*c0909341SAndroid Build Coastguard Worker.endif 1313*c0909341SAndroid Build Coastguard Worker 1314*c0909341SAndroid Build Coastguard Worker clz r8, \w 1315*c0909341SAndroid Build Coastguard Worker tst \mx, #(0x7f << 14) 1316*c0909341SAndroid Build Coastguard Worker sub r8, r8, #24 1317*c0909341SAndroid Build Coastguard Worker movrel r10, X(mc_subpel_filters), -8 1318*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_h) 1319*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1320*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_v) 1321*c0909341SAndroid Build Coastguard Worker b \type\()_neon 1322*c0909341SAndroid Build Coastguard Worker 1323*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h): 1324*c0909341SAndroid Build Coastguard Worker cmp \w, #4 1325*c0909341SAndroid Build Coastguard Worker ubfx r9, \mx, #7, #7 1326*c0909341SAndroid Build Coastguard Worker and \mx, \mx, #0x7f 1327*c0909341SAndroid Build Coastguard Worker it gt 1328*c0909341SAndroid Build Coastguard Worker movgt \mx, r9 1329*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1330*c0909341SAndroid Build Coastguard Worker add \mx, r10, \mx, lsl #3 1331*c0909341SAndroid Build Coastguard Worker bne L(\type\()_8tap_hv) 1332*c0909341SAndroid Build Coastguard Worker 1333*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_8tap_h_tbl) 1334*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 1335*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 1336*c0909341SAndroid Build Coastguard Worker bx r9 1337*c0909341SAndroid Build Coastguard Worker 1338*c0909341SAndroid Build Coastguard Worker .align 2 1339*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h_tbl): 1340*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1341*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1342*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1343*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1344*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1345*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1346*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB 1347*c0909341SAndroid Build Coastguard Worker 1348*c0909341SAndroid Build Coastguard Worker20: // 2xN h 1349*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1350*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 1351*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 1352*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 1353*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1354*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1355*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1356*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1357*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1358*c0909341SAndroid Build Coastguard Worker2: 1359*c0909341SAndroid Build Coastguard Worker vld1.8 {d4}, [\src], \s_strd 1360*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [\sr2], \s_strd 1361*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d4 1362*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d6 1363*c0909341SAndroid Build Coastguard Worker vext.8 d5, d4, d5, #2 1364*c0909341SAndroid Build Coastguard Worker vext.8 d7, d6, d7, #2 1365*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1366*c0909341SAndroid Build Coastguard Worker vtrn.32 d4, d6 1367*c0909341SAndroid Build Coastguard Worker vtrn.32 d5, d7 1368*c0909341SAndroid Build Coastguard Worker vmul.s16 d2, d4, d0[0] 1369*c0909341SAndroid Build Coastguard Worker vmla.s16 d2, d5, d0[1] 1370*c0909341SAndroid Build Coastguard Worker vmla.s16 d2, d6, d0[2] 1371*c0909341SAndroid Build Coastguard Worker vmla.s16 d2, d7, d0[3] 1372*c0909341SAndroid Build Coastguard Worker vrshr.s16 d2, d2, #2 1373*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d2, q1, #4 1374*c0909341SAndroid Build Coastguard Worker vst1.16 {d2[0]}, [\dst, :16], \d_strd 1375*c0909341SAndroid Build Coastguard Worker vst1.16 {d2[1]}, [\ds2, :16], \d_strd 1376*c0909341SAndroid Build Coastguard Worker bgt 2b 1377*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1378*c0909341SAndroid Build Coastguard Worker.endif 1379*c0909341SAndroid Build Coastguard Worker 1380*c0909341SAndroid Build Coastguard Worker40: // 4xN h 1381*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 1382*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 1383*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 1384*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1385*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1386*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1387*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1388*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1389*c0909341SAndroid Build Coastguard Worker4: 1390*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [\src], \s_strd 1391*c0909341SAndroid Build Coastguard Worker vld1.8 {d24}, [\sr2], \s_strd 1392*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d16 1393*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d24 1394*c0909341SAndroid Build Coastguard Worker vext.8 d18, d16, d17, #2 1395*c0909341SAndroid Build Coastguard Worker vext.8 d20, d16, d17, #4 1396*c0909341SAndroid Build Coastguard Worker vext.8 d22, d16, d17, #6 1397*c0909341SAndroid Build Coastguard Worker vext.8 d26, d24, d25, #2 1398*c0909341SAndroid Build Coastguard Worker vext.8 d28, d24, d25, #4 1399*c0909341SAndroid Build Coastguard Worker vext.8 d30, d24, d25, #6 1400*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1401*c0909341SAndroid Build Coastguard Worker vmul.s16 d4, d16, d0[0] 1402*c0909341SAndroid Build Coastguard Worker vmla.s16 d4, d18, d0[1] 1403*c0909341SAndroid Build Coastguard Worker vmla.s16 d4, d20, d0[2] 1404*c0909341SAndroid Build Coastguard Worker vmla.s16 d4, d22, d0[3] 1405*c0909341SAndroid Build Coastguard Worker vmul.s16 d5, d24, d0[0] 1406*c0909341SAndroid Build Coastguard Worker vmla.s16 d5, d26, d0[1] 1407*c0909341SAndroid Build Coastguard Worker vmla.s16 d5, d28, d0[2] 1408*c0909341SAndroid Build Coastguard Worker vmla.s16 d5, d30, d0[3] 1409*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #2 1410*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1411*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d4, q2, #4 1412*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 1413*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [\ds2, :32], \d_strd 1414*c0909341SAndroid Build Coastguard Worker.else 1415*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 1416*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [\ds2, :64], \d_strd 1417*c0909341SAndroid Build Coastguard Worker.endif 1418*c0909341SAndroid Build Coastguard Worker bgt 4b 1419*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1420*c0909341SAndroid Build Coastguard Worker 1421*c0909341SAndroid Build Coastguard Worker80: // 8xN h 1422*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 1423*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 1424*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1425*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1426*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1427*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1428*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1429*c0909341SAndroid Build Coastguard Worker8: 1430*c0909341SAndroid Build Coastguard Worker vld1.8 {q8}, [\src], \s_strd 1431*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [\sr2], \s_strd 1432*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d17 1433*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d16 1434*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d25 1435*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d24 1436*c0909341SAndroid Build Coastguard Worker 1437*c0909341SAndroid Build Coastguard Worker vmul.s16 q10, q8, d0[0] 1438*c0909341SAndroid Build Coastguard Worker vmul.s16 q14, q12, d0[0] 1439*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 1440*c0909341SAndroid Build Coastguard Worker vext.8 q11, q8, q9, #(2*\i) 1441*c0909341SAndroid Build Coastguard Worker vext.8 q15, q12, q13, #(2*\i) 1442*c0909341SAndroid Build Coastguard Worker.if \i < 4 1443*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q11, d0[\i] 1444*c0909341SAndroid Build Coastguard Worker vmla.s16 q14, q15, d0[\i] 1445*c0909341SAndroid Build Coastguard Worker.else 1446*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q11, d1[\i-4] 1447*c0909341SAndroid Build Coastguard Worker vmla.s16 q14, q15, d1[\i-4] 1448*c0909341SAndroid Build Coastguard Worker.endif 1449*c0909341SAndroid Build Coastguard Worker.endr 1450*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1451*c0909341SAndroid Build Coastguard Worker vrshr.s16 q10, q10, #2 1452*c0909341SAndroid Build Coastguard Worker vrshr.s16 q14, q14, #2 1453*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1454*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d20, q10, #4 1455*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d28, q14, #4 1456*c0909341SAndroid Build Coastguard Worker vst1.8 {d20}, [\dst, :64], \d_strd 1457*c0909341SAndroid Build Coastguard Worker vst1.8 {d28}, [\ds2, :64], \d_strd 1458*c0909341SAndroid Build Coastguard Worker.else 1459*c0909341SAndroid Build Coastguard Worker vst1.16 {q10}, [\dst, :128], \d_strd 1460*c0909341SAndroid Build Coastguard Worker vst1.16 {q14}, [\ds2, :128], \d_strd 1461*c0909341SAndroid Build Coastguard Worker.endif 1462*c0909341SAndroid Build Coastguard Worker bgt 8b 1463*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1464*c0909341SAndroid Build Coastguard Worker 1465*c0909341SAndroid Build Coastguard Worker160: 1466*c0909341SAndroid Build Coastguard Worker320: 1467*c0909341SAndroid Build Coastguard Worker640: 1468*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 1469*c0909341SAndroid Build Coastguard Worker // This could be done without touching q4-q6, by using only 1470*c0909341SAndroid Build Coastguard Worker // one temporary for vext in the loop. That's slower on A7 and A53, 1471*c0909341SAndroid Build Coastguard Worker // (but surprisingly, marginally faster on A8 and A73). 1472*c0909341SAndroid Build Coastguard Worker vpush {q4-q6} 1473*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 1474*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 1475*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1476*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1477*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1478*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1479*c0909341SAndroid Build Coastguard Worker 1480*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w 1481*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #8 1482*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1483*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1484*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w 1485*c0909341SAndroid Build Coastguard Worker.endif 1486*c0909341SAndroid Build Coastguard Worker161: 1487*c0909341SAndroid Build Coastguard Worker vld1.8 {d16, d17, d18}, [\src]! 1488*c0909341SAndroid Build Coastguard Worker vld1.8 {d24, d25, d26}, [\sr2]! 1489*c0909341SAndroid Build Coastguard Worker mov \mx, \w 1490*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d18 1491*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d17 1492*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d16 1493*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d26 1494*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d25 1495*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d24 1496*c0909341SAndroid Build Coastguard Worker 1497*c0909341SAndroid Build Coastguard Worker16: 1498*c0909341SAndroid Build Coastguard Worker vmul.s16 q1, q8, d0[0] 1499*c0909341SAndroid Build Coastguard Worker vmul.s16 q2, q9, d0[0] 1500*c0909341SAndroid Build Coastguard Worker vmul.s16 q3, q12, d0[0] 1501*c0909341SAndroid Build Coastguard Worker vmul.s16 q4, q13, d0[0] 1502*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567 1503*c0909341SAndroid Build Coastguard Worker vext.8 q5, q8, q9, #(2*\i) 1504*c0909341SAndroid Build Coastguard Worker vext.8 q6, q9, q10, #(2*\i) 1505*c0909341SAndroid Build Coastguard Worker vext.8 q11, q12, q13, #(2*\i) 1506*c0909341SAndroid Build Coastguard Worker vext.8 q15, q13, q14, #(2*\i) 1507*c0909341SAndroid Build Coastguard Worker.if \i < 4 1508*c0909341SAndroid Build Coastguard Worker vmla.s16 q1, q5, d0[\i] 1509*c0909341SAndroid Build Coastguard Worker vmla.s16 q2, q6, d0[\i] 1510*c0909341SAndroid Build Coastguard Worker vmla.s16 q3, q11, d0[\i] 1511*c0909341SAndroid Build Coastguard Worker vmla.s16 q4, q15, d0[\i] 1512*c0909341SAndroid Build Coastguard Worker.else 1513*c0909341SAndroid Build Coastguard Worker vmla.s16 q1, q5, d1[\i-4] 1514*c0909341SAndroid Build Coastguard Worker vmla.s16 q2, q6, d1[\i-4] 1515*c0909341SAndroid Build Coastguard Worker vmla.s16 q3, q11, d1[\i-4] 1516*c0909341SAndroid Build Coastguard Worker vmla.s16 q4, q15, d1[\i-4] 1517*c0909341SAndroid Build Coastguard Worker.endif 1518*c0909341SAndroid Build Coastguard Worker.endr 1519*c0909341SAndroid Build Coastguard Worker vrshr.s16 q1, q1, #2 1520*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #2 1521*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q3, #2 1522*c0909341SAndroid Build Coastguard Worker vrshr.s16 q4, q4, #2 1523*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 1524*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1525*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d2, q1, #4 1526*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d3, q2, #4 1527*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d4, q3, #4 1528*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d5, q4, #4 1529*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [\dst, :128]! 1530*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [\ds2, :128]! 1531*c0909341SAndroid Build Coastguard Worker.else 1532*c0909341SAndroid Build Coastguard Worker vst1.16 {q1, q2}, [\dst, :128]! 1533*c0909341SAndroid Build Coastguard Worker vst1.16 {q3, q4}, [\ds2, :128]! 1534*c0909341SAndroid Build Coastguard Worker.endif 1535*c0909341SAndroid Build Coastguard Worker ble 9f 1536*c0909341SAndroid Build Coastguard Worker 1537*c0909341SAndroid Build Coastguard Worker vmov q8, q10 1538*c0909341SAndroid Build Coastguard Worker vmov q12, q14 1539*c0909341SAndroid Build Coastguard Worker vld1.8 {d18, d19}, [\src]! 1540*c0909341SAndroid Build Coastguard Worker vld1.8 {d26, d27}, [\sr2]! 1541*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d19 1542*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 1543*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d27 1544*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d26 1545*c0909341SAndroid Build Coastguard Worker b 16b 1546*c0909341SAndroid Build Coastguard Worker 1547*c0909341SAndroid Build Coastguard Worker9: 1548*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1549*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 1550*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1551*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 1552*c0909341SAndroid Build Coastguard Worker 1553*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1554*c0909341SAndroid Build Coastguard Worker bgt 161b 1555*c0909341SAndroid Build Coastguard Worker vpop {q4-q6} 1556*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1557*c0909341SAndroid Build Coastguard Worker 1558*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v): 1559*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1560*c0909341SAndroid Build Coastguard Worker ubfx r9, \my, #7, #7 1561*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1562*c0909341SAndroid Build Coastguard Worker it gt 1563*c0909341SAndroid Build Coastguard Worker movgt \my, r9 1564*c0909341SAndroid Build Coastguard Worker add \my, r10, \my, lsl #3 1565*c0909341SAndroid Build Coastguard Worker 1566*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_8tap_v_tbl) 1567*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 1568*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 1569*c0909341SAndroid Build Coastguard Worker bx r9 1570*c0909341SAndroid Build Coastguard Worker 1571*c0909341SAndroid Build Coastguard Worker .align 2 1572*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v_tbl): 1573*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1574*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1575*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1576*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1577*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1578*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1579*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB 1580*c0909341SAndroid Build Coastguard Worker 1581*c0909341SAndroid Build Coastguard Worker20: // 2xN v 1582*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1583*c0909341SAndroid Build Coastguard Worker bgt 28f 1584*c0909341SAndroid Build Coastguard Worker 1585*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1586*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1587*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1588*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1589*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1590*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1591*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1592*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1593*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1594*c0909341SAndroid Build Coastguard Worker 1595*c0909341SAndroid Build Coastguard Worker // 2x2 v 1596*c0909341SAndroid Build Coastguard Worker load_16 \src, \sr2, \s_strd, d1, d2, d3, d4, d5 1597*c0909341SAndroid Build Coastguard Worker interleave_1_16 d1, d2, d3, d4, d5 1598*c0909341SAndroid Build Coastguard Worker bgt 24f 1599*c0909341SAndroid Build Coastguard Worker vmovl_u8 q8, d1, q9, d2, q10, d3, q11, d4 1600*c0909341SAndroid Build Coastguard Worker mul_mla_4 d6, d16, d18, d20, d22 1601*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, q3, d6 1602*c0909341SAndroid Build Coastguard Worker st_16 \d_strd, d6, 2 1603*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1604*c0909341SAndroid Build Coastguard Worker 1605*c0909341SAndroid Build Coastguard Worker24: // 2x4 v 1606*c0909341SAndroid Build Coastguard Worker load_16 \sr2, \src, \s_strd, d6, d7 1607*c0909341SAndroid Build Coastguard Worker interleave_1_16 d5, d6, d7 1608*c0909341SAndroid Build Coastguard Worker vmovl_u8 q8, d1, q9, d2, q10, d3, q11, d4, q12, d5, q13, d6 1609*c0909341SAndroid Build Coastguard Worker vmov d17, d20 1610*c0909341SAndroid Build Coastguard Worker vmov d19, d22 1611*c0909341SAndroid Build Coastguard Worker vmov d21, d24 1612*c0909341SAndroid Build Coastguard Worker vmov d23, d26 1613*c0909341SAndroid Build Coastguard Worker mul_mla_4 q3, q8, q9, q10, q11 1614*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, q3, d6 1615*c0909341SAndroid Build Coastguard Worker st_16 \d_strd, d6, 4 1616*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1617*c0909341SAndroid Build Coastguard Worker 1618*c0909341SAndroid Build Coastguard Worker28: // 2x6, 2x8, 2x12, 2x16 v 1619*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1620*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1621*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1622*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1623*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1624*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1625*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1626*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1627*c0909341SAndroid Build Coastguard Worker 1628*c0909341SAndroid Build Coastguard Worker load_16 \src, \sr2, \s_strd, d2, d4, d6, d8, d10, d12, d14 1629*c0909341SAndroid Build Coastguard Worker interleave_1_16 d2, d4, d6, d8, d10 1630*c0909341SAndroid Build Coastguard Worker interleave_1_16 d10, d12, d14 1631*c0909341SAndroid Build Coastguard Worker vmovl_u8 q1, d2, q2, d4, q3, d6, q4, d8, q5, d10, q6, d12 1632*c0909341SAndroid Build Coastguard Worker vmov d3, d6 1633*c0909341SAndroid Build Coastguard Worker vmov d5, d8 1634*c0909341SAndroid Build Coastguard Worker vmov d7, d10 1635*c0909341SAndroid Build Coastguard Worker vmov d9, d12 1636*c0909341SAndroid Build Coastguard Worker216: 1637*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1638*c0909341SAndroid Build Coastguard Worker load_16 \sr2, \src, \s_strd, d16, d18, d20, d22 1639*c0909341SAndroid Build Coastguard Worker interleave_1_16 d14, d16, d18, d20, d22 1640*c0909341SAndroid Build Coastguard Worker vmovl_u8 q7, d14, q8, d16, q9, d18, q10, d20 1641*c0909341SAndroid Build Coastguard Worker vmov d11, d14 1642*c0909341SAndroid Build Coastguard Worker vmov d13, d16 1643*c0909341SAndroid Build Coastguard Worker vmov d15, d18 1644*c0909341SAndroid Build Coastguard Worker vmov d17, d20 1645*c0909341SAndroid Build Coastguard Worker mul_mla_8_0 q1, q1, q2, q3, q4, q5, q6, q7, q8 1646*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, q1, d2 1647*c0909341SAndroid Build Coastguard Worker st_16 \d_strd, d2, 4 1648*c0909341SAndroid Build Coastguard Worker ble 0f 1649*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1650*c0909341SAndroid Build Coastguard Worker vmov q1, q5 1651*c0909341SAndroid Build Coastguard Worker vmov q2, q6 1652*c0909341SAndroid Build Coastguard Worker vmov q3, q7 1653*c0909341SAndroid Build Coastguard Worker vmov q4, q8 1654*c0909341SAndroid Build Coastguard Worker vmov q5, q9 1655*c0909341SAndroid Build Coastguard Worker vmov q6, q10 1656*c0909341SAndroid Build Coastguard Worker vmov d14, d22 1657*c0909341SAndroid Build Coastguard Worker beq 26f 1658*c0909341SAndroid Build Coastguard Worker b 216b 1659*c0909341SAndroid Build Coastguard Worker26: 1660*c0909341SAndroid Build Coastguard Worker load_16 \sr2, \src, \s_strd, d16, d18 1661*c0909341SAndroid Build Coastguard Worker interleave_1_16 d14, d16, d18 1662*c0909341SAndroid Build Coastguard Worker vmovl_u8 q7, d14, q8, d16 1663*c0909341SAndroid Build Coastguard Worker vmov d11, d14 1664*c0909341SAndroid Build Coastguard Worker vmov d13, d16 1665*c0909341SAndroid Build Coastguard Worker mul_mla_8_0 d2, d2, d4, d6, d8, d10, d12, d14, d16 1666*c0909341SAndroid Build Coastguard Worker vqrshrun_s16 6, q1, d2 1667*c0909341SAndroid Build Coastguard Worker st_16 \d_strd, d2, 2 1668*c0909341SAndroid Build Coastguard Worker0: 1669*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1670*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1671*c0909341SAndroid Build Coastguard Worker.endif 1672*c0909341SAndroid Build Coastguard Worker 1673*c0909341SAndroid Build Coastguard Worker40: 1674*c0909341SAndroid Build Coastguard Worker bgt 480f 1675*c0909341SAndroid Build Coastguard Worker 1676*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 v 1677*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1678*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1679*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1680*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1681*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1682*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1683*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1684*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1685*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1686*c0909341SAndroid Build Coastguard Worker 1687*c0909341SAndroid Build Coastguard Worker load_32 \src, \sr2, \s_strd, d1, d2, d3, d4, d5 1688*c0909341SAndroid Build Coastguard Worker interleave_1_32 d1, d2, d3, d4, d5 1689*c0909341SAndroid Build Coastguard Worker vmovl_u8 q8, d1, q9, d2, q10, d3, q11, d4 1690*c0909341SAndroid Build Coastguard Worker mul_mla_4 q3, q8, q9, q10, q11 1691*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q3, d6, d7 1692*c0909341SAndroid Build Coastguard Worker ble 0f 1693*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d6, d7 1694*c0909341SAndroid Build Coastguard Worker interleave_1_32 d5, d6, d7 1695*c0909341SAndroid Build Coastguard Worker vmovl_u8 q12, d5, q13, d6 1696*c0909341SAndroid Build Coastguard Worker mul_mla_4 q3, q10, q11, q12, q13 1697*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q3, d6, d7 1698*c0909341SAndroid Build Coastguard Worker0: 1699*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1700*c0909341SAndroid Build Coastguard Worker 1701*c0909341SAndroid Build Coastguard Worker480: // 4x6, 4x8, 4x12, 4x16 v 1702*c0909341SAndroid Build Coastguard Worker vpush {q4} 1703*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1704*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1705*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1706*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1707*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1708*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1709*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1710*c0909341SAndroid Build Coastguard Worker 1711*c0909341SAndroid Build Coastguard Worker load_32 \src, \sr2, \s_strd, d2, d4, d6, d8, d16, d18, d20 1712*c0909341SAndroid Build Coastguard Worker interleave_1_32 d2, d4, d6 1713*c0909341SAndroid Build Coastguard Worker interleave_1_32 d6, d8, d16, d18, d20 1714*c0909341SAndroid Build Coastguard Worker vmovl_u8 q1, d2, q2, d4, q3, d6, q4, d8, q8, d16, q9, d18 1715*c0909341SAndroid Build Coastguard Worker 1716*c0909341SAndroid Build Coastguard Worker48: 1717*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1718*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d22, d24, d26, d28 1719*c0909341SAndroid Build Coastguard Worker interleave_1_32 d20, d22, d24, d26, d28 1720*c0909341SAndroid Build Coastguard Worker vmovl_u8 q10, d20, q11, d22, q12, d24, q13, d26 1721*c0909341SAndroid Build Coastguard Worker mul_mla_8_2 q1, q2, q1, q2, q3, q4, q8, q9, q10, q11, q12, q13 1722*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q1, d2, d3, q2, d4, d5 1723*c0909341SAndroid Build Coastguard Worker ble 0f 1724*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d30, d2 1725*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1726*c0909341SAndroid Build Coastguard Worker interleave_1_32 d28, d30, d2 1727*c0909341SAndroid Build Coastguard Worker vmovl_u8 q14, d28, q15, d30 1728*c0909341SAndroid Build Coastguard Worker mul_mla_8_0 q8, q8, q9, q10, q11, q12, q13, q14, q15 1729*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q8, d16, d17 1730*c0909341SAndroid Build Coastguard Worker ble 0f 1731*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d4, d6 1732*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1733*c0909341SAndroid Build Coastguard Worker interleave_1_32 d2, d4, d6 1734*c0909341SAndroid Build Coastguard Worker vmovl_u8 q1, d2, q2, d4 1735*c0909341SAndroid Build Coastguard Worker mul_mla_8_0 q9, q10, q11, q12, q13, q14, q15, q1, q2 1736*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q9, d18, d19 1737*c0909341SAndroid Build Coastguard Worker ble 0f 1738*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1739*c0909341SAndroid Build Coastguard Worker load_32 \sr2, \src, \s_strd, d8, d16, d18, d20 1740*c0909341SAndroid Build Coastguard Worker interleave_1_32 d6, d8, d16, d18, d20 1741*c0909341SAndroid Build Coastguard Worker vmovl_u8 q3, d6, q4, d8, q8, d16, q9, d18 1742*c0909341SAndroid Build Coastguard Worker mul_mla_8_2 q12, q13, q12, q13, q14, q15, q1, q2, q3, q4, q8, q9 1743*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, q12, d24, d25, q13, d26, d27 1744*c0909341SAndroid Build Coastguard Worker bgt 48b 1745*c0909341SAndroid Build Coastguard Worker0: 1746*c0909341SAndroid Build Coastguard Worker vpop {q4} 1747*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1748*c0909341SAndroid Build Coastguard Worker 1749*c0909341SAndroid Build Coastguard Worker80: 1750*c0909341SAndroid Build Coastguard Worker bgt 880f 1751*c0909341SAndroid Build Coastguard Worker 1752*c0909341SAndroid Build Coastguard Worker // 8x2, 8x4 v 1753*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1754*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1755*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1756*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1757*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1758*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1759*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1760*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1761*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1762*c0909341SAndroid Build Coastguard Worker 1763*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, d1, d2, d3, d4, d5 1764*c0909341SAndroid Build Coastguard Worker vmovl_u8 q8, d1, q9, d2, q10, d3, q11, d4, q12, d5 1765*c0909341SAndroid Build Coastguard Worker mul_mla_4 q1, q8, q9, q10, q11 1766*c0909341SAndroid Build Coastguard Worker mul_mla_4 q2, q9, q10, q11, q12 1767*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q1, d2, q2, d4 1768*c0909341SAndroid Build Coastguard Worker ble 0f 1769*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d6, d7 1770*c0909341SAndroid Build Coastguard Worker vmovl_u8 q13, d6, q14, d7 1771*c0909341SAndroid Build Coastguard Worker mul_mla_4 q1, q10, q11, q12, q13 1772*c0909341SAndroid Build Coastguard Worker mul_mla_4 q2, q11, q12, q13, q14 1773*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q1, d2, q2, d4 1774*c0909341SAndroid Build Coastguard Worker0: 1775*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1776*c0909341SAndroid Build Coastguard Worker 1777*c0909341SAndroid Build Coastguard Worker880: // 8x6, 8x8, 8x16, 8x32 v 1778*c0909341SAndroid Build Coastguard Worker1680: // 16x8, 16x16, ... 1779*c0909341SAndroid Build Coastguard Worker320: // 32x8, 32x16, ... 1780*c0909341SAndroid Build Coastguard Worker640: 1781*c0909341SAndroid Build Coastguard Worker1280: 1782*c0909341SAndroid Build Coastguard Worker vpush {q4} 1783*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\my, :64] 1784*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1785*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 1786*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1787*c0909341SAndroid Build Coastguard Worker mov \my, \h 1788*c0909341SAndroid Build Coastguard Worker168: 1789*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1790*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1791*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1792*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1793*c0909341SAndroid Build Coastguard Worker 1794*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, d2, d4, d6, d8, d16, d18, d20 1795*c0909341SAndroid Build Coastguard Worker vmovl_u8 q1, d2, q2, d4, q3, d6, q4, d8, q8, d16, q9, d18, q10, d20 1796*c0909341SAndroid Build Coastguard Worker 1797*c0909341SAndroid Build Coastguard Worker88: 1798*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1799*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d22, d24 1800*c0909341SAndroid Build Coastguard Worker vmovl_u8 q11, d22, q12, d24 1801*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q1, q2, q1, q2, q3, q4, q8, q9, q10, q11, q12 1802*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q1, d2, q2, d4 1803*c0909341SAndroid Build Coastguard Worker ble 9f 1804*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1805*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d26, d28 1806*c0909341SAndroid Build Coastguard Worker vmovl_u8 q13, d26, q14, d28 1807*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q3, q4, q3, q4, q8, q9, q10, q11, q12, q13, q14 1808*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q3, d6, q4, d8 1809*c0909341SAndroid Build Coastguard Worker ble 9f 1810*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1811*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d30, d2 1812*c0909341SAndroid Build Coastguard Worker vmovl_u8 q15, d30, q1, d2 1813*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q8, q9, q8, q9, q10, q11, q12, q13, q14, q15, q1 1814*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q8, d16, q9, d18 1815*c0909341SAndroid Build Coastguard Worker ble 9f 1816*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1817*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d4, d6 1818*c0909341SAndroid Build Coastguard Worker vmovl_u8 q2, d4, q3, d6 1819*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q10, q11, q10, q11, q12, q13, q14, q15, q1, q2, q3 1820*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q10, d20, q11, d22 1821*c0909341SAndroid Build Coastguard Worker ble 9f 1822*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1823*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, d8, d16, d18, d20 1824*c0909341SAndroid Build Coastguard Worker vmovl_u8 q4, d8, q8, d16, q9, d18, q10, d20 1825*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q12, q13, q12, q13, q14, q15, q1, q2, q3, q4, q8 1826*c0909341SAndroid Build Coastguard Worker mul_mla_8_1 q14, q15, q14, q15, q1, q2, q3, q4, q8, q9, q10 1827*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, q12, d24, q13, d26, q14, d28, q15, d30 1828*c0909341SAndroid Build Coastguard Worker bgt 88b 1829*c0909341SAndroid Build Coastguard Worker9: 1830*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 1831*c0909341SAndroid Build Coastguard Worker ble 0f 1832*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 1833*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 1834*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 1835*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 1836*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 1837*c0909341SAndroid Build Coastguard Worker mov \h, \my 1838*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 1839*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1840*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 1841*c0909341SAndroid Build Coastguard Worker.else 1842*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 1843*c0909341SAndroid Build Coastguard Worker.endif 1844*c0909341SAndroid Build Coastguard Worker b 168b 1845*c0909341SAndroid Build Coastguard Worker0: 1846*c0909341SAndroid Build Coastguard Worker vpop {q4} 1847*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1848*c0909341SAndroid Build Coastguard Worker 1849*c0909341SAndroid Build Coastguard Worker160: 1850*c0909341SAndroid Build Coastguard Worker bgt 1680b 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker // 16x2, 16x4 v 1853*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1854*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\my] 1855*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1856*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1857*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1858*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1859*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1860*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1861*c0909341SAndroid Build Coastguard Worker 1862*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1863*c0909341SAndroid Build Coastguard Worker load_reg \src, \sr2, \s_strd, q11, q12, q13, q14, q15 1864*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d22 1865*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d24 1866*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d26 1867*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d28 1868*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d30 1869*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d23 1870*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d25 1871*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d27 1872*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d29 1873*c0909341SAndroid Build Coastguard Worker vmovl.u8 q15, d31 1874*c0909341SAndroid Build Coastguard Worker mul_mla_4 q1, q1, q2, q3, q8 1875*c0909341SAndroid Build Coastguard Worker mul_mla_4 q10, q2, q3, q8, q9 1876*c0909341SAndroid Build Coastguard Worker mul_mla_4 q2, q11, q12, q13, q14 1877*c0909341SAndroid Build Coastguard Worker mul_mla_4 q11, q12, q13, q14, q15 1878*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, q1, d2, d3, q2, q10, d20, d21, q11 1879*c0909341SAndroid Build Coastguard Worker ble 0f 1880*c0909341SAndroid Build Coastguard Worker load_reg \sr2, \src, \s_strd, q10, q11 1881*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d20 1882*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d21 1883*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d22 1884*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d23 1885*c0909341SAndroid Build Coastguard Worker mul_mla_4 q2, q3, q8, q9, q1 1886*c0909341SAndroid Build Coastguard Worker mul_mla_4 q3, q13, q14, q15, q10 1887*c0909341SAndroid Build Coastguard Worker mul_mla_4 q13, q8, q9, q1, q12 1888*c0909341SAndroid Build Coastguard Worker mul_mla_4 q14, q14, q15, q10, q11 1889*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, q2, d4, d5, q3, q13, d26, d27, q14 1890*c0909341SAndroid Build Coastguard Worker0: 1891*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1892*c0909341SAndroid Build Coastguard Worker 1893*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv): 1894*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1895*c0909341SAndroid Build Coastguard Worker ubfx r9, \my, #7, #7 1896*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1897*c0909341SAndroid Build Coastguard Worker it gt 1898*c0909341SAndroid Build Coastguard Worker movgt \my, r9 1899*c0909341SAndroid Build Coastguard Worker add \my, r10, \my, lsl #3 1900*c0909341SAndroid Build Coastguard Worker 1901*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_8tap_hv_tbl) 1902*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 1903*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 1904*c0909341SAndroid Build Coastguard Worker bx r9 1905*c0909341SAndroid Build Coastguard Worker 1906*c0909341SAndroid Build Coastguard Worker .align 2 1907*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv_tbl): 1908*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1909*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1910*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1911*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1912*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1913*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1914*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB 1915*c0909341SAndroid Build Coastguard Worker 1916*c0909341SAndroid Build Coastguard Worker20: 1917*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1918*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 1919*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 1920*c0909341SAndroid Build Coastguard Worker bgt 280f 1921*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 1922*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 1923*c0909341SAndroid Build Coastguard Worker 1924*c0909341SAndroid Build Coastguard Worker // 2x2, 2x4 hv 1925*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #1 1926*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1927*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1928*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1929*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1930*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1931*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 1932*c0909341SAndroid Build Coastguard Worker 1933*c0909341SAndroid Build Coastguard Worker 1934*c0909341SAndroid Build Coastguard Worker vld1.8 {d26}, [\src], \s_strd 1935*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d26 1936*c0909341SAndroid Build Coastguard Worker vext.8 q14, q13, q13, #2 1937*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d26, d0 1938*c0909341SAndroid Build Coastguard Worker vmul.s16 d28, d28, d0 1939*c0909341SAndroid Build Coastguard Worker vpadd.s16 d26, d26, d28 1940*c0909341SAndroid Build Coastguard Worker vpadd.s16 d26, d26, d26 1941*c0909341SAndroid Build Coastguard Worker vrshr.s16 d16, d26, #2 1942*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 1943*c0909341SAndroid Build Coastguard Worker 1944*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d16, #4 1945*c0909341SAndroid Build Coastguard Worker vmov d17, d26 1946*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d26, #4 1947*c0909341SAndroid Build Coastguard Worker 1948*c0909341SAndroid Build Coastguard Worker2: 1949*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 1950*c0909341SAndroid Build Coastguard Worker 1951*c0909341SAndroid Build Coastguard Worker vext.8 d18, d17, d26, #4 1952*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 1953*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d2[1] 1954*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[2] 1955*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d2[3] 1956*c0909341SAndroid Build Coastguard Worker 1957*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d4, q2, #\shift_hv 1958*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d4, q2 1959*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1960*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[0]}, [\dst, :16], \d_strd 1961*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[1]}, [\ds2, :16], \d_strd 1962*c0909341SAndroid Build Coastguard Worker ble 0f 1963*c0909341SAndroid Build Coastguard Worker vmov d16, d18 1964*c0909341SAndroid Build Coastguard Worker vmov d17, d26 1965*c0909341SAndroid Build Coastguard Worker b 2b 1966*c0909341SAndroid Build Coastguard Worker 1967*c0909341SAndroid Build Coastguard Worker280: // 2x8, 2x16, 2x32 hv 1968*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 1969*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 1970*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1971*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1972*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1973*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1974*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1975*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 1976*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 1977*c0909341SAndroid Build Coastguard Worker 1978*c0909341SAndroid Build Coastguard Worker vld1.8 {d26}, [\src], \s_strd 1979*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d26 1980*c0909341SAndroid Build Coastguard Worker vext.8 q14, q13, q13, #2 1981*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d26, d0 1982*c0909341SAndroid Build Coastguard Worker vmul.s16 d28, d28, d0 1983*c0909341SAndroid Build Coastguard Worker vpadd.s16 d26, d26, d28 1984*c0909341SAndroid Build Coastguard Worker vpadd.s16 d26, d26, d26 1985*c0909341SAndroid Build Coastguard Worker vrshr.s16 d16, d26, #2 1986*c0909341SAndroid Build Coastguard Worker 1987*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 1988*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d16, #4 1989*c0909341SAndroid Build Coastguard Worker vmov d17, d26 1990*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d26, #4 1991*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 1992*c0909341SAndroid Build Coastguard Worker vext.8 d18, d17, d26, #4 1993*c0909341SAndroid Build Coastguard Worker vmov d19, d26 1994*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 1995*c0909341SAndroid Build Coastguard Worker vext.8 d20, d19, d26, #4 1996*c0909341SAndroid Build Coastguard Worker vmov d21, d26 1997*c0909341SAndroid Build Coastguard Worker 1998*c0909341SAndroid Build Coastguard Worker28: 1999*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_2) 2000*c0909341SAndroid Build Coastguard Worker vext.8 d22, d21, d26, #4 2001*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 2002*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d2[1] 2003*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[2] 2004*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d19, d2[3] 2005*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d3[0] 2006*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d21, d3[1] 2007*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d3[2] 2008*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d3[3] 2009*c0909341SAndroid Build Coastguard Worker 2010*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d4, q2, #\shift_hv 2011*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d4, q2 2012*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2013*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[0]}, [\dst, :16], \d_strd 2014*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[1]}, [\ds2, :16], \d_strd 2015*c0909341SAndroid Build Coastguard Worker ble 0f 2016*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2017*c0909341SAndroid Build Coastguard Worker vmov d17, d19 2018*c0909341SAndroid Build Coastguard Worker vmov d18, d20 2019*c0909341SAndroid Build Coastguard Worker vmov d19, d21 2020*c0909341SAndroid Build Coastguard Worker vmov d20, d22 2021*c0909341SAndroid Build Coastguard Worker vmov d21, d26 2022*c0909341SAndroid Build Coastguard Worker b 28b 2023*c0909341SAndroid Build Coastguard Worker 2024*c0909341SAndroid Build Coastguard Worker0: 2025*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2026*c0909341SAndroid Build Coastguard Worker 2027*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_2): 2028*c0909341SAndroid Build Coastguard Worker vld1.8 {d28}, [\sr2], \s_strd 2029*c0909341SAndroid Build Coastguard Worker vld1.8 {d30}, [\src], \s_strd 2030*c0909341SAndroid Build Coastguard Worker vext.8 d29, d28, d28, #1 2031*c0909341SAndroid Build Coastguard Worker vext.8 d31, d30, d30, #1 2032*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d28 2033*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d29 2034*c0909341SAndroid Build Coastguard Worker vmov d27, d28 2035*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d30 2036*c0909341SAndroid Build Coastguard Worker vmovl.u8 q15, d31 2037*c0909341SAndroid Build Coastguard Worker vtrn.32 d26, d28 2038*c0909341SAndroid Build Coastguard Worker vtrn.32 d27, d30 2039*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d26, d0[0] 2040*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d27, d0[1] 2041*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d28, d0[2] 2042*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d30, d0[3] 2043*c0909341SAndroid Build Coastguard Worker vrshr.s16 d26, d26, #2 2044*c0909341SAndroid Build Coastguard Worker vext.8 d27, d26, d26, #4 2045*c0909341SAndroid Build Coastguard Worker bx lr 2046*c0909341SAndroid Build Coastguard Worker.endif 2047*c0909341SAndroid Build Coastguard Worker 2048*c0909341SAndroid Build Coastguard Worker40: 2049*c0909341SAndroid Build Coastguard Worker add \mx, \mx, #2 2050*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [\mx] 2051*c0909341SAndroid Build Coastguard Worker bgt 480f 2052*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 2053*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 2054*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #1 2055*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2056*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2057*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2058*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2059*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2060*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2061*c0909341SAndroid Build Coastguard Worker 2062*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 hv 2063*c0909341SAndroid Build Coastguard Worker vld1.8 {d30}, [\src], \s_strd 2064*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d30 2065*c0909341SAndroid Build Coastguard Worker vext.8 d27, d28, d29, #2 2066*c0909341SAndroid Build Coastguard Worker vext.8 d30, d28, d29, #4 2067*c0909341SAndroid Build Coastguard Worker vext.8 d31, d28, d29, #6 2068*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d28, d0[0] 2069*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d27, d0[1] 2070*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d30, d0[2] 2071*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d31, d0[3] 2072*c0909341SAndroid Build Coastguard Worker vrshr.s16 d16, d26, #2 2073*c0909341SAndroid Build Coastguard Worker 2074*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2075*c0909341SAndroid Build Coastguard Worker vmov d17, d26 2076*c0909341SAndroid Build Coastguard Worker vmov d18, d27 2077*c0909341SAndroid Build Coastguard Worker 2078*c0909341SAndroid Build Coastguard Worker4: 2079*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2080*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 2081*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d2[1] 2082*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[2] 2083*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d2[3] 2084*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d17, d2[0] 2085*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d18, d2[1] 2086*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d26, d2[2] 2087*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d27, d2[3] 2088*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d4, q2, #\shift_hv 2089*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d6, q3, #\shift_hv 2090*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2091*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2092*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d4, q2 2093*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d6, q3 2094*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 2095*c0909341SAndroid Build Coastguard Worker vst1.32 {d6[0]}, [\ds2, :32], \d_strd 2096*c0909341SAndroid Build Coastguard Worker.else 2097*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 2098*c0909341SAndroid Build Coastguard Worker vst1.16 {d6}, [\ds2, :64], \d_strd 2099*c0909341SAndroid Build Coastguard Worker.endif 2100*c0909341SAndroid Build Coastguard Worker ble 0f 2101*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2102*c0909341SAndroid Build Coastguard Worker vmov d17, d26 2103*c0909341SAndroid Build Coastguard Worker vmov d18, d27 2104*c0909341SAndroid Build Coastguard Worker b 4b 2105*c0909341SAndroid Build Coastguard Worker 2106*c0909341SAndroid Build Coastguard Worker480: // 4x8, 4x16, 4x32 hv 2107*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 2108*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 2109*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2110*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2111*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2112*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2113*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2114*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2115*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2116*c0909341SAndroid Build Coastguard Worker 2117*c0909341SAndroid Build Coastguard Worker vld1.8 {d30}, [\src], \s_strd 2118*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d30 2119*c0909341SAndroid Build Coastguard Worker vext.8 d27, d28, d29, #2 2120*c0909341SAndroid Build Coastguard Worker vext.8 d30, d28, d29, #4 2121*c0909341SAndroid Build Coastguard Worker vext.8 d31, d28, d29, #6 2122*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d28, d0[0] 2123*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d27, d0[1] 2124*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d30, d0[2] 2125*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d31, d0[3] 2126*c0909341SAndroid Build Coastguard Worker vrshr.s16 d16, d26, #2 2127*c0909341SAndroid Build Coastguard Worker 2128*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2129*c0909341SAndroid Build Coastguard Worker vmov d17, d26 2130*c0909341SAndroid Build Coastguard Worker vmov d18, d27 2131*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2132*c0909341SAndroid Build Coastguard Worker vmov d19, d26 2133*c0909341SAndroid Build Coastguard Worker vmov d20, d27 2134*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2135*c0909341SAndroid Build Coastguard Worker vmov d21, d26 2136*c0909341SAndroid Build Coastguard Worker vmov d22, d27 2137*c0909341SAndroid Build Coastguard Worker 2138*c0909341SAndroid Build Coastguard Worker48: 2139*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_4) 2140*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2[0] 2141*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d17, d2[1] 2142*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d18, d2[2] 2143*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d19, d2[3] 2144*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d3[0] 2145*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d21, d3[1] 2146*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d3[2] 2147*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d3[3] 2148*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d17, d2[0] 2149*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d18, d2[1] 2150*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d19, d2[2] 2151*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d20, d2[3] 2152*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d3[0] 2153*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d22, d3[1] 2154*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d26, d3[2] 2155*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d27, d3[3] 2156*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d4, q2, #\shift_hv 2157*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d6, q3, #\shift_hv 2158*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2159*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2160*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d4, q2 2161*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d6, q3 2162*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 2163*c0909341SAndroid Build Coastguard Worker vst1.32 {d6[0]}, [\ds2, :32], \d_strd 2164*c0909341SAndroid Build Coastguard Worker.else 2165*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 2166*c0909341SAndroid Build Coastguard Worker vst1.16 {d6}, [\ds2, :64], \d_strd 2167*c0909341SAndroid Build Coastguard Worker.endif 2168*c0909341SAndroid Build Coastguard Worker ble 0f 2169*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2170*c0909341SAndroid Build Coastguard Worker vmov d17, d19 2171*c0909341SAndroid Build Coastguard Worker vmov d18, d20 2172*c0909341SAndroid Build Coastguard Worker vmov d19, d21 2173*c0909341SAndroid Build Coastguard Worker vmov d20, d22 2174*c0909341SAndroid Build Coastguard Worker vmov d21, d26 2175*c0909341SAndroid Build Coastguard Worker vmov d22, d27 2176*c0909341SAndroid Build Coastguard Worker b 48b 2177*c0909341SAndroid Build Coastguard Worker0: 2178*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2179*c0909341SAndroid Build Coastguard Worker 2180*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_4): 2181*c0909341SAndroid Build Coastguard Worker vld1.8 {d30}, [\sr2], \s_strd 2182*c0909341SAndroid Build Coastguard Worker vld1.8 {d31}, [\src], \s_strd 2183*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d30 2184*c0909341SAndroid Build Coastguard Worker vext.8 d27, d28, d29, #2 2185*c0909341SAndroid Build Coastguard Worker vext.8 d30, d28, d29, #4 2186*c0909341SAndroid Build Coastguard Worker vext.8 d1, d28, d29, #6 2187*c0909341SAndroid Build Coastguard Worker vmul.s16 d26, d28, d0[0] 2188*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d27, d0[1] 2189*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d30, d0[2] 2190*c0909341SAndroid Build Coastguard Worker vmla.s16 d26, d1, d0[3] 2191*c0909341SAndroid Build Coastguard Worker 2192*c0909341SAndroid Build Coastguard Worker vmovl.u8 q14, d31 2193*c0909341SAndroid Build Coastguard Worker vext.8 d30, d28, d29, #2 2194*c0909341SAndroid Build Coastguard Worker vext.8 d31, d28, d29, #4 2195*c0909341SAndroid Build Coastguard Worker vext.8 d1, d28, d29, #6 2196*c0909341SAndroid Build Coastguard Worker vmul.s16 d27, d28, d0[0] 2197*c0909341SAndroid Build Coastguard Worker vmla.s16 d27, d30, d0[1] 2198*c0909341SAndroid Build Coastguard Worker vmla.s16 d27, d31, d0[2] 2199*c0909341SAndroid Build Coastguard Worker vmla.s16 d27, d1, d0[3] 2200*c0909341SAndroid Build Coastguard Worker vrshr.s16 d26, d26, #2 2201*c0909341SAndroid Build Coastguard Worker vrshr.s16 d27, d27, #2 2202*c0909341SAndroid Build Coastguard Worker bx lr 2203*c0909341SAndroid Build Coastguard Worker 2204*c0909341SAndroid Build Coastguard Worker80: 2205*c0909341SAndroid Build Coastguard Worker160: 2206*c0909341SAndroid Build Coastguard Worker320: 2207*c0909341SAndroid Build Coastguard Worker bgt 880f 2208*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 2209*c0909341SAndroid Build Coastguard Worker add \my, \my, #2 2210*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 2211*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [\my] 2212*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 2213*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2214*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2215*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2216*c0909341SAndroid Build Coastguard Worker mov \my, \h 2217*c0909341SAndroid Build Coastguard Worker 2218*c0909341SAndroid Build Coastguard Worker164: // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv 2219*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2220*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2221*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2222*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2223*c0909341SAndroid Build Coastguard Worker 2224*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [\src], \s_strd 2225*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d28 2226*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d29 2227*c0909341SAndroid Build Coastguard Worker vmul.s16 q10, q12, d0[0] 2228*c0909341SAndroid Build Coastguard Worker.irpc i, 123 2229*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2230*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d0[\i] 2231*c0909341SAndroid Build Coastguard Worker.endr 2232*c0909341SAndroid Build Coastguard Worker.irpc i, 4567 2233*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2234*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d1[\i-4] 2235*c0909341SAndroid Build Coastguard Worker.endr 2236*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q10, #2 2237*c0909341SAndroid Build Coastguard Worker 2238*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2239*c0909341SAndroid Build Coastguard Worker vmov q4, q10 2240*c0909341SAndroid Build Coastguard Worker vmov q5, q11 2241*c0909341SAndroid Build Coastguard Worker 2242*c0909341SAndroid Build Coastguard Worker8: 2243*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2244*c0909341SAndroid Build Coastguard Worker vmull.s16 q12, d6, d2[0] 2245*c0909341SAndroid Build Coastguard Worker vmull.s16 q13, d7, d2[0] 2246*c0909341SAndroid Build Coastguard Worker vmull.s16 q14, d8, d2[0] 2247*c0909341SAndroid Build Coastguard Worker vmull.s16 q15, d9, d2[0] 2248*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d8, d2[1] 2249*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d9, d2[1] 2250*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d10, d2[1] 2251*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d11, d2[1] 2252*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d10, d2[2] 2253*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d11, d2[2] 2254*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d20, d2[2] 2255*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d21, d2[2] 2256*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d20, d2[3] 2257*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d21, d2[3] 2258*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d22, d2[3] 2259*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d23, d2[3] 2260*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d24, q12, #\shift_hv 2261*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d25, q13, #\shift_hv 2262*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d28, q14, #\shift_hv 2263*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d29, q15, #\shift_hv 2264*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2265*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2266*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d24, q12 2267*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d28, q14 2268*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [\dst, :64], \d_strd 2269*c0909341SAndroid Build Coastguard Worker vst1.8 {d28}, [\ds2, :64], \d_strd 2270*c0909341SAndroid Build Coastguard Worker.else 2271*c0909341SAndroid Build Coastguard Worker vst1.16 {q12}, [\dst, :128], \d_strd 2272*c0909341SAndroid Build Coastguard Worker vst1.16 {q14}, [\ds2, :128], \d_strd 2273*c0909341SAndroid Build Coastguard Worker.endif 2274*c0909341SAndroid Build Coastguard Worker ble 9f 2275*c0909341SAndroid Build Coastguard Worker vmov q3, q5 2276*c0909341SAndroid Build Coastguard Worker vmov q4, q10 2277*c0909341SAndroid Build Coastguard Worker vmov q5, q11 2278*c0909341SAndroid Build Coastguard Worker b 8b 2279*c0909341SAndroid Build Coastguard Worker9: 2280*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2281*c0909341SAndroid Build Coastguard Worker ble 0f 2282*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2283*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2284*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2285*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2286*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #2 2287*c0909341SAndroid Build Coastguard Worker mov \h, \my 2288*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 2289*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2290*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 2291*c0909341SAndroid Build Coastguard Worker.else 2292*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2293*c0909341SAndroid Build Coastguard Worker.endif 2294*c0909341SAndroid Build Coastguard Worker b 164b 2295*c0909341SAndroid Build Coastguard Worker 2296*c0909341SAndroid Build Coastguard Worker880: // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv 2297*c0909341SAndroid Build Coastguard Worker640: 2298*c0909341SAndroid Build Coastguard Worker1280: 2299*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 2300*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [\mx, :64] 2301*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [\my, :64] 2302*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 2303*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2304*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2305*c0909341SAndroid Build Coastguard Worker vmovl.s8 q0, d0 2306*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d2 2307*c0909341SAndroid Build Coastguard Worker mov \my, \h 2308*c0909341SAndroid Build Coastguard Worker 2309*c0909341SAndroid Build Coastguard Worker168: 2310*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2311*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2312*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2313*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2314*c0909341SAndroid Build Coastguard Worker 2315*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [\src], \s_strd 2316*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d28 2317*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d29 2318*c0909341SAndroid Build Coastguard Worker vmul.s16 q10, q12, d0[0] 2319*c0909341SAndroid Build Coastguard Worker.irpc i, 123 2320*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2321*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d0[\i] 2322*c0909341SAndroid Build Coastguard Worker.endr 2323*c0909341SAndroid Build Coastguard Worker.irpc i, 4567 2324*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2325*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d1[\i-4] 2326*c0909341SAndroid Build Coastguard Worker.endr 2327*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q10, #2 2328*c0909341SAndroid Build Coastguard Worker 2329*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2330*c0909341SAndroid Build Coastguard Worker vmov q4, q10 2331*c0909341SAndroid Build Coastguard Worker vmov q5, q11 2332*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2333*c0909341SAndroid Build Coastguard Worker vmov q6, q10 2334*c0909341SAndroid Build Coastguard Worker vmov q7, q11 2335*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2336*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2337*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2338*c0909341SAndroid Build Coastguard Worker 2339*c0909341SAndroid Build Coastguard Worker88: 2340*c0909341SAndroid Build Coastguard Worker bl L(\type\()_8tap_filter_8) 2341*c0909341SAndroid Build Coastguard Worker vmull.s16 q12, d6, d2[0] 2342*c0909341SAndroid Build Coastguard Worker vmull.s16 q13, d7, d2[0] 2343*c0909341SAndroid Build Coastguard Worker vmull.s16 q14, d8, d2[0] 2344*c0909341SAndroid Build Coastguard Worker vmull.s16 q15, d9, d2[0] 2345*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d8, d2[1] 2346*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d9, d2[1] 2347*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d10, d2[1] 2348*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d11, d2[1] 2349*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d10, d2[2] 2350*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d11, d2[2] 2351*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d12, d2[2] 2352*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d13, d2[2] 2353*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d12, d2[3] 2354*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d13, d2[3] 2355*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d14, d2[3] 2356*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d15, d2[3] 2357*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d14, d3[0] 2358*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d15, d3[0] 2359*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d16, d3[0] 2360*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d17, d3[0] 2361*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d16, d3[1] 2362*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d17, d3[1] 2363*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d18, d3[1] 2364*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d19, d3[1] 2365*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d18, d3[2] 2366*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d19, d3[2] 2367*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d20, d3[2] 2368*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d21, d3[2] 2369*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d20, d3[3] 2370*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d21, d3[3] 2371*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d22, d3[3] 2372*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d23, d3[3] 2373*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d24, q12, #\shift_hv 2374*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d25, q13, #\shift_hv 2375*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d28, q14, #\shift_hv 2376*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d29, q15, #\shift_hv 2377*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2378*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2379*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d24, q12 2380*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d28, q14 2381*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [\dst, :64], \d_strd 2382*c0909341SAndroid Build Coastguard Worker vst1.8 {d28}, [\ds2, :64], \d_strd 2383*c0909341SAndroid Build Coastguard Worker.else 2384*c0909341SAndroid Build Coastguard Worker vst1.16 {q12}, [\dst, :128], \d_strd 2385*c0909341SAndroid Build Coastguard Worker vst1.16 {q14}, [\ds2, :128], \d_strd 2386*c0909341SAndroid Build Coastguard Worker.endif 2387*c0909341SAndroid Build Coastguard Worker ble 9f 2388*c0909341SAndroid Build Coastguard Worker vmov q3, q5 2389*c0909341SAndroid Build Coastguard Worker vmov q4, q6 2390*c0909341SAndroid Build Coastguard Worker vmov q5, q7 2391*c0909341SAndroid Build Coastguard Worker vmov q6, q8 2392*c0909341SAndroid Build Coastguard Worker vmov q7, q9 2393*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2394*c0909341SAndroid Build Coastguard Worker vmov q9, q11 2395*c0909341SAndroid Build Coastguard Worker b 88b 2396*c0909341SAndroid Build Coastguard Worker9: 2397*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2398*c0909341SAndroid Build Coastguard Worker ble 0f 2399*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2400*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2401*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2402*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2403*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 2404*c0909341SAndroid Build Coastguard Worker mov \h, \my 2405*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 2406*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2407*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 2408*c0909341SAndroid Build Coastguard Worker.else 2409*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2410*c0909341SAndroid Build Coastguard Worker.endif 2411*c0909341SAndroid Build Coastguard Worker b 168b 2412*c0909341SAndroid Build Coastguard Worker0: 2413*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 2414*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2415*c0909341SAndroid Build Coastguard Worker 2416*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_8): 2417*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [\sr2], \s_strd 2418*c0909341SAndroid Build Coastguard Worker vld1.8 {q15}, [\src], \s_strd 2419*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d28 2420*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d29 2421*c0909341SAndroid Build Coastguard Worker vmul.s16 q10, q12, d0[0] 2422*c0909341SAndroid Build Coastguard Worker.irpc i, 123 2423*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2424*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d0[\i] 2425*c0909341SAndroid Build Coastguard Worker.endr 2426*c0909341SAndroid Build Coastguard Worker.irpc i, 4567 2427*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2428*c0909341SAndroid Build Coastguard Worker vmla.s16 q10, q14, d1[\i-4] 2429*c0909341SAndroid Build Coastguard Worker.endr 2430*c0909341SAndroid Build Coastguard Worker vmovl.u8 q12, d30 2431*c0909341SAndroid Build Coastguard Worker vmovl.u8 q13, d31 2432*c0909341SAndroid Build Coastguard Worker vmul.s16 q11, q12, d0[0] 2433*c0909341SAndroid Build Coastguard Worker.irpc i, 123 2434*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2435*c0909341SAndroid Build Coastguard Worker vmla.s16 q11, q14, d0[\i] 2436*c0909341SAndroid Build Coastguard Worker.endr 2437*c0909341SAndroid Build Coastguard Worker.irpc i, 4567 2438*c0909341SAndroid Build Coastguard Worker vext.8 q14, q12, q13, #(2*\i) 2439*c0909341SAndroid Build Coastguard Worker vmla.s16 q11, q14, d1[\i-4] 2440*c0909341SAndroid Build Coastguard Worker.endr 2441*c0909341SAndroid Build Coastguard Worker vrshr.s16 q10, q10, #2 2442*c0909341SAndroid Build Coastguard Worker vrshr.s16 q11, q11, #2 2443*c0909341SAndroid Build Coastguard Worker bx lr 2444*c0909341SAndroid Build Coastguard Workerendfunc 2445*c0909341SAndroid Build Coastguard Worker 2446*c0909341SAndroid Build Coastguard Worker 2447*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_8bpc_neon, export=1 2448*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 2449*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 2450*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 2451*c0909341SAndroid Build Coastguard Worker vdup.8 d1, \mx 2452*c0909341SAndroid Build Coastguard Worker vdup.8 d3, \my 2453*c0909341SAndroid Build Coastguard Worker rsb r8, \mx, #16 2454*c0909341SAndroid Build Coastguard Worker rsb r9, \my, #16 2455*c0909341SAndroid Build Coastguard Worker vdup.8 d0, r8 2456*c0909341SAndroid Build Coastguard Worker vdup.8 d2, r9 2457*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2458*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \w, #1 2459*c0909341SAndroid Build Coastguard Worker.endif 2460*c0909341SAndroid Build Coastguard Worker clz r8, \w 2461*c0909341SAndroid Build Coastguard Worker cmp \mx, #0 2462*c0909341SAndroid Build Coastguard Worker sub r8, r8, #24 2463*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_h) 2464*c0909341SAndroid Build Coastguard Worker cmp \my, #0 2465*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_v) 2466*c0909341SAndroid Build Coastguard Worker b \type\()_neon 2467*c0909341SAndroid Build Coastguard Worker 2468*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h): 2469*c0909341SAndroid Build Coastguard Worker cmp \my, #0 2470*c0909341SAndroid Build Coastguard Worker bne L(\type\()_bilin_hv) 2471*c0909341SAndroid Build Coastguard Worker 2472*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_bilin_h_tbl) 2473*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 2474*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 2475*c0909341SAndroid Build Coastguard Worker bx r9 2476*c0909341SAndroid Build Coastguard Worker 2477*c0909341SAndroid Build Coastguard Worker .align 2 2478*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h_tbl): 2479*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2480*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2481*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2482*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2483*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2484*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2485*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB 2486*c0909341SAndroid Build Coastguard Worker 2487*c0909341SAndroid Build Coastguard Worker20: // 2xN h 2488*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2489*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2490*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2491*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2492*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2493*c0909341SAndroid Build Coastguard Worker2: 2494*c0909341SAndroid Build Coastguard Worker vld1.32 {d4[]}, [\src], \s_strd 2495*c0909341SAndroid Build Coastguard Worker vld1.32 {d6[]}, [\sr2], \s_strd 2496*c0909341SAndroid Build Coastguard Worker vext.8 d5, d4, d4, #1 2497*c0909341SAndroid Build Coastguard Worker vext.8 d7, d6, d6, #1 2498*c0909341SAndroid Build Coastguard Worker vtrn.16 q2, q3 2499*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2500*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d4, d0 2501*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d5, d1 2502*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q3, #4 2503*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[0]}, [\dst, :16], \d_strd 2504*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[1]}, [\ds2, :16], \d_strd 2505*c0909341SAndroid Build Coastguard Worker bgt 2b 2506*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2507*c0909341SAndroid Build Coastguard Worker.endif 2508*c0909341SAndroid Build Coastguard Worker 2509*c0909341SAndroid Build Coastguard Worker40: // 4xN h 2510*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2511*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2512*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2513*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2514*c0909341SAndroid Build Coastguard Worker4: 2515*c0909341SAndroid Build Coastguard Worker vld1.8 {d4}, [\src], \s_strd 2516*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [\sr2], \s_strd 2517*c0909341SAndroid Build Coastguard Worker vext.8 d5, d4, d4, #1 2518*c0909341SAndroid Build Coastguard Worker vext.8 d7, d6, d6, #1 2519*c0909341SAndroid Build Coastguard Worker vtrn.32 q2, q3 2520*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2521*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d4, d0 2522*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d5, d1 2523*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2524*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q3, #4 2525*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 2526*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [\ds2, :32], \d_strd 2527*c0909341SAndroid Build Coastguard Worker.else 2528*c0909341SAndroid Build Coastguard Worker vst1.16 {d6}, [\dst, :64], \d_strd 2529*c0909341SAndroid Build Coastguard Worker vst1.16 {d7}, [\ds2, :64], \d_strd 2530*c0909341SAndroid Build Coastguard Worker.endif 2531*c0909341SAndroid Build Coastguard Worker bgt 4b 2532*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2533*c0909341SAndroid Build Coastguard Worker 2534*c0909341SAndroid Build Coastguard Worker80: // 8xN h 2535*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2536*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2537*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2538*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2539*c0909341SAndroid Build Coastguard Worker8: 2540*c0909341SAndroid Build Coastguard Worker vld1.8 {q8}, [\src], \s_strd 2541*c0909341SAndroid Build Coastguard Worker vld1.8 {q10}, [\sr2], \s_strd 2542*c0909341SAndroid Build Coastguard Worker vext.8 q9, q8, q8, #1 2543*c0909341SAndroid Build Coastguard Worker vext.8 q11, q10, q10, #1 2544*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2545*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d16, d0 2546*c0909341SAndroid Build Coastguard Worker vmull.u8 q10, d20, d0 2547*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d18, d1 2548*c0909341SAndroid Build Coastguard Worker vmlal.u8 q10, d22, d1 2549*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2550*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d16, q8, #4 2551*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d18, q10, #4 2552*c0909341SAndroid Build Coastguard Worker vst1.8 {d16}, [\dst, :64], \d_strd 2553*c0909341SAndroid Build Coastguard Worker vst1.8 {d18}, [\ds2, :64], \d_strd 2554*c0909341SAndroid Build Coastguard Worker.else 2555*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [\dst, :128], \d_strd 2556*c0909341SAndroid Build Coastguard Worker vst1.16 {q10}, [\ds2, :128], \d_strd 2557*c0909341SAndroid Build Coastguard Worker.endif 2558*c0909341SAndroid Build Coastguard Worker bgt 8b 2559*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2560*c0909341SAndroid Build Coastguard Worker160: 2561*c0909341SAndroid Build Coastguard Worker320: 2562*c0909341SAndroid Build Coastguard Worker640: 2563*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 2564*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2565*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2566*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2567*c0909341SAndroid Build Coastguard Worker 2568*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w 2569*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #8 2570*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2571*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2572*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w 2573*c0909341SAndroid Build Coastguard Worker.endif 2574*c0909341SAndroid Build Coastguard Worker161: 2575*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [\src]! 2576*c0909341SAndroid Build Coastguard Worker vld1.8 {d22}, [\sr2]! 2577*c0909341SAndroid Build Coastguard Worker mov \mx, \w 2578*c0909341SAndroid Build Coastguard Worker 2579*c0909341SAndroid Build Coastguard Worker16: 2580*c0909341SAndroid Build Coastguard Worker vld1.8 {d17,d18}, [\src]! 2581*c0909341SAndroid Build Coastguard Worker vld1.8 {d23,d24}, [\sr2]! 2582*c0909341SAndroid Build Coastguard Worker vext.8 q10, q8, q9, #1 2583*c0909341SAndroid Build Coastguard Worker vext.8 q13, q11, q12, #1 2584*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d16, d0 2585*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d17, d0 2586*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d22, d0 2587*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d23, d0 2588*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d20, d1 2589*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d21, d1 2590*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d26, d1 2591*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d27, d1 2592*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 2593*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2594*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q2, #4 2595*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d5, q3, #4 2596*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d28, q14, #4 2597*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d29, q15, #4 2598*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [\dst, :128]! 2599*c0909341SAndroid Build Coastguard Worker vst1.8 {q14}, [\ds2, :128]! 2600*c0909341SAndroid Build Coastguard Worker.else 2601*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [\dst, :128]! 2602*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [\ds2, :128]! 2603*c0909341SAndroid Build Coastguard Worker.endif 2604*c0909341SAndroid Build Coastguard Worker ble 9f 2605*c0909341SAndroid Build Coastguard Worker 2606*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2607*c0909341SAndroid Build Coastguard Worker vmov d22, d24 2608*c0909341SAndroid Build Coastguard Worker b 16b 2609*c0909341SAndroid Build Coastguard Worker 2610*c0909341SAndroid Build Coastguard Worker9: 2611*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 2612*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 2613*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 2614*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 2615*c0909341SAndroid Build Coastguard Worker 2616*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2617*c0909341SAndroid Build Coastguard Worker bgt 161b 2618*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2619*c0909341SAndroid Build Coastguard Worker 2620*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v): 2621*c0909341SAndroid Build Coastguard Worker cmp \h, #4 2622*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_bilin_v_tbl) 2623*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 2624*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 2625*c0909341SAndroid Build Coastguard Worker bx r9 2626*c0909341SAndroid Build Coastguard Worker 2627*c0909341SAndroid Build Coastguard Worker .align 2 2628*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v_tbl): 2629*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2630*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2631*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2632*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2633*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2634*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2635*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB 2636*c0909341SAndroid Build Coastguard Worker 2637*c0909341SAndroid Build Coastguard Worker20: // 2xN v 2638*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2639*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2640*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2641*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2642*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2643*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2644*c0909341SAndroid Build Coastguard Worker 2645*c0909341SAndroid Build Coastguard Worker // 2x2 v 2646*c0909341SAndroid Build Coastguard Worker vld1.16 {d16[]}, [\src], \s_strd 2647*c0909341SAndroid Build Coastguard Worker bgt 24f 2648*c0909341SAndroid Build Coastguard Worker22: 2649*c0909341SAndroid Build Coastguard Worker vld1.16 {d17[]}, [\sr2], \s_strd 2650*c0909341SAndroid Build Coastguard Worker vld1.16 {d18[]}, [\src], \s_strd 2651*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d17, #6 2652*c0909341SAndroid Build Coastguard Worker vext.8 d17, d17, d18, #6 2653*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d16, d2 2654*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d17, d3 2655*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q2, #4 2656*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[0]}, [\dst, :16] 2657*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[1]}, [\ds2, :16] 2658*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2659*c0909341SAndroid Build Coastguard Worker24: // 2x4, 2x6, 2x8, ... v 2660*c0909341SAndroid Build Coastguard Worker vld1.16 {d17[]}, [\sr2], \s_strd 2661*c0909341SAndroid Build Coastguard Worker vld1.16 {d18[]}, [\src], \s_strd 2662*c0909341SAndroid Build Coastguard Worker vld1.16 {d19[]}, [\sr2], \s_strd 2663*c0909341SAndroid Build Coastguard Worker vld1.16 {d20[]}, [\src], \s_strd 2664*c0909341SAndroid Build Coastguard Worker sub \h, \h, #4 2665*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d17, #6 2666*c0909341SAndroid Build Coastguard Worker vext.8 d17, d17, d18, #6 2667*c0909341SAndroid Build Coastguard Worker vext.8 d18, d18, d19, #6 2668*c0909341SAndroid Build Coastguard Worker vext.8 d19, d19, d20, #6 2669*c0909341SAndroid Build Coastguard Worker vtrn.32 d16, d18 2670*c0909341SAndroid Build Coastguard Worker vtrn.32 d17, d19 2671*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d16, d2 2672*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d17, d3 2673*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2674*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q2, #4 2675*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[0]}, [\dst, :16], \d_strd 2676*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[1]}, [\ds2, :16], \d_strd 2677*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[2]}, [\dst, :16], \d_strd 2678*c0909341SAndroid Build Coastguard Worker vst1.16 {d4[3]}, [\ds2, :16], \d_strd 2679*c0909341SAndroid Build Coastguard Worker blt 0f 2680*c0909341SAndroid Build Coastguard Worker vmov d16, d20 2681*c0909341SAndroid Build Coastguard Worker beq 22b 2682*c0909341SAndroid Build Coastguard Worker b 24b 2683*c0909341SAndroid Build Coastguard Worker0: 2684*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2685*c0909341SAndroid Build Coastguard Worker.endif 2686*c0909341SAndroid Build Coastguard Worker 2687*c0909341SAndroid Build Coastguard Worker40: // 4xN v 2688*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2689*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2690*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2691*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2692*c0909341SAndroid Build Coastguard Worker vld1.32 {d16[]}, [\src], \s_strd 2693*c0909341SAndroid Build Coastguard Worker4: 2694*c0909341SAndroid Build Coastguard Worker vld1.32 {d17[]}, [\sr2], \s_strd 2695*c0909341SAndroid Build Coastguard Worker vld1.32 {d18[]}, [\src], \s_strd 2696*c0909341SAndroid Build Coastguard Worker vext.8 d16, d16, d17, #4 2697*c0909341SAndroid Build Coastguard Worker vext.8 d17, d17, d18, #4 2698*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d16, d2 2699*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d17, d3 2700*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2701*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2702*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q2, #4 2703*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [\dst, :32], \d_strd 2704*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [\ds2, :32], \d_strd 2705*c0909341SAndroid Build Coastguard Worker.else 2706*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [\dst, :64], \d_strd 2707*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [\ds2, :64], \d_strd 2708*c0909341SAndroid Build Coastguard Worker.endif 2709*c0909341SAndroid Build Coastguard Worker ble 0f 2710*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2711*c0909341SAndroid Build Coastguard Worker b 4b 2712*c0909341SAndroid Build Coastguard Worker0: 2713*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2714*c0909341SAndroid Build Coastguard Worker 2715*c0909341SAndroid Build Coastguard Worker80: // 8xN v 2716*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2717*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2718*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2719*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2720*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [\src], \s_strd 2721*c0909341SAndroid Build Coastguard Worker8: 2722*c0909341SAndroid Build Coastguard Worker vld1.8 {d17}, [\sr2], \s_strd 2723*c0909341SAndroid Build Coastguard Worker vld1.8 {d18}, [\src], \s_strd 2724*c0909341SAndroid Build Coastguard Worker vmull.u8 q2, d16, d2 2725*c0909341SAndroid Build Coastguard Worker vmull.u8 q3, d17, d2 2726*c0909341SAndroid Build Coastguard Worker vmlal.u8 q2, d17, d3 2727*c0909341SAndroid Build Coastguard Worker vmlal.u8 q3, d18, d3 2728*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2729*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2730*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d4, q2, #4 2731*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d6, q3, #4 2732*c0909341SAndroid Build Coastguard Worker vst1.8 {d4}, [\dst, :64], \d_strd 2733*c0909341SAndroid Build Coastguard Worker vst1.8 {d6}, [\ds2, :64], \d_strd 2734*c0909341SAndroid Build Coastguard Worker.else 2735*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [\dst, :128], \d_strd 2736*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [\ds2, :128], \d_strd 2737*c0909341SAndroid Build Coastguard Worker.endif 2738*c0909341SAndroid Build Coastguard Worker ble 0f 2739*c0909341SAndroid Build Coastguard Worker vmov d16, d18 2740*c0909341SAndroid Build Coastguard Worker b 8b 2741*c0909341SAndroid Build Coastguard Worker0: 2742*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2743*c0909341SAndroid Build Coastguard Worker 2744*c0909341SAndroid Build Coastguard Worker160: // 16xN, 32xN, ... 2745*c0909341SAndroid Build Coastguard Worker320: 2746*c0909341SAndroid Build Coastguard Worker640: 2747*c0909341SAndroid Build Coastguard Worker1280: 2748*c0909341SAndroid Build Coastguard Worker mov \my, \h 2749*c0909341SAndroid Build Coastguard Worker1: 2750*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2751*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2752*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2753*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2754*c0909341SAndroid Build Coastguard Worker 2755*c0909341SAndroid Build Coastguard Worker vld1.8 {q8}, [\src], \s_strd 2756*c0909341SAndroid Build Coastguard Worker2: 2757*c0909341SAndroid Build Coastguard Worker vld1.8 {q9}, [\sr2], \s_strd 2758*c0909341SAndroid Build Coastguard Worker vld1.8 {q10}, [\src], \s_strd 2759*c0909341SAndroid Build Coastguard Worker vmull.u8 q12, d16, d2 2760*c0909341SAndroid Build Coastguard Worker vmull.u8 q13, d17, d2 2761*c0909341SAndroid Build Coastguard Worker vmull.u8 q14, d18, d2 2762*c0909341SAndroid Build Coastguard Worker vmull.u8 q15, d19, d2 2763*c0909341SAndroid Build Coastguard Worker vmlal.u8 q12, d18, d3 2764*c0909341SAndroid Build Coastguard Worker vmlal.u8 q13, d19, d3 2765*c0909341SAndroid Build Coastguard Worker vmlal.u8 q14, d20, d3 2766*c0909341SAndroid Build Coastguard Worker vmlal.u8 q15, d21, d3 2767*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2768*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2769*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d24, q12, #4 2770*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d25, q13, #4 2771*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d28, q14, #4 2772*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d29, q15, #4 2773*c0909341SAndroid Build Coastguard Worker vst1.8 {q12}, [\dst, :128], \d_strd 2774*c0909341SAndroid Build Coastguard Worker vst1.8 {q14}, [\ds2, :128], \d_strd 2775*c0909341SAndroid Build Coastguard Worker.else 2776*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [\dst, :128], \d_strd 2777*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [\ds2, :128], \d_strd 2778*c0909341SAndroid Build Coastguard Worker.endif 2779*c0909341SAndroid Build Coastguard Worker ble 9f 2780*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2781*c0909341SAndroid Build Coastguard Worker b 2b 2782*c0909341SAndroid Build Coastguard Worker9: 2783*c0909341SAndroid Build Coastguard Worker subs \w, \w, #16 2784*c0909341SAndroid Build Coastguard Worker ble 0f 2785*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2786*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2787*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2788*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2789*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2790*c0909341SAndroid Build Coastguard Worker mov \h, \my 2791*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 2792*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2793*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2794*c0909341SAndroid Build Coastguard Worker.else 2795*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #32 2796*c0909341SAndroid Build Coastguard Worker.endif 2797*c0909341SAndroid Build Coastguard Worker b 1b 2798*c0909341SAndroid Build Coastguard Worker0: 2799*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2800*c0909341SAndroid Build Coastguard Worker 2801*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv): 2802*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d2 2803*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d3 2804*c0909341SAndroid Build Coastguard Worker adr r9, L(\type\()_bilin_hv_tbl) 2805*c0909341SAndroid Build Coastguard Worker ldr r8, [r9, r8, lsl #2] 2806*c0909341SAndroid Build Coastguard Worker add r9, r9, r8 2807*c0909341SAndroid Build Coastguard Worker bx r9 2808*c0909341SAndroid Build Coastguard Worker 2809*c0909341SAndroid Build Coastguard Worker .align 2 2810*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv_tbl): 2811*c0909341SAndroid Build Coastguard Worker .word 1280f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2812*c0909341SAndroid Build Coastguard Worker .word 640f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2813*c0909341SAndroid Build Coastguard Worker .word 320f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2814*c0909341SAndroid Build Coastguard Worker .word 160f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2815*c0909341SAndroid Build Coastguard Worker .word 80f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2816*c0909341SAndroid Build Coastguard Worker .word 40f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2817*c0909341SAndroid Build Coastguard Worker .word 20f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB 2818*c0909341SAndroid Build Coastguard Worker 2819*c0909341SAndroid Build Coastguard Worker20: // 2xN hv 2820*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2821*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2822*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2823*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2824*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2825*c0909341SAndroid Build Coastguard Worker 2826*c0909341SAndroid Build Coastguard Worker vld1.32 {d28[]}, [\src], \s_strd 2827*c0909341SAndroid Build Coastguard Worker vext.8 d29, d28, d28, #1 2828*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d28, d0 2829*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d29, d1 2830*c0909341SAndroid Build Coastguard Worker 2831*c0909341SAndroid Build Coastguard Worker2: 2832*c0909341SAndroid Build Coastguard Worker vld1.32 {d28[]}, [\sr2], \s_strd 2833*c0909341SAndroid Build Coastguard Worker vld1.32 {d30[]}, [\src], \s_strd 2834*c0909341SAndroid Build Coastguard Worker vext.8 d29, d28, d28, #1 2835*c0909341SAndroid Build Coastguard Worker vext.8 d31, d30, d30, #1 2836*c0909341SAndroid Build Coastguard Worker vtrn.16 d28, d30 2837*c0909341SAndroid Build Coastguard Worker vtrn.16 d29, d31 2838*c0909341SAndroid Build Coastguard Worker vmull.u8 q9, d28, d0 2839*c0909341SAndroid Build Coastguard Worker vmlal.u8 q9, d29, d1 2840*c0909341SAndroid Build Coastguard Worker 2841*c0909341SAndroid Build Coastguard Worker vtrn.32 d16, d18 2842*c0909341SAndroid Build Coastguard Worker 2843*c0909341SAndroid Build Coastguard Worker vmul.u16 d20, d16, d4 2844*c0909341SAndroid Build Coastguard Worker vmla.u16 d20, d19, d6 2845*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d20, q10, #8 2846*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2847*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[0]}, [\dst, :16], \d_strd 2848*c0909341SAndroid Build Coastguard Worker vst1.16 {d20[1]}, [\ds2, :16], \d_strd 2849*c0909341SAndroid Build Coastguard Worker ble 0f 2850*c0909341SAndroid Build Coastguard Worker vtrn.32 d19, d16 2851*c0909341SAndroid Build Coastguard Worker b 2b 2852*c0909341SAndroid Build Coastguard Worker0: 2853*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2854*c0909341SAndroid Build Coastguard Worker.endif 2855*c0909341SAndroid Build Coastguard Worker 2856*c0909341SAndroid Build Coastguard Worker40: // 4xN hv 2857*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2858*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2859*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2860*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2861*c0909341SAndroid Build Coastguard Worker 2862*c0909341SAndroid Build Coastguard Worker vld1.8 {d28}, [\src], \s_strd 2863*c0909341SAndroid Build Coastguard Worker vext.8 d29, d28, d28, #1 2864*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d28, d0 2865*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d29, d1 2866*c0909341SAndroid Build Coastguard Worker 2867*c0909341SAndroid Build Coastguard Worker4: 2868*c0909341SAndroid Build Coastguard Worker vld1.8 {d28}, [\sr2], \s_strd 2869*c0909341SAndroid Build Coastguard Worker vld1.8 {d30}, [\src], \s_strd 2870*c0909341SAndroid Build Coastguard Worker vext.8 d29, d28, d28, #1 2871*c0909341SAndroid Build Coastguard Worker vext.8 d31, d30, d30, #1 2872*c0909341SAndroid Build Coastguard Worker vtrn.32 d28, d30 2873*c0909341SAndroid Build Coastguard Worker vtrn.32 d29, d31 2874*c0909341SAndroid Build Coastguard Worker vmull.u8 q9, d28, d0 2875*c0909341SAndroid Build Coastguard Worker vmlal.u8 q9, d29, d1 2876*c0909341SAndroid Build Coastguard Worker 2877*c0909341SAndroid Build Coastguard Worker vmov d17, d18 2878*c0909341SAndroid Build Coastguard Worker 2879*c0909341SAndroid Build Coastguard Worker vmul.u16 q10, q8, q2 2880*c0909341SAndroid Build Coastguard Worker vmla.u16 q10, q9, q3 2881*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2882*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2883*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d20, q10, #8 2884*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [\dst, :32], \d_strd 2885*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[1]}, [\ds2, :32], \d_strd 2886*c0909341SAndroid Build Coastguard Worker.else 2887*c0909341SAndroid Build Coastguard Worker vrshr.u16 q10, q10, #4 2888*c0909341SAndroid Build Coastguard Worker vst1.16 {d20}, [\dst, :64], \d_strd 2889*c0909341SAndroid Build Coastguard Worker vst1.16 {d21}, [\ds2, :64], \d_strd 2890*c0909341SAndroid Build Coastguard Worker.endif 2891*c0909341SAndroid Build Coastguard Worker ble 0f 2892*c0909341SAndroid Build Coastguard Worker vmov d16, d19 2893*c0909341SAndroid Build Coastguard Worker b 4b 2894*c0909341SAndroid Build Coastguard Worker0: 2895*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2896*c0909341SAndroid Build Coastguard Worker 2897*c0909341SAndroid Build Coastguard Worker80: // 8xN, 16xN, ... hv 2898*c0909341SAndroid Build Coastguard Worker160: 2899*c0909341SAndroid Build Coastguard Worker320: 2900*c0909341SAndroid Build Coastguard Worker640: 2901*c0909341SAndroid Build Coastguard Worker1280: 2902*c0909341SAndroid Build Coastguard Worker mov \my, \h 2903*c0909341SAndroid Build Coastguard Worker 2904*c0909341SAndroid Build Coastguard Worker1: 2905*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2906*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2907*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2908*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2909*c0909341SAndroid Build Coastguard Worker 2910*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [\src], \s_strd 2911*c0909341SAndroid Build Coastguard Worker vext.8 q13, q12, q12, #1 2912*c0909341SAndroid Build Coastguard Worker vmull.u8 q8, d24, d0 2913*c0909341SAndroid Build Coastguard Worker vmlal.u8 q8, d26, d1 2914*c0909341SAndroid Build Coastguard Worker 2915*c0909341SAndroid Build Coastguard Worker2: 2916*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [\sr2], \s_strd 2917*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [\src], \s_strd 2918*c0909341SAndroid Build Coastguard Worker vext.8 q13, q12, q12, #1 2919*c0909341SAndroid Build Coastguard Worker vext.8 q15, q14, q14, #1 2920*c0909341SAndroid Build Coastguard Worker vmull.u8 q9, d24, d0 2921*c0909341SAndroid Build Coastguard Worker vmlal.u8 q9, d26, d1 2922*c0909341SAndroid Build Coastguard Worker vmull.u8 q10, d28, d0 2923*c0909341SAndroid Build Coastguard Worker vmlal.u8 q10, d30, d1 2924*c0909341SAndroid Build Coastguard Worker 2925*c0909341SAndroid Build Coastguard Worker vmul.u16 q8, q8, q2 2926*c0909341SAndroid Build Coastguard Worker vmla.u16 q8, q9, q3 2927*c0909341SAndroid Build Coastguard Worker vmul.u16 q9, q9, q2 2928*c0909341SAndroid Build Coastguard Worker vmla.u16 q9, q10, q3 2929*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2930*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2931*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d16, q8, #8 2932*c0909341SAndroid Build Coastguard Worker vqrshrn.u16 d18, q9, #8 2933*c0909341SAndroid Build Coastguard Worker vst1.8 {d16}, [\dst, :64], \d_strd 2934*c0909341SAndroid Build Coastguard Worker vst1.8 {d18}, [\ds2, :64], \d_strd 2935*c0909341SAndroid Build Coastguard Worker.else 2936*c0909341SAndroid Build Coastguard Worker vrshr.u16 q8, q8, #4 2937*c0909341SAndroid Build Coastguard Worker vrshr.u16 q9, q9, #4 2938*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [\dst, :128], \d_strd 2939*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [\ds2, :128], \d_strd 2940*c0909341SAndroid Build Coastguard Worker.endif 2941*c0909341SAndroid Build Coastguard Worker ble 9f 2942*c0909341SAndroid Build Coastguard Worker vmov q8, q10 2943*c0909341SAndroid Build Coastguard Worker b 2b 2944*c0909341SAndroid Build Coastguard Worker9: 2945*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2946*c0909341SAndroid Build Coastguard Worker ble 0f 2947*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2948*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2949*c0909341SAndroid Build Coastguard Worker mls \src, \s_strd, \my, \src 2950*c0909341SAndroid Build Coastguard Worker mls \dst, \d_strd, \my, \dst 2951*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2952*c0909341SAndroid Build Coastguard Worker mov \h, \my 2953*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 2954*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2955*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 2956*c0909341SAndroid Build Coastguard Worker.else 2957*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2958*c0909341SAndroid Build Coastguard Worker.endif 2959*c0909341SAndroid Build Coastguard Worker b 1b 2960*c0909341SAndroid Build Coastguard Worker0: 2961*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2962*c0909341SAndroid Build Coastguard Workerendfunc 2963*c0909341SAndroid Build Coastguard Worker.endm 2964*c0909341SAndroid Build Coastguard Worker 2965*c0909341SAndroid Build Coastguard Workerfilter_fn put, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, 10 2966*c0909341SAndroid Build Coastguard Workerfilter_fn prep, r0, r7, r1, r2, r3, r4, r5, r6, r8, r9, 6 2967*c0909341SAndroid Build Coastguard Worker 2968*c0909341SAndroid Build Coastguard Worker.macro load_filter_ptr src 2969*c0909341SAndroid Build Coastguard Worker asr r12, \src, #10 2970*c0909341SAndroid Build Coastguard Worker add r12, r11, r12, lsl #3 2971*c0909341SAndroid Build Coastguard Worker.endm 2972*c0909341SAndroid Build Coastguard Worker 2973*c0909341SAndroid Build Coastguard Worker.macro load_filter_coef dst, src, inc 2974*c0909341SAndroid Build Coastguard Worker add \src, \src, \inc 2975*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst}, [r12, :64] 2976*c0909341SAndroid Build Coastguard Worker.endm 2977*c0909341SAndroid Build Coastguard Worker 2978*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc 2979*c0909341SAndroid Build Coastguard Worker load_filter_ptr \src 2980*c0909341SAndroid Build Coastguard Worker load_filter_coef \dst, \src, \inc 2981*c0909341SAndroid Build Coastguard Worker.endm 2982*c0909341SAndroid Build Coastguard Worker 2983*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon 2984*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 0 2985*c0909341SAndroid Build Coastguard Worker vld1.16 {q7}, [r2], r3 2986*c0909341SAndroid Build Coastguard Worker vmov.i8 q6, #128 2987*c0909341SAndroid Build Coastguard Worker 2988*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 0 2989*c0909341SAndroid Build Coastguard Worker load_filter_row d1, r5, r7 // filter 1 2990*c0909341SAndroid Build Coastguard Worker load_filter_row d2, r5, r7 // filter 2 2991*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 3 2992*c0909341SAndroid Build Coastguard Worker veor q7, q7, q6 // subtract by 128 to allow using vmull 2993*c0909341SAndroid Build Coastguard Worker load_filter_coef d3, r5, r7 // filter 3 2994*c0909341SAndroid Build Coastguard Worker vext.8 d12, d14, d15, #1 // filter 1 pixels 2995*c0909341SAndroid Build Coastguard Worker vext.8 d13, d14, d15, #2 // filter 2 pixels 2996*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 4 2997*c0909341SAndroid Build Coastguard Worker vmull.s8 q2, d14, d0 // filter 0 output 2998*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d12, d1 // filter 1 output 2999*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 4 3000*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 5 3001*c0909341SAndroid Build Coastguard Worker vext.8 d12, d14, d15, #3 // filter 3 pixels 3002*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d13, d2 // filter 2 output 3003*c0909341SAndroid Build Coastguard Worker vext.8 d13, d14, d15, #4 // filter 4 pixels 3004*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 // pixel 0 (4x16) 3005*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d6, d7 // pixel 1 (4x16) 3006*c0909341SAndroid Build Coastguard Worker load_filter_coef d1, r5, r7 // filter 5 3007*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 6 3008*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d12, d3 // filter 3 output 3009*c0909341SAndroid Build Coastguard Worker vext.8 d12, d14, d15, #5 // filter 5 pixels 3010*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d13, d0 // filter 4 output 3011*c0909341SAndroid Build Coastguard Worker load_filter_coef d0, r5, r7 // filter 6 3012*c0909341SAndroid Build Coastguard Worker vext.8 d13, d14, d15, #6 // filter 6 pixels 3013*c0909341SAndroid Build Coastguard Worker load_filter_ptr r5 // filter 7 3014*c0909341SAndroid Build Coastguard Worker vpadd.i16 d8, d8, d9 // pixel 2 (4x16) 3015*c0909341SAndroid Build Coastguard Worker vpadd.i16 d9, d10, d11 // pixel 3 (4x16) 3016*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d12, d1 // filter 5 output 3017*c0909341SAndroid Build Coastguard Worker load_filter_coef d1, r5, r7 // filter 7 3018*c0909341SAndroid Build Coastguard Worker vext.8 d14, d14, d15, #7 // filter 7 pixels 3019*c0909341SAndroid Build Coastguard Worker vpadd.i16 d6, d6, d7 // pixel 4 (4x16) 3020*c0909341SAndroid Build Coastguard Worker vpadd.i16 d10, d10, d11 // pixel 5 (4x16) 3021*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d13, d0 // filter 6 output 3022*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d14, d1 // filter 7 output 3023*c0909341SAndroid Build Coastguard Worker 3024*c0909341SAndroid Build Coastguard Worker sub r5, r5, r7, lsl #3 3025*c0909341SAndroid Build Coastguard Worker 3026*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 // pixel 0,1 (2x16) 3027*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d8, d9 // pixel 2,3 (2x16) 3028*c0909341SAndroid Build Coastguard Worker vpadd.i16 d12, d12, d13 // pixel 6 (4x16) 3029*c0909341SAndroid Build Coastguard Worker vpadd.i16 d14, d14, d15 // pixel 7 (4x16) 3030*c0909341SAndroid Build Coastguard Worker vpadd.i16 d6, d6, d10 // pixel 4,5 (2x16) 3031*c0909341SAndroid Build Coastguard Worker vpadd.i16 d10, d12, d14 // pixel 6,7 (2x16) 3032*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 // pixel 0-3 3033*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d6, d10 // pixel 4-7 3034*c0909341SAndroid Build Coastguard Worker 3035*c0909341SAndroid Build Coastguard Worker add r5, r5, r8 3036*c0909341SAndroid Build Coastguard Worker 3037*c0909341SAndroid Build Coastguard Worker bx lr 3038*c0909341SAndroid Build Coastguard Workerendfunc 3039*c0909341SAndroid Build Coastguard Worker 3040*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_8bpc_neon( 3041*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3042*c0909341SAndroid Build Coastguard Worker// const pixel *src, const ptrdiff_t src_stride, 3043*c0909341SAndroid Build Coastguard Worker// const int16_t *const abcd, int mx, int my) 3044*c0909341SAndroid Build Coastguard Worker.macro warp t, shift 3045*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_8bpc_neon, export=1 3046*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 3047*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 3048*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #100] 3049*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #108] 3050*c0909341SAndroid Build Coastguard Worker ldrd r8, r9, [r4] 3051*c0909341SAndroid Build Coastguard Worker sxth r7, r8 3052*c0909341SAndroid Build Coastguard Worker asr r8, r8, #16 3053*c0909341SAndroid Build Coastguard Worker asr r4, r9, #16 3054*c0909341SAndroid Build Coastguard Worker sxth r9, r9 3055*c0909341SAndroid Build Coastguard Worker mov r10, #8 3056*c0909341SAndroid Build Coastguard Worker sub r2, r2, r3, lsl #1 3057*c0909341SAndroid Build Coastguard Worker sub r2, r2, r3 3058*c0909341SAndroid Build Coastguard Worker sub r2, r2, #3 3059*c0909341SAndroid Build Coastguard Worker movrel r11, X(mc_warp_filter), 64*8 3060*c0909341SAndroid Build Coastguard Worker.ifnb \t 3061*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 3062*c0909341SAndroid Build Coastguard Worker.endif 3063*c0909341SAndroid Build Coastguard Worker add r5, r5, #512 3064*c0909341SAndroid Build Coastguard Worker add r6, r6, #512 3065*c0909341SAndroid Build Coastguard Worker 3066*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3067*c0909341SAndroid Build Coastguard Worker vrshr.s16 q8, q2, #3 3068*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3069*c0909341SAndroid Build Coastguard Worker vrshr.s16 q9, q2, #3 3070*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3071*c0909341SAndroid Build Coastguard Worker vrshr.s16 q10, q2, #3 3072*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3073*c0909341SAndroid Build Coastguard Worker vrshr.s16 q11, q2, #3 3074*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3075*c0909341SAndroid Build Coastguard Worker vrshr.s16 q12, q2, #3 3076*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3077*c0909341SAndroid Build Coastguard Worker vrshr.s16 q13, q2, #3 3078*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3079*c0909341SAndroid Build Coastguard Worker vrshr.s16 q14, q2, #3 3080*c0909341SAndroid Build Coastguard Worker 3081*c0909341SAndroid Build Coastguard Worker1: 3082*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3083*c0909341SAndroid Build Coastguard Worker vrshr.s16 q15, q2, #3 3084*c0909341SAndroid Build Coastguard Worker 3085*c0909341SAndroid Build Coastguard Worker load_filter_row d8, r6, r9 3086*c0909341SAndroid Build Coastguard Worker load_filter_row d9, r6, r9 3087*c0909341SAndroid Build Coastguard Worker load_filter_row d10, r6, r9 3088*c0909341SAndroid Build Coastguard Worker load_filter_row d11, r6, r9 3089*c0909341SAndroid Build Coastguard Worker load_filter_row d12, r6, r9 3090*c0909341SAndroid Build Coastguard Worker load_filter_row d13, r6, r9 3091*c0909341SAndroid Build Coastguard Worker load_filter_row d14, r6, r9 3092*c0909341SAndroid Build Coastguard Worker load_filter_row d15, r6, r9 3093*c0909341SAndroid Build Coastguard Worker transpose_8x8b q4, q5, q6, q7, d8, d9, d10, d11, d12, d13, d14, d15 3094*c0909341SAndroid Build Coastguard Worker vmovl.s8 q1, d8 3095*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d9 3096*c0909341SAndroid Build Coastguard Worker vmovl.s8 q3, d10 3097*c0909341SAndroid Build Coastguard Worker vmovl.s8 q4, d11 3098*c0909341SAndroid Build Coastguard Worker vmovl.s8 q5, d12 3099*c0909341SAndroid Build Coastguard Worker vmovl.s8 q6, d13 3100*c0909341SAndroid Build Coastguard Worker 3101*c0909341SAndroid Build Coastguard Worker sub r6, r6, r9, lsl #3 3102*c0909341SAndroid Build Coastguard Worker 3103*c0909341SAndroid Build Coastguard Worker // This ordering of vmull/vmlal is highly beneficial for 3104*c0909341SAndroid Build Coastguard Worker // Cortex A8/A9/A53 here, but harmful for Cortex A7. 3105*c0909341SAndroid Build Coastguard Worker vmull.s16 q0, d16, d2 3106*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d18, d4 3107*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d20, d6 3108*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d22, d8 3109*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d24, d10 3110*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d26, d12 3111*c0909341SAndroid Build Coastguard Worker vmull.s16 q1, d17, d3 3112*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d19, d5 3113*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d21, d7 3114*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d23, d9 3115*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d25, d11 3116*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d27, d13 3117*c0909341SAndroid Build Coastguard Worker 3118*c0909341SAndroid Build Coastguard Worker vmovl.s8 q2, d14 3119*c0909341SAndroid Build Coastguard Worker vmovl.s8 q3, d15 3120*c0909341SAndroid Build Coastguard Worker 3121*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d28, d4 3122*c0909341SAndroid Build Coastguard Worker vmlal.s16 q0, d30, d6 3123*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d29, d5 3124*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d31, d7 3125*c0909341SAndroid Build Coastguard Worker 3126*c0909341SAndroid Build Coastguard Worker.ifb \t 3127*c0909341SAndroid Build Coastguard Worker vmov.i16 q7, #128 3128*c0909341SAndroid Build Coastguard Worker.else 3129*c0909341SAndroid Build Coastguard Worker vmov.i16 q7, #0x800 3130*c0909341SAndroid Build Coastguard Worker.endif 3131*c0909341SAndroid Build Coastguard Worker 3132*c0909341SAndroid Build Coastguard Worker vmov q8, q9 3133*c0909341SAndroid Build Coastguard Worker vmov q9, q10 3134*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d0, q0, #\shift 3135*c0909341SAndroid Build Coastguard Worker vmov q10, q11 3136*c0909341SAndroid Build Coastguard Worker vqrshrn.s32 d1, q1, #\shift 3137*c0909341SAndroid Build Coastguard Worker vmov q11, q12 3138*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q7 3139*c0909341SAndroid Build Coastguard Worker vmov q12, q13 3140*c0909341SAndroid Build Coastguard Worker.ifb \t 3141*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q0 3142*c0909341SAndroid Build Coastguard Worker.endif 3143*c0909341SAndroid Build Coastguard Worker vmov q13, q14 3144*c0909341SAndroid Build Coastguard Worker vmov q14, q15 3145*c0909341SAndroid Build Coastguard Worker subs r10, r10, #1 3146*c0909341SAndroid Build Coastguard Worker.ifnb \t 3147*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 3148*c0909341SAndroid Build Coastguard Worker.else 3149*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 3150*c0909341SAndroid Build Coastguard Worker.endif 3151*c0909341SAndroid Build Coastguard Worker 3152*c0909341SAndroid Build Coastguard Worker add r6, r6, r4 3153*c0909341SAndroid Build Coastguard Worker bgt 1b 3154*c0909341SAndroid Build Coastguard Worker 3155*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 3156*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3157*c0909341SAndroid Build Coastguard Workerendfunc 3158*c0909341SAndroid Build Coastguard Worker.endm 3159*c0909341SAndroid Build Coastguard Worker 3160*c0909341SAndroid Build Coastguard Workerwarp , 11 3161*c0909341SAndroid Build Coastguard Workerwarp t, 7 3162*c0909341SAndroid Build Coastguard Worker 3163*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_8bpc_neon( 3164*c0909341SAndroid Build Coastguard Worker// const intptr_t bw, const intptr_t bh, 3165*c0909341SAndroid Build Coastguard Worker// const intptr_t iw, const intptr_t ih, 3166*c0909341SAndroid Build Coastguard Worker// const intptr_t x, const intptr_t y, 3167*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3168*c0909341SAndroid Build Coastguard Worker// const pixel *ref, const ptrdiff_t ref_stride) 3169*c0909341SAndroid Build Coastguard Workerfunction emu_edge_8bpc_neon, export=1 3170*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 3171*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #36] 3172*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #44] 3173*c0909341SAndroid Build Coastguard Worker ldrd r8, r9, [sp, #52] 3174*c0909341SAndroid Build Coastguard Worker 3175*c0909341SAndroid Build Coastguard Worker // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) 3176*c0909341SAndroid Build Coastguard Worker // ref += iclip(x, 0, iw - 1) 3177*c0909341SAndroid Build Coastguard Worker sub r12, r3, #1 // ih - 1 3178*c0909341SAndroid Build Coastguard Worker cmp r5, r3 3179*c0909341SAndroid Build Coastguard Worker sub lr, r2, #1 // iw - 1 3180*c0909341SAndroid Build Coastguard Worker it lt 3181*c0909341SAndroid Build Coastguard Worker movlt r12, r5 // min(y, ih - 1) 3182*c0909341SAndroid Build Coastguard Worker cmp r4, r2 3183*c0909341SAndroid Build Coastguard Worker bic r12, r12, r12, asr #31 // max(min(y, ih - 1), 0) 3184*c0909341SAndroid Build Coastguard Worker it lt 3185*c0909341SAndroid Build Coastguard Worker movlt lr, r4 // min(x, iw - 1) 3186*c0909341SAndroid Build Coastguard Worker bic lr, lr, lr, asr #31 // max(min(x, iw - 1), 0) 3187*c0909341SAndroid Build Coastguard Worker mla r8, r12, r9, r8 // ref += iclip() * stride 3188*c0909341SAndroid Build Coastguard Worker add r8, r8, lr // ref += iclip() 3189*c0909341SAndroid Build Coastguard Worker 3190*c0909341SAndroid Build Coastguard Worker // bottom_ext = iclip(y + bh - ih, 0, bh - 1) 3191*c0909341SAndroid Build Coastguard Worker // top_ext = iclip(-y, 0, bh - 1) 3192*c0909341SAndroid Build Coastguard Worker add r10, r5, r1 // y + bh 3193*c0909341SAndroid Build Coastguard Worker neg r5, r5 // -y 3194*c0909341SAndroid Build Coastguard Worker sub r10, r10, r3 // y + bh - ih 3195*c0909341SAndroid Build Coastguard Worker sub r12, r1, #1 // bh - 1 3196*c0909341SAndroid Build Coastguard Worker cmp r10, r1 3197*c0909341SAndroid Build Coastguard Worker bic r5, r5, r5, asr #31 // max(-y, 0) 3198*c0909341SAndroid Build Coastguard Worker it ge 3199*c0909341SAndroid Build Coastguard Worker movge r10, r12 // min(y + bh - ih, bh-1) 3200*c0909341SAndroid Build Coastguard Worker cmp r5, r1 3201*c0909341SAndroid Build Coastguard Worker bic r10, r10, r10, asr #31 // max(min(y + bh - ih, bh-1), 0) 3202*c0909341SAndroid Build Coastguard Worker it ge 3203*c0909341SAndroid Build Coastguard Worker movge r5, r12 // min(max(-y, 0), bh-1) 3204*c0909341SAndroid Build Coastguard Worker 3205*c0909341SAndroid Build Coastguard Worker // right_ext = iclip(x + bw - iw, 0, bw - 1) 3206*c0909341SAndroid Build Coastguard Worker // left_ext = iclip(-x, 0, bw - 1) 3207*c0909341SAndroid Build Coastguard Worker add r11, r4, r0 // x + bw 3208*c0909341SAndroid Build Coastguard Worker neg r4, r4 // -x 3209*c0909341SAndroid Build Coastguard Worker sub r11, r11, r2 // x + bw - iw 3210*c0909341SAndroid Build Coastguard Worker sub lr, r0, #1 // bw - 1 3211*c0909341SAndroid Build Coastguard Worker cmp r11, r0 3212*c0909341SAndroid Build Coastguard Worker bic r4, r4, r4, asr #31 // max(-x, 0) 3213*c0909341SAndroid Build Coastguard Worker it ge 3214*c0909341SAndroid Build Coastguard Worker movge r11, lr // min(x + bw - iw, bw-1) 3215*c0909341SAndroid Build Coastguard Worker cmp r4, r0 3216*c0909341SAndroid Build Coastguard Worker bic r11, r11, r11, asr #31 // max(min(x + bw - iw, bw-1), 0) 3217*c0909341SAndroid Build Coastguard Worker it ge 3218*c0909341SAndroid Build Coastguard Worker movge r4, lr // min(max(-x, 0), bw - 1) 3219*c0909341SAndroid Build Coastguard Worker 3220*c0909341SAndroid Build Coastguard Worker // center_h = bh - top_ext - bottom_ext 3221*c0909341SAndroid Build Coastguard Worker // dst += top_ext * PXSTRIDE(dst_stride) 3222*c0909341SAndroid Build Coastguard Worker // center_w = bw - left_ext - right_ext 3223*c0909341SAndroid Build Coastguard Worker sub r1, r1, r5 // bh - top_ext 3224*c0909341SAndroid Build Coastguard Worker mla r6, r5, r7, r6 3225*c0909341SAndroid Build Coastguard Worker sub r2, r0, r4 // bw - left_ext 3226*c0909341SAndroid Build Coastguard Worker sub r1, r1, r10 // center_h = bh - top_ext - bottom_ext 3227*c0909341SAndroid Build Coastguard Worker sub r2, r2, r11 // center_w = bw - left_ext - right_ext 3228*c0909341SAndroid Build Coastguard Worker 3229*c0909341SAndroid Build Coastguard Worker mov r0, r6 // backup of dst 3230*c0909341SAndroid Build Coastguard Worker 3231*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right 3232*c0909341SAndroid Build Coastguard Worker0: 3233*c0909341SAndroid Build Coastguard Worker.if \need_left 3234*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[], d1[]}, [r8] 3235*c0909341SAndroid Build Coastguard Worker mov r12, r6 // out = dst 3236*c0909341SAndroid Build Coastguard Worker mov r3, r4 3237*c0909341SAndroid Build Coastguard Worker1: 3238*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 3239*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r12, :128]! 3240*c0909341SAndroid Build Coastguard Worker bgt 1b 3241*c0909341SAndroid Build Coastguard Worker.endif 3242*c0909341SAndroid Build Coastguard Worker mov lr, r8 3243*c0909341SAndroid Build Coastguard Worker add r12, r6, r4 // out = dst + left_ext 3244*c0909341SAndroid Build Coastguard Worker mov r3, r2 3245*c0909341SAndroid Build Coastguard Worker1: 3246*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [lr]! 3247*c0909341SAndroid Build Coastguard Worker subs r3, r3, #32 3248*c0909341SAndroid Build Coastguard Worker.if \need_left 3249*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r12]! 3250*c0909341SAndroid Build Coastguard Worker.else 3251*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r12, :128]! 3252*c0909341SAndroid Build Coastguard Worker.endif 3253*c0909341SAndroid Build Coastguard Worker bgt 1b 3254*c0909341SAndroid Build Coastguard Worker.if \need_right 3255*c0909341SAndroid Build Coastguard Worker add r3, r8, r2 // in + center_w 3256*c0909341SAndroid Build Coastguard Worker sub r3, r3, #1 // in + center_w - 1 3257*c0909341SAndroid Build Coastguard Worker add r12, r6, r4 // dst + left_ext 3258*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[], d1[]}, [r3] 3259*c0909341SAndroid Build Coastguard Worker add r12, r12, r2 // out = dst + left_ext + center_w 3260*c0909341SAndroid Build Coastguard Worker mov r3, r11 3261*c0909341SAndroid Build Coastguard Worker1: 3262*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 3263*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r12]! 3264*c0909341SAndroid Build Coastguard Worker bgt 1b 3265*c0909341SAndroid Build Coastguard Worker.endif 3266*c0909341SAndroid Build Coastguard Worker 3267*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 // center_h-- 3268*c0909341SAndroid Build Coastguard Worker add r6, r6, r7 3269*c0909341SAndroid Build Coastguard Worker add r8, r8, r9 3270*c0909341SAndroid Build Coastguard Worker bgt 0b 3271*c0909341SAndroid Build Coastguard Worker.endm 3272*c0909341SAndroid Build Coastguard Worker 3273*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3274*c0909341SAndroid Build Coastguard Worker beq 2f 3275*c0909341SAndroid Build Coastguard Worker // need_left 3276*c0909341SAndroid Build Coastguard Worker cmp r11, #0 3277*c0909341SAndroid Build Coastguard Worker beq 3f 3278*c0909341SAndroid Build Coastguard Worker // need_left + need_right 3279*c0909341SAndroid Build Coastguard Worker v_loop 1, 1 3280*c0909341SAndroid Build Coastguard Worker b 5f 3281*c0909341SAndroid Build Coastguard Worker 3282*c0909341SAndroid Build Coastguard Worker2: 3283*c0909341SAndroid Build Coastguard Worker // !need_left 3284*c0909341SAndroid Build Coastguard Worker cmp r11, #0 3285*c0909341SAndroid Build Coastguard Worker beq 4f 3286*c0909341SAndroid Build Coastguard Worker // !need_left + need_right 3287*c0909341SAndroid Build Coastguard Worker v_loop 0, 1 3288*c0909341SAndroid Build Coastguard Worker b 5f 3289*c0909341SAndroid Build Coastguard Worker 3290*c0909341SAndroid Build Coastguard Worker3: 3291*c0909341SAndroid Build Coastguard Worker // need_left + !need_right 3292*c0909341SAndroid Build Coastguard Worker v_loop 1, 0 3293*c0909341SAndroid Build Coastguard Worker b 5f 3294*c0909341SAndroid Build Coastguard Worker 3295*c0909341SAndroid Build Coastguard Worker4: 3296*c0909341SAndroid Build Coastguard Worker // !need_left + !need_right 3297*c0909341SAndroid Build Coastguard Worker v_loop 0, 0 3298*c0909341SAndroid Build Coastguard Worker 3299*c0909341SAndroid Build Coastguard Worker5: 3300*c0909341SAndroid Build Coastguard Worker cmp r10, #0 3301*c0909341SAndroid Build Coastguard Worker // Storing the original dst in r0 overwrote bw, recalculate it here 3302*c0909341SAndroid Build Coastguard Worker add r2, r2, r4 // center_w + left_ext 3303*c0909341SAndroid Build Coastguard Worker add r2, r2, r11 // bw = center_w + left_ext + right_ext 3304*c0909341SAndroid Build Coastguard Worker 3305*c0909341SAndroid Build Coastguard Worker beq 3f 3306*c0909341SAndroid Build Coastguard Worker // need_bottom 3307*c0909341SAndroid Build Coastguard Worker sub r8, r6, r7 // ref = dst - stride 3308*c0909341SAndroid Build Coastguard Worker mov r4, r2 3309*c0909341SAndroid Build Coastguard Worker1: 3310*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r8, :128]! 3311*c0909341SAndroid Build Coastguard Worker mov r3, r10 3312*c0909341SAndroid Build Coastguard Worker2: 3313*c0909341SAndroid Build Coastguard Worker subs r3, r3, #1 3314*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r6, :128], r7 3315*c0909341SAndroid Build Coastguard Worker bgt 2b 3316*c0909341SAndroid Build Coastguard Worker mls r6, r7, r10, r6 // dst -= bottom_ext * stride 3317*c0909341SAndroid Build Coastguard Worker subs r4, r4, #32 // bw -= 32 3318*c0909341SAndroid Build Coastguard Worker add r6, r6, #32 // dst += 32 3319*c0909341SAndroid Build Coastguard Worker bgt 1b 3320*c0909341SAndroid Build Coastguard Worker 3321*c0909341SAndroid Build Coastguard Worker3: 3322*c0909341SAndroid Build Coastguard Worker cmp r5, #0 3323*c0909341SAndroid Build Coastguard Worker beq 3f 3324*c0909341SAndroid Build Coastguard Worker // need_top 3325*c0909341SAndroid Build Coastguard Worker mls r6, r7, r5, r0 // dst = stored_dst - top_ext * stride 3326*c0909341SAndroid Build Coastguard Worker1: 3327*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r0, :128]! 3328*c0909341SAndroid Build Coastguard Worker mov r3, r5 3329*c0909341SAndroid Build Coastguard Worker2: 3330*c0909341SAndroid Build Coastguard Worker subs r3, r3, #1 3331*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r6, :128], r7 3332*c0909341SAndroid Build Coastguard Worker bgt 2b 3333*c0909341SAndroid Build Coastguard Worker mls r6, r7, r5, r6 // dst -= top_ext * stride 3334*c0909341SAndroid Build Coastguard Worker subs r2, r2, #32 // bw -= 32 3335*c0909341SAndroid Build Coastguard Worker add r6, r6, #32 // dst += 32 3336*c0909341SAndroid Build Coastguard Worker bgt 1b 3337*c0909341SAndroid Build Coastguard Worker 3338*c0909341SAndroid Build Coastguard Worker3: 3339*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 3340*c0909341SAndroid Build Coastguard Workerendfunc 3341