1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, B Krishnan Iyer 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_16bpc_neon(pixel *dst, const ptrdiff_t stride, 33*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 34*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 35*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height, 36*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 37*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_16bpc_neon, export=1 38*c0909341SAndroid Build Coastguard Worker push {r4, lr} 39*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #8] 40*c0909341SAndroid Build Coastguard Worker ldr r12, [sp, #24] 41*c0909341SAndroid Build Coastguard Worker clz r3, r3 42*c0909341SAndroid Build Coastguard Worker adr r2, L(ipred_dc_128_tbl) 43*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 44*c0909341SAndroid Build Coastguard Worker vdup.16 q0, r12 45*c0909341SAndroid Build Coastguard Worker ldr r3, [r2, r3, lsl #2] 46*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 47*c0909341SAndroid Build Coastguard Worker vrshr.u16 q0, q0, #1 48*c0909341SAndroid Build Coastguard Worker add r2, r2, r3 49*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 50*c0909341SAndroid Build Coastguard Worker bx r2 51*c0909341SAndroid Build Coastguard Worker 52*c0909341SAndroid Build Coastguard Worker .align 2 53*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_128_tbl): 54*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_dc_128_tbl) + CONFIG_THUMB 55*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_dc_128_tbl) + CONFIG_THUMB 56*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_dc_128_tbl) + CONFIG_THUMB 57*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_dc_128_tbl) + CONFIG_THUMB 58*c0909341SAndroid Build Coastguard Worker .word 4f - L(ipred_dc_128_tbl) + CONFIG_THUMB 59*c0909341SAndroid Build Coastguard Worker4: 60*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 61*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 62*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 63*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 64*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 65*c0909341SAndroid Build Coastguard Worker bgt 4b 66*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 67*c0909341SAndroid Build Coastguard Worker8: 68*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 69*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 70*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 71*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 72*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 73*c0909341SAndroid Build Coastguard Worker bgt 8b 74*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 75*c0909341SAndroid Build Coastguard Worker160: 76*c0909341SAndroid Build Coastguard Worker vmov q1, q0 77*c0909341SAndroid Build Coastguard Worker16: 78*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 79*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 80*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 81*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 82*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 83*c0909341SAndroid Build Coastguard Worker bgt 16b 84*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 85*c0909341SAndroid Build Coastguard Worker320: 86*c0909341SAndroid Build Coastguard Worker vmov q1, q0 87*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 88*c0909341SAndroid Build Coastguard Worker32: 89*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 90*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 91*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 92*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 93*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 94*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 95*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 96*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 97*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 98*c0909341SAndroid Build Coastguard Worker bgt 32b 99*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 100*c0909341SAndroid Build Coastguard Worker640: 101*c0909341SAndroid Build Coastguard Worker vmov q1, q0 102*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 103*c0909341SAndroid Build Coastguard Worker64: 104*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 105*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 106*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 107*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 108*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 109*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 110*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 111*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 112*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 113*c0909341SAndroid Build Coastguard Worker bgt 64b 114*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 115*c0909341SAndroid Build Coastguard Workerendfunc 116*c0909341SAndroid Build Coastguard Worker 117*c0909341SAndroid Build Coastguard Worker// void ipred_v_16bpc_neon(pixel *dst, const ptrdiff_t stride, 118*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 119*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 120*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 121*c0909341SAndroid Build Coastguard Workerfunction ipred_v_16bpc_neon, export=1 122*c0909341SAndroid Build Coastguard Worker push {r4, lr} 123*c0909341SAndroid Build Coastguard Worker ldr lr, [sp, #8] 124*c0909341SAndroid Build Coastguard Worker clz r3, r3 125*c0909341SAndroid Build Coastguard Worker adr r4, L(ipred_v_tbl) 126*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 127*c0909341SAndroid Build Coastguard Worker ldr r3, [r4, r3, lsl #2] 128*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 129*c0909341SAndroid Build Coastguard Worker add r4, r4, r3 130*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 131*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 132*c0909341SAndroid Build Coastguard Worker bx r4 133*c0909341SAndroid Build Coastguard Worker 134*c0909341SAndroid Build Coastguard Worker .align 2 135*c0909341SAndroid Build Coastguard WorkerL(ipred_v_tbl): 136*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_v_tbl) + CONFIG_THUMB 137*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_v_tbl) + CONFIG_THUMB 138*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_v_tbl) + CONFIG_THUMB 139*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_v_tbl) + CONFIG_THUMB 140*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_v_tbl) + CONFIG_THUMB 141*c0909341SAndroid Build Coastguard Worker 142*c0909341SAndroid Build Coastguard Worker40: 143*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2] 144*c0909341SAndroid Build Coastguard Worker4: 145*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 146*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 147*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 148*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 149*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 150*c0909341SAndroid Build Coastguard Worker bgt 4b 151*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 152*c0909341SAndroid Build Coastguard Worker80: 153*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r2] 154*c0909341SAndroid Build Coastguard Worker8: 155*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 156*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 157*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 158*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 159*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 160*c0909341SAndroid Build Coastguard Worker bgt 8b 161*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 162*c0909341SAndroid Build Coastguard Worker160: 163*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2] 164*c0909341SAndroid Build Coastguard Worker16: 165*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 166*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 167*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 168*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 169*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 170*c0909341SAndroid Build Coastguard Worker bgt 16b 171*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 172*c0909341SAndroid Build Coastguard Worker320: 173*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2]! 174*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 175*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2] 176*c0909341SAndroid Build Coastguard Worker32: 177*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 178*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 179*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r0, :128], r1 180*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r12, :128], r1 181*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 182*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 183*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 184*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r0, :128], r1 185*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r12, :128], r1 186*c0909341SAndroid Build Coastguard Worker bgt 32b 187*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 188*c0909341SAndroid Build Coastguard Worker640: 189*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2]! 190*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 191*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2]! 192*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2]! 193*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2]! 194*c0909341SAndroid Build Coastguard Worker64: 195*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 196*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 197*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r0, :128]! 198*c0909341SAndroid Build Coastguard Worker vst1.16 {d4, d5, d6, d7}, [r12, :128]! 199*c0909341SAndroid Build Coastguard Worker subs lr, lr, #2 200*c0909341SAndroid Build Coastguard Worker vst1.16 {d16, d17, d18, d19}, [r0, :128]! 201*c0909341SAndroid Build Coastguard Worker vst1.16 {d16, d17, d18, d19}, [r12, :128]! 202*c0909341SAndroid Build Coastguard Worker vst1.16 {d20, d21, d22, d23}, [r0, :128], r1 203*c0909341SAndroid Build Coastguard Worker vst1.16 {d20, d21, d22, d23}, [r12, :128], r1 204*c0909341SAndroid Build Coastguard Worker bgt 64b 205*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 206*c0909341SAndroid Build Coastguard Workerendfunc 207*c0909341SAndroid Build Coastguard Worker 208*c0909341SAndroid Build Coastguard Worker// void ipred_h_16bpc_neon(pixel *dst, const ptrdiff_t stride, 209*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 210*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 211*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 212*c0909341SAndroid Build Coastguard Workerfunction ipred_h_16bpc_neon, export=1 213*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 214*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 215*c0909341SAndroid Build Coastguard Worker clz r3, r3 216*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_h_tbl) 217*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 218*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 219*c0909341SAndroid Build Coastguard Worker sub r2, r2, #2 220*c0909341SAndroid Build Coastguard Worker mov lr, #-2 221*c0909341SAndroid Build Coastguard Worker add r5, r5, r3 222*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 223*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 224*c0909341SAndroid Build Coastguard Worker bx r5 225*c0909341SAndroid Build Coastguard Worker 226*c0909341SAndroid Build Coastguard Worker .align 2 227*c0909341SAndroid Build Coastguard WorkerL(ipred_h_tbl): 228*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_h_tbl) + CONFIG_THUMB 229*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_h_tbl) + CONFIG_THUMB 230*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_h_tbl) + CONFIG_THUMB 231*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_h_tbl) + CONFIG_THUMB 232*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_h_tbl) + CONFIG_THUMB 233*c0909341SAndroid Build Coastguard Worker40: 234*c0909341SAndroid Build Coastguard Worker sub r2, r2, #6 235*c0909341SAndroid Build Coastguard Worker mov lr, #-8 236*c0909341SAndroid Build Coastguard Worker4: 237*c0909341SAndroid Build Coastguard Worker vld4.16 {d0[], d1[], d2[], d3[]}, [r2], lr 238*c0909341SAndroid Build Coastguard Worker vst1.16 {d3}, [r0, :64], r1 239*c0909341SAndroid Build Coastguard Worker vst1.16 {d2}, [r12, :64], r1 240*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 241*c0909341SAndroid Build Coastguard Worker vst1.16 {d1}, [r0, :64], r1 242*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 243*c0909341SAndroid Build Coastguard Worker bgt 4b 244*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 245*c0909341SAndroid Build Coastguard Worker8: 246*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r2], lr 247*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 248*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[], d3[]}, [r2], lr 249*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 250*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r2], lr 251*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128], r1 252*c0909341SAndroid Build Coastguard Worker vld1.16 {d6[], d7[]}, [r2], lr 253*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 254*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128], r1 255*c0909341SAndroid Build Coastguard Worker bgt 8b 256*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 257*c0909341SAndroid Build Coastguard Worker160: 258*c0909341SAndroid Build Coastguard Worker sub r1, r1, #16 259*c0909341SAndroid Build Coastguard Worker16: 260*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r2], lr 261*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 262*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[], d3[]}, [r2], lr 263*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 264*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r2], lr 265*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128]! 266*c0909341SAndroid Build Coastguard Worker vld1.16 {d6[], d7[]}, [r2], lr 267*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 268*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128], r1 269*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128]! 270*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128]! 271*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 272*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128], r1 273*c0909341SAndroid Build Coastguard Worker bgt 16b 274*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 275*c0909341SAndroid Build Coastguard Worker320: 276*c0909341SAndroid Build Coastguard Worker sub r1, r1, #48 277*c0909341SAndroid Build Coastguard Worker32: 278*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r2], lr 279*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 280*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[], d3[]}, [r2], lr 281*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 282*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r2], lr 283*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128]! 284*c0909341SAndroid Build Coastguard Worker vld1.16 {d6[], d7[]}, [r2], lr 285*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 286*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128]! 287*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 288*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128]! 289*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 290*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r12, :128], r1 291*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128]! 292*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128]! 293*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128]! 294*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128]! 295*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128]! 296*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128]! 297*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 298*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r12, :128], r1 299*c0909341SAndroid Build Coastguard Worker bgt 32b 300*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 301*c0909341SAndroid Build Coastguard Worker640: 302*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 303*c0909341SAndroid Build Coastguard Worker64: 304*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[], d1[]}, [r2], lr 305*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 306*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r2], lr 307*c0909341SAndroid Build Coastguard Worker vmov q1, q0 308*c0909341SAndroid Build Coastguard Worker vmov q3, q2 309*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 310*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128]! 311*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 312*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128]! 313*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 314*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128]! 315*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 316*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r12, :128], r1 317*c0909341SAndroid Build Coastguard Worker bgt 64b 318*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 319*c0909341SAndroid Build Coastguard Workerendfunc 320*c0909341SAndroid Build Coastguard Worker 321*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_16bpc_neon(pixel *dst, const ptrdiff_t stride, 322*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 323*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 324*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 325*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_16bpc_neon, export=1 326*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 327*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 328*c0909341SAndroid Build Coastguard Worker clz r3, r3 329*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_top_tbl) 330*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 331*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 332*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 333*c0909341SAndroid Build Coastguard Worker add r5, r5, r3 334*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 335*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 336*c0909341SAndroid Build Coastguard Worker bx r5 337*c0909341SAndroid Build Coastguard Worker 338*c0909341SAndroid Build Coastguard Worker .align 2 339*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_top_tbl): 340*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_dc_top_tbl) + CONFIG_THUMB 341*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_dc_top_tbl) + CONFIG_THUMB 342*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_dc_top_tbl) + CONFIG_THUMB 343*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_dc_top_tbl) + CONFIG_THUMB 344*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_dc_top_tbl) + CONFIG_THUMB 345*c0909341SAndroid Build Coastguard Worker 346*c0909341SAndroid Build Coastguard Worker40: 347*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2] 348*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 349*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 350*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 351*c0909341SAndroid Build Coastguard Worker vdup.16 d0, d0[0] 352*c0909341SAndroid Build Coastguard Worker4: 353*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 354*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 355*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 356*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 357*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 358*c0909341SAndroid Build Coastguard Worker bgt 4b 359*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 360*c0909341SAndroid Build Coastguard Worker80: 361*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1}, [r2] 362*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 363*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 364*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 365*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 366*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 367*c0909341SAndroid Build Coastguard Worker8: 368*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 369*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 370*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 371*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 372*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 373*c0909341SAndroid Build Coastguard Worker bgt 8b 374*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 375*c0909341SAndroid Build Coastguard Worker160: 376*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2] 377*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 378*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 379*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 380*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 381*c0909341SAndroid Build Coastguard Worker vrshr.u16 d4, d0, #4 382*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[0] 383*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d4[0] 384*c0909341SAndroid Build Coastguard Worker16: 385*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 386*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 387*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 388*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 389*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 390*c0909341SAndroid Build Coastguard Worker bgt 16b 391*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 392*c0909341SAndroid Build Coastguard Worker320: 393*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2]! 394*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2] 395*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 396*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 397*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 398*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 399*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 400*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 401*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d18, q0, #5 402*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d18[0] 403*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d18[0] 404*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 405*c0909341SAndroid Build Coastguard Worker32: 406*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 407*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 408*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 409*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 410*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 411*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 412*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 413*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 414*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 415*c0909341SAndroid Build Coastguard Worker bgt 32b 416*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 417*c0909341SAndroid Build Coastguard Worker640: 418*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2]! 419*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2]! 420*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 421*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r2]! 422*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 423*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22, d23}, [r2] 424*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 425*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 426*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 427*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q10 428*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 429*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 430*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 431*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 432*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d18, q0, #6 433*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d18[0] 434*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d18[0] 435*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 436*c0909341SAndroid Build Coastguard Worker64: 437*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 438*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 439*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 440*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 441*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 442*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 443*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 444*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 445*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 446*c0909341SAndroid Build Coastguard Worker bgt 64b 447*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 448*c0909341SAndroid Build Coastguard Workerendfunc 449*c0909341SAndroid Build Coastguard Worker 450*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_16bpc_neon(pixel *dst, const ptrdiff_t stride, 451*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 452*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 453*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 454*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_16bpc_neon, export=1 455*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 456*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 457*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4, lsl #1 458*c0909341SAndroid Build Coastguard Worker clz r3, r3 459*c0909341SAndroid Build Coastguard Worker clz lr, r4 460*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 461*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_left_tbl) 462*c0909341SAndroid Build Coastguard Worker sub r3, r3, #20 463*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 464*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 465*c0909341SAndroid Build Coastguard Worker add r3, r5, r3 466*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 467*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 468*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 469*c0909341SAndroid Build Coastguard Worker bx r5 470*c0909341SAndroid Build Coastguard Worker 471*c0909341SAndroid Build Coastguard Worker .align 2 472*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_tbl): 473*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h64) - L(ipred_dc_left_tbl) + CONFIG_THUMB 474*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h32) - L(ipred_dc_left_tbl) + CONFIG_THUMB 475*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h16) - L(ipred_dc_left_tbl) + CONFIG_THUMB 476*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h8) - L(ipred_dc_left_tbl) + CONFIG_THUMB 477*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h4) - L(ipred_dc_left_tbl) + CONFIG_THUMB 478*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w64) - L(ipred_dc_left_tbl) + CONFIG_THUMB 479*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w32) - L(ipred_dc_left_tbl) + CONFIG_THUMB 480*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w16) - L(ipred_dc_left_tbl) + CONFIG_THUMB 481*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w8) - L(ipred_dc_left_tbl) + CONFIG_THUMB 482*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w4) - L(ipred_dc_left_tbl) + CONFIG_THUMB 483*c0909341SAndroid Build Coastguard Worker 484*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4): 485*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2, :64] 486*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 487*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 488*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 489*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 490*c0909341SAndroid Build Coastguard Worker bx r3 491*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4): 492*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 493*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 494*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 495*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 496*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 497*c0909341SAndroid Build Coastguard Worker bgt L(ipred_dc_left_w4) 498*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 499*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8): 500*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1}, [r2, :128] 501*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 502*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 503*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 504*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 505*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 506*c0909341SAndroid Build Coastguard Worker bx r3 507*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8): 508*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 509*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 510*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 511*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 512*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 513*c0909341SAndroid Build Coastguard Worker bgt L(ipred_dc_left_w8) 514*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 515*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16): 516*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128] 517*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 518*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 519*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 520*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 521*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #4 522*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 523*c0909341SAndroid Build Coastguard Worker bx r3 524*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16): 525*c0909341SAndroid Build Coastguard Worker vmov q1, q0 526*c0909341SAndroid Build Coastguard Worker1: 527*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 528*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 529*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 530*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 531*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 532*c0909341SAndroid Build Coastguard Worker bgt 1b 533*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 534*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32): 535*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! 536*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2, :128] 537*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 538*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 539*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 540*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 541*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 542*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 543*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d0, q0, #5 544*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 545*c0909341SAndroid Build Coastguard Worker bx r3 546*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32): 547*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 548*c0909341SAndroid Build Coastguard Worker vmov q1, q0 549*c0909341SAndroid Build Coastguard Worker1: 550*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 551*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 552*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 553*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 554*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 555*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 556*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 557*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 558*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 559*c0909341SAndroid Build Coastguard Worker bgt 1b 560*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 561*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64): 562*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! 563*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2, :128]! 564*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 565*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r2, :128]! 566*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 567*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22, d23}, [r2, :128] 568*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 569*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 570*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 571*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q10 572*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 573*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 574*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 575*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 576*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d0, q0, #6 577*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 578*c0909341SAndroid Build Coastguard Worker bx r3 579*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64): 580*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 581*c0909341SAndroid Build Coastguard Worker vmov q1, q0 582*c0909341SAndroid Build Coastguard Worker1: 583*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 584*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 585*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 586*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 587*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 588*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 589*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 590*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 591*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 592*c0909341SAndroid Build Coastguard Worker bgt 1b 593*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 594*c0909341SAndroid Build Coastguard Workerendfunc 595*c0909341SAndroid Build Coastguard Worker 596*c0909341SAndroid Build Coastguard Worker// void ipred_dc_16bpc_neon(pixel *dst, const ptrdiff_t stride, 597*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 598*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 599*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 600*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_16bpc_neon, export=1 601*c0909341SAndroid Build Coastguard Worker push {r4-r6, lr} 602*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #16] 603*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4, lsl #1 604*c0909341SAndroid Build Coastguard Worker add lr, r3, r4 // width + height 605*c0909341SAndroid Build Coastguard Worker clz r3, r3 606*c0909341SAndroid Build Coastguard Worker clz r12, r4 607*c0909341SAndroid Build Coastguard Worker vdup.32 q15, lr // width + height 608*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_tbl) 609*c0909341SAndroid Build Coastguard Worker rbit lr, lr // rbit(width + height) 610*c0909341SAndroid Build Coastguard Worker sub r3, r3, #20 // 25 leading bits, minus table offset 5 611*c0909341SAndroid Build Coastguard Worker sub r12, r12, #25 612*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width + height) 613*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 614*c0909341SAndroid Build Coastguard Worker ldr r12, [r5, r12, lsl #2] 615*c0909341SAndroid Build Coastguard Worker neg lr, lr // -ctz(width + height) 616*c0909341SAndroid Build Coastguard Worker add r3, r5, r3 617*c0909341SAndroid Build Coastguard Worker add r5, r5, r12 618*c0909341SAndroid Build Coastguard Worker vshr.u32 q15, q15, #1 // (width + height) >> 1 619*c0909341SAndroid Build Coastguard Worker vdup.32 q14, lr // -ctz(width + height) 620*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 621*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 622*c0909341SAndroid Build Coastguard Worker bx r5 623*c0909341SAndroid Build Coastguard Worker 624*c0909341SAndroid Build Coastguard Worker .align 2 625*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_tbl): 626*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h64) - L(ipred_dc_tbl) + CONFIG_THUMB 627*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h32) - L(ipred_dc_tbl) + CONFIG_THUMB 628*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h16) - L(ipred_dc_tbl) + CONFIG_THUMB 629*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h8) - L(ipred_dc_tbl) + CONFIG_THUMB 630*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h4) - L(ipred_dc_tbl) + CONFIG_THUMB 631*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w64) - L(ipred_dc_tbl) + CONFIG_THUMB 632*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w32) - L(ipred_dc_tbl) + CONFIG_THUMB 633*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w16) - L(ipred_dc_tbl) + CONFIG_THUMB 634*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w8) - L(ipred_dc_tbl) + CONFIG_THUMB 635*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w4) - L(ipred_dc_tbl) + CONFIG_THUMB 636*c0909341SAndroid Build Coastguard Worker 637*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4): 638*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2, :64]! 639*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 640*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 641*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 642*c0909341SAndroid Build Coastguard Worker bx r3 643*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4): 644*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r2] 645*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d30 646*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d2, d2 647*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d2, d2 648*c0909341SAndroid Build Coastguard Worker cmp r4, #4 649*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d2 650*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d28 651*c0909341SAndroid Build Coastguard Worker beq 1f 652*c0909341SAndroid Build Coastguard Worker // h = 8/16 653*c0909341SAndroid Build Coastguard Worker cmp r4, #16 654*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 655*c0909341SAndroid Build Coastguard Worker movw r5, #0xAAAB 656*c0909341SAndroid Build Coastguard Worker it ne 657*c0909341SAndroid Build Coastguard Worker movne lr, r5 658*c0909341SAndroid Build Coastguard Worker vdup.32 d24, lr 659*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d24 660*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 661*c0909341SAndroid Build Coastguard Worker1: 662*c0909341SAndroid Build Coastguard Worker vdup.16 d0, d0[0] 663*c0909341SAndroid Build Coastguard Worker2: 664*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 665*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 666*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 667*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 668*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r12, :64], r1 669*c0909341SAndroid Build Coastguard Worker bgt 2b 670*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 671*c0909341SAndroid Build Coastguard Worker 672*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8): 673*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1}, [r2, :128]! 674*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 675*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 676*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 677*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 678*c0909341SAndroid Build Coastguard Worker bx r3 679*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8): 680*c0909341SAndroid Build Coastguard Worker vld1.16 {d2, d3}, [r2] 681*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d30 682*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d3 683*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d2, d2 684*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d2, d2 685*c0909341SAndroid Build Coastguard Worker cmp r4, #8 686*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d2 687*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d28 688*c0909341SAndroid Build Coastguard Worker beq 1f 689*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 690*c0909341SAndroid Build Coastguard Worker cmp r4, #32 691*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 692*c0909341SAndroid Build Coastguard Worker movw r5, #0xAAAB 693*c0909341SAndroid Build Coastguard Worker it ne 694*c0909341SAndroid Build Coastguard Worker movne lr, r5 695*c0909341SAndroid Build Coastguard Worker vdup.32 d24, lr 696*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d24 697*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 698*c0909341SAndroid Build Coastguard Worker1: 699*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 700*c0909341SAndroid Build Coastguard Worker2: 701*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 702*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 703*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 704*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r0, :128], r1 705*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1}, [r12, :128], r1 706*c0909341SAndroid Build Coastguard Worker bgt 2b 707*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 708*c0909341SAndroid Build Coastguard Worker 709*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16): 710*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! 711*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 712*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 713*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 714*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 715*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 716*c0909341SAndroid Build Coastguard Worker bx r3 717*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16): 718*c0909341SAndroid Build Coastguard Worker vld1.16 {d2, d3, d4, d5}, [r2] 719*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d30 720*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q2 721*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d3 722*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d2, d1 723*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d2, d2 724*c0909341SAndroid Build Coastguard Worker cmp r4, #16 725*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d2 726*c0909341SAndroid Build Coastguard Worker vshl.u32 d4, d0, d28 727*c0909341SAndroid Build Coastguard Worker beq 1f 728*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 729*c0909341SAndroid Build Coastguard Worker tst r4, #(32+16+8) // 16 added to make a consecutive bitmask 730*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 731*c0909341SAndroid Build Coastguard Worker movw r5, #0xAAAB 732*c0909341SAndroid Build Coastguard Worker it ne 733*c0909341SAndroid Build Coastguard Worker movne lr, r5 734*c0909341SAndroid Build Coastguard Worker vdup.32 d24, lr 735*c0909341SAndroid Build Coastguard Worker vmul.i32 d4, d4, d24 736*c0909341SAndroid Build Coastguard Worker vshr.u32 d4, d4, #17 737*c0909341SAndroid Build Coastguard Worker1: 738*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[0] 739*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d4[0] 740*c0909341SAndroid Build Coastguard Worker2: 741*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 742*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 743*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 744*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 745*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 746*c0909341SAndroid Build Coastguard Worker bgt 2b 747*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 748*c0909341SAndroid Build Coastguard Worker 749*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32): 750*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! 751*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2, :128]! 752*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 753*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 754*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 755*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 756*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 757*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 758*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 759*c0909341SAndroid Build Coastguard Worker bx r3 760*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32): 761*c0909341SAndroid Build Coastguard Worker vld1.16 {d2, d3, d4, d5}, [r2]! 762*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d30 763*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r2] 764*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q2 765*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 766*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q8 767*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d3 768*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d2, d2 769*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d2, d2 770*c0909341SAndroid Build Coastguard Worker cmp r4, #32 771*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d2 772*c0909341SAndroid Build Coastguard Worker vshl.u32 d4, d0, d28 773*c0909341SAndroid Build Coastguard Worker beq 1f 774*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 775*c0909341SAndroid Build Coastguard Worker cmp r4, #8 776*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 777*c0909341SAndroid Build Coastguard Worker movw r5, #0xAAAB 778*c0909341SAndroid Build Coastguard Worker it ne 779*c0909341SAndroid Build Coastguard Worker movne lr, r5 780*c0909341SAndroid Build Coastguard Worker vdup.32 d24, lr 781*c0909341SAndroid Build Coastguard Worker vmul.i32 d4, d4, d24 782*c0909341SAndroid Build Coastguard Worker vshr.u32 d4, d4, #17 783*c0909341SAndroid Build Coastguard Worker1: 784*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 785*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[0] 786*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d4[0] 787*c0909341SAndroid Build Coastguard Worker2: 788*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 789*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 790*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 791*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 792*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 793*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 794*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 795*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 796*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 797*c0909341SAndroid Build Coastguard Worker bgt 2b 798*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 799*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64): 800*c0909341SAndroid Build Coastguard Worker vld1.16 {d0, d1, d2, d3}, [r2, :128]! 801*c0909341SAndroid Build Coastguard Worker vld1.16 {d4, d5, d6, d7}, [r2, :128]! 802*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 803*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r2, :128]! 804*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 805*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22, d23}, [r2, :128]! 806*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 807*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 808*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 809*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q10 810*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q8 811*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 812*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 813*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 814*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 815*c0909341SAndroid Build Coastguard Worker bx r3 816*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64): 817*c0909341SAndroid Build Coastguard Worker vld1.16 {d2, d3, d4, d5}, [r2]! 818*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d30 819*c0909341SAndroid Build Coastguard Worker vld1.16 {d16, d17, d18, d19}, [r2]! 820*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q2 821*c0909341SAndroid Build Coastguard Worker vld1.16 {d20, d21, d22, d23}, [r2]! 822*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 823*c0909341SAndroid Build Coastguard Worker vld1.16 {d24, d25, d26, d27}, [r2]! 824*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 825*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q13 826*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q8 827*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q12 828*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q10 829*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d3 830*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d2, d2 831*c0909341SAndroid Build Coastguard Worker vpadd.i32 d2, d2, d2 832*c0909341SAndroid Build Coastguard Worker cmp r4, #64 833*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d2 834*c0909341SAndroid Build Coastguard Worker vshl.u32 d4, d0, d28 835*c0909341SAndroid Build Coastguard Worker beq 1f 836*c0909341SAndroid Build Coastguard Worker // h = 16/32 837*c0909341SAndroid Build Coastguard Worker cmp r4, #16 838*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 839*c0909341SAndroid Build Coastguard Worker movw r5, #0xAAAB 840*c0909341SAndroid Build Coastguard Worker it ne 841*c0909341SAndroid Build Coastguard Worker movne lr, r5 842*c0909341SAndroid Build Coastguard Worker vdup.32 d24, lr 843*c0909341SAndroid Build Coastguard Worker vmul.i32 d4, d4, d24 844*c0909341SAndroid Build Coastguard Worker vshr.u32 d4, d4, #17 845*c0909341SAndroid Build Coastguard Worker1: 846*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 847*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[0] 848*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d4[0] 849*c0909341SAndroid Build Coastguard Worker2: 850*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 851*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 852*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 853*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 854*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 855*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128]! 856*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128]! 857*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r0, :128], r1 858*c0909341SAndroid Build Coastguard Worker vst1.16 {d0, d1, d2, d3}, [r12, :128], r1 859*c0909341SAndroid Build Coastguard Worker bgt 2b 860*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 861*c0909341SAndroid Build Coastguard Workerendfunc 862*c0909341SAndroid Build Coastguard Worker 863*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_16bpc_neon(pixel *dst, const ptrdiff_t stride, 864*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 865*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 866*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 867*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_16bpc_neon, export=1 868*c0909341SAndroid Build Coastguard Worker push {r4-r6, lr} 869*c0909341SAndroid Build Coastguard Worker vpush {q4} 870*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #32] 871*c0909341SAndroid Build Coastguard Worker clz lr, r3 872*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_paeth_tbl) 873*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 874*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 875*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r2] 876*c0909341SAndroid Build Coastguard Worker add r6, r2, #2 877*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 878*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 879*c0909341SAndroid Build Coastguard Worker mov r5, #-4 880*c0909341SAndroid Build Coastguard Worker add lr, r0, r1 881*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 882*c0909341SAndroid Build Coastguard Worker bx r12 883*c0909341SAndroid Build Coastguard Worker 884*c0909341SAndroid Build Coastguard Worker .align 2 885*c0909341SAndroid Build Coastguard WorkerL(ipred_paeth_tbl): 886*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_paeth_tbl) + CONFIG_THUMB 887*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_paeth_tbl) + CONFIG_THUMB 888*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_paeth_tbl) + CONFIG_THUMB 889*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_paeth_tbl) + CONFIG_THUMB 890*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_paeth_tbl) + CONFIG_THUMB 891*c0909341SAndroid Build Coastguard Worker 892*c0909341SAndroid Build Coastguard Worker40: 893*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 894*c0909341SAndroid Build Coastguard Worker mov r5, #-8 895*c0909341SAndroid Build Coastguard Worker vld1.16 {d6}, [r6] 896*c0909341SAndroid Build Coastguard Worker vsub.i16 d16, d6, d4 // top - topleft 897*c0909341SAndroid Build Coastguard Worker vmov d7, d6 898*c0909341SAndroid Build Coastguard Worker vmov d17, d16 899*c0909341SAndroid Build Coastguard Worker4: 900*c0909341SAndroid Build Coastguard Worker vld4.16 {d0[], d1[], d2[], d3[]}, [r2, :64], r5 901*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q8, q0 // base 902*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q8, q1 903*c0909341SAndroid Build Coastguard Worker vabd.s16 q11, q3, q9 // tdiff 904*c0909341SAndroid Build Coastguard Worker vabd.s16 q12, q3, q10 905*c0909341SAndroid Build Coastguard Worker vabd.s16 q13, q2, q9 // tldiff 906*c0909341SAndroid Build Coastguard Worker vabd.s16 q14, q2, q10 907*c0909341SAndroid Build Coastguard Worker vabd.s16 q9, q0, q9 // ldiff 908*c0909341SAndroid Build Coastguard Worker vabd.s16 q10, q1, q10 909*c0909341SAndroid Build Coastguard Worker vmin.u16 q15, q11, q13 // min(tdiff, tldiff) 910*c0909341SAndroid Build Coastguard Worker vmin.u16 q4, q12, q14 911*c0909341SAndroid Build Coastguard Worker vcge.u16 q11, q13, q11 // tldiff >= tdiff 912*c0909341SAndroid Build Coastguard Worker vcge.u16 q12, q14, q12 913*c0909341SAndroid Build Coastguard Worker vcge.u16 q9, q15, q9 // min(tdiff, tldiff) >= ldiff 914*c0909341SAndroid Build Coastguard Worker vcge.u16 q10, q4, q10 915*c0909341SAndroid Build Coastguard Worker vbsl q12, q3, q2 // tdiff <= tldiff ? top : topleft 916*c0909341SAndroid Build Coastguard Worker vbsl q11, q3, q2 917*c0909341SAndroid Build Coastguard Worker vbit q12, q1, q10 // ldiff <= min ? left : ... 918*c0909341SAndroid Build Coastguard Worker vbit q11, q0, q9 919*c0909341SAndroid Build Coastguard Worker vst1.16 {d25}, [r0, :64], r1 920*c0909341SAndroid Build Coastguard Worker vst1.16 {d24}, [lr, :64], r1 921*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 922*c0909341SAndroid Build Coastguard Worker vst1.16 {d23}, [r0, :64], r1 923*c0909341SAndroid Build Coastguard Worker vst1.16 {d22}, [lr, :64], r1 924*c0909341SAndroid Build Coastguard Worker bgt 4b 925*c0909341SAndroid Build Coastguard Worker vpop {q4} 926*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 927*c0909341SAndroid Build Coastguard Worker80: 928*c0909341SAndroid Build Coastguard Worker160: 929*c0909341SAndroid Build Coastguard Worker320: 930*c0909341SAndroid Build Coastguard Worker640: 931*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r6]! 932*c0909341SAndroid Build Coastguard Worker mov r12, r3 933*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 934*c0909341SAndroid Build Coastguard Worker1: 935*c0909341SAndroid Build Coastguard Worker vld2.16 {d0[], d2[]}, [r2, :32], r5 936*c0909341SAndroid Build Coastguard Worker vmov d1, d0 937*c0909341SAndroid Build Coastguard Worker vmov d3, d2 938*c0909341SAndroid Build Coastguard Worker2: 939*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q3, q2 // top - topleft 940*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q8, q0 // base 941*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q8, q1 942*c0909341SAndroid Build Coastguard Worker vabd.s16 q11, q3, q9 // tdiff 943*c0909341SAndroid Build Coastguard Worker vabd.s16 q12, q3, q10 944*c0909341SAndroid Build Coastguard Worker vabd.s16 q13, q2, q9 // tldiff 945*c0909341SAndroid Build Coastguard Worker vabd.s16 q14, q2, q10 946*c0909341SAndroid Build Coastguard Worker vabd.s16 q9, q0, q9 // ldiff 947*c0909341SAndroid Build Coastguard Worker vabd.s16 q10, q1, q10 948*c0909341SAndroid Build Coastguard Worker vmin.u16 q15, q11, q13 // min(tdiff, tldiff) 949*c0909341SAndroid Build Coastguard Worker vmin.u16 q4, q12, q14 950*c0909341SAndroid Build Coastguard Worker vcge.u16 q11, q13, q11 // tldiff >= tdiff 951*c0909341SAndroid Build Coastguard Worker vcge.u16 q12, q14, q12 952*c0909341SAndroid Build Coastguard Worker vcge.u16 q9, q15, q9 // min(tdiff, tldiff) >= ldiff 953*c0909341SAndroid Build Coastguard Worker vcge.u16 q10, q4, q10 954*c0909341SAndroid Build Coastguard Worker vbsl q12, q3, q2 // tdiff <= tldiff ? top : topleft 955*c0909341SAndroid Build Coastguard Worker vbsl q11, q3, q2 956*c0909341SAndroid Build Coastguard Worker vbit q12, q1, q10 // ldiff <= min ? left : ... 957*c0909341SAndroid Build Coastguard Worker vbit q11, q0, q9 958*c0909341SAndroid Build Coastguard Worker subs r3, r3, #8 959*c0909341SAndroid Build Coastguard Worker vst1.16 {q12}, [r0, :128]! 960*c0909341SAndroid Build Coastguard Worker vst1.16 {q11}, [lr, :128]! 961*c0909341SAndroid Build Coastguard Worker ble 8f 962*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r6]! 963*c0909341SAndroid Build Coastguard Worker b 2b 964*c0909341SAndroid Build Coastguard Worker8: 965*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 966*c0909341SAndroid Build Coastguard Worker ble 9f 967*c0909341SAndroid Build Coastguard Worker // End of horizontal loop, move pointers to next two rows 968*c0909341SAndroid Build Coastguard Worker sub r6, r6, r12, lsl #1 969*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 970*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 971*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r6]! 972*c0909341SAndroid Build Coastguard Worker mov r3, r12 973*c0909341SAndroid Build Coastguard Worker b 1b 974*c0909341SAndroid Build Coastguard Worker9: 975*c0909341SAndroid Build Coastguard Worker vpop {q4} 976*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 977*c0909341SAndroid Build Coastguard Workerendfunc 978*c0909341SAndroid Build Coastguard Worker 979*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_16bpc_neon(pixel *dst, const ptrdiff_t stride, 980*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 981*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 982*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 983*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_16bpc_neon, export=1 984*c0909341SAndroid Build Coastguard Worker push {r4-r10, lr} 985*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #32] 986*c0909341SAndroid Build Coastguard Worker movrel r10, X(sm_weights) 987*c0909341SAndroid Build Coastguard Worker add r12, r10, r4 988*c0909341SAndroid Build Coastguard Worker add r10, r10, r3 989*c0909341SAndroid Build Coastguard Worker clz r9, r3 990*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_tbl) 991*c0909341SAndroid Build Coastguard Worker sub lr, r2, r4, lsl #1 992*c0909341SAndroid Build Coastguard Worker sub r9, r9, #25 993*c0909341SAndroid Build Coastguard Worker ldr r9, [r5, r9, lsl #2] 994*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [lr] // bottom 995*c0909341SAndroid Build Coastguard Worker add r8, r2, #2 996*c0909341SAndroid Build Coastguard Worker add r5, r5, r9 997*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 998*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 999*c0909341SAndroid Build Coastguard Worker bx r5 1000*c0909341SAndroid Build Coastguard Worker 1001*c0909341SAndroid Build Coastguard Worker .align 2 1002*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_tbl): 1003*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_tbl) + CONFIG_THUMB 1004*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_tbl) + CONFIG_THUMB 1005*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_tbl) + CONFIG_THUMB 1006*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_tbl) + CONFIG_THUMB 1007*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_tbl) + CONFIG_THUMB 1008*c0909341SAndroid Build Coastguard Worker 1009*c0909341SAndroid Build Coastguard Worker40: 1010*c0909341SAndroid Build Coastguard Worker vld1.16 {d16}, [r8] // top 1011*c0909341SAndroid Build Coastguard Worker vld1.32 {d18[]}, [r10, :32] // weights_hor 1012*c0909341SAndroid Build Coastguard Worker sub r2, r2, #8 1013*c0909341SAndroid Build Coastguard Worker mov r7, #-8 1014*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d16[3] // right 1015*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q2 // top-bottom 1016*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1017*c0909341SAndroid Build Coastguard Worker vadd.i16 d19, d4, d6 // bottom+right 1018*c0909341SAndroid Build Coastguard Worker4: 1019*c0909341SAndroid Build Coastguard Worker vld4.16 {d0[], d1[], d2[], d3[]}, [r2, :64], r7 // left 1020*c0909341SAndroid Build Coastguard Worker vld4.8 {d20[], d21[], d22[], d23[]}, [r12, :32]! // weights_ver 1021*c0909341SAndroid Build Coastguard Worker vshll.u16 q12, d19, #8 // (bottom+right)*256 1022*c0909341SAndroid Build Coastguard Worker vshll.u16 q13, d19, #8 1023*c0909341SAndroid Build Coastguard Worker vshll.u16 q14, d19, #8 1024*c0909341SAndroid Build Coastguard Worker vshll.u16 q15, d19, #8 1025*c0909341SAndroid Build Coastguard Worker vzip.32 d20, d21 // weights_ver 1026*c0909341SAndroid Build Coastguard Worker vzip.32 d22, d23 1027*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q3 // left-right 1028*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q3 1029*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1030*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1031*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d3, d18 // += (left-right)*weights_hor 1032*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d2, d18 // (left flipped) 1033*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d1, d18 1034*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d0, d18 1035*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d16, d20 // += (top-bottom)*weights_ver 1036*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d16, d21 1037*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d16, d22 1038*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d16, d23 1039*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d24, q12, #9 1040*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d25, q13, #9 1041*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d26, q14, #9 1042*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d27, q15, #9 1043*c0909341SAndroid Build Coastguard Worker vst1.16 {d24}, [r0, :64], r1 1044*c0909341SAndroid Build Coastguard Worker vst1.16 {d25}, [r6, :64], r1 1045*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1046*c0909341SAndroid Build Coastguard Worker vst1.16 {d26}, [r0, :64], r1 1047*c0909341SAndroid Build Coastguard Worker vst1.16 {d27}, [r6, :64], r1 1048*c0909341SAndroid Build Coastguard Worker bgt 4b 1049*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1050*c0909341SAndroid Build Coastguard Worker80: 1051*c0909341SAndroid Build Coastguard Worker vld1.16 {q8}, [r8] // top 1052*c0909341SAndroid Build Coastguard Worker vld1.8 {d18}, [r10, :64] // weights_hor 1053*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1054*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1055*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d17[3] // right 1056*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q2 // top-bottom 1057*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1058*c0909341SAndroid Build Coastguard Worker vadd.i16 d3, d4, d6 // bottom+right 1059*c0909341SAndroid Build Coastguard Worker8: 1060*c0909341SAndroid Build Coastguard Worker vld2.16 {d0[], d1[]}, [r2, :32], r7 // left 1061*c0909341SAndroid Build Coastguard Worker vld2.8 {d20[], d22[]}, [r12, :16]! // weights_ver 1062*c0909341SAndroid Build Coastguard Worker vshll.u16 q12, d3, #8 // (bottom+right)*256 1063*c0909341SAndroid Build Coastguard Worker vshll.u16 q13, d3, #8 1064*c0909341SAndroid Build Coastguard Worker vshll.u16 q14, d3, #8 1065*c0909341SAndroid Build Coastguard Worker vshll.u16 q15, d3, #8 1066*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q3 // left-right 1067*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1068*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1069*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d1, d18 // += (left-right)*weights_hor 1070*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d1, d19 // (left flipped) 1071*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d0, d18 1072*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d0, d19 1073*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d16, d20 // += (top-bottom)*weights_ver 1074*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d17, d20 1075*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d16, d22 1076*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d17, d22 1077*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d24, q12, #9 1078*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d25, q13, #9 1079*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d26, q14, #9 1080*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d27, q15, #9 1081*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1082*c0909341SAndroid Build Coastguard Worker vst1.16 {q12}, [r0, :128], r1 1083*c0909341SAndroid Build Coastguard Worker vst1.16 {q13}, [r6, :128], r1 1084*c0909341SAndroid Build Coastguard Worker bgt 8b 1085*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1086*c0909341SAndroid Build Coastguard Worker160: 1087*c0909341SAndroid Build Coastguard Worker320: 1088*c0909341SAndroid Build Coastguard Worker640: 1089*c0909341SAndroid Build Coastguard Worker add lr, r2, r3, lsl #1 1090*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1091*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1092*c0909341SAndroid Build Coastguard Worker vld1.16 {d6[], d7[]}, [lr] // right 1093*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 1094*c0909341SAndroid Build Coastguard Worker mov r9, r3 1095*c0909341SAndroid Build Coastguard Worker vadd.i16 d3, d4, d6 // bottom+right 1096*c0909341SAndroid Build Coastguard Worker 1097*c0909341SAndroid Build Coastguard Worker1: 1098*c0909341SAndroid Build Coastguard Worker vld2.16 {d0[], d1[]}, [r2, :32], r7 // left 1099*c0909341SAndroid Build Coastguard Worker vld2.8 {d20[], d22[]}, [r12, :16]! // weights_ver 1100*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q3 // left-right 1101*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1102*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1103*c0909341SAndroid Build Coastguard Worker2: 1104*c0909341SAndroid Build Coastguard Worker vld1.8 {d18}, [r10, :64]! // weights_hor 1105*c0909341SAndroid Build Coastguard Worker vld1.16 {q8}, [r8]! // top 1106*c0909341SAndroid Build Coastguard Worker vshll.u16 q12, d3, #8 // (bottom+right)*256 1107*c0909341SAndroid Build Coastguard Worker vshll.u16 q13, d3, #8 1108*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1109*c0909341SAndroid Build Coastguard Worker vshll.u16 q14, d3, #8 1110*c0909341SAndroid Build Coastguard Worker vshll.u16 q15, d3, #8 1111*c0909341SAndroid Build Coastguard Worker vsub.i16 q8, q8, q2 // top-bottom 1112*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d1, d18 // += (left-right)*weights_hor 1113*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d1, d19 // (left flipped) 1114*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d0, d18 1115*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d0, d19 1116*c0909341SAndroid Build Coastguard Worker vmlal.s16 q12, d16, d20 // += (top-bottom)*weights_ver 1117*c0909341SAndroid Build Coastguard Worker vmlal.s16 q13, d17, d20 1118*c0909341SAndroid Build Coastguard Worker vmlal.s16 q14, d16, d22 1119*c0909341SAndroid Build Coastguard Worker vmlal.s16 q15, d17, d22 1120*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d24, q12, #9 1121*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d25, q13, #9 1122*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d26, q14, #9 1123*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d27, q15, #9 1124*c0909341SAndroid Build Coastguard Worker subs r3, r3, #8 1125*c0909341SAndroid Build Coastguard Worker vst1.16 {q12}, [r0, :128]! 1126*c0909341SAndroid Build Coastguard Worker vst1.16 {q13}, [r6, :128]! 1127*c0909341SAndroid Build Coastguard Worker bgt 2b 1128*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1129*c0909341SAndroid Build Coastguard Worker ble 9f 1130*c0909341SAndroid Build Coastguard Worker sub r8, r8, r9, lsl #1 1131*c0909341SAndroid Build Coastguard Worker sub r10, r10, r9 1132*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1133*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1134*c0909341SAndroid Build Coastguard Worker mov r3, r9 1135*c0909341SAndroid Build Coastguard Worker b 1b 1136*c0909341SAndroid Build Coastguard Worker9: 1137*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1138*c0909341SAndroid Build Coastguard Workerendfunc 1139*c0909341SAndroid Build Coastguard Worker 1140*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1141*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1142*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1143*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1144*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_16bpc_neon, export=1 1145*c0909341SAndroid Build Coastguard Worker push {r4-r7, lr} 1146*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #20] 1147*c0909341SAndroid Build Coastguard Worker movrel r7, X(sm_weights) 1148*c0909341SAndroid Build Coastguard Worker add r7, r7, r4 1149*c0909341SAndroid Build Coastguard Worker clz lr, r3 1150*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_v_tbl) 1151*c0909341SAndroid Build Coastguard Worker sub r12, r2, r4, lsl #1 1152*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1153*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1154*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r12] // bottom 1155*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 1156*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1157*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1158*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1159*c0909341SAndroid Build Coastguard Worker bx r5 1160*c0909341SAndroid Build Coastguard Worker 1161*c0909341SAndroid Build Coastguard Worker .align 2 1162*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_v_tbl): 1163*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1164*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1165*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1166*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1167*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1168*c0909341SAndroid Build Coastguard Worker 1169*c0909341SAndroid Build Coastguard Worker40: 1170*c0909341SAndroid Build Coastguard Worker vld1.16 {d6}, [r2] // top 1171*c0909341SAndroid Build Coastguard Worker vsub.i16 d6, d6, d4 // top-bottom 1172*c0909341SAndroid Build Coastguard Worker vmov d7, d6 1173*c0909341SAndroid Build Coastguard Worker4: 1174*c0909341SAndroid Build Coastguard Worker vld4.8 {d16[], d17[], d18[], d19[]}, [r7, :32]! // weights_ver 1175*c0909341SAndroid Build Coastguard Worker vzip.32 d16, d17 // weights_ver 1176*c0909341SAndroid Build Coastguard Worker vzip.32 d18, d19 1177*c0909341SAndroid Build Coastguard Worker vshll.u8 q8, d16, #7 // weights_ver << 7 1178*c0909341SAndroid Build Coastguard Worker vshll.u8 q9, d18, #7 1179*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q3, q8 // ((top-bottom)*weights_ver + 128) >> 8 1180*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q3, q9 1181*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 1182*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q2 1183*c0909341SAndroid Build Coastguard Worker vst1.16 {d20}, [r0, :64], r1 1184*c0909341SAndroid Build Coastguard Worker vst1.16 {d21}, [r6, :64], r1 1185*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1186*c0909341SAndroid Build Coastguard Worker vst1.16 {d22}, [r0, :64], r1 1187*c0909341SAndroid Build Coastguard Worker vst1.16 {d23}, [r6, :64], r1 1188*c0909341SAndroid Build Coastguard Worker bgt 4b 1189*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1190*c0909341SAndroid Build Coastguard Worker80: 1191*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r2] // top 1192*c0909341SAndroid Build Coastguard Worker vsub.i16 q3, q3, q2 // top-bottom 1193*c0909341SAndroid Build Coastguard Worker8: 1194*c0909341SAndroid Build Coastguard Worker vld4.8 {d16[], d18[], d20[], d22[]}, [r7, :32]! // weights_ver 1195*c0909341SAndroid Build Coastguard Worker vshll.u8 q8, d16, #7 // weights_ver << 7 1196*c0909341SAndroid Build Coastguard Worker vshll.u8 q9, d18, #7 1197*c0909341SAndroid Build Coastguard Worker vshll.u8 q10, d20, #7 1198*c0909341SAndroid Build Coastguard Worker vshll.u8 q11, d22, #7 1199*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q3, q8 // ((top-bottom)*weights_ver + 128) >> 8 1200*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q3, q9 1201*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q3, q10 1202*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q3, q11 1203*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q2 1204*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q2 1205*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 1206*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q2 1207*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [r0, :128], r1 1208*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [r6, :128], r1 1209*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1210*c0909341SAndroid Build Coastguard Worker vst1.16 {q10}, [r0, :128], r1 1211*c0909341SAndroid Build Coastguard Worker vst1.16 {q11}, [r6, :128], r1 1212*c0909341SAndroid Build Coastguard Worker bgt 8b 1213*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1214*c0909341SAndroid Build Coastguard Worker160: 1215*c0909341SAndroid Build Coastguard Worker320: 1216*c0909341SAndroid Build Coastguard Worker640: 1217*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1218*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; r0, r6, r5, lr 1219*c0909341SAndroid Build Coastguard Worker add r5, r0, r1 1220*c0909341SAndroid Build Coastguard Worker add lr, r6, r1 1221*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1222*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 1223*c0909341SAndroid Build Coastguard Worker mov r12, r3 1224*c0909341SAndroid Build Coastguard Worker 1225*c0909341SAndroid Build Coastguard Worker1: 1226*c0909341SAndroid Build Coastguard Worker vld4.8 {d8[], d10[], d12[], d14[]}, [r7, :32]! // weights_ver 1227*c0909341SAndroid Build Coastguard Worker vshll.u8 q4, d8, #7 // weights_ver << 7 1228*c0909341SAndroid Build Coastguard Worker vshll.u8 q5, d10, #7 1229*c0909341SAndroid Build Coastguard Worker vshll.u8 q6, d12, #7 1230*c0909341SAndroid Build Coastguard Worker vshll.u8 q7, d14, #7 1231*c0909341SAndroid Build Coastguard Worker2: 1232*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r2]! // top 1233*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q2 // top-bottom 1234*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q2 1235*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q0, q4 // ((top-bottom)*weights_ver + 128) >> 8 1236*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q1, q4 1237*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q0, q5 1238*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q1, q5 1239*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q12, q0, q6 1240*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q13, q1, q6 1241*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q14, q0, q7 1242*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q15, q1, q7 1243*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q2 1244*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q2 1245*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 1246*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q2 1247*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q2 1248*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q2 1249*c0909341SAndroid Build Coastguard Worker vadd.i16 q14, q14, q2 1250*c0909341SAndroid Build Coastguard Worker vadd.i16 q15, q15, q2 1251*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1252*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1253*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r6, :128]! 1254*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r5, :128]! 1255*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [lr, :128]! 1256*c0909341SAndroid Build Coastguard Worker bgt 2b 1257*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1258*c0909341SAndroid Build Coastguard Worker ble 9f 1259*c0909341SAndroid Build Coastguard Worker sub r2, r2, r12, lsl #1 1260*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1261*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1262*c0909341SAndroid Build Coastguard Worker add r5, r5, r1 1263*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 1264*c0909341SAndroid Build Coastguard Worker mov r3, r12 1265*c0909341SAndroid Build Coastguard Worker b 1b 1266*c0909341SAndroid Build Coastguard Worker9: 1267*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1268*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1269*c0909341SAndroid Build Coastguard Workerendfunc 1270*c0909341SAndroid Build Coastguard Worker 1271*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1272*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1273*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1274*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1275*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_16bpc_neon, export=1 1276*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1277*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #24] 1278*c0909341SAndroid Build Coastguard Worker movrel r8, X(sm_weights) 1279*c0909341SAndroid Build Coastguard Worker add r8, r8, r3 1280*c0909341SAndroid Build Coastguard Worker clz lr, r3 1281*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_h_tbl) 1282*c0909341SAndroid Build Coastguard Worker add r12, r2, r3, lsl #1 1283*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1284*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1285*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[], d5[]}, [r12] // right 1286*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1287*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1288*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1289*c0909341SAndroid Build Coastguard Worker bx r5 1290*c0909341SAndroid Build Coastguard Worker 1291*c0909341SAndroid Build Coastguard Worker .align 2 1292*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_h_tbl): 1293*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1294*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1295*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1296*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1297*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1298*c0909341SAndroid Build Coastguard Worker 1299*c0909341SAndroid Build Coastguard Worker40: 1300*c0909341SAndroid Build Coastguard Worker vld1.32 {d6[]}, [r8, :32] // weights_hor 1301*c0909341SAndroid Build Coastguard Worker sub r2, r2, #8 1302*c0909341SAndroid Build Coastguard Worker mov r7, #-8 1303*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d6, #7 // weights_hor << 7 1304*c0909341SAndroid Build Coastguard Worker4: 1305*c0909341SAndroid Build Coastguard Worker vld4.16 {d0[], d1[], d2[], d3[]}, [r2, :64], r7 // left 1306*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q2 // left-right 1307*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q2 1308*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1309*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q1, q3 // ((left-right)*weights_hor + 128) >> 8 1310*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q0, q3 // (left flipped) 1311*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q2 1312*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q2 1313*c0909341SAndroid Build Coastguard Worker vst1.16 {d17}, [r0, :64], r1 1314*c0909341SAndroid Build Coastguard Worker vst1.16 {d16}, [r6, :64], r1 1315*c0909341SAndroid Build Coastguard Worker vst1.16 {d19}, [r0, :64], r1 1316*c0909341SAndroid Build Coastguard Worker vst1.16 {d18}, [r6, :64], r1 1317*c0909341SAndroid Build Coastguard Worker bgt 4b 1318*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1319*c0909341SAndroid Build Coastguard Worker80: 1320*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8, :64] // weights_hor 1321*c0909341SAndroid Build Coastguard Worker sub r2, r2, #8 1322*c0909341SAndroid Build Coastguard Worker mov r7, #-8 1323*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d6, #7 // weights_hor << 7 1324*c0909341SAndroid Build Coastguard Worker8: 1325*c0909341SAndroid Build Coastguard Worker vld1.16 {d23}, [r2, :64], r7 // left 1326*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1327*c0909341SAndroid Build Coastguard Worker vsub.i16 d23, d23, d4 // left-right 1328*c0909341SAndroid Build Coastguard Worker vdup.16 q8, d23[3] // flip left 1329*c0909341SAndroid Build Coastguard Worker vdup.16 q9, d23[2] 1330*c0909341SAndroid Build Coastguard Worker vdup.16 q10, d23[1] 1331*c0909341SAndroid Build Coastguard Worker vdup.16 q11, d23[0] 1332*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q8, q3 // ((left-right)*weights_hor + 128) >> 8 1333*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q9, q3 1334*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q10, q3 1335*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q11, q3 1336*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q2 1337*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q2 1338*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 1339*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q2 1340*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [r0, :128], r1 1341*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [r6, :128], r1 1342*c0909341SAndroid Build Coastguard Worker vst1.16 {q10}, [r0, :128], r1 1343*c0909341SAndroid Build Coastguard Worker vst1.16 {q11}, [r6, :128], r1 1344*c0909341SAndroid Build Coastguard Worker bgt 8b 1345*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1346*c0909341SAndroid Build Coastguard Worker160: 1347*c0909341SAndroid Build Coastguard Worker320: 1348*c0909341SAndroid Build Coastguard Worker640: 1349*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1350*c0909341SAndroid Build Coastguard Worker sub r2, r2, #8 1351*c0909341SAndroid Build Coastguard Worker mov r7, #-8 1352*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; r0, r6, r5, lr 1353*c0909341SAndroid Build Coastguard Worker add r5, r0, r1 1354*c0909341SAndroid Build Coastguard Worker add lr, r6, r1 1355*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1356*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 1357*c0909341SAndroid Build Coastguard Worker mov r12, r3 1358*c0909341SAndroid Build Coastguard Worker 1359*c0909341SAndroid Build Coastguard Worker1: 1360*c0909341SAndroid Build Coastguard Worker vld1.16 {d15}, [r2, :64], r7 // left 1361*c0909341SAndroid Build Coastguard Worker vsub.i16 d15, d15, d4 // left-right 1362*c0909341SAndroid Build Coastguard Worker vdup.16 q4, d15[3] // flip left 1363*c0909341SAndroid Build Coastguard Worker vdup.16 q5, d15[2] 1364*c0909341SAndroid Build Coastguard Worker vdup.16 q6, d15[1] 1365*c0909341SAndroid Build Coastguard Worker vdup.16 q7, d15[0] 1366*c0909341SAndroid Build Coastguard Worker2: 1367*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r8, :128]! // weights_hor 1368*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1369*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d2, #7 // weights_hor << 7 1370*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d3, #7 1371*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q8, q0, q4 // ((left-right)*weights_hor + 128) >> 8 1372*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q9, q1, q4 1373*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q10, q0, q5 1374*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q11, q1, q5 1375*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q12, q0, q6 1376*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q13, q1, q6 1377*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q14, q0, q7 1378*c0909341SAndroid Build Coastguard Worker vqrdmulh.s16 q15, q1, q7 1379*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q2 1380*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q2 1381*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 1382*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q2 1383*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q2 1384*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q2 1385*c0909341SAndroid Build Coastguard Worker vadd.i16 q14, q14, q2 1386*c0909341SAndroid Build Coastguard Worker vadd.i16 q15, q15, q2 1387*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1388*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r6, :128]! 1389*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r5, :128]! 1390*c0909341SAndroid Build Coastguard Worker vst1.16 {q14, q15}, [lr, :128]! 1391*c0909341SAndroid Build Coastguard Worker bgt 2b 1392*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1393*c0909341SAndroid Build Coastguard Worker ble 9f 1394*c0909341SAndroid Build Coastguard Worker sub r8, r8, r12 1395*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1396*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1397*c0909341SAndroid Build Coastguard Worker add r5, r5, r1 1398*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 1399*c0909341SAndroid Build Coastguard Worker mov r3, r12 1400*c0909341SAndroid Build Coastguard Worker b 1b 1401*c0909341SAndroid Build Coastguard Worker9: 1402*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1403*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1404*c0909341SAndroid Build Coastguard Workerendfunc 1405*c0909341SAndroid Build Coastguard Worker 1406*c0909341SAndroid Build Coastguard Worker// void ipred_filter_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1407*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1408*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int filt_idx, 1409*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height, 1410*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 1411*c0909341SAndroid Build Coastguard Worker.macro filter_fn bpc 1412*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_\bpc\()bpc_neon, export=1 1413*c0909341SAndroid Build Coastguard Worker movw r12, #511 1414*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #88] 1415*c0909341SAndroid Build Coastguard Worker and r5, r5, r12 // 511 1416*c0909341SAndroid Build Coastguard Worker movrel r6, X(filter_intra_taps) 1417*c0909341SAndroid Build Coastguard Worker lsl r5, r5, #6 1418*c0909341SAndroid Build Coastguard Worker add r6, r6, r5 1419*c0909341SAndroid Build Coastguard Worker vld1.8 {d20, d21, d22, d23}, [r6, :128]! 1420*c0909341SAndroid Build Coastguard Worker clz lr, r3 1421*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_filter\bpc\()_tbl) 1422*c0909341SAndroid Build Coastguard Worker vld1.8 {d27, d28, d29}, [r6, :64] 1423*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1424*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1425*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d20 1426*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d21 1427*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1428*c0909341SAndroid Build Coastguard Worker vmovl.s8 q10, d22 1429*c0909341SAndroid Build Coastguard Worker vmovl.s8 q11, d23 1430*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1431*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1432*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d27 1433*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d28 1434*c0909341SAndroid Build Coastguard Worker vmovl.s8 q14, d29 1435*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1436*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r8 1437*c0909341SAndroid Build Coastguard Worker add r8, r2, #2 1438*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1439*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 1440*c0909341SAndroid Build Coastguard Worker vmov.i16 q7, #0 1441*c0909341SAndroid Build Coastguard Worker.endif 1442*c0909341SAndroid Build Coastguard Worker bx r5 1443*c0909341SAndroid Build Coastguard Worker 1444*c0909341SAndroid Build Coastguard Worker .align 2 1445*c0909341SAndroid Build Coastguard WorkerL(ipred_filter\bpc\()_tbl): 1446*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB 1447*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB 1448*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB 1449*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB 1450*c0909341SAndroid Build Coastguard Worker 1451*c0909341SAndroid Build Coastguard Worker40: 1452*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r8] // top (0-3) 1453*c0909341SAndroid Build Coastguard Worker4: 1454*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r2], r7 // left (0-1) + topleft (2) 1455*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 1456*c0909341SAndroid Build Coastguard Worker vmul.i16 q2, q9, d0[0] // p1(top[0]) * filter(1) 1457*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q10, d0[1] // p2(top[1]) * filter(2) 1458*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q11, d0[2] // p3(top[2]) * filter(3) 1459*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q12, d0[3] // p4(top[3]) * filter(4) 1460*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q8, d2[2] // p0(topleft) * filter(0) 1461*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q13, d2[1] // p5(left[0]) * filter(5) 1462*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q14, d2[0] // p6(left[1]) * filter(6) 1463*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #4 1464*c0909341SAndroid Build Coastguard Worker vmax.s16 q2, q2, q7 1465*c0909341SAndroid Build Coastguard Worker.else 1466*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d18, d0[0] // p1(top[0]) * filter(1) 1467*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d0[1] // p2(top[1]) * filter(2) 1468*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d0[2] // p3(top[2]) * filter(3) 1469*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d0[3] // p4(top[3]) * filter(4) 1470*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d16, d2[2] // p0(topleft) * filter(0) 1471*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d2[1] // p5(left[0]) * filter(5) 1472*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d28, d2[0] // p6(left[1]) * filter(6) 1473*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d19, d0[0] // p1(top[0]) * filter(1) 1474*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[1] // p2(top[1]) * filter(2) 1475*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d23, d0[2] // p3(top[2]) * filter(3) 1476*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[3] // p4(top[3]) * filter(4) 1477*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d17, d2[2] // p0(topleft) * filter(0) 1478*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d27, d2[1] // p5(left[0]) * filter(5) 1479*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d29, d2[0] // p6(left[1]) * filter(6) 1480*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d4, q2, #4 1481*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d5, q3, #4 1482*c0909341SAndroid Build Coastguard Worker.endif 1483*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 1484*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1485*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [r0, :64], r1 1486*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [r6, :64], r1 1487*c0909341SAndroid Build Coastguard Worker vmov d0, d5 // move top from [4-7] to [0-3] 1488*c0909341SAndroid Build Coastguard Worker bgt 4b 1489*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1490*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1491*c0909341SAndroid Build Coastguard Worker80: 1492*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r8] // top (0-7) 1493*c0909341SAndroid Build Coastguard Worker8: 1494*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r2], r7 // left (0-1) + topleft (2) 1495*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 1496*c0909341SAndroid Build Coastguard Worker vmul.i16 q2, q9, d0[0] // p1(top[0]) * filter(1) 1497*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q10, d0[1] // p2(top[1]) * filter(2) 1498*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q11, d0[2] // p3(top[2]) * filter(3) 1499*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q12, d0[3] // p4(top[3]) * filter(4) 1500*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q8, d2[2] // p0(topleft) * filter(0) 1501*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q13, d2[1] // p5(left[0]) * filter(5) 1502*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q14, d2[0] // p6(left[1]) * filter(6) 1503*c0909341SAndroid Build Coastguard Worker vmul.i16 q3, q9, d1[0] // p1(top[0]) * filter(1) 1504*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q10, d1[1] // p2(top[1]) * filter(2) 1505*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q11, d1[2] // p3(top[2]) * filter(3) 1506*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #4 1507*c0909341SAndroid Build Coastguard Worker vmax.s16 q2, q2, q7 1508*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 1509*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q12, d1[3] // p4(top[3]) * filter(4) 1510*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q8, d0[3] // p0(topleft) * filter(0) 1511*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q13, d4[3] // p5(left[0]) * filter(5) 1512*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q14, d5[3] // p6(left[1]) * filter(6) 1513*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q3, #4 1514*c0909341SAndroid Build Coastguard Worker vmax.s16 q3, q3, q7 1515*c0909341SAndroid Build Coastguard Worker.else 1516*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d18, d0[0] // p1(top[0]) * filter(1) 1517*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d20, d0[1] // p2(top[1]) * filter(2) 1518*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d22, d0[2] // p3(top[2]) * filter(3) 1519*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d24, d0[3] // p4(top[3]) * filter(4) 1520*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d16, d2[2] // p0(topleft) * filter(0) 1521*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d26, d2[1] // p5(left[0]) * filter(5) 1522*c0909341SAndroid Build Coastguard Worker vmlal.s16 q2, d28, d2[0] // p6(left[1]) * filter(6) 1523*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d19, d0[0] // p1(top[0]) * filter(1) 1524*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d21, d0[1] // p2(top[1]) * filter(2) 1525*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d23, d0[2] // p3(top[2]) * filter(3) 1526*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d25, d0[3] // p4(top[3]) * filter(4) 1527*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d17, d2[2] // p0(topleft) * filter(0) 1528*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d27, d2[1] // p5(left[0]) * filter(5) 1529*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d29, d2[0] // p6(left[1]) * filter(6) 1530*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d4, q2, #4 1531*c0909341SAndroid Build Coastguard Worker vmull.s16 q4, d18, d1[0] // p1(top[0]) * filter(1) 1532*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d20, d1[1] // p2(top[1]) * filter(2) 1533*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d22, d1[2] // p3(top[2]) * filter(3) 1534*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d5, q3, #4 1535*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 1536*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d24, d1[3] // p4(top[3]) * filter(4) 1537*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d16, d0[3] // p0(topleft) * filter(0) 1538*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d26, d4[3] // p5(left[0]) * filter(5) 1539*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d28, d5[3] // p6(left[1]) * filter(6) 1540*c0909341SAndroid Build Coastguard Worker vmull.s16 q5, d19, d1[0] // p1(top[0]) * filter(1) 1541*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d21, d1[1] // p2(top[1]) * filter(2) 1542*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d23, d1[2] // p3(top[2]) * filter(3) 1543*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d25, d1[3] // p4(top[3]) * filter(4) 1544*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d17, d0[3] // p0(topleft) * filter(0) 1545*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d27, d4[3] // p5(left[0]) * filter(5) 1546*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d29, d5[3] // p6(left[1]) * filter(6) 1547*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d6, q4, #4 1548*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d7, q5, #4 1549*c0909341SAndroid Build Coastguard Worker.endif 1550*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 1551*c0909341SAndroid Build Coastguard Worker vswp d5, d6 1552*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1553*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 1554*c0909341SAndroid Build Coastguard Worker vmov q0, q3 1555*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r6, :128], r1 1556*c0909341SAndroid Build Coastguard Worker bgt 8b 1557*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1558*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1559*c0909341SAndroid Build Coastguard Worker160: 1560*c0909341SAndroid Build Coastguard Worker320: 1561*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 1562*c0909341SAndroid Build Coastguard Worker mov lr, r3 1563*c0909341SAndroid Build Coastguard Worker 1564*c0909341SAndroid Build Coastguard Worker1: 1565*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2], r7 // left (0-1) + topleft (2) 1566*c0909341SAndroid Build Coastguard Worker2: 1567*c0909341SAndroid Build Coastguard Worker vld1.16 {q1, q2}, [r8]! // top(0-15) 1568*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 1569*c0909341SAndroid Build Coastguard Worker vmul.i16 q3, q8, d0[2] // p0(topleft) * filter(0) 1570*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q13, d0[1] // p5(left[0]) * filter(5) 1571*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q14, d0[0] // p6(left[1]) * filter(6) 1572*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q9, d2[0] // p1(top[0]) * filter(1) 1573*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q10, d2[1] // p2(top[1]) * filter(2) 1574*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q11, d2[2] // p3(top[2]) * filter(3) 1575*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q12, d2[3] // p4(top[3]) * filter(4) 1576*c0909341SAndroid Build Coastguard Worker 1577*c0909341SAndroid Build Coastguard Worker vmul.i16 q4, q9, d3[0] // p1(top[0]) * filter(1) 1578*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q10, d3[1] // p2(top[1]) * filter(2) 1579*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q11, d3[2] // p3(top[2]) * filter(3) 1580*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q3, #4 1581*c0909341SAndroid Build Coastguard Worker vmax.s16 q3, q3, q7 1582*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 1583*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q12, d3[3] // p4(top[3]) * filter(4) 1584*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q8, d2[3] // p0(topleft) * filter(0) 1585*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q13, d6[3] // p5(left[0]) * filter(5) 1586*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q14, d7[3] // p6(left[1]) * filter(6) 1587*c0909341SAndroid Build Coastguard Worker 1588*c0909341SAndroid Build Coastguard Worker vmul.i16 q5, q9, d4[0] // p1(top[0]) * filter(1) 1589*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q10, d4[1] // p2(top[1]) * filter(2) 1590*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q11, d4[2] // p3(top[2]) * filter(3) 1591*c0909341SAndroid Build Coastguard Worker vrshr.s16 q4, q4, #4 1592*c0909341SAndroid Build Coastguard Worker vmax.s16 q4, q4, q7 1593*c0909341SAndroid Build Coastguard Worker vmin.s16 q4, q4, q15 1594*c0909341SAndroid Build Coastguard Worker vmov q0, q4 1595*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q12, d4[3] // p4(top[3]) * filter(4) 1596*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q8, d3[3] // p0(topleft) * filter(0) 1597*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q13, d0[3] // p5(left[0]) * filter(5) 1598*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q14, d1[3] // p6(left[1]) * filter(6) 1599*c0909341SAndroid Build Coastguard Worker 1600*c0909341SAndroid Build Coastguard Worker vmul.i16 q6, q9, d5[0] // p1(top[0]) * filter(1) 1601*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q10, d5[1] // p2(top[1]) * filter(2) 1602*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q11, d5[2] // p3(top[2]) * filter(3) 1603*c0909341SAndroid Build Coastguard Worker vrshr.s16 q5, q5, #4 1604*c0909341SAndroid Build Coastguard Worker vmax.s16 q5, q5, q7 1605*c0909341SAndroid Build Coastguard Worker vmin.s16 q5, q5, q15 1606*c0909341SAndroid Build Coastguard Worker vmov q0, q5 1607*c0909341SAndroid Build Coastguard Worker vmov.u16 r12, d5[3] 1608*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q12, d5[3] // p4(top[3]) * filter(4) 1609*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q8, d4[3] // p0(topleft) * filter(0) 1610*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q13, d0[3] // p5(left[0]) * filter(5) 1611*c0909341SAndroid Build Coastguard Worker vmla.i16 q6, q14, d1[3] // p6(left[1]) * filter(6) 1612*c0909341SAndroid Build Coastguard Worker vmov.16 d0[2], r12 1613*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1614*c0909341SAndroid Build Coastguard Worker vrshr.s16 q6, q6, #4 1615*c0909341SAndroid Build Coastguard Worker.else 1616*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d16, d0[2] // p0(topleft) * filter(0) 1617*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d26, d0[1] // p5(left[0]) * filter(5) 1618*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d28, d0[0] // p6(left[1]) * filter(6) 1619*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d18, d2[0] // p1(top[0]) * filter(1) 1620*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d20, d2[1] // p2(top[1]) * filter(2) 1621*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d22, d2[2] // p3(top[2]) * filter(3) 1622*c0909341SAndroid Build Coastguard Worker vmlal.s16 q3, d24, d2[3] // p4(top[3]) * filter(4) 1623*c0909341SAndroid Build Coastguard Worker vmull.s16 q4, d17, d0[2] // p0(topleft) * filter(0) 1624*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d27, d0[1] // p5(left[0]) * filter(5) 1625*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d29, d0[0] // p6(left[1]) * filter(6) 1626*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d19, d2[0] // p1(top[0]) * filter(1) 1627*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d21, d2[1] // p2(top[1]) * filter(2) 1628*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d23, d2[2] // p3(top[2]) * filter(3) 1629*c0909341SAndroid Build Coastguard Worker vmlal.s16 q4, d25, d2[3] // p4(top[3]) * filter(4) 1630*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d6, q3, #4 1631*c0909341SAndroid Build Coastguard Worker vmull.s16 q5, d18, d3[0] // p1(top[0]) * filter(1) 1632*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d20, d3[1] // p2(top[1]) * filter(2) 1633*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d7, q4, #4 1634*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 1635*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d22, d3[2] // p3(top[2]) * filter(3) 1636*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d24, d3[3] // p4(top[3]) * filter(4) 1637*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d16, d2[3] // p0(topleft) * filter(0) 1638*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d26, d6[3] // p5(left[0]) * filter(5) 1639*c0909341SAndroid Build Coastguard Worker vmlal.s16 q5, d28, d7[3] // p6(left[1]) * filter(6) 1640*c0909341SAndroid Build Coastguard Worker vmull.s16 q6, d19, d3[0] // p1(top[0]) * filter(1) 1641*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d21, d3[1] // p2(top[1]) * filter(2) 1642*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d23, d3[2] // p3(top[2]) * filter(3) 1643*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d25, d3[3] // p4(top[3]) * filter(4) 1644*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d17, d2[3] // p0(topleft) * filter(0) 1645*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d27, d6[3] // p5(left[0]) * filter(5) 1646*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d29, d7[3] // p6(left[1]) * filter(6) 1647*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d8, q5, #4 1648*c0909341SAndroid Build Coastguard Worker vmull.s16 q7, d18, d4[0] // p1(top[0]) * filter(1) 1649*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d20, d4[1] // p2(top[1]) * filter(2) 1650*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d22, d4[2] // p3(top[2]) * filter(3) 1651*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d9, q6, #4 1652*c0909341SAndroid Build Coastguard Worker vmin.s16 q0, q4, q15 1653*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d24, d4[3] // p4(top[3]) * filter(4) 1654*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d16, d3[3] // p0(topleft) * filter(0) 1655*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d26, d0[3] // p5(left[0]) * filter(5) 1656*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d28, d1[3] // p6(left[1]) * filter(6) 1657*c0909341SAndroid Build Coastguard Worker vmin.s16 q4, q4, q15 1658*c0909341SAndroid Build Coastguard Worker vmull.s16 q6, d19, d4[0] // p1(top[0]) * filter(1) 1659*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d21, d4[1] // p2(top[1]) * filter(2) 1660*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d23, d4[2] // p3(top[2]) * filter(3) 1661*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d25, d4[3] // p4(top[3]) * filter(4) 1662*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d17, d3[3] // p0(topleft) * filter(0) 1663*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d27, d0[3] // p5(left[0]) * filter(5) 1664*c0909341SAndroid Build Coastguard Worker vmlal.s16 q6, d29, d1[3] // p6(left[1]) * filter(6) 1665*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d10, q7, #4 1666*c0909341SAndroid Build Coastguard Worker vmull.s16 q1, d18, d5[0] // p1(top[0]) * filter(1) 1667*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d20, d5[1] // p2(top[1]) * filter(2) 1668*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d22, d5[2] // p3(top[2]) * filter(3) 1669*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d11, q6, #4 1670*c0909341SAndroid Build Coastguard Worker vmin.s16 q0, q5, q15 1671*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d24, d5[3] // p4(top[3]) * filter(4) 1672*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d16, d4[3] // p0(topleft) * filter(0) 1673*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d26, d0[3] // p5(left[0]) * filter(5) 1674*c0909341SAndroid Build Coastguard Worker vmlal.s16 q1, d28, d1[3] // p6(left[1]) * filter(6) 1675*c0909341SAndroid Build Coastguard Worker vmin.s16 q5, q5, q15 1676*c0909341SAndroid Build Coastguard Worker vmov.u16 r12, d5[3] 1677*c0909341SAndroid Build Coastguard Worker vmull.s16 q7, d19, d5[0] // p1(top[0]) * filter(1) 1678*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d21, d5[1] // p2(top[1]) * filter(2) 1679*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d23, d5[2] // p3(top[2]) * filter(3) 1680*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d25, d5[3] // p4(top[3]) * filter(4) 1681*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d17, d4[3] // p0(topleft) * filter(0) 1682*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d27, d0[3] // p5(left[0]) * filter(5) 1683*c0909341SAndroid Build Coastguard Worker vmlal.s16 q7, d29, d1[3] // p6(left[1]) * filter(6) 1684*c0909341SAndroid Build Coastguard Worker vmov.16 d0[2], r12 1685*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d12, q1, #4 1686*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1687*c0909341SAndroid Build Coastguard Worker vqrshrun.s32 d13, q7, #4 1688*c0909341SAndroid Build Coastguard Worker.endif 1689*c0909341SAndroid Build Coastguard Worker vswp q4, q5 1690*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 1691*c0909341SAndroid Build Coastguard Worker vmax.s16 q6, q6, q7 1692*c0909341SAndroid Build Coastguard Worker.endif 1693*c0909341SAndroid Build Coastguard Worker vswp d7, d10 1694*c0909341SAndroid Build Coastguard Worker vmin.s16 q6, q6, q15 1695*c0909341SAndroid Build Coastguard Worker 1696*c0909341SAndroid Build Coastguard Worker vswp d9, d12 1697*c0909341SAndroid Build Coastguard Worker 1698*c0909341SAndroid Build Coastguard Worker vst1.16 {q3, q4}, [r0, :128]! 1699*c0909341SAndroid Build Coastguard Worker vst1.16 {q5, q6}, [r6, :128]! 1700*c0909341SAndroid Build Coastguard Worker ble 8f 1701*c0909341SAndroid Build Coastguard Worker vmov.u16 r12, d13[3] 1702*c0909341SAndroid Build Coastguard Worker vmov.16 d0[0], r12 1703*c0909341SAndroid Build Coastguard Worker vmov.u16 r12, d9[3] 1704*c0909341SAndroid Build Coastguard Worker vmov.16 d0[1], r12 1705*c0909341SAndroid Build Coastguard Worker b 2b 1706*c0909341SAndroid Build Coastguard Worker8: 1707*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1708*c0909341SAndroid Build Coastguard Worker 1709*c0909341SAndroid Build Coastguard Worker ble 9f 1710*c0909341SAndroid Build Coastguard Worker sub r8, r6, lr, lsl #1 1711*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1712*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1713*c0909341SAndroid Build Coastguard Worker mov r3, lr 1714*c0909341SAndroid Build Coastguard Worker b 1b 1715*c0909341SAndroid Build Coastguard Worker9: 1716*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1717*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1718*c0909341SAndroid Build Coastguard Workerendfunc 1719*c0909341SAndroid Build Coastguard Worker.endm 1720*c0909341SAndroid Build Coastguard Worker 1721*c0909341SAndroid Build Coastguard Workerfilter_fn 10 1722*c0909341SAndroid Build Coastguard Workerfilter_fn 12 1723*c0909341SAndroid Build Coastguard Worker 1724*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_16bpc_neon, export=1 1725*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1726*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1727*c0909341SAndroid Build Coastguard Worker movw r12, 0x3ff 1728*c0909341SAndroid Build Coastguard Worker ldr r8, [sp, #104] 1729*c0909341SAndroid Build Coastguard Worker cmp r8, r12 1730*c0909341SAndroid Build Coastguard Worker ble ipred_filter_10bpc_neon 1731*c0909341SAndroid Build Coastguard Worker b ipred_filter_12bpc_neon 1732*c0909341SAndroid Build Coastguard Workerendfunc 1733*c0909341SAndroid Build Coastguard Worker 1734*c0909341SAndroid Build Coastguard Worker// void pal_pred_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1735*c0909341SAndroid Build Coastguard Worker// const pixel *const pal, const uint8_t *idx, 1736*c0909341SAndroid Build Coastguard Worker// const int w, const int h); 1737*c0909341SAndroid Build Coastguard Workerfunction pal_pred_16bpc_neon, export=1 1738*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 1739*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 1740*c0909341SAndroid Build Coastguard Worker ldr r5, [sp, #16] 1741*c0909341SAndroid Build Coastguard Worker vld1.16 {q14}, [r2, :128] 1742*c0909341SAndroid Build Coastguard Worker clz lr, r4 1743*c0909341SAndroid Build Coastguard Worker adr r12, L(pal_pred_tbl) 1744*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1745*c0909341SAndroid Build Coastguard Worker vmov.i8 q13, #7 1746*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1747*c0909341SAndroid Build Coastguard Worker vmov.i16 q15, #0x100 1748*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1749*c0909341SAndroid Build Coastguard Worker add r2, r0, r1 1750*c0909341SAndroid Build Coastguard Worker bx r12 1751*c0909341SAndroid Build Coastguard Worker 1752*c0909341SAndroid Build Coastguard Worker .align 2 1753*c0909341SAndroid Build Coastguard WorkerL(pal_pred_tbl): 1754*c0909341SAndroid Build Coastguard Worker .word 640f - L(pal_pred_tbl) + CONFIG_THUMB 1755*c0909341SAndroid Build Coastguard Worker .word 320f - L(pal_pred_tbl) + CONFIG_THUMB 1756*c0909341SAndroid Build Coastguard Worker .word 160f - L(pal_pred_tbl) + CONFIG_THUMB 1757*c0909341SAndroid Build Coastguard Worker .word 80f - L(pal_pred_tbl) + CONFIG_THUMB 1758*c0909341SAndroid Build Coastguard Worker .word 40f - L(pal_pred_tbl) + CONFIG_THUMB 1759*c0909341SAndroid Build Coastguard Worker 1760*c0909341SAndroid Build Coastguard Worker40: 1761*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1762*c0909341SAndroid Build Coastguard Worker4: 1763*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r3, :64]! 1764*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1765*c0909341SAndroid Build Coastguard Worker vshr.u8 d3, d2, #4 1766*c0909341SAndroid Build Coastguard Worker vand.u8 d2, d2, d26 1767*c0909341SAndroid Build Coastguard Worker vzip.8 d2, d3 1768*c0909341SAndroid Build Coastguard Worker // Restructure q1 from a, b, c, ... into 2*a, 2*a+1, 2*b, 2*b+1, 2*c, 2*c+1, ... 1769*c0909341SAndroid Build Coastguard Worker vadd.i8 q0, q1, q1 1770*c0909341SAndroid Build Coastguard Worker vadd.i8 q1, q1, q1 1771*c0909341SAndroid Build Coastguard Worker vzip.8 q0, q1 1772*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q15 1773*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q15 1774*c0909341SAndroid Build Coastguard Worker vtbl.8 d0, {q14}, d0 1775*c0909341SAndroid Build Coastguard Worker vtbl.8 d1, {q14}, d1 1776*c0909341SAndroid Build Coastguard Worker vst1.16 {d0}, [r0, :64], r1 1777*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {q14}, d2 1778*c0909341SAndroid Build Coastguard Worker vst1.16 {d1}, [r2, :64], r1 1779*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {q14}, d3 1780*c0909341SAndroid Build Coastguard Worker vst1.16 {d2}, [r0, :64], r1 1781*c0909341SAndroid Build Coastguard Worker vst1.16 {d3}, [r2, :64], r1 1782*c0909341SAndroid Build Coastguard Worker bgt 4b 1783*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1784*c0909341SAndroid Build Coastguard Worker80: 1785*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1786*c0909341SAndroid Build Coastguard Worker8: 1787*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r3, :64]! 1788*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1789*c0909341SAndroid Build Coastguard Worker vshr.u8 q2, q1, #4 1790*c0909341SAndroid Build Coastguard Worker vand.u8 q1, q1, q13 1791*c0909341SAndroid Build Coastguard Worker vzip.8 q1, q2 1792*c0909341SAndroid Build Coastguard Worker // Prefer doing the adds twice, instead of chaining a vmov after 1793*c0909341SAndroid Build Coastguard Worker // the add. 1794*c0909341SAndroid Build Coastguard Worker vadd.i8 q0, q1, q1 1795*c0909341SAndroid Build Coastguard Worker vadd.i8 q1, q1, q1 1796*c0909341SAndroid Build Coastguard Worker vadd.i8 q3, q2, q2 1797*c0909341SAndroid Build Coastguard Worker vadd.i8 q2, q2, q2 1798*c0909341SAndroid Build Coastguard Worker vzip.8 q0, q1 1799*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1800*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q15 1801*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q15 1802*c0909341SAndroid Build Coastguard Worker vtbl.8 d0, {q14}, d0 1803*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q15 1804*c0909341SAndroid Build Coastguard Worker vtbl.8 d1, {q14}, d1 1805*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q15 1806*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {q14}, d2 1807*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {q14}, d3 1808*c0909341SAndroid Build Coastguard Worker vtbl.8 d4, {q14}, d4 1809*c0909341SAndroid Build Coastguard Worker vtbl.8 d5, {q14}, d5 1810*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128], r1 1811*c0909341SAndroid Build Coastguard Worker vtbl.8 d6, {q14}, d6 1812*c0909341SAndroid Build Coastguard Worker vst1.16 {q1}, [r2, :128], r1 1813*c0909341SAndroid Build Coastguard Worker vtbl.8 d7, {q14}, d7 1814*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 1815*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r2, :128], r1 1816*c0909341SAndroid Build Coastguard Worker bgt 8b 1817*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1818*c0909341SAndroid Build Coastguard Worker160: 1819*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1820*c0909341SAndroid Build Coastguard Worker16: 1821*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1822*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1823*c0909341SAndroid Build Coastguard Worker vand.u8 q2, q10, q13 1824*c0909341SAndroid Build Coastguard Worker vshr.u8 q3, q10, #4 1825*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q13 1826*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1827*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1828*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1829*c0909341SAndroid Build Coastguard Worker vadd.i8 q0, q2, q2 1830*c0909341SAndroid Build Coastguard Worker vadd.i8 q1, q2, q2 1831*c0909341SAndroid Build Coastguard Worker vadd.i8 q2, q3, q3 1832*c0909341SAndroid Build Coastguard Worker vadd.i8 q3, q3, q3 1833*c0909341SAndroid Build Coastguard Worker vadd.i8 q8, q10, q10 1834*c0909341SAndroid Build Coastguard Worker vadd.i8 q9, q10, q10 1835*c0909341SAndroid Build Coastguard Worker vadd.i8 q10, q11, q11 1836*c0909341SAndroid Build Coastguard Worker vzip.8 q0, q1 1837*c0909341SAndroid Build Coastguard Worker vadd.i8 q11, q11, q11 1838*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1839*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1840*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q15 1841*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1842*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q15 1843*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q15 1844*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q15 1845*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q15 1846*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q15 1847*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q15 1848*c0909341SAndroid Build Coastguard Worker vtbl.8 d0, {q14}, d0 1849*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q15 1850*c0909341SAndroid Build Coastguard Worker vtbl.8 d1, {q14}, d1 1851*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {q14}, d2 1852*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {q14}, d3 1853*c0909341SAndroid Build Coastguard Worker vtbl.8 d4, {q14}, d4 1854*c0909341SAndroid Build Coastguard Worker vtbl.8 d5, {q14}, d5 1855*c0909341SAndroid Build Coastguard Worker vtbl.8 d6, {q14}, d6 1856*c0909341SAndroid Build Coastguard Worker vtbl.8 d7, {q14}, d7 1857*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {q14}, d16 1858*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {q14}, d17 1859*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {q14}, d18 1860*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128], r1 1861*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {q14}, d19 1862*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {q14}, d20 1863*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r2, :128], r1 1864*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {q14}, d21 1865*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {q14}, d22 1866*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128], r1 1867*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {q14}, d23 1868*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r2, :128], r1 1869*c0909341SAndroid Build Coastguard Worker bgt 16b 1870*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1871*c0909341SAndroid Build Coastguard Worker320: 1872*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1873*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 1874*c0909341SAndroid Build Coastguard Worker32: 1875*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1876*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 1877*c0909341SAndroid Build Coastguard Worker vand.u8 q2, q10, q13 1878*c0909341SAndroid Build Coastguard Worker vshr.u8 q3, q10, #4 1879*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q13 1880*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1881*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1882*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1883*c0909341SAndroid Build Coastguard Worker vadd.i8 q0, q2, q2 1884*c0909341SAndroid Build Coastguard Worker vadd.i8 q1, q2, q2 1885*c0909341SAndroid Build Coastguard Worker vadd.i8 q2, q3, q3 1886*c0909341SAndroid Build Coastguard Worker vadd.i8 q3, q3, q3 1887*c0909341SAndroid Build Coastguard Worker vadd.i8 q8, q10, q10 1888*c0909341SAndroid Build Coastguard Worker vadd.i8 q9, q10, q10 1889*c0909341SAndroid Build Coastguard Worker vadd.i8 q10, q11, q11 1890*c0909341SAndroid Build Coastguard Worker vzip.8 q0, q1 1891*c0909341SAndroid Build Coastguard Worker vadd.i8 q11, q11, q11 1892*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1893*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1894*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q15 1895*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1896*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q15 1897*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q15 1898*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q15 1899*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q15 1900*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q15 1901*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q15 1902*c0909341SAndroid Build Coastguard Worker vtbl.8 d0, {q14}, d0 1903*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q15 1904*c0909341SAndroid Build Coastguard Worker vtbl.8 d1, {q14}, d1 1905*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {q14}, d2 1906*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {q14}, d3 1907*c0909341SAndroid Build Coastguard Worker vtbl.8 d4, {q14}, d4 1908*c0909341SAndroid Build Coastguard Worker vtbl.8 d5, {q14}, d5 1909*c0909341SAndroid Build Coastguard Worker vtbl.8 d6, {q14}, d6 1910*c0909341SAndroid Build Coastguard Worker vtbl.8 d7, {q14}, d7 1911*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {q14}, d16 1912*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {q14}, d17 1913*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {q14}, d18 1914*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1915*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {q14}, d19 1916*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {q14}, d20 1917*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128], r1 1918*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {q14}, d21 1919*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {q14}, d22 1920*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r2, :128]! 1921*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {q14}, d23 1922*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r2, :128], r1 1923*c0909341SAndroid Build Coastguard Worker bgt 32b 1924*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1925*c0909341SAndroid Build Coastguard Worker640: 1926*c0909341SAndroid Build Coastguard Worker sub r1, r1, #96 1927*c0909341SAndroid Build Coastguard Worker64: 1928*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1929*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1930*c0909341SAndroid Build Coastguard Worker vand.u8 q2, q10, q13 1931*c0909341SAndroid Build Coastguard Worker vshr.u8 q3, q10, #4 1932*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q13 1933*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1934*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1935*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1936*c0909341SAndroid Build Coastguard Worker vadd.i8 q0, q2, q2 1937*c0909341SAndroid Build Coastguard Worker vadd.i8 q1, q2, q2 1938*c0909341SAndroid Build Coastguard Worker vadd.i8 q2, q3, q3 1939*c0909341SAndroid Build Coastguard Worker vadd.i8 q3, q3, q3 1940*c0909341SAndroid Build Coastguard Worker vadd.i8 q8, q10, q10 1941*c0909341SAndroid Build Coastguard Worker vadd.i8 q9, q10, q10 1942*c0909341SAndroid Build Coastguard Worker vadd.i8 q10, q11, q11 1943*c0909341SAndroid Build Coastguard Worker vzip.8 q0, q1 1944*c0909341SAndroid Build Coastguard Worker vadd.i8 q11, q11, q11 1945*c0909341SAndroid Build Coastguard Worker vzip.8 q2, q3 1946*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1947*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q15 1948*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1949*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q15 1950*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q15 1951*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q15 1952*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q15 1953*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q15 1954*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q15 1955*c0909341SAndroid Build Coastguard Worker vtbl.8 d0, {q14}, d0 1956*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q15 1957*c0909341SAndroid Build Coastguard Worker vtbl.8 d1, {q14}, d1 1958*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {q14}, d2 1959*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {q14}, d3 1960*c0909341SAndroid Build Coastguard Worker vtbl.8 d4, {q14}, d4 1961*c0909341SAndroid Build Coastguard Worker vtbl.8 d5, {q14}, d5 1962*c0909341SAndroid Build Coastguard Worker vtbl.8 d6, {q14}, d6 1963*c0909341SAndroid Build Coastguard Worker vtbl.8 d7, {q14}, d7 1964*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {q14}, d16 1965*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {q14}, d17 1966*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {q14}, d18 1967*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 1968*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {q14}, d19 1969*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {q14}, d20 1970*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 1971*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {q14}, d21 1972*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {q14}, d22 1973*c0909341SAndroid Build Coastguard Worker vst1.16 {q8, q9}, [r0, :128]! 1974*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {q14}, d23 1975*c0909341SAndroid Build Coastguard Worker vst1.16 {q10, q11}, [r0, :128], r1 1976*c0909341SAndroid Build Coastguard Worker bgt 64b 1977*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1978*c0909341SAndroid Build Coastguard Workerendfunc 1979*c0909341SAndroid Build Coastguard Worker 1980*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1981*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1982*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1983*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 1984*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 1985*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_16bpc_neon, export=1 1986*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1987*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1988*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #32] 1989*c0909341SAndroid Build Coastguard Worker clz lr, r3 1990*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // bitdepth_max 1991*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_128_tbl) 1992*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1993*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1994*c0909341SAndroid Build Coastguard Worker vrshr.u16 q0, q15, #1 1995*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 1996*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1997*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1998*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1999*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #0 2000*c0909341SAndroid Build Coastguard Worker bx r12 2001*c0909341SAndroid Build Coastguard Worker 2002*c0909341SAndroid Build Coastguard Worker .align 2 2003*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_128_tbl): 2004*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_tbl): 2005*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 2006*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 2007*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w8) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 2008*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w4) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 2009*c0909341SAndroid Build Coastguard Worker 2010*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4): 2011*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r5, :128]! 2012*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2 // diff = ac * alpha 2013*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d17, d3 2014*c0909341SAndroid Build Coastguard Worker vmull.s16 q8, d18, d2 2015*c0909341SAndroid Build Coastguard Worker vmull.s16 q9, d19, d3 2016*c0909341SAndroid Build Coastguard Worker vshr.s32 q10, q2, #31 // sign = diff >> 15 2017*c0909341SAndroid Build Coastguard Worker vshr.s32 q11, q3, #31 2018*c0909341SAndroid Build Coastguard Worker vshr.s32 q12, q8, #31 2019*c0909341SAndroid Build Coastguard Worker vshr.s32 q13, q9, #31 2020*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q10 // diff + sign 2021*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q11 2022*c0909341SAndroid Build Coastguard Worker vadd.i32 q8, q8, q12 2023*c0909341SAndroid Build Coastguard Worker vadd.i32 q9, q9, q13 2024*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 // (diff + sign + 32) >> 6 = apply_sign() 2025*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2026*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d6, q8, #6 2027*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d7, q9, #6 2028*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q0 // dc + apply_sign() 2029*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q0 2030*c0909341SAndroid Build Coastguard Worker vmax.s16 q2, q2, q14 2031*c0909341SAndroid Build Coastguard Worker vmax.s16 q3, q3, q14 2032*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 2033*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 2034*c0909341SAndroid Build Coastguard Worker vst1.16 {d4}, [r0, :64], r1 2035*c0909341SAndroid Build Coastguard Worker vst1.16 {d5}, [r6, :64], r1 2036*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 2037*c0909341SAndroid Build Coastguard Worker vst1.16 {d6}, [r0, :64], r1 2038*c0909341SAndroid Build Coastguard Worker vst1.16 {d7}, [r6, :64], r1 2039*c0909341SAndroid Build Coastguard Worker bgt L(ipred_cfl_splat_w4) 2040*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 2041*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8): 2042*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r5, :128]! 2043*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 2044*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d16, d2 // diff = ac * alpha 2045*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d17, d3 2046*c0909341SAndroid Build Coastguard Worker vmull.s16 q8, d18, d2 2047*c0909341SAndroid Build Coastguard Worker vmull.s16 q9, d19, d3 2048*c0909341SAndroid Build Coastguard Worker vshr.s32 q10, q2, #31 // sign = diff >> 15 2049*c0909341SAndroid Build Coastguard Worker vshr.s32 q11, q3, #31 2050*c0909341SAndroid Build Coastguard Worker vshr.s32 q12, q8, #31 2051*c0909341SAndroid Build Coastguard Worker vshr.s32 q13, q9, #31 2052*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q10 // diff + sign 2053*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q11 2054*c0909341SAndroid Build Coastguard Worker vadd.i32 q8, q8, q12 2055*c0909341SAndroid Build Coastguard Worker vadd.i32 q9, q9, q13 2056*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 // (diff + sign + 32) >> 6 = apply_sign() 2057*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2058*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d6, q8, #6 2059*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d7, q9, #6 2060*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q0 // dc + apply_sign() 2061*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q0 2062*c0909341SAndroid Build Coastguard Worker vmax.s16 q2, q2, q14 2063*c0909341SAndroid Build Coastguard Worker vmax.s16 q3, q3, q14 2064*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 2065*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 2066*c0909341SAndroid Build Coastguard Worker vst1.16 {q2}, [r0, :128], r1 2067*c0909341SAndroid Build Coastguard Worker vst1.16 {q3}, [r6, :128], r1 2068*c0909341SAndroid Build Coastguard Worker bgt L(ipred_cfl_splat_w8) 2069*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 2070*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16): 2071*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 2072*c0909341SAndroid Build Coastguard Worker add r12, r5, r3, lsl #1 2073*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3, lsl #1 2074*c0909341SAndroid Build Coastguard Worker mov lr, r3 2075*c0909341SAndroid Build Coastguard Worker1: 2076*c0909341SAndroid Build Coastguard Worker vld1.16 {q6, q7}, [r5, :128]! 2077*c0909341SAndroid Build Coastguard Worker vmull.s16 q2, d12, d2 // diff = ac * alpha 2078*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r12, :128]! 2079*c0909341SAndroid Build Coastguard Worker vmull.s16 q3, d13, d3 2080*c0909341SAndroid Build Coastguard Worker vmull.s16 q4, d14, d2 2081*c0909341SAndroid Build Coastguard Worker vmull.s16 q5, d15, d3 2082*c0909341SAndroid Build Coastguard Worker vmull.s16 q6, d16, d2 2083*c0909341SAndroid Build Coastguard Worker vmull.s16 q7, d17, d3 2084*c0909341SAndroid Build Coastguard Worker vmull.s16 q8, d18, d2 2085*c0909341SAndroid Build Coastguard Worker vmull.s16 q9, d19, d3 2086*c0909341SAndroid Build Coastguard Worker vshr.s32 q10, q2, #31 // sign = diff >> 15 2087*c0909341SAndroid Build Coastguard Worker vshr.s32 q11, q3, #31 2088*c0909341SAndroid Build Coastguard Worker vshr.s32 q12, q4, #31 2089*c0909341SAndroid Build Coastguard Worker vshr.s32 q13, q5, #31 2090*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q10 // diff + sign 2091*c0909341SAndroid Build Coastguard Worker vshr.s32 q10, q6, #31 2092*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q11 2093*c0909341SAndroid Build Coastguard Worker vshr.s32 q11, q7, #31 2094*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q12 2095*c0909341SAndroid Build Coastguard Worker vshr.s32 q12, q8, #31 2096*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q13 2097*c0909341SAndroid Build Coastguard Worker vshr.s32 q13, q9, #31 2098*c0909341SAndroid Build Coastguard Worker vadd.i32 q6, q6, q10 2099*c0909341SAndroid Build Coastguard Worker vadd.i32 q7, q7, q11 2100*c0909341SAndroid Build Coastguard Worker vadd.i32 q8, q8, q12 2101*c0909341SAndroid Build Coastguard Worker vadd.i32 q9, q9, q13 2102*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d4, q2, #6 // (diff + sign + 32) >> 6 = apply_sign() 2103*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d5, q3, #6 2104*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d6, q4, #6 2105*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d7, q5, #6 2106*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q0 // dc + apply_sign() 2107*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d8, q6, #6 2108*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d9, q7, #6 2109*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q0 2110*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d10, q8, #6 2111*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d11, q9, #6 2112*c0909341SAndroid Build Coastguard Worker vadd.i16 q4, q4, q0 2113*c0909341SAndroid Build Coastguard Worker vadd.i16 q5, q5, q0 2114*c0909341SAndroid Build Coastguard Worker vmax.s16 q2, q2, q14 2115*c0909341SAndroid Build Coastguard Worker vmax.s16 q3, q3, q14 2116*c0909341SAndroid Build Coastguard Worker vmax.s16 q4, q4, q14 2117*c0909341SAndroid Build Coastguard Worker vmax.s16 q5, q5, q14 2118*c0909341SAndroid Build Coastguard Worker vmin.s16 q2, q2, q15 2119*c0909341SAndroid Build Coastguard Worker vmin.s16 q3, q3, q15 2120*c0909341SAndroid Build Coastguard Worker vmin.s16 q4, q4, q15 2121*c0909341SAndroid Build Coastguard Worker vmin.s16 q5, q5, q15 2122*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 2123*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2124*c0909341SAndroid Build Coastguard Worker vst1.16 {q4, q5}, [r6, :128]! 2125*c0909341SAndroid Build Coastguard Worker bgt 1b 2126*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 2127*c0909341SAndroid Build Coastguard Worker add r5, r5, lr, lsl #1 2128*c0909341SAndroid Build Coastguard Worker add r12, r12, lr, lsl #1 2129*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 2130*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 2131*c0909341SAndroid Build Coastguard Worker mov r3, lr 2132*c0909341SAndroid Build Coastguard Worker bgt 1b 2133*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 2134*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 2135*c0909341SAndroid Build Coastguard Workerendfunc 2136*c0909341SAndroid Build Coastguard Worker 2137*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_16bpc_neon(pixel *dst, const ptrdiff_t stride, 2138*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 2139*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 2140*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 2141*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 2142*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_16bpc_neon, export=1 2143*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 2144*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2145*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #32] 2146*c0909341SAndroid Build Coastguard Worker clz lr, r3 2147*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // bitdepth_max 2148*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_top_tbl) 2149*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 2150*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 2151*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 2152*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 2153*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 2154*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 2155*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 2156*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #0 2157*c0909341SAndroid Build Coastguard Worker bx r12 2158*c0909341SAndroid Build Coastguard Worker 2159*c0909341SAndroid Build Coastguard Worker .align 2 2160*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_top_tbl): 2161*c0909341SAndroid Build Coastguard Worker .word 32f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 2162*c0909341SAndroid Build Coastguard Worker .word 16f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 2163*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 2164*c0909341SAndroid Build Coastguard Worker .word 4f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 2165*c0909341SAndroid Build Coastguard Worker 2166*c0909341SAndroid Build Coastguard Worker4: 2167*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2] 2168*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2169*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2170*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 2171*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2172*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 2173*c0909341SAndroid Build Coastguard Worker8: 2174*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r2] 2175*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2176*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2177*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2178*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 2179*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2180*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 2181*c0909341SAndroid Build Coastguard Worker16: 2182*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2] 2183*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q2, q3 2184*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2185*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2186*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2187*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #4 2188*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2189*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2190*c0909341SAndroid Build Coastguard Worker32: 2191*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2]! 2192*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2] 2193*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 2194*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 2195*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q8, q10 2196*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2197*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2198*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2199*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d0, q0, #5 2200*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2201*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2202*c0909341SAndroid Build Coastguard Workerendfunc 2203*c0909341SAndroid Build Coastguard Worker 2204*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_16bpc_neon(pixel *dst, const ptrdiff_t stride, 2205*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 2206*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 2207*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 2208*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 2209*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_16bpc_neon, export=1 2210*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 2211*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2212*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #32] 2213*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4, lsl #1 2214*c0909341SAndroid Build Coastguard Worker clz lr, r3 2215*c0909341SAndroid Build Coastguard Worker clz r8, r4 2216*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // bitdepth_max 2217*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_splat_tbl) 2218*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_left_tbl) 2219*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 2220*c0909341SAndroid Build Coastguard Worker sub r8, r8, #26 2221*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 2222*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2223*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 2224*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 2225*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2226*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 2227*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 2228*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #0 2229*c0909341SAndroid Build Coastguard Worker bx r7 2230*c0909341SAndroid Build Coastguard Worker 2231*c0909341SAndroid Build Coastguard Worker .align 2 2232*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_tbl): 2233*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h32) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 2234*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h16) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 2235*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h8) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 2236*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h4) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 2237*c0909341SAndroid Build Coastguard Worker 2238*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4): 2239*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2, :64] 2240*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2241*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2242*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 2243*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2244*c0909341SAndroid Build Coastguard Worker bx r12 2245*c0909341SAndroid Build Coastguard Worker 2246*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8): 2247*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r2, :128] 2248*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2249*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2250*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2251*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 2252*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2253*c0909341SAndroid Build Coastguard Worker bx r12 2254*c0909341SAndroid Build Coastguard Worker 2255*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16): 2256*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128] 2257*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q2, q3 2258*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2259*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2260*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2261*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #4 2262*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2263*c0909341SAndroid Build Coastguard Worker bx r12 2264*c0909341SAndroid Build Coastguard Worker 2265*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32): 2266*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r2, :128]! 2267*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128] 2268*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q9 2269*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 2270*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q8, q10 2271*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2272*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2273*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2274*c0909341SAndroid Build Coastguard Worker vrshrn.i32 d0, q0, #5 2275*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2276*c0909341SAndroid Build Coastguard Worker bx r12 2277*c0909341SAndroid Build Coastguard Workerendfunc 2278*c0909341SAndroid Build Coastguard Worker 2279*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_16bpc_neon(pixel *dst, const ptrdiff_t stride, 2280*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 2281*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 2282*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 2283*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 2284*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_16bpc_neon, export=1 2285*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 2286*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2287*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #32] 2288*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4, lsl #1 2289*c0909341SAndroid Build Coastguard Worker add r8, r3, r4 // width + height 2290*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 2291*c0909341SAndroid Build Coastguard Worker clz lr, r3 2292*c0909341SAndroid Build Coastguard Worker clz r6, r4 2293*c0909341SAndroid Build Coastguard Worker vdup.32 d16, r8 // width + height 2294*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r7 // bitdepth_max 2295*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_tbl) 2296*c0909341SAndroid Build Coastguard Worker rbit r8, r8 // rbit(width + height) 2297*c0909341SAndroid Build Coastguard Worker sub lr, lr, #22 // 26 leading bits, minus table offset 4 2298*c0909341SAndroid Build Coastguard Worker sub r6, r6, #26 2299*c0909341SAndroid Build Coastguard Worker clz r8, r8 // ctz(width + height) 2300*c0909341SAndroid Build Coastguard Worker ldr lr, [r7, lr, lsl #2] 2301*c0909341SAndroid Build Coastguard Worker ldr r6, [r7, r6, lsl #2] 2302*c0909341SAndroid Build Coastguard Worker neg r8, r8 // -ctz(width + height) 2303*c0909341SAndroid Build Coastguard Worker add r12, r7, lr 2304*c0909341SAndroid Build Coastguard Worker add r7, r7, r6 2305*c0909341SAndroid Build Coastguard Worker vshr.u32 d16, d16, #1 // (width + height) >> 1 2306*c0909341SAndroid Build Coastguard Worker vdup.32 d17, r8 // -ctz(width + height) 2307*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 2308*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 2309*c0909341SAndroid Build Coastguard Worker vmov.i16 q14, #0 2310*c0909341SAndroid Build Coastguard Worker bx r7 2311*c0909341SAndroid Build Coastguard Worker 2312*c0909341SAndroid Build Coastguard Worker .align 2 2313*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_tbl): 2314*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h32) - L(ipred_cfl_tbl) + CONFIG_THUMB 2315*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h16) - L(ipred_cfl_tbl) + CONFIG_THUMB 2316*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h8) - L(ipred_cfl_tbl) + CONFIG_THUMB 2317*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h4) - L(ipred_cfl_tbl) + CONFIG_THUMB 2318*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w32) - L(ipred_cfl_tbl) + CONFIG_THUMB 2319*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w16) - L(ipred_cfl_tbl) + CONFIG_THUMB 2320*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w8) - L(ipred_cfl_tbl) + CONFIG_THUMB 2321*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w4) - L(ipred_cfl_tbl) + CONFIG_THUMB 2322*c0909341SAndroid Build Coastguard Worker 2323*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4): 2324*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r2, :64]! 2325*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2326*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 2327*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2328*c0909341SAndroid Build Coastguard Worker bx r12 2329*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4): 2330*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r2] 2331*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d16 2332*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1, d1 2333*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d1, d1 2334*c0909341SAndroid Build Coastguard Worker cmp r4, #4 2335*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2336*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d17 2337*c0909341SAndroid Build Coastguard Worker beq 1f 2338*c0909341SAndroid Build Coastguard Worker // h = 8/16 2339*c0909341SAndroid Build Coastguard Worker cmp r4, #16 2340*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 2341*c0909341SAndroid Build Coastguard Worker movw r8, #0xAAAB 2342*c0909341SAndroid Build Coastguard Worker it ne 2343*c0909341SAndroid Build Coastguard Worker movne lr, r8 2344*c0909341SAndroid Build Coastguard Worker vdup.32 d18, lr 2345*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d18 2346*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 2347*c0909341SAndroid Build Coastguard Worker1: 2348*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2349*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 2350*c0909341SAndroid Build Coastguard Worker 2351*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8): 2352*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r2, :128]! 2353*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2354*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2355*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 2356*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2357*c0909341SAndroid Build Coastguard Worker bx r12 2358*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8): 2359*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r2] 2360*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d16 2361*c0909341SAndroid Build Coastguard Worker vadd.i16 d1, d4, d5 2362*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1, d1 2363*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d1, d1 2364*c0909341SAndroid Build Coastguard Worker cmp r4, #8 2365*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2366*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d17 2367*c0909341SAndroid Build Coastguard Worker beq 1f 2368*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 2369*c0909341SAndroid Build Coastguard Worker cmp r4, #32 2370*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 2371*c0909341SAndroid Build Coastguard Worker movw r8, #0xAAAB 2372*c0909341SAndroid Build Coastguard Worker it ne 2373*c0909341SAndroid Build Coastguard Worker movne lr, r8 2374*c0909341SAndroid Build Coastguard Worker vdup.32 d18, lr 2375*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d18 2376*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 2377*c0909341SAndroid Build Coastguard Worker1: 2378*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2379*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 2380*c0909341SAndroid Build Coastguard Worker 2381*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16): 2382*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! 2383*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q2, q3 2384*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2385*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2386*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 2387*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2388*c0909341SAndroid Build Coastguard Worker bx r12 2389*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16): 2390*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2] 2391*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d16 2392*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2393*c0909341SAndroid Build Coastguard Worker vadd.i16 d1, d4, d5 2394*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1, d1 2395*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d1, d1 2396*c0909341SAndroid Build Coastguard Worker cmp r4, #16 2397*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2398*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d17 2399*c0909341SAndroid Build Coastguard Worker beq 1f 2400*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 2401*c0909341SAndroid Build Coastguard Worker tst r4, #(32+16+8) // 16 added to make a consecutive bitmask 2402*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 2403*c0909341SAndroid Build Coastguard Worker movw r8, #0xAAAB 2404*c0909341SAndroid Build Coastguard Worker it ne 2405*c0909341SAndroid Build Coastguard Worker movne lr, r8 2406*c0909341SAndroid Build Coastguard Worker vdup.32 d18, lr 2407*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d18 2408*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 2409*c0909341SAndroid Build Coastguard Worker1: 2410*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2411*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2412*c0909341SAndroid Build Coastguard Worker 2413*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32): 2414*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2, :128]! 2415*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2, :128]! 2416*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2417*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 2418*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q2, q10 2419*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2420*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d0 2421*c0909341SAndroid Build Coastguard Worker add r2, r2, #2 2422*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d0, d0 2423*c0909341SAndroid Build Coastguard Worker bx r12 2424*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32): 2425*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r2]! 2426*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d16 2427*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r2]! 2428*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2429*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q11 2430*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q10 2431*c0909341SAndroid Build Coastguard Worker vadd.i16 d1, d4, d5 2432*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1, d1 2433*c0909341SAndroid Build Coastguard Worker vpaddl.u16 d1, d1 2434*c0909341SAndroid Build Coastguard Worker cmp r4, #32 2435*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2436*c0909341SAndroid Build Coastguard Worker vshl.u32 d0, d0, d17 2437*c0909341SAndroid Build Coastguard Worker beq 1f 2438*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 2439*c0909341SAndroid Build Coastguard Worker cmp r4, #8 2440*c0909341SAndroid Build Coastguard Worker movw lr, #0x6667 2441*c0909341SAndroid Build Coastguard Worker movw r8, #0xAAAB 2442*c0909341SAndroid Build Coastguard Worker it ne 2443*c0909341SAndroid Build Coastguard Worker movne lr, r8 2444*c0909341SAndroid Build Coastguard Worker vdup.32 d18, lr 2445*c0909341SAndroid Build Coastguard Worker vmul.i32 d0, d0, d18 2446*c0909341SAndroid Build Coastguard Worker vshr.u32 d0, d0, #17 2447*c0909341SAndroid Build Coastguard Worker1: 2448*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2449*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2450*c0909341SAndroid Build Coastguard Workerendfunc 2451*c0909341SAndroid Build Coastguard Worker 2452*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_16bpc_neon(int16_t *const ac, const pixel *const ypx, 2453*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 2454*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 2455*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_16bpc_neon, export=1 2456*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 2457*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2458*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 2459*c0909341SAndroid Build Coastguard Worker clz r8, r5 2460*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 2461*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_420_tbl) 2462*c0909341SAndroid Build Coastguard Worker sub r8, r8, #27 2463*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2464*c0909341SAndroid Build Coastguard Worker vmov.i32 q8, #0 2465*c0909341SAndroid Build Coastguard Worker vmov.i32 q9, #0 2466*c0909341SAndroid Build Coastguard Worker vmov.i32 q10, #0 2467*c0909341SAndroid Build Coastguard Worker vmov.i32 q11, #0 2468*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2469*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 2470*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 2471*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 2472*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 2473*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 2474*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 2475*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 2476*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 2477*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 2478*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 2479*c0909341SAndroid Build Coastguard Worker bx r7 2480*c0909341SAndroid Build Coastguard Worker 2481*c0909341SAndroid Build Coastguard Worker .align 2 2482*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_tbl): 2483*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2484*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w8) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2485*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w4) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2486*c0909341SAndroid Build Coastguard Worker 2487*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4): 2488*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 2489*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2490*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128], r2 2491*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 2492*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r12, :128], r2 2493*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2494*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2495*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2496*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d4, d5 2497*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2498*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2499*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 2500*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2501*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2502*c0909341SAndroid Build Coastguard Worker bgt 1b 2503*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2504*c0909341SAndroid Build Coastguard Worker vmov d0, d1 2505*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2506*c0909341SAndroid Build Coastguard Worker vmov d3, d1 2507*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad): 2508*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2509*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2510*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 2511*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2512*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2513*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2514*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2515*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2516*c0909341SAndroid Build Coastguard Worker bgt 2b 2517*c0909341SAndroid Build Coastguard Worker3: 2518*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_calc_subtract_dc): 2519*c0909341SAndroid Build Coastguard Worker // Aggregate the sums 2520*c0909341SAndroid Build Coastguard Worker vadd.i32 q8, q8, q9 2521*c0909341SAndroid Build Coastguard Worker vadd.i32 q10, q10, q11 2522*c0909341SAndroid Build Coastguard Worker vadd.i32 q0, q8, q10 2523*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2524*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 // sum 2525*c0909341SAndroid Build Coastguard Worker sub r0, r0, r6, lsl #3 2526*c0909341SAndroid Build Coastguard Worker vrshl.u32 d16, d0, d31 // (sum + (1 << (log2sz - 1))) >>= log2sz 2527*c0909341SAndroid Build Coastguard Worker vdup.16 q8, d16[0] 2528*c0909341SAndroid Build Coastguard Worker6: // Subtract dc from ac 2529*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128] 2530*c0909341SAndroid Build Coastguard Worker subs r6, r6, #4 2531*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q8 2532*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q8 2533*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2534*c0909341SAndroid Build Coastguard Worker bgt 6b 2535*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 2536*c0909341SAndroid Build Coastguard Worker 2537*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8): 2538*c0909341SAndroid Build Coastguard Worker cmp r3, #0 2539*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_420_w8_wpad) 2540*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2541*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 2542*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2543*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r1, :128], r2 2544*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 2545*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q3 2546*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2547*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2548*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2549*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q2 2550*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q3 2551*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d24, d25 2552*c0909341SAndroid Build Coastguard Worker vpadd.i16 d3, d26, d27 2553*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2554*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #1 2555*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2556*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2557*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2558*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2559*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2560*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2561*c0909341SAndroid Build Coastguard Worker bgt 1b 2562*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2563*c0909341SAndroid Build Coastguard Worker vmov q0, q1 2564*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2565*c0909341SAndroid Build Coastguard Worker 2566*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad): 2567*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2568*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2569*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128], r2 2570*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 2571*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r12, :128], r2 2572*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2573*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2574*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2575*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d4, d5 2576*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2577*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d1[3] 2578*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2579*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2580*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2581*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2582*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2583*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2584*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2585*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2586*c0909341SAndroid Build Coastguard Worker bgt 1b 2587*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2588*c0909341SAndroid Build Coastguard Worker vmov q0, q1 2589*c0909341SAndroid Build Coastguard Worker 2590*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad): 2591*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2592*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2593*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 2594*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2595*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2596*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2597*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2598*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2599*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2600*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2601*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2602*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2603*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2604*c0909341SAndroid Build Coastguard Worker bgt 2b 2605*c0909341SAndroid Build Coastguard Worker3: 2606*c0909341SAndroid Build Coastguard Worker 2607*c0909341SAndroid Build Coastguard Worker // Double the height and reuse the w4 summing/subtracting 2608*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #1 2609*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 2610*c0909341SAndroid Build Coastguard Worker 2611*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16): 2612*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_420_w16_tbl) 2613*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #2] 2614*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 2615*c0909341SAndroid Build Coastguard Worker bx r7 2616*c0909341SAndroid Build Coastguard Worker 2617*c0909341SAndroid Build Coastguard Worker .align 2 2618*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_tbl): 2619*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad0) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2620*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad1) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2621*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad2) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2622*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad3) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2623*c0909341SAndroid Build Coastguard Worker 2624*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0): 2625*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 2626*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2627*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 2628*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r12, :128]! 2629*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1, :128], r2 2630*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q12 2631*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q13 2632*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r12, :128], r2 2633*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2634*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2635*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q12 2636*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q13 2637*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d4, d5 2638*c0909341SAndroid Build Coastguard Worker vpadd.i16 d3, d6, d7 2639*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2640*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #1 2641*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 2642*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2643*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2644*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2645*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2646*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2647*c0909341SAndroid Build Coastguard Worker bgt 1b 2648*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2649*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2650*c0909341SAndroid Build Coastguard Worker 2651*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1): 2652*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 2653*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2654*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 2655*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r12, :128]! 2656*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 2657*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q12 2658*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q13 2659*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [r12, :128], r2 2660*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2661*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q12 2662*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2663*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d4, d5 2664*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2665*c0909341SAndroid Build Coastguard Worker vshl.i16 d2, d2, #1 2666*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 2667*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d2[3] 2668*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2669*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2670*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2671*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2672*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2673*c0909341SAndroid Build Coastguard Worker bgt 1b 2674*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2675*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2676*c0909341SAndroid Build Coastguard Worker 2677*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2): 2678*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 2679*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 2680*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r12, :128], r2 2681*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q12 2682*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q13 2683*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2684*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2685*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2686*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 2687*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2688*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2689*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2690*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2691*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2692*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2693*c0909341SAndroid Build Coastguard Worker bgt 1b 2694*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2695*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2696*c0909341SAndroid Build Coastguard Worker 2697*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3): 2698*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 2699*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2700*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [r12, :128], r2 2701*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q12 2702*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2703*c0909341SAndroid Build Coastguard Worker vshl.i16 d0, d0, #1 2704*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 2705*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d0[3] 2706*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2707*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2708*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2709*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2710*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2711*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2712*c0909341SAndroid Build Coastguard Worker bgt 1b 2713*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2714*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2715*c0909341SAndroid Build Coastguard Worker 2716*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad): 2717*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2718*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2719*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 2720*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2721*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2722*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2723*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2724*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2725*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2726*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2727*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2728*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2729*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2730*c0909341SAndroid Build Coastguard Worker bgt 2b 2731*c0909341SAndroid Build Coastguard Worker3: 2732*c0909341SAndroid Build Coastguard Worker 2733*c0909341SAndroid Build Coastguard Worker // Quadruple the height and reuse the w4 summing/subtracting 2734*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #2 2735*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 2736*c0909341SAndroid Build Coastguard Workerendfunc 2737*c0909341SAndroid Build Coastguard Worker 2738*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_16bpc_neon(int16_t *const ac, const pixel *const ypx, 2739*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 2740*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 2741*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_16bpc_neon, export=1 2742*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 2743*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2744*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 2745*c0909341SAndroid Build Coastguard Worker clz r8, r5 2746*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 2747*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_422_tbl) 2748*c0909341SAndroid Build Coastguard Worker sub r8, r8, #27 2749*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2750*c0909341SAndroid Build Coastguard Worker vmov.i16 q8, #0 2751*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #0 2752*c0909341SAndroid Build Coastguard Worker vmov.i16 q10, #0 2753*c0909341SAndroid Build Coastguard Worker vmov.i16 q11, #0 2754*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2755*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 2756*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 2757*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 2758*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 2759*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 2760*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 2761*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 2762*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 2763*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 2764*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 2765*c0909341SAndroid Build Coastguard Worker bx r7 2766*c0909341SAndroid Build Coastguard Worker 2767*c0909341SAndroid Build Coastguard Worker .align 2 2768*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_tbl): 2769*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2770*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w8) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2771*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w4) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2772*c0909341SAndroid Build Coastguard Worker 2773*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4): 2774*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 2775*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2776*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128], r2 2777*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 2778*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r12, :128], r2 2779*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2780*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2781*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d4, d5 2782*c0909341SAndroid Build Coastguard Worker vpadd.i16 d3, d6, d7 2783*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2784*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2785*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2786*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2787*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2788*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2789*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2790*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2791*c0909341SAndroid Build Coastguard Worker bgt 1b 2792*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2793*c0909341SAndroid Build Coastguard Worker vmov d0, d3 2794*c0909341SAndroid Build Coastguard Worker vmov d1, d3 2795*c0909341SAndroid Build Coastguard Worker vmov d2, d3 2796*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 2797*c0909341SAndroid Build Coastguard Worker 2798*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8): 2799*c0909341SAndroid Build Coastguard Worker cmp r3, #0 2800*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_422_w8_wpad) 2801*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2802*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 2803*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2804*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r1, :128], r2 2805*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2806*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2807*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d4, d5 2808*c0909341SAndroid Build Coastguard Worker vpadd.i16 d3, d6, d7 2809*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2810*c0909341SAndroid Build Coastguard Worker vpadd.i16 d24, d24, d25 2811*c0909341SAndroid Build Coastguard Worker vpadd.i16 d25, d26, d27 2812*c0909341SAndroid Build Coastguard Worker vpadd.i16 d26, d4, d5 2813*c0909341SAndroid Build Coastguard Worker vpadd.i16 d27, d6, d7 2814*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2815*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2816*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q12, #2 2817*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q13, #2 2818*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2819*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2820*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2821*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2822*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2823*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2824*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2825*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 2826*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 2827*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 2828*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 2829*c0909341SAndroid Build Coastguard Worker bgt 1b 2830*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2831*c0909341SAndroid Build Coastguard Worker vmov q0, q3 2832*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2833*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2834*c0909341SAndroid Build Coastguard Worker 2835*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad): 2836*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2837*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2838*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r12, :128], r2 2839*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [r1, :128], r2 2840*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2841*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d4, d5 2842*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2843*c0909341SAndroid Build Coastguard Worker vpadd.i16 d24, d24, d25 2844*c0909341SAndroid Build Coastguard Worker vpadd.i16 d25, d4, d5 2845*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2846*c0909341SAndroid Build Coastguard Worker vshl.i16 q12, q12, #2 2847*c0909341SAndroid Build Coastguard Worker vdup.16 d7, d25[3] 2848*c0909341SAndroid Build Coastguard Worker vmov d6, d25 2849*c0909341SAndroid Build Coastguard Worker vdup.16 d5, d24[3] 2850*c0909341SAndroid Build Coastguard Worker vmov d4, d24 2851*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d1[3] 2852*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2853*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2854*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2855*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2856*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2857*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2858*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2859*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2860*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2861*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 2862*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 2863*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 2864*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 2865*c0909341SAndroid Build Coastguard Worker bgt 1b 2866*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2867*c0909341SAndroid Build Coastguard Worker vmov q0, q3 2868*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2869*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2870*c0909341SAndroid Build Coastguard Worker 2871*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16): 2872*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_422_w16_tbl) 2873*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #2] 2874*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 2875*c0909341SAndroid Build Coastguard Worker bx r7 2876*c0909341SAndroid Build Coastguard Worker 2877*c0909341SAndroid Build Coastguard Worker .align 2 2878*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_tbl): 2879*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad0) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2880*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad1) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2881*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad2) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2882*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad3) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2883*c0909341SAndroid Build Coastguard Worker 2884*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0): 2885*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 2886*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2887*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 2888*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128]! 2889*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r1, :128], r2 2890*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2891*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2892*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d24, d25 2893*c0909341SAndroid Build Coastguard Worker vpadd.i16 d3, d26, d27 2894*c0909341SAndroid Build Coastguard Worker vld1.16 {q12, q13}, [r12, :128], r2 2895*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 2896*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d6, d7 2897*c0909341SAndroid Build Coastguard Worker vpadd.i16 d6, d24, d25 2898*c0909341SAndroid Build Coastguard Worker vpadd.i16 d7, d26, d27 2899*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2900*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2901*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2902*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #2 2903*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2904*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2905*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2906*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2907*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2908*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2909*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2910*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 2911*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 2912*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 2913*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 2914*c0909341SAndroid Build Coastguard Worker bgt 1b 2915*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2916*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2917*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2918*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2919*c0909341SAndroid Build Coastguard Worker 2920*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1): 2921*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 2922*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2923*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 2924*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128]! 2925*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [r1, :128], r2 2926*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2927*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2928*c0909341SAndroid Build Coastguard Worker vpadd.i16 d2, d24, d25 2929*c0909341SAndroid Build Coastguard Worker vld1.16 {q12}, [r12, :128], r2 2930*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 2931*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d6, d7 2932*c0909341SAndroid Build Coastguard Worker vpadd.i16 d6, d24, d25 2933*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2934*c0909341SAndroid Build Coastguard Worker vshl.i16 d2, d2, #2 2935*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2936*c0909341SAndroid Build Coastguard Worker vshl.i16 d6, d6, #2 2937*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d2[3] 2938*c0909341SAndroid Build Coastguard Worker vdup.16 d7, d6[3] 2939*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2940*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2941*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2942*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2943*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2944*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2945*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2946*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 2947*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 2948*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 2949*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 2950*c0909341SAndroid Build Coastguard Worker bgt 1b 2951*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2952*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2953*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2954*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2955*c0909341SAndroid Build Coastguard Worker 2956*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2): 2957*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 2958*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 2959*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 2960*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2961*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d2, d3 2962*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4, d5 2963*c0909341SAndroid Build Coastguard Worker vpadd.i16 d5, d6, d7 2964*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2965*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2966*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2967*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 2968*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2969*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2970*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 2971*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 2972*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 2973*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 2974*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2975*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 2976*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 2977*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 2978*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 2979*c0909341SAndroid Build Coastguard Worker bgt 1b 2980*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2981*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2982*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2983*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2984*c0909341SAndroid Build Coastguard Worker 2985*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3): 2986*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 2987*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 2988*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r12, :128], r2 2989*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0, d1 2990*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d4, d5 2991*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2992*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d1[3] 2993*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d0[3] 2994*c0909341SAndroid Build Coastguard Worker vdup.16 d5, d1[3] 2995*c0909341SAndroid Build Coastguard Worker vmov d4, d1 2996*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2997*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2998*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2999*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3000*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3001*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3002*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3003*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3004*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3005*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3006*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3007*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3008*c0909341SAndroid Build Coastguard Worker bgt 1b 3009*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3010*c0909341SAndroid Build Coastguard Worker vmov q0, q2 3011*c0909341SAndroid Build Coastguard Worker vmov q1, q3 3012*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 3013*c0909341SAndroid Build Coastguard Workerendfunc 3014*c0909341SAndroid Build Coastguard Worker 3015*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_16bpc_neon(int16_t *const ac, const pixel *const ypx, 3016*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 3017*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 3018*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_16bpc_neon, export=1 3019*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 3020*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 3021*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 3022*c0909341SAndroid Build Coastguard Worker clz r8, r5 3023*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 3024*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_444_tbl) 3025*c0909341SAndroid Build Coastguard Worker sub r8, r8, #26 3026*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 3027*c0909341SAndroid Build Coastguard Worker vmov.i16 q8, #0 3028*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #0 3029*c0909341SAndroid Build Coastguard Worker vmov.i16 q10, #0 3030*c0909341SAndroid Build Coastguard Worker vmov.i16 q11, #0 3031*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 3032*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 3033*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 3034*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 3035*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 3036*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 3037*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 3038*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 3039*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 3040*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 3041*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 3042*c0909341SAndroid Build Coastguard Worker bx r7 3043*c0909341SAndroid Build Coastguard Worker 3044*c0909341SAndroid Build Coastguard Worker .align 2 3045*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_tbl): 3046*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 3047*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w16) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 3048*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w8) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 3049*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w4) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 3050*c0909341SAndroid Build Coastguard Worker 3051*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4): 3052*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 3053*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r1, :64], r2 3054*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r12, :64], r2 3055*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r1, :64], r2 3056*c0909341SAndroid Build Coastguard Worker vld1.16 {d3}, [r12, :64], r2 3057*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3058*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3059*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 3060*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3061*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3062*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3063*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3064*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3065*c0909341SAndroid Build Coastguard Worker bgt 1b 3066*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3067*c0909341SAndroid Build Coastguard Worker vmov d0, d3 3068*c0909341SAndroid Build Coastguard Worker vmov d1, d3 3069*c0909341SAndroid Build Coastguard Worker vmov d2, d3 3070*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 3071*c0909341SAndroid Build Coastguard Worker 3072*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8): 3073*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 3074*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 3075*c0909341SAndroid Build Coastguard Worker vld1.16 {q1}, [r12, :128], r2 3076*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 3077*c0909341SAndroid Build Coastguard Worker vld1.16 {q3}, [r12, :128], r2 3078*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3079*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3080*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #3 3081*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #3 3082*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 3083*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3084*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3085*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3086*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3087*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3088*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3089*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3090*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3091*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3092*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3093*c0909341SAndroid Build Coastguard Worker bgt 1b 3094*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3095*c0909341SAndroid Build Coastguard Worker vmov q0, q3 3096*c0909341SAndroid Build Coastguard Worker vmov q1, q3 3097*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 3098*c0909341SAndroid Build Coastguard Worker 3099*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16): 3100*c0909341SAndroid Build Coastguard Worker cmp r3, #0 3101*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_444_w16_wpad) 3102*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 3103*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 3104*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r12, :128], r2 3105*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3106*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3107*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #3 3108*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #3 3109*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 3110*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3111*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3112*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3113*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3114*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3115*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3116*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3117*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3118*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3119*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3120*c0909341SAndroid Build Coastguard Worker bgt 1b 3121*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3122*c0909341SAndroid Build Coastguard Worker vmov q0, q2 3123*c0909341SAndroid Build Coastguard Worker vmov q1, q3 3124*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 3125*c0909341SAndroid Build Coastguard Worker 3126*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad): 3127*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 3128*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 3129*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r12, :128], r2 3130*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3131*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #3 3132*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 3133*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 3134*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 3135*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3136*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3137*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3138*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3139*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3140*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3141*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3142*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3143*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3144*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3145*c0909341SAndroid Build Coastguard Worker bgt 1b 3146*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3147*c0909341SAndroid Build Coastguard Worker vmov q0, q2 3148*c0909341SAndroid Build Coastguard Worker vmov q1, q3 3149*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 3150*c0909341SAndroid Build Coastguard Worker 3151*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32): 3152*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_444_w32_tbl) 3153*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #1] // (w3>>1) << 2 3154*c0909341SAndroid Build Coastguard Worker asr r2, r2, #1 3155*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 3156*c0909341SAndroid Build Coastguard Worker bx r7 3157*c0909341SAndroid Build Coastguard Worker 3158*c0909341SAndroid Build Coastguard Worker .align 2 3159*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_tbl): 3160*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad0) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 3161*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad2) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 3162*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad4) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 3163*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad6) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 3164*c0909341SAndroid Build Coastguard Worker 3165*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0): 3166*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 3167*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 3168*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 3169*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r1, :128], r2 3170*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3171*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3172*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #3 3173*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #3 3174*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 3175*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3176*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3177*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3178*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3179*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3180*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3181*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3182*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3183*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3184*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3185*c0909341SAndroid Build Coastguard Worker bgt 1b 3186*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3187*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 3188*c0909341SAndroid Build Coastguard Worker 3189*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2): 3190*c0909341SAndroid Build Coastguard Worker sub r2, r2, #32 3191*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 3192*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128]! 3193*c0909341SAndroid Build Coastguard Worker vld1.16 {q2}, [r1, :128], r2 3194*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3195*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3196*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #3 3197*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 3198*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3199*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 3200*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3201*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3202*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3203*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3204*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3205*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3206*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3207*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3208*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3209*c0909341SAndroid Build Coastguard Worker bgt 1b 3210*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3211*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 3212*c0909341SAndroid Build Coastguard Worker 3213*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4): 3214*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 16 3215*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r1, :128], r2 3216*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3217*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #3 3218*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 3219*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3220*c0909341SAndroid Build Coastguard Worker vdup.16 q2, d3[3] 3221*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d3[3] 3222*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3223*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3224*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3225*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3226*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3227*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3228*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3229*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3230*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3231*c0909341SAndroid Build Coastguard Worker bgt 1b 3232*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3233*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 3234*c0909341SAndroid Build Coastguard Worker 3235*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6): 3236*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 24 3237*c0909341SAndroid Build Coastguard Worker vld1.16 {q0}, [r1, :128], r2 3238*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #3 3239*c0909341SAndroid Build Coastguard Worker subs r8, r8, #1 3240*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 3241*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3242*c0909341SAndroid Build Coastguard Worker vdup.16 q2, d1[3] 3243*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d1[3] 3244*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3245*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3246*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3247*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3248*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3249*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3250*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3251*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3252*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3253*c0909341SAndroid Build Coastguard Worker bgt 1b 3254*c0909341SAndroid Build Coastguard Worker cmp r4, #0 3255*c0909341SAndroid Build Coastguard Worker 3256*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad): 3257*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 3258*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 3259*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 3260*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 3261*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d0 3262*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d1 3263*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d2 3264*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d3 3265*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 3266*c0909341SAndroid Build Coastguard Worker vaddw.u16 q8, q8, d4 3267*c0909341SAndroid Build Coastguard Worker vaddw.u16 q9, q9, d5 3268*c0909341SAndroid Build Coastguard Worker vaddw.u16 q10, q10, d6 3269*c0909341SAndroid Build Coastguard Worker vaddw.u16 q11, q11, d7 3270*c0909341SAndroid Build Coastguard Worker bgt 2b 3271*c0909341SAndroid Build Coastguard Worker3: 3272*c0909341SAndroid Build Coastguard Worker 3273*c0909341SAndroid Build Coastguard Worker // Multiply the height by eight and reuse the w4 subtracting 3274*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #3 3275*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 3276*c0909341SAndroid Build Coastguard Workerendfunc 3277