1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, B Krishnan Iyer 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, 33*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 34*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 35*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 36*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_8bpc_neon, export=1 37*c0909341SAndroid Build Coastguard Worker push {r4, lr} 38*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #8] 39*c0909341SAndroid Build Coastguard Worker clz r3, r3 40*c0909341SAndroid Build Coastguard Worker adr r2, L(ipred_dc_128_tbl) 41*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 42*c0909341SAndroid Build Coastguard Worker ldr r3, [r2, r3, lsl #2] 43*c0909341SAndroid Build Coastguard Worker vmov.i8 q0, #128 44*c0909341SAndroid Build Coastguard Worker add r2, r2, r3 45*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 46*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 47*c0909341SAndroid Build Coastguard Worker bx r2 48*c0909341SAndroid Build Coastguard Worker 49*c0909341SAndroid Build Coastguard Worker .align 2 50*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_128_tbl): 51*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_dc_128_tbl) + CONFIG_THUMB 52*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_dc_128_tbl) + CONFIG_THUMB 53*c0909341SAndroid Build Coastguard Worker .word 16f - L(ipred_dc_128_tbl) + CONFIG_THUMB 54*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_dc_128_tbl) + CONFIG_THUMB 55*c0909341SAndroid Build Coastguard Worker .word 4f - L(ipred_dc_128_tbl) + CONFIG_THUMB 56*c0909341SAndroid Build Coastguard Worker4: 57*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 58*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 59*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 60*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 61*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 62*c0909341SAndroid Build Coastguard Worker bgt 4b 63*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 64*c0909341SAndroid Build Coastguard Worker8: 65*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 66*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 67*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 68*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 69*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 70*c0909341SAndroid Build Coastguard Worker bgt 8b 71*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 72*c0909341SAndroid Build Coastguard Worker16: 73*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 74*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 75*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 76*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 77*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 78*c0909341SAndroid Build Coastguard Worker bgt 16b 79*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 80*c0909341SAndroid Build Coastguard Worker320: 81*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #128 82*c0909341SAndroid Build Coastguard Worker32: 83*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 84*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 85*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 86*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 87*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 88*c0909341SAndroid Build Coastguard Worker bgt 32b 89*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 90*c0909341SAndroid Build Coastguard Worker640: 91*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #128 92*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 93*c0909341SAndroid Build Coastguard Worker64: 94*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 95*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 96*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 97*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 98*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 99*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 100*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 101*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 102*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 103*c0909341SAndroid Build Coastguard Worker bgt 64b 104*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 105*c0909341SAndroid Build Coastguard Workerendfunc 106*c0909341SAndroid Build Coastguard Worker 107*c0909341SAndroid Build Coastguard Worker// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, 108*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 109*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 110*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 111*c0909341SAndroid Build Coastguard Workerfunction ipred_v_8bpc_neon, export=1 112*c0909341SAndroid Build Coastguard Worker push {r4, lr} 113*c0909341SAndroid Build Coastguard Worker ldr lr, [sp, #8] 114*c0909341SAndroid Build Coastguard Worker clz r3, r3 115*c0909341SAndroid Build Coastguard Worker adr r4, L(ipred_v_tbl) 116*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 117*c0909341SAndroid Build Coastguard Worker ldr r3, [r4, r3, lsl #2] 118*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 119*c0909341SAndroid Build Coastguard Worker add r4, r4, r3 120*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 121*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 122*c0909341SAndroid Build Coastguard Worker bx r4 123*c0909341SAndroid Build Coastguard Worker 124*c0909341SAndroid Build Coastguard Worker .align 2 125*c0909341SAndroid Build Coastguard WorkerL(ipred_v_tbl): 126*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_v_tbl) + CONFIG_THUMB 127*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_v_tbl) + CONFIG_THUMB 128*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_v_tbl) + CONFIG_THUMB 129*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_v_tbl) + CONFIG_THUMB 130*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_v_tbl) + CONFIG_THUMB 131*c0909341SAndroid Build Coastguard Worker40: 132*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2] 133*c0909341SAndroid Build Coastguard Worker4: 134*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 135*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 136*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 137*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 138*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 139*c0909341SAndroid Build Coastguard Worker bgt 4b 140*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 141*c0909341SAndroid Build Coastguard Worker80: 142*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2] 143*c0909341SAndroid Build Coastguard Worker8: 144*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 145*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 146*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 147*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 148*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 149*c0909341SAndroid Build Coastguard Worker bgt 8b 150*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 151*c0909341SAndroid Build Coastguard Worker160: 152*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r2] 153*c0909341SAndroid Build Coastguard Worker16: 154*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 155*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 156*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 157*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 158*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 159*c0909341SAndroid Build Coastguard Worker bgt 16b 160*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 161*c0909341SAndroid Build Coastguard Worker320: 162*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r2] 163*c0909341SAndroid Build Coastguard Worker32: 164*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 165*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 166*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 167*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 168*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 169*c0909341SAndroid Build Coastguard Worker bgt 32b 170*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 171*c0909341SAndroid Build Coastguard Worker640: 172*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r2]! 173*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 174*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2] 175*c0909341SAndroid Build Coastguard Worker64: 176*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 177*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 178*c0909341SAndroid Build Coastguard Worker vst1.8 {d4, d5, d6, d7}, [r0, :128], r1 179*c0909341SAndroid Build Coastguard Worker vst1.8 {d4, d5, d6, d7}, [r12, :128], r1 180*c0909341SAndroid Build Coastguard Worker subs lr, lr, #4 181*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 182*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 183*c0909341SAndroid Build Coastguard Worker vst1.8 {d4, d5, d6, d7}, [r0, :128], r1 184*c0909341SAndroid Build Coastguard Worker vst1.8 {d4, d5, d6, d7}, [r12, :128], r1 185*c0909341SAndroid Build Coastguard Worker bgt 64b 186*c0909341SAndroid Build Coastguard Worker pop {r4, pc} 187*c0909341SAndroid Build Coastguard Workerendfunc 188*c0909341SAndroid Build Coastguard Worker 189*c0909341SAndroid Build Coastguard Worker// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, 190*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 191*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 192*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 193*c0909341SAndroid Build Coastguard Workerfunction ipred_h_8bpc_neon, export=1 194*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 195*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 196*c0909341SAndroid Build Coastguard Worker clz r3, r3 197*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_h_tbl) 198*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 199*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 200*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 201*c0909341SAndroid Build Coastguard Worker mov lr, #-4 202*c0909341SAndroid Build Coastguard Worker add r5, r5, r3 203*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 204*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 205*c0909341SAndroid Build Coastguard Worker bx r5 206*c0909341SAndroid Build Coastguard Worker 207*c0909341SAndroid Build Coastguard Worker .align 2 208*c0909341SAndroid Build Coastguard WorkerL(ipred_h_tbl): 209*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_h_tbl) + CONFIG_THUMB 210*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_h_tbl) + CONFIG_THUMB 211*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_h_tbl) + CONFIG_THUMB 212*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_h_tbl) + CONFIG_THUMB 213*c0909341SAndroid Build Coastguard Worker .word 4f - L(ipred_h_tbl) + CONFIG_THUMB 214*c0909341SAndroid Build Coastguard Worker4: 215*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], lr 216*c0909341SAndroid Build Coastguard Worker vst1.32 {d3[0]}, [r0, :32], r1 217*c0909341SAndroid Build Coastguard Worker vst1.32 {d2[0]}, [r12, :32], r1 218*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 219*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[0]}, [r0, :32], r1 220*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 221*c0909341SAndroid Build Coastguard Worker bgt 4b 222*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 223*c0909341SAndroid Build Coastguard Worker8: 224*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], lr 225*c0909341SAndroid Build Coastguard Worker vst1.8 {d3}, [r0, :64], r1 226*c0909341SAndroid Build Coastguard Worker vst1.8 {d2}, [r12, :64], r1 227*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 228*c0909341SAndroid Build Coastguard Worker vst1.8 {d1}, [r0, :64], r1 229*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 230*c0909341SAndroid Build Coastguard Worker bgt 8b 231*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 232*c0909341SAndroid Build Coastguard Worker160: 233*c0909341SAndroid Build Coastguard Worker add r2, r2, #3 234*c0909341SAndroid Build Coastguard Worker mov lr, #-1 235*c0909341SAndroid Build Coastguard Worker16: 236*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[], d1[]}, [r2], lr 237*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 238*c0909341SAndroid Build Coastguard Worker vld1.8 {d2[], d3[]}, [r2], lr 239*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128], r1 240*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[], d5[]}, [r2], lr 241*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128], r1 242*c0909341SAndroid Build Coastguard Worker vld1.8 {d6[], d7[]}, [r2], lr 243*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128], r1 244*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128], r1 245*c0909341SAndroid Build Coastguard Worker bgt 16b 246*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 247*c0909341SAndroid Build Coastguard Worker320: 248*c0909341SAndroid Build Coastguard Worker add r2, r2, #3 249*c0909341SAndroid Build Coastguard Worker mov lr, #-1 250*c0909341SAndroid Build Coastguard Worker sub r1, r1, #16 251*c0909341SAndroid Build Coastguard Worker32: 252*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[], d1[]}, [r2], lr 253*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 254*c0909341SAndroid Build Coastguard Worker vld1.8 {d2[], d3[]}, [r2], lr 255*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128]! 256*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[], d5[]}, [r2], lr 257*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128]! 258*c0909341SAndroid Build Coastguard Worker vld1.8 {d6[], d7[]}, [r2], lr 259*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128], r1 260*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128], r1 261*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128]! 262*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128]! 263*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128], r1 264*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128], r1 265*c0909341SAndroid Build Coastguard Worker bgt 32b 266*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 267*c0909341SAndroid Build Coastguard Worker640: 268*c0909341SAndroid Build Coastguard Worker add r2, r2, #3 269*c0909341SAndroid Build Coastguard Worker mov lr, #-1 270*c0909341SAndroid Build Coastguard Worker sub r1, r1, #48 271*c0909341SAndroid Build Coastguard Worker64: 272*c0909341SAndroid Build Coastguard Worker vld1.8 {d0[], d1[]}, [r2], lr 273*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 274*c0909341SAndroid Build Coastguard Worker vld1.8 {d2[], d3[]}, [r2], lr 275*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128]! 276*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[], d5[]}, [r2], lr 277*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128]! 278*c0909341SAndroid Build Coastguard Worker vld1.8 {d6[], d7[]}, [r2], lr 279*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128]! 280*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128]! 281*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128]! 282*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128]! 283*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128], r1 284*c0909341SAndroid Build Coastguard Worker vst1.8 {q1}, [r12, :128], r1 285*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128]! 286*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128]! 287*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128]! 288*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128]! 289*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128]! 290*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128]! 291*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0, :128], r1 292*c0909341SAndroid Build Coastguard Worker vst1.8 {q3}, [r12, :128], r1 293*c0909341SAndroid Build Coastguard Worker bgt 64b 294*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 295*c0909341SAndroid Build Coastguard Workerendfunc 296*c0909341SAndroid Build Coastguard Worker 297*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, 298*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 299*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 300*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 301*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_8bpc_neon, export=1 302*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 303*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 304*c0909341SAndroid Build Coastguard Worker clz r3, r3 305*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_top_tbl) 306*c0909341SAndroid Build Coastguard Worker sub r3, r3, #25 307*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 308*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 309*c0909341SAndroid Build Coastguard Worker add r5, r5, r3 310*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 311*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 312*c0909341SAndroid Build Coastguard Worker bx r5 313*c0909341SAndroid Build Coastguard Worker 314*c0909341SAndroid Build Coastguard Worker .align 2 315*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_top_tbl): 316*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_dc_top_tbl) + CONFIG_THUMB 317*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_dc_top_tbl) + CONFIG_THUMB 318*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_dc_top_tbl) + CONFIG_THUMB 319*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_dc_top_tbl) + CONFIG_THUMB 320*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_dc_top_tbl) + CONFIG_THUMB 321*c0909341SAndroid Build Coastguard Worker40: 322*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2] 323*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 324*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 325*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #2 326*c0909341SAndroid Build Coastguard Worker vdup.8 d0, d0[0] 327*c0909341SAndroid Build Coastguard Worker4: 328*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 329*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 330*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 331*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 332*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 333*c0909341SAndroid Build Coastguard Worker bgt 4b 334*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 335*c0909341SAndroid Build Coastguard Worker80: 336*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2] 337*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 338*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 339*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 340*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #3 341*c0909341SAndroid Build Coastguard Worker vdup.8 d0, d0[0] 342*c0909341SAndroid Build Coastguard Worker8: 343*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 344*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 345*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 346*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 347*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 348*c0909341SAndroid Build Coastguard Worker bgt 8b 349*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 350*c0909341SAndroid Build Coastguard Worker160: 351*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1}, [r2] 352*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 353*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 354*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 355*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 356*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #4 357*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 358*c0909341SAndroid Build Coastguard Worker16: 359*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 360*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 361*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 362*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 363*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 364*c0909341SAndroid Build Coastguard Worker bgt 16b 365*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 366*c0909341SAndroid Build Coastguard Worker320: 367*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2] 368*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 369*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 370*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 371*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 372*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 373*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 374*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d4, q0, #5 375*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d4[0] 376*c0909341SAndroid Build Coastguard Worker vdup.8 q1, d4[0] 377*c0909341SAndroid Build Coastguard Worker32: 378*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 379*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 380*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 381*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 382*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 383*c0909341SAndroid Build Coastguard Worker bgt 32b 384*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 385*c0909341SAndroid Build Coastguard Worker640: 386*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2]! 387*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 388*c0909341SAndroid Build Coastguard Worker vld1.8 {d4, d5, d6, d7}, [r2] 389*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 390*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 391*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 392*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 393*c0909341SAndroid Build Coastguard Worker vadd.u16 q1, q2, q3 394*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 395*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 396*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 397*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 398*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d18, q0, #6 399*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d18[0] 400*c0909341SAndroid Build Coastguard Worker vdup.8 q1, d18[0] 401*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 402*c0909341SAndroid Build Coastguard Worker64: 403*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 404*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 405*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 406*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 407*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 408*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 409*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 410*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 411*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 412*c0909341SAndroid Build Coastguard Worker bgt 64b 413*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 414*c0909341SAndroid Build Coastguard Workerendfunc 415*c0909341SAndroid Build Coastguard Worker 416*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, 417*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 418*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 419*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 420*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_8bpc_neon, export=1 421*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 422*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #12] 423*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4 424*c0909341SAndroid Build Coastguard Worker clz r3, r3 425*c0909341SAndroid Build Coastguard Worker clz lr, r4 426*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 427*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_left_tbl) 428*c0909341SAndroid Build Coastguard Worker sub r3, r3, #20 429*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 430*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 431*c0909341SAndroid Build Coastguard Worker add r3, r5, r3 432*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 433*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 434*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 435*c0909341SAndroid Build Coastguard Worker bx r5 436*c0909341SAndroid Build Coastguard Worker 437*c0909341SAndroid Build Coastguard Worker .align 2 438*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_tbl): 439*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h64) - L(ipred_dc_left_tbl) + CONFIG_THUMB 440*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h32) - L(ipred_dc_left_tbl) + CONFIG_THUMB 441*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h16) - L(ipred_dc_left_tbl) + CONFIG_THUMB 442*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h8) - L(ipred_dc_left_tbl) + CONFIG_THUMB 443*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h4) - L(ipred_dc_left_tbl) + CONFIG_THUMB 444*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w64) - L(ipred_dc_left_tbl) + CONFIG_THUMB 445*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w32) - L(ipred_dc_left_tbl) + CONFIG_THUMB 446*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w16) - L(ipred_dc_left_tbl) + CONFIG_THUMB 447*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w8) - L(ipred_dc_left_tbl) + CONFIG_THUMB 448*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w4) - L(ipred_dc_left_tbl) + CONFIG_THUMB 449*c0909341SAndroid Build Coastguard Worker 450*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4): 451*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2, :32] 452*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 453*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 454*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #2 455*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 456*c0909341SAndroid Build Coastguard Worker bx r3 457*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4): 458*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 459*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 460*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 461*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 462*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 463*c0909341SAndroid Build Coastguard Worker bgt L(ipred_dc_left_w4) 464*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 465*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8): 466*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64] 467*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 468*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 469*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 470*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #3 471*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 472*c0909341SAndroid Build Coastguard Worker bx r3 473*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8): 474*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 475*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 476*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 477*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 478*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 479*c0909341SAndroid Build Coastguard Worker bgt L(ipred_dc_left_w8) 480*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 481*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16): 482*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1}, [r2, :128] 483*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 484*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 485*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 486*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 487*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #4 488*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 489*c0909341SAndroid Build Coastguard Worker bx r3 490*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16): 491*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 492*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 493*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 494*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 495*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 496*c0909341SAndroid Build Coastguard Worker bgt L(ipred_dc_left_w16) 497*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 498*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32): 499*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2, :128] 500*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 501*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 502*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 503*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 504*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 505*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 506*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #5 507*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 508*c0909341SAndroid Build Coastguard Worker bx r3 509*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32): 510*c0909341SAndroid Build Coastguard Worker vmov.8 q1, q0 511*c0909341SAndroid Build Coastguard Worker1: 512*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 513*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 514*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 515*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 516*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 517*c0909341SAndroid Build Coastguard Worker bgt 1b 518*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 519*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64): 520*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2, :128]! 521*c0909341SAndroid Build Coastguard Worker vld1.8 {d4, d5, d6, d7}, [r2, :128] 522*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 523*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 524*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 525*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 526*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 527*c0909341SAndroid Build Coastguard Worker vadd.u16 q1, q2, q3 528*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 529*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 530*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 531*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 532*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #6 533*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 534*c0909341SAndroid Build Coastguard Worker bx r3 535*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64): 536*c0909341SAndroid Build Coastguard Worker vmov.8 q1, q0 537*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 538*c0909341SAndroid Build Coastguard Worker1: 539*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 540*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 541*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 542*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 543*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 544*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 545*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 546*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 547*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 548*c0909341SAndroid Build Coastguard Worker bgt 1b 549*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 550*c0909341SAndroid Build Coastguard Workerendfunc 551*c0909341SAndroid Build Coastguard Worker 552*c0909341SAndroid Build Coastguard Worker// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride, 553*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 554*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 555*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 556*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_8bpc_neon, export=1 557*c0909341SAndroid Build Coastguard Worker push {r4-r6, lr} 558*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #16] 559*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4 560*c0909341SAndroid Build Coastguard Worker add lr, r3, r4 // width + height 561*c0909341SAndroid Build Coastguard Worker clz r3, r3 562*c0909341SAndroid Build Coastguard Worker clz r12, r4 563*c0909341SAndroid Build Coastguard Worker vdup.16 q15, lr // width + height 564*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_dc_tbl) 565*c0909341SAndroid Build Coastguard Worker rbit lr, lr // rbit(width + height) 566*c0909341SAndroid Build Coastguard Worker sub r3, r3, #20 // 25 leading bits, minus table offset 5 567*c0909341SAndroid Build Coastguard Worker sub r12, r12, #25 568*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width + height) 569*c0909341SAndroid Build Coastguard Worker ldr r3, [r5, r3, lsl #2] 570*c0909341SAndroid Build Coastguard Worker ldr r12, [r5, r12, lsl #2] 571*c0909341SAndroid Build Coastguard Worker neg lr, lr // -ctz(width + height) 572*c0909341SAndroid Build Coastguard Worker add r3, r5, r3 573*c0909341SAndroid Build Coastguard Worker add r5, r5, r12 574*c0909341SAndroid Build Coastguard Worker vshr.u16 q15, q15, #1 // (width + height) >> 1 575*c0909341SAndroid Build Coastguard Worker vdup.16 q14, lr // -ctz(width + height) 576*c0909341SAndroid Build Coastguard Worker add r12, r0, r1 577*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 578*c0909341SAndroid Build Coastguard Worker bx r5 579*c0909341SAndroid Build Coastguard Worker 580*c0909341SAndroid Build Coastguard Worker .align 2 581*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_tbl): 582*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h64) - L(ipred_dc_tbl) + CONFIG_THUMB 583*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h32) - L(ipred_dc_tbl) + CONFIG_THUMB 584*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h16) - L(ipred_dc_tbl) + CONFIG_THUMB 585*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h8) - L(ipred_dc_tbl) + CONFIG_THUMB 586*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h4) - L(ipred_dc_tbl) + CONFIG_THUMB 587*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w64) - L(ipred_dc_tbl) + CONFIG_THUMB 588*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w32) - L(ipred_dc_tbl) + CONFIG_THUMB 589*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w16) - L(ipred_dc_tbl) + CONFIG_THUMB 590*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w8) - L(ipred_dc_tbl) + CONFIG_THUMB 591*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w4) - L(ipred_dc_tbl) + CONFIG_THUMB 592*c0909341SAndroid Build Coastguard Worker 593*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4): 594*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2, :32]! 595*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 596*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 597*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 598*c0909341SAndroid Build Coastguard Worker bx r3 599*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4): 600*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[]}, [r2] 601*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d30 602*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d1, d1 603*c0909341SAndroid Build Coastguard Worker vpadd.u16 d1, d1 604*c0909341SAndroid Build Coastguard Worker cmp r4, #4 605*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d1 606*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d28 607*c0909341SAndroid Build Coastguard Worker beq 1f 608*c0909341SAndroid Build Coastguard Worker // h = 8/16 609*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 610*c0909341SAndroid Build Coastguard Worker movw r5, #(0x5556/2) 611*c0909341SAndroid Build Coastguard Worker cmp r4, #16 612*c0909341SAndroid Build Coastguard Worker it ne 613*c0909341SAndroid Build Coastguard Worker movne lr, r5 614*c0909341SAndroid Build Coastguard Worker vdup.16 d30, lr 615*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d30 616*c0909341SAndroid Build Coastguard Worker1: 617*c0909341SAndroid Build Coastguard Worker vdup.8 d0, d0[0] 618*c0909341SAndroid Build Coastguard Worker2: 619*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 620*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 621*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 622*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 623*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r12, :32], r1 624*c0909341SAndroid Build Coastguard Worker bgt 2b 625*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 626*c0909341SAndroid Build Coastguard Worker 627*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8): 628*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64]! 629*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 630*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 631*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 632*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 633*c0909341SAndroid Build Coastguard Worker bx r3 634*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8): 635*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r2] 636*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d30 637*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d2, d2 638*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 639*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 640*c0909341SAndroid Build Coastguard Worker cmp r4, #8 641*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d2 642*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d28 643*c0909341SAndroid Build Coastguard Worker beq 1f 644*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 645*c0909341SAndroid Build Coastguard Worker cmp r4, #32 646*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 647*c0909341SAndroid Build Coastguard Worker movw r5, #(0x5556/2) 648*c0909341SAndroid Build Coastguard Worker it ne 649*c0909341SAndroid Build Coastguard Worker movne lr, r5 650*c0909341SAndroid Build Coastguard Worker vdup.16 d24, lr 651*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d24 652*c0909341SAndroid Build Coastguard Worker1: 653*c0909341SAndroid Build Coastguard Worker vdup.8 d0, d0[0] 654*c0909341SAndroid Build Coastguard Worker2: 655*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 656*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 657*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 658*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 659*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r12, :64], r1 660*c0909341SAndroid Build Coastguard Worker bgt 2b 661*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 662*c0909341SAndroid Build Coastguard Worker 663*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16): 664*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1}, [r2, :128]! 665*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 666*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 667*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 668*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 669*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 670*c0909341SAndroid Build Coastguard Worker bx r3 671*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16): 672*c0909341SAndroid Build Coastguard Worker vld1.8 {d2, d3}, [r2] 673*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d30 674*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 675*c0909341SAndroid Build Coastguard Worker vadd.u16 d2, d2, d3 676*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 677*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 678*c0909341SAndroid Build Coastguard Worker cmp r4, #16 679*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d2 680*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d28 681*c0909341SAndroid Build Coastguard Worker beq 1f 682*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 683*c0909341SAndroid Build Coastguard Worker tst r4, #(32+16+8) // 16 added to make a consecutive bitmask 684*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 685*c0909341SAndroid Build Coastguard Worker movw r5, #(0x5556/2) 686*c0909341SAndroid Build Coastguard Worker it ne 687*c0909341SAndroid Build Coastguard Worker movne lr, r5 688*c0909341SAndroid Build Coastguard Worker vdup.16 d24, lr 689*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d24 690*c0909341SAndroid Build Coastguard Worker1: 691*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d0[0] 692*c0909341SAndroid Build Coastguard Worker2: 693*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 694*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 695*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 696*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r0, :128], r1 697*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1}, [r12, :128], r1 698*c0909341SAndroid Build Coastguard Worker bgt 2b 699*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 700*c0909341SAndroid Build Coastguard Worker 701*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32): 702*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2, :128]! 703*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 704*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 705*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 706*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 707*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 708*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 709*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 710*c0909341SAndroid Build Coastguard Worker bx r3 711*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32): 712*c0909341SAndroid Build Coastguard Worker vld1.8 {d2, d3, d4, d5}, [r2] 713*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d30 714*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 715*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 716*c0909341SAndroid Build Coastguard Worker vadd.u16 q1, q1, q2 717*c0909341SAndroid Build Coastguard Worker vadd.u16 d2, d2, d3 718*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 719*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 720*c0909341SAndroid Build Coastguard Worker cmp r4, #32 721*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d2 722*c0909341SAndroid Build Coastguard Worker vshl.u16 d4, d0, d28 723*c0909341SAndroid Build Coastguard Worker beq 1f 724*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 725*c0909341SAndroid Build Coastguard Worker cmp r4, #8 726*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 727*c0909341SAndroid Build Coastguard Worker movw r5, #(0x5556/2) 728*c0909341SAndroid Build Coastguard Worker it ne 729*c0909341SAndroid Build Coastguard Worker movne lr, r5 730*c0909341SAndroid Build Coastguard Worker vdup.16 d24, lr 731*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d4, d4, d24 732*c0909341SAndroid Build Coastguard Worker1: 733*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d4[0] 734*c0909341SAndroid Build Coastguard Worker vdup.8 q1, d4[0] 735*c0909341SAndroid Build Coastguard Worker2: 736*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 737*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 738*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 739*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 740*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 741*c0909341SAndroid Build Coastguard Worker bgt 2b 742*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 743*c0909341SAndroid Build Coastguard Worker 744*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64): 745*c0909341SAndroid Build Coastguard Worker vld1.8 {d0, d1, d2, d3}, [r2, :128]! 746*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 747*c0909341SAndroid Build Coastguard Worker vld1.8 {d4, d5, d6, d7}, [r2, :128]! 748*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 749*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 750*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 751*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 752*c0909341SAndroid Build Coastguard Worker vadd.u16 q1, q2, q3 753*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q0, q1 754*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 755*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 756*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 757*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 758*c0909341SAndroid Build Coastguard Worker bx r3 759*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64): 760*c0909341SAndroid Build Coastguard Worker vld1.8 {d2, d3, d4, d5}, [r2]! 761*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d30 762*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 763*c0909341SAndroid Build Coastguard Worker vaddl.u8 q1, d2, d3 764*c0909341SAndroid Build Coastguard Worker vadd.u16 d4, d4, d5 765*c0909341SAndroid Build Coastguard Worker vadd.u16 d2, d2, d3 766*c0909341SAndroid Build Coastguard Worker vld1.8 {d16, d17, d18, d19}, [r2] 767*c0909341SAndroid Build Coastguard Worker vpadd.u16 d4, d4 768*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 769*c0909341SAndroid Build Coastguard Worker vpadd.u16 d4, d4 770*c0909341SAndroid Build Coastguard Worker vpadd.u16 d2, d2 771*c0909341SAndroid Build Coastguard Worker vaddl.u8 q8, d16, d17 772*c0909341SAndroid Build Coastguard Worker vaddl.u8 q9, d18, d19 773*c0909341SAndroid Build Coastguard Worker vadd.u16 d16, d16, d17 774*c0909341SAndroid Build Coastguard Worker vadd.u16 d18, d18, d19 775*c0909341SAndroid Build Coastguard Worker vpadd.u16 d16, d16 776*c0909341SAndroid Build Coastguard Worker vpadd.u16 d18, d18 777*c0909341SAndroid Build Coastguard Worker vpadd.u16 d16, d16 778*c0909341SAndroid Build Coastguard Worker vpadd.u16 d18, d18 779*c0909341SAndroid Build Coastguard Worker vadd.u16 d2, d2, d4 780*c0909341SAndroid Build Coastguard Worker vadd.u16 d3, d16, d18 781*c0909341SAndroid Build Coastguard Worker cmp r4, #64 782*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d2 783*c0909341SAndroid Build Coastguard Worker vadd.s16 d0, d0, d3 784*c0909341SAndroid Build Coastguard Worker vshl.u16 d18, d0, d28 785*c0909341SAndroid Build Coastguard Worker beq 1f 786*c0909341SAndroid Build Coastguard Worker // h = 16/32 787*c0909341SAndroid Build Coastguard Worker movw lr, #(0x5556/2) 788*c0909341SAndroid Build Coastguard Worker movt lr, #(0x3334/2) 789*c0909341SAndroid Build Coastguard Worker and r5, r4, #31 790*c0909341SAndroid Build Coastguard Worker lsr lr, lr, r5 791*c0909341SAndroid Build Coastguard Worker vdup.16 d30, lr 792*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d18, d18, d30 793*c0909341SAndroid Build Coastguard Worker1: 794*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 795*c0909341SAndroid Build Coastguard Worker vdup.8 q0, d18[0] 796*c0909341SAndroid Build Coastguard Worker vdup.8 q1, d18[0] 797*c0909341SAndroid Build Coastguard Worker2: 798*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 799*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 800*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 801*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 802*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 803*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128]! 804*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128]! 805*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r0, :128], r1 806*c0909341SAndroid Build Coastguard Worker vst1.8 {d0, d1, d2, d3}, [r12, :128], r1 807*c0909341SAndroid Build Coastguard Worker bgt 2b 808*c0909341SAndroid Build Coastguard Worker pop {r4-r6, pc} 809*c0909341SAndroid Build Coastguard Workerendfunc 810*c0909341SAndroid Build Coastguard Worker 811*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride, 812*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 813*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 814*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 815*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_8bpc_neon, export=1 816*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 817*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #24] 818*c0909341SAndroid Build Coastguard Worker clz lr, r3 819*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_paeth_tbl) 820*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 821*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 822*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[], d5[]}, [r2] 823*c0909341SAndroid Build Coastguard Worker add r8, r2, #1 824*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 825*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 826*c0909341SAndroid Build Coastguard Worker mov r7, #-4 827*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 828*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 829*c0909341SAndroid Build Coastguard Worker bx r5 830*c0909341SAndroid Build Coastguard Worker 831*c0909341SAndroid Build Coastguard Worker .align 2 832*c0909341SAndroid Build Coastguard WorkerL(ipred_paeth_tbl): 833*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_paeth_tbl) + CONFIG_THUMB 834*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_paeth_tbl) + CONFIG_THUMB 835*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_paeth_tbl) + CONFIG_THUMB 836*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_paeth_tbl) + CONFIG_THUMB 837*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_paeth_tbl) + CONFIG_THUMB 838*c0909341SAndroid Build Coastguard Worker 839*c0909341SAndroid Build Coastguard Worker40: 840*c0909341SAndroid Build Coastguard Worker vld1.32 {d6[], d7[]}, [r8] 841*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d6, d4 // top - topleft 842*c0909341SAndroid Build Coastguard Worker4: 843*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], r7 844*c0909341SAndroid Build Coastguard Worker vzip.32 d0, d1 845*c0909341SAndroid Build Coastguard Worker vzip.32 d2, d3 846*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q8, d0 847*c0909341SAndroid Build Coastguard Worker vaddw.u8 q10, q8, d2 848*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d18, q9 // base 849*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d19, q10 850*c0909341SAndroid Build Coastguard Worker vmov d1, d2 851*c0909341SAndroid Build Coastguard Worker vabd.u8 q10, q3, q9 // tdiff 852*c0909341SAndroid Build Coastguard Worker vabd.u8 q11, q2, q9 // tldiff 853*c0909341SAndroid Build Coastguard Worker vabd.u8 q9, q0, q9 // ldiff 854*c0909341SAndroid Build Coastguard Worker vmin.u8 q12, q10, q11 // min(tdiff, tldiff) 855*c0909341SAndroid Build Coastguard Worker vcge.u8 q10, q11, q10 // tldiff >= tdiff 856*c0909341SAndroid Build Coastguard Worker vcge.u8 q9, q12, q9 // min(tdiff, tldiff) >= ldiff 857*c0909341SAndroid Build Coastguard Worker vbsl q10, q3, q2 // tdiff <= tldiff ? top : topleft 858*c0909341SAndroid Build Coastguard Worker vbit q10, q0, q9 // ldiff <= min ? left : ... 859*c0909341SAndroid Build Coastguard Worker vst1.32 {d21[1]}, [r0, :32], r1 860*c0909341SAndroid Build Coastguard Worker vst1.32 {d21[0]}, [r6, :32], r1 861*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 862*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[1]}, [r0, :32], r1 863*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r6, :32], r1 864*c0909341SAndroid Build Coastguard Worker bgt 4b 865*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 866*c0909341SAndroid Build Coastguard Worker80: 867*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8] 868*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d6, d4 // top - topleft 869*c0909341SAndroid Build Coastguard Worker vmov d7, d6 870*c0909341SAndroid Build Coastguard Worker8: 871*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], r7 872*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q8, d0 873*c0909341SAndroid Build Coastguard Worker vaddw.u8 q10, q8, d1 874*c0909341SAndroid Build Coastguard Worker vaddw.u8 q11, q8, d2 875*c0909341SAndroid Build Coastguard Worker vaddw.u8 q12, q8, d3 876*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d18, q9 // base 877*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d19, q10 878*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d20, q11 879*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d21, q12 880*c0909341SAndroid Build Coastguard Worker vabd.u8 q11, q3, q9 // tdiff 881*c0909341SAndroid Build Coastguard Worker vabd.u8 q12, q3, q10 882*c0909341SAndroid Build Coastguard Worker vabd.u8 q13, q2, q9 // tldiff 883*c0909341SAndroid Build Coastguard Worker vabd.u8 q14, q2, q10 884*c0909341SAndroid Build Coastguard Worker vabd.u8 q10, q1, q10 // ldiff 885*c0909341SAndroid Build Coastguard Worker vabd.u8 q9, q0, q9 886*c0909341SAndroid Build Coastguard Worker vmin.u8 q15, q12, q14 // min(tdiff, tldiff) 887*c0909341SAndroid Build Coastguard Worker vcge.u8 q12, q14, q12 // tldiff >= tdiff 888*c0909341SAndroid Build Coastguard Worker vmin.u8 q14, q11, q13 // min(tdiff, tldiff) 889*c0909341SAndroid Build Coastguard Worker vcge.u8 q11, q13, q11 // tldiff >= tdiff 890*c0909341SAndroid Build Coastguard Worker vcge.u8 q10, q15, q10 // min(tdiff, tldiff) >= ldiff 891*c0909341SAndroid Build Coastguard Worker vcge.u8 q9, q14, q9 892*c0909341SAndroid Build Coastguard Worker vbsl q12, q3, q2 // tdiff <= tldiff ? top : topleft 893*c0909341SAndroid Build Coastguard Worker vbsl q11, q3, q2 894*c0909341SAndroid Build Coastguard Worker vbit q12, q1, q10 // ldiff <= min ? left : ... 895*c0909341SAndroid Build Coastguard Worker vbit q11, q0, q9 896*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r0, :64], r1 897*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r6, :64], r1 898*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 899*c0909341SAndroid Build Coastguard Worker vst1.8 {d23}, [r0, :64], r1 900*c0909341SAndroid Build Coastguard Worker vst1.8 {d22}, [r6, :64], r1 901*c0909341SAndroid Build Coastguard Worker bgt 8b 902*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 903*c0909341SAndroid Build Coastguard Worker160: 904*c0909341SAndroid Build Coastguard Worker320: 905*c0909341SAndroid Build Coastguard Worker640: 906*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8]! 907*c0909341SAndroid Build Coastguard Worker mov r12, r3 908*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; r0, r6, r5, lr 909*c0909341SAndroid Build Coastguard Worker add r5, r0, r1 910*c0909341SAndroid Build Coastguard Worker add lr, r6, r1 911*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 912*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 913*c0909341SAndroid Build Coastguard Worker1: 914*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], r7 915*c0909341SAndroid Build Coastguard Worker2: 916*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d6, d4 // top - topleft 917*c0909341SAndroid Build Coastguard Worker vmov d7, d6 918*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q8, d0 919*c0909341SAndroid Build Coastguard Worker vaddw.u8 q10, q8, d1 920*c0909341SAndroid Build Coastguard Worker vaddw.u8 q11, q8, d2 921*c0909341SAndroid Build Coastguard Worker vaddw.u8 q12, q8, d3 922*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d18, q9 // base 923*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d19, q10 924*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d20, q11 925*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d21, q12 926*c0909341SAndroid Build Coastguard Worker vabd.u8 q11, q3, q9 // tdiff 927*c0909341SAndroid Build Coastguard Worker vabd.u8 q12, q3, q10 928*c0909341SAndroid Build Coastguard Worker vabd.u8 q13, q2, q9 // tldiff 929*c0909341SAndroid Build Coastguard Worker vabd.u8 q14, q2, q10 930*c0909341SAndroid Build Coastguard Worker vabd.u8 q10, q1, q10 // ldiff 931*c0909341SAndroid Build Coastguard Worker vabd.u8 q9, q0, q9 932*c0909341SAndroid Build Coastguard Worker vmin.u8 q15, q12, q14 // min(tdiff, tldiff) 933*c0909341SAndroid Build Coastguard Worker vcge.u8 q12, q14, q12 // tldiff >= tdiff 934*c0909341SAndroid Build Coastguard Worker vmin.u8 q14, q11, q13 // min(tdiff, tldiff) 935*c0909341SAndroid Build Coastguard Worker vcge.u8 q11, q13, q11 // tldiff >= tdiff 936*c0909341SAndroid Build Coastguard Worker vcge.u8 q10, q15, q10 // min(tdiff, tldiff) >= ldiff 937*c0909341SAndroid Build Coastguard Worker vcge.u8 q9, q14, q9 938*c0909341SAndroid Build Coastguard Worker vbsl q12, q3, q2 // tdiff <= tldiff ? top : topleft 939*c0909341SAndroid Build Coastguard Worker vbsl q11, q3, q2 940*c0909341SAndroid Build Coastguard Worker vbit q12, q1, q10 // ldiff <= min ? left : ... 941*c0909341SAndroid Build Coastguard Worker vbit q11, q0, q9 942*c0909341SAndroid Build Coastguard Worker subs r3, r3, #8 943*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r0, :64]! 944*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r6, :64]! 945*c0909341SAndroid Build Coastguard Worker vst1.8 {d23}, [r5, :64]! 946*c0909341SAndroid Build Coastguard Worker vst1.8 {d22}, [lr, :64]! 947*c0909341SAndroid Build Coastguard Worker ble 8f 948*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8]! 949*c0909341SAndroid Build Coastguard Worker b 2b 950*c0909341SAndroid Build Coastguard Worker8: 951*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 952*c0909341SAndroid Build Coastguard Worker ble 9f 953*c0909341SAndroid Build Coastguard Worker // End of horizontal loop, move pointers to next four rows 954*c0909341SAndroid Build Coastguard Worker sub r8, r8, r12 955*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 956*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 957*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8]! 958*c0909341SAndroid Build Coastguard Worker add r5, r5, r1 959*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 960*c0909341SAndroid Build Coastguard Worker mov r3, r12 961*c0909341SAndroid Build Coastguard Worker b 1b 962*c0909341SAndroid Build Coastguard Worker9: 963*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 964*c0909341SAndroid Build Coastguard Workerendfunc 965*c0909341SAndroid Build Coastguard Worker 966*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride, 967*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 968*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 969*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 970*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_8bpc_neon, export=1 971*c0909341SAndroid Build Coastguard Worker push {r4-r10, lr} 972*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #32] 973*c0909341SAndroid Build Coastguard Worker movrel r10, X(sm_weights) 974*c0909341SAndroid Build Coastguard Worker add r12, r10, r4 975*c0909341SAndroid Build Coastguard Worker add r10, r10, r3 976*c0909341SAndroid Build Coastguard Worker clz r9, r3 977*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_tbl) 978*c0909341SAndroid Build Coastguard Worker sub lr, r2, r4 979*c0909341SAndroid Build Coastguard Worker sub r9, r9, #25 980*c0909341SAndroid Build Coastguard Worker ldr r9, [r5, r9, lsl #2] 981*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[]}, [lr] // bottom 982*c0909341SAndroid Build Coastguard Worker add r8, r2, #1 983*c0909341SAndroid Build Coastguard Worker add r5, r5, r9 984*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 985*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 986*c0909341SAndroid Build Coastguard Worker bx r5 987*c0909341SAndroid Build Coastguard Worker 988*c0909341SAndroid Build Coastguard Worker .align 2 989*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_tbl): 990*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_tbl) + CONFIG_THUMB 991*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_tbl) + CONFIG_THUMB 992*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_tbl) + CONFIG_THUMB 993*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_tbl) + CONFIG_THUMB 994*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_tbl) + CONFIG_THUMB 995*c0909341SAndroid Build Coastguard Worker 996*c0909341SAndroid Build Coastguard Worker40: 997*c0909341SAndroid Build Coastguard Worker vld1.32 {d16[]}, [r8] // top 998*c0909341SAndroid Build Coastguard Worker vld1.32 {d18[]}, [r10, :32] // weights_hor 999*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1000*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1001*c0909341SAndroid Build Coastguard Worker vdup.8 q3, d16[3] // right 1002*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d16, d4 // top-bottom 1003*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1004*c0909341SAndroid Build Coastguard Worker4: 1005*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], r7 // left 1006*c0909341SAndroid Build Coastguard Worker vld4.8 {d20[], d21[], d22[], d23[]}, [r12, :32]! // weights_ver 1007*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d6, #8 // right*256 1008*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d6, #8 1009*c0909341SAndroid Build Coastguard Worker vzip.32 d1, d0 // left, flipped 1010*c0909341SAndroid Build Coastguard Worker vzip.32 d3, d2 1011*c0909341SAndroid Build Coastguard Worker vzip.32 d20, d21 // weights_ver 1012*c0909341SAndroid Build Coastguard Worker vzip.32 d22, d23 1013*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 // bottom*256 1014*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1015*c0909341SAndroid Build Coastguard Worker vsubl.u8 q0, d1, d6 // left-right 1016*c0909341SAndroid Build Coastguard Worker vsubl.u8 q1, d3, d6 1017*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1018*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1019*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q1, q9 // right*256 + (left-right)*weights_hor 1020*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q0, q9 // (left flipped) 1021*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q8, q10 // bottom*256 + (top-bottom)*weights_ver 1022*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q8, q11 1023*c0909341SAndroid Build Coastguard Worker vhadd.u16 q12, q12, q14 1024*c0909341SAndroid Build Coastguard Worker vhadd.u16 q13, q13, q15 1025*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q12, #8 1026*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q13, #8 1027*c0909341SAndroid Build Coastguard Worker vst1.32 {d24[0]}, [r0, :32], r1 1028*c0909341SAndroid Build Coastguard Worker vst1.32 {d24[1]}, [r6, :32], r1 1029*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1030*c0909341SAndroid Build Coastguard Worker vst1.32 {d25[0]}, [r0, :32], r1 1031*c0909341SAndroid Build Coastguard Worker vst1.32 {d25[1]}, [r6, :32], r1 1032*c0909341SAndroid Build Coastguard Worker bgt 4b 1033*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1034*c0909341SAndroid Build Coastguard Worker80: 1035*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [r8] // top 1036*c0909341SAndroid Build Coastguard Worker vld1.8 {d18}, [r10, :64] // weights_hor 1037*c0909341SAndroid Build Coastguard Worker sub r2, r2, #2 1038*c0909341SAndroid Build Coastguard Worker mov r7, #-2 1039*c0909341SAndroid Build Coastguard Worker vdup.8 q3, d16[7] // right 1040*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d16, d4 // top-bottom 1041*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1042*c0909341SAndroid Build Coastguard Worker8: 1043*c0909341SAndroid Build Coastguard Worker vld2.8 {d0[], d1[]}, [r2, :16], r7 // left 1044*c0909341SAndroid Build Coastguard Worker vld2.8 {d20[], d22[]}, [r12, :16]! // weights_ver 1045*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d6, #8 // right*256 1046*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d6, #8 1047*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 // bottom*256 1048*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1049*c0909341SAndroid Build Coastguard Worker vsubl.u8 q1, d0, d6 // left-right (left flipped) 1050*c0909341SAndroid Build Coastguard Worker vsubl.u8 q0, d1, d6 1051*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1052*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1053*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q0, q9 // right*256 + (left-right)*weights_hor 1054*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q1, q9 1055*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q8, q10 // bottom*256 + (top-bottom)*weights_ver 1056*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q8, q11 1057*c0909341SAndroid Build Coastguard Worker vhadd.u16 q12, q12, q14 1058*c0909341SAndroid Build Coastguard Worker vhadd.u16 q13, q13, q15 1059*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q12, #8 1060*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q13, #8 1061*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1062*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r0, :64], r1 1063*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r6, :64], r1 1064*c0909341SAndroid Build Coastguard Worker bgt 8b 1065*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1066*c0909341SAndroid Build Coastguard Worker160: 1067*c0909341SAndroid Build Coastguard Worker320: 1068*c0909341SAndroid Build Coastguard Worker640: 1069*c0909341SAndroid Build Coastguard Worker add lr, r2, r3 1070*c0909341SAndroid Build Coastguard Worker sub r2, r2, #2 1071*c0909341SAndroid Build Coastguard Worker mov r7, #-2 1072*c0909341SAndroid Build Coastguard Worker vld1.8 {d6[], d7[]}, [lr] // right 1073*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 1074*c0909341SAndroid Build Coastguard Worker mov r9, r3 1075*c0909341SAndroid Build Coastguard Worker 1076*c0909341SAndroid Build Coastguard Worker1: 1077*c0909341SAndroid Build Coastguard Worker vld2.8 {d0[], d1[]}, [r2, :16], r7 // left 1078*c0909341SAndroid Build Coastguard Worker vld2.8 {d20[], d22[]}, [r12, :16]! // weights_ver 1079*c0909341SAndroid Build Coastguard Worker vsubl.u8 q1, d0, d6 // left-right (left flipped) 1080*c0909341SAndroid Build Coastguard Worker vsubl.u8 q0, d1, d6 1081*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 // weights_ver 1082*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1083*c0909341SAndroid Build Coastguard Worker2: 1084*c0909341SAndroid Build Coastguard Worker vld1.8 {d16}, [r8]! // top 1085*c0909341SAndroid Build Coastguard Worker vld1.8 {d18}, [r10, :64]! // weights_hor 1086*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d6, #8 // right*256 1087*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d6, #8 1088*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 // weights_hor 1089*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 // bottom*256 1090*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1091*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d16, d4 // top-bottom 1092*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q0, q9 // right*256 + (left-right)*weights_hor 1093*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q1, q9 1094*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q8, q10 // bottom*256 + (top-bottom)*weights_ver 1095*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q8, q11 1096*c0909341SAndroid Build Coastguard Worker vhadd.u16 q12, q12, q14 1097*c0909341SAndroid Build Coastguard Worker vhadd.u16 q13, q13, q15 1098*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q12, #8 1099*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q13, #8 1100*c0909341SAndroid Build Coastguard Worker subs r3, r3, #8 1101*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r0, :64]! 1102*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r6, :64]! 1103*c0909341SAndroid Build Coastguard Worker bgt 2b 1104*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1105*c0909341SAndroid Build Coastguard Worker ble 9f 1106*c0909341SAndroid Build Coastguard Worker sub r8, r8, r9 1107*c0909341SAndroid Build Coastguard Worker sub r10, r10, r9 1108*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1109*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1110*c0909341SAndroid Build Coastguard Worker mov r3, r9 1111*c0909341SAndroid Build Coastguard Worker b 1b 1112*c0909341SAndroid Build Coastguard Worker9: 1113*c0909341SAndroid Build Coastguard Worker pop {r4-r10, pc} 1114*c0909341SAndroid Build Coastguard Workerendfunc 1115*c0909341SAndroid Build Coastguard Worker 1116*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1117*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1118*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1119*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1120*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_8bpc_neon, export=1 1121*c0909341SAndroid Build Coastguard Worker push {r4-r7, lr} 1122*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #20] 1123*c0909341SAndroid Build Coastguard Worker movrel r7, X(sm_weights) 1124*c0909341SAndroid Build Coastguard Worker add r7, r7, r4 1125*c0909341SAndroid Build Coastguard Worker clz lr, r3 1126*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_v_tbl) 1127*c0909341SAndroid Build Coastguard Worker sub r12, r2, r4 1128*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1129*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1130*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[]}, [r12] // bottom 1131*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 1132*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1133*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1134*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1135*c0909341SAndroid Build Coastguard Worker bx r5 1136*c0909341SAndroid Build Coastguard Worker 1137*c0909341SAndroid Build Coastguard Worker .align 2 1138*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_v_tbl): 1139*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1140*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1141*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1142*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1143*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_v_tbl) + CONFIG_THUMB 1144*c0909341SAndroid Build Coastguard Worker 1145*c0909341SAndroid Build Coastguard Worker40: 1146*c0909341SAndroid Build Coastguard Worker vld1.32 {d6[]}, [r2] // top 1147*c0909341SAndroid Build Coastguard Worker vsubl.u8 q3, d6, d4 // top-bottom 1148*c0909341SAndroid Build Coastguard Worker4: 1149*c0909341SAndroid Build Coastguard Worker vld4.8 {d16[], d17[], d18[], d19[]}, [r7, :32]! // weights_ver 1150*c0909341SAndroid Build Coastguard Worker vshll.i8 q10, d4, #8 // bottom*256 1151*c0909341SAndroid Build Coastguard Worker vshll.i8 q11, d4, #8 1152*c0909341SAndroid Build Coastguard Worker vzip.32 d16, d17 // weights_ver 1153*c0909341SAndroid Build Coastguard Worker vzip.32 d18, d19 1154*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d16 // weights_ver 1155*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 1156*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1157*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q3, q8 // bottom*256 + (top-bottom)*weights_ver 1158*c0909341SAndroid Build Coastguard Worker vmla.i16 q11, q3, q9 1159*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q10, #8 1160*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q11, #8 1161*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[0]}, [r0, :32], r1 1162*c0909341SAndroid Build Coastguard Worker vst1.32 {d20[1]}, [r6, :32], r1 1163*c0909341SAndroid Build Coastguard Worker vst1.32 {d21[0]}, [r0, :32], r1 1164*c0909341SAndroid Build Coastguard Worker vst1.32 {d21[1]}, [r6, :32], r1 1165*c0909341SAndroid Build Coastguard Worker bgt 4b 1166*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1167*c0909341SAndroid Build Coastguard Worker80: 1168*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r2] // top 1169*c0909341SAndroid Build Coastguard Worker vsubl.u8 q3, d6, d4 // top-bottom 1170*c0909341SAndroid Build Coastguard Worker8: 1171*c0909341SAndroid Build Coastguard Worker vld4.8 {d16[], d18[], d20[], d22[]}, [r7, :32]! // weights_ver 1172*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d4, #8 // bottom*256 1173*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d4, #8 1174*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 1175*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1176*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d16 // weights_ver 1177*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d18 1178*c0909341SAndroid Build Coastguard Worker vmovl.u8 q10, d20 1179*c0909341SAndroid Build Coastguard Worker vmovl.u8 q11, d22 1180*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q3, q8 // bottom*256 + (top-bottom)*weights_ver 1181*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q3, q9 1182*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q3, q10 1183*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q3, q11 1184*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q12, #8 1185*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q13, #8 1186*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d26, q14, #8 1187*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d27, q15, #8 1188*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r0, :64], r1 1189*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r6, :64], r1 1190*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1191*c0909341SAndroid Build Coastguard Worker vst1.8 {d26}, [r0, :64], r1 1192*c0909341SAndroid Build Coastguard Worker vst1.8 {d27}, [r6, :64], r1 1193*c0909341SAndroid Build Coastguard Worker bgt 8b 1194*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1195*c0909341SAndroid Build Coastguard Worker160: 1196*c0909341SAndroid Build Coastguard Worker320: 1197*c0909341SAndroid Build Coastguard Worker640: 1198*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1199*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; r0, r6, r5, lr 1200*c0909341SAndroid Build Coastguard Worker add r5, r0, r1 1201*c0909341SAndroid Build Coastguard Worker add lr, r6, r1 1202*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1203*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 1204*c0909341SAndroid Build Coastguard Worker mov r12, r3 1205*c0909341SAndroid Build Coastguard Worker 1206*c0909341SAndroid Build Coastguard Worker1: 1207*c0909341SAndroid Build Coastguard Worker vld4.8 {d8[], d10[], d12[], d14[]}, [r7, :32]! // weights_ver 1208*c0909341SAndroid Build Coastguard Worker vmovl.u8 q4, d8 // weights_ver 1209*c0909341SAndroid Build Coastguard Worker vmovl.u8 q5, d10 1210*c0909341SAndroid Build Coastguard Worker vmovl.u8 q6, d12 1211*c0909341SAndroid Build Coastguard Worker vmovl.u8 q7, d14 1212*c0909341SAndroid Build Coastguard Worker2: 1213*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r2]! // top 1214*c0909341SAndroid Build Coastguard Worker vshll.i8 q8, d4, #8 // bottom*256 1215*c0909341SAndroid Build Coastguard Worker vshll.i8 q9, d4, #8 1216*c0909341SAndroid Build Coastguard Worker vshll.i8 q10, d4, #8 1217*c0909341SAndroid Build Coastguard Worker vshll.i8 q11, d4, #8 1218*c0909341SAndroid Build Coastguard Worker vsubl.u8 q0, d6, d4 // top-bottom 1219*c0909341SAndroid Build Coastguard Worker vsubl.u8 q1, d7, d4 1220*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d4, #8 1221*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d4, #8 1222*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 1223*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1224*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q0, q4 // bottom*256 + (top-bottom)*weights_ver 1225*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q1, q4 1226*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q0, q5 1227*c0909341SAndroid Build Coastguard Worker vmla.i16 q11, q1, q5 1228*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q0, q6 // bottom*256 + (top-bottom)*weights_ver 1229*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q1, q6 1230*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q0, q7 1231*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q1, q7 1232*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d16, q8, #8 1233*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d17, q9, #8 1234*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d18, q10, #8 1235*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d19, q11, #8 1236*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q12, #8 1237*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q13, #8 1238*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q14, #8 1239*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q15, #8 1240*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1241*c0909341SAndroid Build Coastguard Worker vst1.8 {q8}, [r0, :128]! 1242*c0909341SAndroid Build Coastguard Worker vst1.8 {q9}, [r6, :128]! 1243*c0909341SAndroid Build Coastguard Worker vst1.8 {q10}, [r5, :128]! 1244*c0909341SAndroid Build Coastguard Worker vst1.8 {q11}, [lr, :128]! 1245*c0909341SAndroid Build Coastguard Worker bgt 2b 1246*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1247*c0909341SAndroid Build Coastguard Worker ble 9f 1248*c0909341SAndroid Build Coastguard Worker sub r2, r2, r12 1249*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1250*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1251*c0909341SAndroid Build Coastguard Worker add r5, r5, r1 1252*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 1253*c0909341SAndroid Build Coastguard Worker mov r3, r12 1254*c0909341SAndroid Build Coastguard Worker b 1b 1255*c0909341SAndroid Build Coastguard Worker9: 1256*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1257*c0909341SAndroid Build Coastguard Worker pop {r4-r7, pc} 1258*c0909341SAndroid Build Coastguard Workerendfunc 1259*c0909341SAndroid Build Coastguard Worker 1260*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1261*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1262*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1263*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1264*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_8bpc_neon, export=1 1265*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1266*c0909341SAndroid Build Coastguard Worker ldr r4, [sp, #24] 1267*c0909341SAndroid Build Coastguard Worker movrel r8, X(sm_weights) 1268*c0909341SAndroid Build Coastguard Worker add r8, r8, r3 1269*c0909341SAndroid Build Coastguard Worker clz lr, r3 1270*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_smooth_h_tbl) 1271*c0909341SAndroid Build Coastguard Worker add r12, r2, r3 1272*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1273*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1274*c0909341SAndroid Build Coastguard Worker vld1.8 {d4[]}, [r12] // right 1275*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1276*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1277*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1278*c0909341SAndroid Build Coastguard Worker bx r5 1279*c0909341SAndroid Build Coastguard Worker 1280*c0909341SAndroid Build Coastguard Worker .align 2 1281*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_h_tbl): 1282*c0909341SAndroid Build Coastguard Worker .word 640f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1283*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1284*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1285*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1286*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_smooth_h_tbl) + CONFIG_THUMB 1287*c0909341SAndroid Build Coastguard Worker 1288*c0909341SAndroid Build Coastguard Worker40: 1289*c0909341SAndroid Build Coastguard Worker vld1.32 {d6[]}, [r8, :32] // weights_hor 1290*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1291*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1292*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d6 // weights_hor 1293*c0909341SAndroid Build Coastguard Worker4: 1294*c0909341SAndroid Build Coastguard Worker vld4.8 {d0[], d1[], d2[], d3[]}, [r2, :32], r7 // left 1295*c0909341SAndroid Build Coastguard Worker vshll.i8 q8, d4, #8 // right*256 1296*c0909341SAndroid Build Coastguard Worker vshll.i8 q9, d4, #8 1297*c0909341SAndroid Build Coastguard Worker vzip.32 d3, d2 // left, flipped 1298*c0909341SAndroid Build Coastguard Worker vzip.32 d1, d0 1299*c0909341SAndroid Build Coastguard Worker vsubl.u8 q1, d3, d4 // left-right 1300*c0909341SAndroid Build Coastguard Worker vsubl.u8 q0, d1, d4 1301*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1302*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q1, q3 // right*256 + (left-right)*weights_hor 1303*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q0, q3 1304*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d16, q8, #8 1305*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d17, q9, #8 1306*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[0]}, [r0, :32], r1 1307*c0909341SAndroid Build Coastguard Worker vst1.32 {d16[1]}, [r6, :32], r1 1308*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[0]}, [r0, :32], r1 1309*c0909341SAndroid Build Coastguard Worker vst1.32 {d17[1]}, [r6, :32], r1 1310*c0909341SAndroid Build Coastguard Worker bgt 4b 1311*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1312*c0909341SAndroid Build Coastguard Worker80: 1313*c0909341SAndroid Build Coastguard Worker vld1.8 {d6}, [r8, :64] // weights_hor 1314*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1315*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1316*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d6 // weights_hor 1317*c0909341SAndroid Build Coastguard Worker8: 1318*c0909341SAndroid Build Coastguard Worker vld4.8 {d16[], d18[], d20[], d22[]}, [r2, :32], r7 // left 1319*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d4, #8 // right*256 1320*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d4, #8 1321*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 1322*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1323*c0909341SAndroid Build Coastguard Worker vsubl.u8 q11, d22, d4 // left-right 1324*c0909341SAndroid Build Coastguard Worker vsubl.u8 q10, d20, d4 1325*c0909341SAndroid Build Coastguard Worker vsubl.u8 q9, d18, d4 1326*c0909341SAndroid Build Coastguard Worker vsubl.u8 q8, d16, d4 1327*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q11, q3 // right*256 + (left-right)*weights_hor 1328*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q10, q3 // (left flipped) 1329*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q9, q3 1330*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q8, q3 1331*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d24, q12, #8 1332*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d25, q13, #8 1333*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d26, q14, #8 1334*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d27, q15, #8 1335*c0909341SAndroid Build Coastguard Worker vst1.8 {d24}, [r0, :64], r1 1336*c0909341SAndroid Build Coastguard Worker vst1.8 {d25}, [r6, :64], r1 1337*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1338*c0909341SAndroid Build Coastguard Worker vst1.8 {d26}, [r0, :64], r1 1339*c0909341SAndroid Build Coastguard Worker vst1.8 {d27}, [r6, :64], r1 1340*c0909341SAndroid Build Coastguard Worker bgt 8b 1341*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1342*c0909341SAndroid Build Coastguard Worker160: 1343*c0909341SAndroid Build Coastguard Worker320: 1344*c0909341SAndroid Build Coastguard Worker640: 1345*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1346*c0909341SAndroid Build Coastguard Worker sub r2, r2, #4 1347*c0909341SAndroid Build Coastguard Worker mov r7, #-4 1348*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; r0, r6, r5, lr 1349*c0909341SAndroid Build Coastguard Worker add r5, r0, r1 1350*c0909341SAndroid Build Coastguard Worker add lr, r6, r1 1351*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1352*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 1353*c0909341SAndroid Build Coastguard Worker mov r12, r3 1354*c0909341SAndroid Build Coastguard Worker 1355*c0909341SAndroid Build Coastguard Worker1: 1356*c0909341SAndroid Build Coastguard Worker vld4.8 {d8[], d10[], d12[], d14[]}, [r2, :32], r7 // left 1357*c0909341SAndroid Build Coastguard Worker vsubl.u8 q4, d8, d4 // left-right 1358*c0909341SAndroid Build Coastguard Worker vsubl.u8 q5, d10, d4 1359*c0909341SAndroid Build Coastguard Worker vsubl.u8 q6, d12, d4 1360*c0909341SAndroid Build Coastguard Worker vsubl.u8 q7, d14, d4 1361*c0909341SAndroid Build Coastguard Worker2: 1362*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r8, :128]! // weights_hor 1363*c0909341SAndroid Build Coastguard Worker vshll.i8 q8, d4, #8 // right*256 1364*c0909341SAndroid Build Coastguard Worker vshll.i8 q9, d4, #8 1365*c0909341SAndroid Build Coastguard Worker vshll.i8 q10, d4, #8 1366*c0909341SAndroid Build Coastguard Worker vshll.i8 q11, d4, #8 1367*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d2 // weights_hor 1368*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d3 1369*c0909341SAndroid Build Coastguard Worker vshll.i8 q12, d4, #8 1370*c0909341SAndroid Build Coastguard Worker vshll.i8 q13, d4, #8 1371*c0909341SAndroid Build Coastguard Worker vshll.i8 q14, d4, #8 1372*c0909341SAndroid Build Coastguard Worker vshll.i8 q15, d4, #8 1373*c0909341SAndroid Build Coastguard Worker vmla.i16 q8, q7, q0 // right*256 + (left-right)*weights_hor 1374*c0909341SAndroid Build Coastguard Worker vmla.i16 q9, q7, q1 // (left flipped) 1375*c0909341SAndroid Build Coastguard Worker vmla.i16 q10, q6, q0 1376*c0909341SAndroid Build Coastguard Worker vmla.i16 q11, q6, q1 1377*c0909341SAndroid Build Coastguard Worker vmla.i16 q12, q5, q0 1378*c0909341SAndroid Build Coastguard Worker vmla.i16 q13, q5, q1 1379*c0909341SAndroid Build Coastguard Worker vmla.i16 q14, q4, q0 1380*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q4, q1 1381*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d16, q8, #8 1382*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d17, q9, #8 1383*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d18, q10, #8 1384*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d19, q11, #8 1385*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d20, q12, #8 1386*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d21, q13, #8 1387*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d22, q14, #8 1388*c0909341SAndroid Build Coastguard Worker vrshrn.i16 d23, q15, #8 1389*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1390*c0909341SAndroid Build Coastguard Worker vst1.8 {q8}, [r0, :128]! 1391*c0909341SAndroid Build Coastguard Worker vst1.8 {q9}, [r6, :128]! 1392*c0909341SAndroid Build Coastguard Worker vst1.8 {q10}, [r5, :128]! 1393*c0909341SAndroid Build Coastguard Worker vst1.8 {q11}, [lr, :128]! 1394*c0909341SAndroid Build Coastguard Worker bgt 2b 1395*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1396*c0909341SAndroid Build Coastguard Worker ble 9f 1397*c0909341SAndroid Build Coastguard Worker sub r8, r8, r12 1398*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1399*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1400*c0909341SAndroid Build Coastguard Worker add r5, r5, r1 1401*c0909341SAndroid Build Coastguard Worker add lr, lr, r1 1402*c0909341SAndroid Build Coastguard Worker mov r3, r12 1403*c0909341SAndroid Build Coastguard Worker b 1b 1404*c0909341SAndroid Build Coastguard Worker9: 1405*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1406*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1407*c0909341SAndroid Build Coastguard Workerendfunc 1408*c0909341SAndroid Build Coastguard Worker 1409*c0909341SAndroid Build Coastguard Worker// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1410*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1411*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int filt_idx, 1412*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1413*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_8bpc_neon, export=1 1414*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1415*c0909341SAndroid Build Coastguard Worker movw r12, #511 1416*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1417*c0909341SAndroid Build Coastguard Worker and r5, r5, r12 // 511 1418*c0909341SAndroid Build Coastguard Worker movrel r6, X(filter_intra_taps) 1419*c0909341SAndroid Build Coastguard Worker lsl r5, r5, #6 1420*c0909341SAndroid Build Coastguard Worker add r6, r6, r5 1421*c0909341SAndroid Build Coastguard Worker vld1.8 {d20, d21, d22, d23}, [r6, :128]! 1422*c0909341SAndroid Build Coastguard Worker clz lr, r3 1423*c0909341SAndroid Build Coastguard Worker adr r5, L(ipred_filter_tbl) 1424*c0909341SAndroid Build Coastguard Worker vld1.8 {d27, d28, d29}, [r6, :64] 1425*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1426*c0909341SAndroid Build Coastguard Worker ldr lr, [r5, lr, lsl #2] 1427*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d20 1428*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d21 1429*c0909341SAndroid Build Coastguard Worker add r5, r5, lr 1430*c0909341SAndroid Build Coastguard Worker vmovl.s8 q10, d22 1431*c0909341SAndroid Build Coastguard Worker vmovl.s8 q11, d23 1432*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1433*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1434*c0909341SAndroid Build Coastguard Worker vmovl.s8 q12, d27 1435*c0909341SAndroid Build Coastguard Worker vmovl.s8 q13, d28 1436*c0909341SAndroid Build Coastguard Worker vmovl.s8 q14, d29 1437*c0909341SAndroid Build Coastguard Worker add r8, r2, #1 1438*c0909341SAndroid Build Coastguard Worker sub r2, r2, #2 1439*c0909341SAndroid Build Coastguard Worker mov r7, #-2 1440*c0909341SAndroid Build Coastguard Worker bx r5 1441*c0909341SAndroid Build Coastguard Worker 1442*c0909341SAndroid Build Coastguard Worker .align 2 1443*c0909341SAndroid Build Coastguard WorkerL(ipred_filter_tbl): 1444*c0909341SAndroid Build Coastguard Worker .word 320f - L(ipred_filter_tbl) + CONFIG_THUMB 1445*c0909341SAndroid Build Coastguard Worker .word 160f - L(ipred_filter_tbl) + CONFIG_THUMB 1446*c0909341SAndroid Build Coastguard Worker .word 80f - L(ipred_filter_tbl) + CONFIG_THUMB 1447*c0909341SAndroid Build Coastguard Worker .word 40f - L(ipred_filter_tbl) + CONFIG_THUMB 1448*c0909341SAndroid Build Coastguard Worker 1449*c0909341SAndroid Build Coastguard Worker40: 1450*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r8] // top (0-3) 1451*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 // top (0-3) 1452*c0909341SAndroid Build Coastguard Worker4: 1453*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [r2], r7 // left (0-1) + topleft (2) 1454*c0909341SAndroid Build Coastguard Worker vmul.i16 q2, q9, d0[0] // p1(top[0]) * filter(1) 1455*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q10, d0[1] // p2(top[1]) * filter(2) 1456*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q11, d0[2] // p3(top[2]) * filter(3) 1457*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 // left (0-1) + topleft (2) 1458*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q12, d0[3] // p4(top[3]) * filter(4) 1459*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q8, d2[2] // p0(topleft) * filter(0) 1460*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q13, d2[1] // p5(left[0]) * filter(5) 1461*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q14, d2[0] // p6(left[1]) * filter(6) 1462*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d4, q2, #4 1463*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1464*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [r0, :32], r1 1465*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d4 1466*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [r6, :32], r1 1467*c0909341SAndroid Build Coastguard Worker vmov d0, d1 // move top from [4-7] to [0-3] 1468*c0909341SAndroid Build Coastguard Worker bgt 4b 1469*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1470*c0909341SAndroid Build Coastguard Worker80: 1471*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r8] // top (0-7) 1472*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 // top (0-7) 1473*c0909341SAndroid Build Coastguard Worker8: 1474*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [r2], r7 // left (0-1) + topleft (2) 1475*c0909341SAndroid Build Coastguard Worker vmul.i16 q2, q9, d0[0] // p1(top[0]) * filter(1) 1476*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q10, d0[1] // p2(top[1]) * filter(2) 1477*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q11, d0[2] // p3(top[2]) * filter(3) 1478*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 // left (0-1) + topleft (2) 1479*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q12, d0[3] // p4(top[3]) * filter(4) 1480*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q8, d2[2] // p0(topleft) * filter(0) 1481*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q13, d2[1] // p5(left[0]) * filter(5) 1482*c0909341SAndroid Build Coastguard Worker vmla.i16 q2, q14, d2[0] // p6(left[1]) * filter(6) 1483*c0909341SAndroid Build Coastguard Worker vmul.i16 q3, q9, d1[0] // p1(top[0]) * filter(1) 1484*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q10, d1[1] // p2(top[1]) * filter(2) 1485*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q11, d1[2] // p3(top[2]) * filter(3) 1486*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d4, q2, #4 1487*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d4 // first block, in 16 bit 1488*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q12, d1[3] // p4(top[3]) * filter(4) 1489*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q8, d0[3] // p0(topleft) * filter(0) 1490*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q13, d2[3] // p5(left[0]) * filter(5) 1491*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q14, d3[3] // p6(left[1]) * filter(6) 1492*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d5, q3, #4 1493*c0909341SAndroid Build Coastguard Worker vzip.32 d4, d5 1494*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1495*c0909341SAndroid Build Coastguard Worker vst1.8 {d4}, [r0, :64], r1 1496*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d5 1497*c0909341SAndroid Build Coastguard Worker vst1.8 {d5}, [r6, :64], r1 1498*c0909341SAndroid Build Coastguard Worker bgt 8b 1499*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1500*c0909341SAndroid Build Coastguard Worker160: 1501*c0909341SAndroid Build Coastguard Worker320: 1502*c0909341SAndroid Build Coastguard Worker vpush {q4-q5} 1503*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 1504*c0909341SAndroid Build Coastguard Worker mov lr, r3 1505*c0909341SAndroid Build Coastguard Worker 1506*c0909341SAndroid Build Coastguard Worker1: 1507*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2], r7 // left (0-1) + topleft (2) 1508*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 // left (0-1) + topleft (2) 1509*c0909341SAndroid Build Coastguard Worker2: 1510*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r8]! // top(0-15) 1511*c0909341SAndroid Build Coastguard Worker vmul.i16 q3, q8, d0[2] // p0(topleft) * filter(0) 1512*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q13, d0[1] // p5(left[0]) * filter(5) 1513*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d4 // top(0-7) 1514*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d5 // top(8-15) 1515*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q14, d0[0] // p6(left[1]) * filter(6) 1516*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q9, d2[0] // p1(top[0]) * filter(1) 1517*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q10, d2[1] // p2(top[1]) * filter(2) 1518*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q11, d2[2] // p3(top[2]) * filter(3) 1519*c0909341SAndroid Build Coastguard Worker vmla.i16 q3, q12, d2[3] // p4(top[3]) * filter(4) 1520*c0909341SAndroid Build Coastguard Worker 1521*c0909341SAndroid Build Coastguard Worker vmul.i16 q4, q9, d3[0] // p1(top[0]) * filter(1) 1522*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q10, d3[1] // p2(top[1]) * filter(2) 1523*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q11, d3[2] // p3(top[2]) * filter(3) 1524*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d6, q3, #4 1525*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d6 // first block, in 16 bit 1526*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q12, d3[3] // p4(top[3]) * filter(4) 1527*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q8, d2[3] // p0(topleft) * filter(0) 1528*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q13, d0[3] // p5(left[0]) * filter(5) 1529*c0909341SAndroid Build Coastguard Worker vmla.i16 q4, q14, d1[3] // p6(left[1]) * filter(6) 1530*c0909341SAndroid Build Coastguard Worker 1531*c0909341SAndroid Build Coastguard Worker vmul.i16 q5, q9, d4[0] // p1(top[0]) * filter(1) 1532*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q10, d4[1] // p2(top[1]) * filter(2) 1533*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q11, d4[2] // p3(top[2]) * filter(3) 1534*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d7, q4, #4 1535*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d7 // second block, in 16 bit 1536*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q12, d4[3] // p4(top[3]) * filter(4) 1537*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q8, d3[3] // p0(topleft) * filter(0) 1538*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q13, d0[3] // p5(left[0]) * filter(5) 1539*c0909341SAndroid Build Coastguard Worker vmla.i16 q5, q14, d1[3] // p6(left[1]) * filter(6) 1540*c0909341SAndroid Build Coastguard Worker 1541*c0909341SAndroid Build Coastguard Worker vmul.i16 q15, q9, d5[0] // p1(top[0]) * filter(1) 1542*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q10, d5[1] // p2(top[1]) * filter(2) 1543*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q11, d5[2] // p3(top[2]) * filter(3) 1544*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d8, q5, #4 1545*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d8 // third block, in 16 bit 1546*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, d5[6] 1547*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q12, d5[3] // p4(top[3]) * filter(4) 1548*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q8, d4[3] // p0(topleft) * filter(0) 1549*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q13, d0[3] // p5(left[0]) * filter(5) 1550*c0909341SAndroid Build Coastguard Worker vmla.i16 q15, q14, d1[3] // p6(left[1]) * filter(6) 1551*c0909341SAndroid Build Coastguard Worker vmov.8 d0[4], r12 1552*c0909341SAndroid Build Coastguard Worker 1553*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1554*c0909341SAndroid Build Coastguard Worker vqrshrun.s16 d9, q15, #4 1555*c0909341SAndroid Build Coastguard Worker 1556*c0909341SAndroid Build Coastguard Worker vst4.32 {d6[0], d7[0], d8[0], d9[0]}, [r0, :128]! 1557*c0909341SAndroid Build Coastguard Worker vst4.32 {d6[1], d7[1], d8[1], d9[1]}, [r6, :128]! 1558*c0909341SAndroid Build Coastguard Worker ble 8f 1559*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, d9[7] 1560*c0909341SAndroid Build Coastguard Worker vmov.8 d0[0], r12 1561*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, d9[3] 1562*c0909341SAndroid Build Coastguard Worker vmov.8 d0[2], r12 1563*c0909341SAndroid Build Coastguard Worker b 2b 1564*c0909341SAndroid Build Coastguard Worker8: 1565*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1566*c0909341SAndroid Build Coastguard Worker 1567*c0909341SAndroid Build Coastguard Worker ble 9f 1568*c0909341SAndroid Build Coastguard Worker sub r8, r6, lr 1569*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1570*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1571*c0909341SAndroid Build Coastguard Worker mov r3, lr 1572*c0909341SAndroid Build Coastguard Worker b 1b 1573*c0909341SAndroid Build Coastguard Worker9: 1574*c0909341SAndroid Build Coastguard Worker vpop {q4-q5} 1575*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1576*c0909341SAndroid Build Coastguard Workerendfunc 1577*c0909341SAndroid Build Coastguard Worker 1578*c0909341SAndroid Build Coastguard Worker// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1579*c0909341SAndroid Build Coastguard Worker// const pixel *const pal, const uint8_t *idx, 1580*c0909341SAndroid Build Coastguard Worker// const int w, const int h); 1581*c0909341SAndroid Build Coastguard Workerfunction pal_pred_8bpc_neon, export=1 1582*c0909341SAndroid Build Coastguard Worker push {r4-r5, lr} 1583*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #12] 1584*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64] 1585*c0909341SAndroid Build Coastguard Worker clz lr, r4 1586*c0909341SAndroid Build Coastguard Worker adr r12, L(pal_pred_tbl) 1587*c0909341SAndroid Build Coastguard Worker sub lr, lr, #25 1588*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #7 1589*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1590*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1591*c0909341SAndroid Build Coastguard Worker add r2, r0, r1 1592*c0909341SAndroid Build Coastguard Worker bx r12 1593*c0909341SAndroid Build Coastguard Worker 1594*c0909341SAndroid Build Coastguard Worker .align 2 1595*c0909341SAndroid Build Coastguard WorkerL(pal_pred_tbl): 1596*c0909341SAndroid Build Coastguard Worker .word 640f - L(pal_pred_tbl) + CONFIG_THUMB 1597*c0909341SAndroid Build Coastguard Worker .word 320f - L(pal_pred_tbl) + CONFIG_THUMB 1598*c0909341SAndroid Build Coastguard Worker .word 160f - L(pal_pred_tbl) + CONFIG_THUMB 1599*c0909341SAndroid Build Coastguard Worker .word 80f - L(pal_pred_tbl) + CONFIG_THUMB 1600*c0909341SAndroid Build Coastguard Worker .word 40f - L(pal_pred_tbl) + CONFIG_THUMB 1601*c0909341SAndroid Build Coastguard Worker 1602*c0909341SAndroid Build Coastguard Worker40: 1603*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1604*c0909341SAndroid Build Coastguard Worker4: 1605*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r3, :64]! 1606*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1607*c0909341SAndroid Build Coastguard Worker vshr.u8 d3, d2, #4 1608*c0909341SAndroid Build Coastguard Worker vand.u8 d2, d2, d30 1609*c0909341SAndroid Build Coastguard Worker vzip.8 d2, d3 1610*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {d0}, d2 1611*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {d0}, d3 1612*c0909341SAndroid Build Coastguard Worker vst1.32 {d2[0]}, [r0, :32], r1 1613*c0909341SAndroid Build Coastguard Worker vst1.32 {d2[1]}, [r2, :32], r1 1614*c0909341SAndroid Build Coastguard Worker vst1.32 {d3[0]}, [r0, :32], r1 1615*c0909341SAndroid Build Coastguard Worker vst1.32 {d3[1]}, [r2, :32], r1 1616*c0909341SAndroid Build Coastguard Worker bgt 4b 1617*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1618*c0909341SAndroid Build Coastguard Worker80: 1619*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1620*c0909341SAndroid Build Coastguard Worker8: 1621*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r3, :64]! 1622*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1623*c0909341SAndroid Build Coastguard Worker vshr.u8 q2, q1, #4 1624*c0909341SAndroid Build Coastguard Worker vand.u8 q1, q1, q15 1625*c0909341SAndroid Build Coastguard Worker vzip.8 q1, q2 1626*c0909341SAndroid Build Coastguard Worker vtbl.8 d2, {d0}, d2 1627*c0909341SAndroid Build Coastguard Worker vtbl.8 d3, {d0}, d3 1628*c0909341SAndroid Build Coastguard Worker vst1.8 {d2}, [r0, :64], r1 1629*c0909341SAndroid Build Coastguard Worker vtbl.8 d4, {d0}, d4 1630*c0909341SAndroid Build Coastguard Worker vst1.8 {d3}, [r2, :64], r1 1631*c0909341SAndroid Build Coastguard Worker vtbl.8 d5, {d0}, d5 1632*c0909341SAndroid Build Coastguard Worker vst1.8 {d4}, [r0, :64], r1 1633*c0909341SAndroid Build Coastguard Worker vst1.8 {d5}, [r2, :64], r1 1634*c0909341SAndroid Build Coastguard Worker bgt 8b 1635*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1636*c0909341SAndroid Build Coastguard Worker160: 1637*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1638*c0909341SAndroid Build Coastguard Worker16: 1639*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1640*c0909341SAndroid Build Coastguard Worker subs r5, r5, #4 1641*c0909341SAndroid Build Coastguard Worker vand.u8 q8, q10, q15 1642*c0909341SAndroid Build Coastguard Worker vshr.u8 q9, q10, #4 1643*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q15 1644*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1645*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1646*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1647*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {d0}, d16 1648*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {d0}, d17 1649*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {d0}, d18 1650*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {d0}, d19 1651*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {d0}, d20 1652*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {d0}, d21 1653*c0909341SAndroid Build Coastguard Worker vst1.8 {q8}, [r0, :128], r1 1654*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {d0}, d22 1655*c0909341SAndroid Build Coastguard Worker vst1.8 {q9}, [r2, :128], r1 1656*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {d0}, d23 1657*c0909341SAndroid Build Coastguard Worker vst1.8 {q10}, [r0, :128], r1 1658*c0909341SAndroid Build Coastguard Worker vst1.8 {q11}, [r2, :128], r1 1659*c0909341SAndroid Build Coastguard Worker bgt 16b 1660*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1661*c0909341SAndroid Build Coastguard Worker320: 1662*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1663*c0909341SAndroid Build Coastguard Worker32: 1664*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1665*c0909341SAndroid Build Coastguard Worker subs r5, r5, #2 1666*c0909341SAndroid Build Coastguard Worker vand.u8 q8, q10, q15 1667*c0909341SAndroid Build Coastguard Worker vshr.u8 q9, q10, #4 1668*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q15 1669*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1670*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1671*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1672*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {d0}, d16 1673*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {d0}, d17 1674*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {d0}, d18 1675*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {d0}, d19 1676*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {d0}, d20 1677*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {d0}, d21 1678*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128], r1 1679*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {d0}, d22 1680*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {d0}, d23 1681*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r2, :128], r1 1682*c0909341SAndroid Build Coastguard Worker bgt 32b 1683*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1684*c0909341SAndroid Build Coastguard Worker640: 1685*c0909341SAndroid Build Coastguard Worker sub r1, r1, #32 1686*c0909341SAndroid Build Coastguard Worker64: 1687*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r3, :64]! 1688*c0909341SAndroid Build Coastguard Worker subs r5, r5, #1 1689*c0909341SAndroid Build Coastguard Worker vand.u8 q8, q10, q15 1690*c0909341SAndroid Build Coastguard Worker vshr.u8 q9, q10, #4 1691*c0909341SAndroid Build Coastguard Worker vand.u8 q10, q11, q15 1692*c0909341SAndroid Build Coastguard Worker vshr.u8 q11, q11, #4 1693*c0909341SAndroid Build Coastguard Worker vzip.8 q8, q9 1694*c0909341SAndroid Build Coastguard Worker vzip.8 q10, q11 1695*c0909341SAndroid Build Coastguard Worker vtbl.8 d16, {d0}, d16 1696*c0909341SAndroid Build Coastguard Worker vtbl.8 d17, {d0}, d17 1697*c0909341SAndroid Build Coastguard Worker vtbl.8 d18, {d0}, d18 1698*c0909341SAndroid Build Coastguard Worker vtbl.8 d19, {d0}, d19 1699*c0909341SAndroid Build Coastguard Worker vtbl.8 d20, {d0}, d20 1700*c0909341SAndroid Build Coastguard Worker vtbl.8 d21, {d0}, d21 1701*c0909341SAndroid Build Coastguard Worker vst1.8 {q8, q9}, [r0, :128]! 1702*c0909341SAndroid Build Coastguard Worker vtbl.8 d22, {d0}, d22 1703*c0909341SAndroid Build Coastguard Worker vtbl.8 d23, {d0}, d23 1704*c0909341SAndroid Build Coastguard Worker vst1.8 {q10, q11}, [r0, :128], r1 1705*c0909341SAndroid Build Coastguard Worker bgt 64b 1706*c0909341SAndroid Build Coastguard Worker pop {r4-r5, pc} 1707*c0909341SAndroid Build Coastguard Workerendfunc 1708*c0909341SAndroid Build Coastguard Worker 1709*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1710*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1711*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1712*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 1713*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_8bpc_neon, export=1 1714*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1715*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1716*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 1717*c0909341SAndroid Build Coastguard Worker clz lr, r3 1718*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_128_tbl) 1719*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1720*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1721*c0909341SAndroid Build Coastguard Worker vmov.i16 q0, #128 // dc 1722*c0909341SAndroid Build Coastguard Worker vdup.i16 q1, r6 // alpha 1723*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1724*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1725*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1726*c0909341SAndroid Build Coastguard Worker bx r12 1727*c0909341SAndroid Build Coastguard Worker 1728*c0909341SAndroid Build Coastguard Worker .align 2 1729*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_128_tbl): 1730*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_tbl): 1731*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 1732*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 1733*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w8) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 1734*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w4) - L(ipred_cfl_128_tbl) + CONFIG_THUMB 1735*c0909341SAndroid Build Coastguard Worker 1736*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4): 1737*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r5, :128]! 1738*c0909341SAndroid Build Coastguard Worker vmul.i16 q2, q2, q1 // diff = ac * alpha 1739*c0909341SAndroid Build Coastguard Worker vmul.i16 q3, q3, q1 1740*c0909341SAndroid Build Coastguard Worker vshr.s16 q8, q2, #15 // sign = diff >> 15 1741*c0909341SAndroid Build Coastguard Worker vshr.s16 q9, q3, #15 1742*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q8 // diff + sign 1743*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q9 1744*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #6 // (diff + sign + 32) >> 6 = apply_sign() 1745*c0909341SAndroid Build Coastguard Worker vrshr.s16 q3, q3, #6 1746*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q0 // dc + apply_sign() 1747*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q0 1748*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d4, q2 // iclip_pixel(dc + apply_sign()) 1749*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d5, q3 1750*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[0]}, [r0, :32], r1 1751*c0909341SAndroid Build Coastguard Worker vst1.32 {d4[1]}, [r6, :32], r1 1752*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1753*c0909341SAndroid Build Coastguard Worker vst1.32 {d5[0]}, [r0, :32], r1 1754*c0909341SAndroid Build Coastguard Worker vst1.32 {d5[1]}, [r6, :32], r1 1755*c0909341SAndroid Build Coastguard Worker bgt L(ipred_cfl_splat_w4) 1756*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1757*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8): 1758*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r5, :128]! 1759*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r5, :128]! 1760*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q1 // diff = ac * alpha 1761*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q1 1762*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q1 1763*c0909341SAndroid Build Coastguard Worker vmul.i16 q11, q11, q1 1764*c0909341SAndroid Build Coastguard Worker vshr.s16 q12, q8, #15 // sign = diff >> 15 1765*c0909341SAndroid Build Coastguard Worker vshr.s16 q13, q9, #15 1766*c0909341SAndroid Build Coastguard Worker vshr.s16 q14, q10, #15 1767*c0909341SAndroid Build Coastguard Worker vshr.s16 q15, q11, #15 1768*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 // diff + sign 1769*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 1770*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q14 1771*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q15 1772*c0909341SAndroid Build Coastguard Worker vrshr.s16 q8, q8, #6 // (diff + sign + 32) >> 6 = apply_sign() 1773*c0909341SAndroid Build Coastguard Worker vrshr.s16 q9, q9, #6 1774*c0909341SAndroid Build Coastguard Worker vrshr.s16 q10, q10, #6 1775*c0909341SAndroid Build Coastguard Worker vrshr.s16 q11, q11, #6 1776*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 // dc + apply_sign() 1777*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q0 1778*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 1779*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q0 1780*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d16, q8 // iclip_pixel(dc + apply_sign()) 1781*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d17, q9 1782*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d18, q10 1783*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d19, q11 1784*c0909341SAndroid Build Coastguard Worker vst1.8 {d16}, [r0, :64], r1 1785*c0909341SAndroid Build Coastguard Worker vst1.8 {d17}, [r6, :64], r1 1786*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 1787*c0909341SAndroid Build Coastguard Worker vst1.8 {d18}, [r0, :64], r1 1788*c0909341SAndroid Build Coastguard Worker vst1.8 {d19}, [r6, :64], r1 1789*c0909341SAndroid Build Coastguard Worker bgt L(ipred_cfl_splat_w8) 1790*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1791*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16): 1792*c0909341SAndroid Build Coastguard Worker add r12, r5, r3, lsl #1 1793*c0909341SAndroid Build Coastguard Worker sub r1, r1, r3 1794*c0909341SAndroid Build Coastguard Worker mov lr, r3 1795*c0909341SAndroid Build Coastguard Worker1: 1796*c0909341SAndroid Build Coastguard Worker vld1.16 {q8, q9}, [r5, :128]! 1797*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q1 // diff = ac * alpha 1798*c0909341SAndroid Build Coastguard Worker vld1.16 {q10, q11}, [r12, :128]! 1799*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q1 1800*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q1 1801*c0909341SAndroid Build Coastguard Worker vmul.i16 q11, q11, q1 1802*c0909341SAndroid Build Coastguard Worker vshr.s16 q12, q8, #15 // sign = diff >> 15 1803*c0909341SAndroid Build Coastguard Worker vshr.s16 q13, q9, #15 1804*c0909341SAndroid Build Coastguard Worker vshr.s16 q14, q10, #15 1805*c0909341SAndroid Build Coastguard Worker vshr.s16 q15, q11, #15 1806*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 // diff + sign 1807*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 1808*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q14 1809*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q15 1810*c0909341SAndroid Build Coastguard Worker vrshr.s16 q8, q8, #6 // (diff + sign + 32) >> 6 = apply_sign() 1811*c0909341SAndroid Build Coastguard Worker vrshr.s16 q9, q9, #6 1812*c0909341SAndroid Build Coastguard Worker vrshr.s16 q10, q10, #6 1813*c0909341SAndroid Build Coastguard Worker vrshr.s16 q11, q11, #6 1814*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 // dc + apply_sign() 1815*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q0 1816*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 1817*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q0 1818*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d16, q8 // iclip_pixel(dc + apply_sign()) 1819*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d17, q9 1820*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d18, q10 1821*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d19, q11 1822*c0909341SAndroid Build Coastguard Worker subs r3, r3, #16 1823*c0909341SAndroid Build Coastguard Worker vst1.16 {q8}, [r0, :128]! 1824*c0909341SAndroid Build Coastguard Worker vst1.16 {q9}, [r6, :128]! 1825*c0909341SAndroid Build Coastguard Worker bgt 1b 1826*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 1827*c0909341SAndroid Build Coastguard Worker add r5, r5, lr, lsl #1 1828*c0909341SAndroid Build Coastguard Worker add r12, r12, lr, lsl #1 1829*c0909341SAndroid Build Coastguard Worker add r0, r0, r1 1830*c0909341SAndroid Build Coastguard Worker add r6, r6, r1 1831*c0909341SAndroid Build Coastguard Worker mov r3, lr 1832*c0909341SAndroid Build Coastguard Worker bgt 1b 1833*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 1834*c0909341SAndroid Build Coastguard Workerendfunc 1835*c0909341SAndroid Build Coastguard Worker 1836*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1837*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1838*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1839*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 1840*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_8bpc_neon, export=1 1841*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1842*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1843*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 1844*c0909341SAndroid Build Coastguard Worker clz lr, r3 1845*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_top_tbl) 1846*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1847*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1848*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 1849*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 1850*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1851*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1852*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1853*c0909341SAndroid Build Coastguard Worker bx r12 1854*c0909341SAndroid Build Coastguard Worker 1855*c0909341SAndroid Build Coastguard Worker .align 2 1856*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_top_tbl): 1857*c0909341SAndroid Build Coastguard Worker .word 32f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 1858*c0909341SAndroid Build Coastguard Worker .word 16f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 1859*c0909341SAndroid Build Coastguard Worker .word 8f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 1860*c0909341SAndroid Build Coastguard Worker .word 4f - L(ipred_cfl_top_tbl) + CONFIG_THUMB 1861*c0909341SAndroid Build Coastguard Worker 1862*c0909341SAndroid Build Coastguard Worker4: 1863*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2] 1864*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 1865*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1866*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 1867*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1868*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 1869*c0909341SAndroid Build Coastguard Worker8: 1870*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2] 1871*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 1872*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1873*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1874*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 1875*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1876*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 1877*c0909341SAndroid Build Coastguard Worker16: 1878*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r2] 1879*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 1880*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 1881*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1882*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1883*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #4 1884*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1885*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 1886*c0909341SAndroid Build Coastguard Worker32: 1887*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2] 1888*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 1889*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 1890*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q2, q3 1891*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 1892*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1893*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1894*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #5 1895*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1896*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 1897*c0909341SAndroid Build Coastguard Workerendfunc 1898*c0909341SAndroid Build Coastguard Worker 1899*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1900*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1901*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1902*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 1903*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_8bpc_neon, export=1 1904*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1905*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1906*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 1907*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4 1908*c0909341SAndroid Build Coastguard Worker clz lr, r3 1909*c0909341SAndroid Build Coastguard Worker clz r8, r4 1910*c0909341SAndroid Build Coastguard Worker adr r12, L(ipred_cfl_splat_tbl) 1911*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_left_tbl) 1912*c0909341SAndroid Build Coastguard Worker sub lr, lr, #26 1913*c0909341SAndroid Build Coastguard Worker sub r8, r8, #26 1914*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1915*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 1916*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 1917*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1918*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 1919*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1920*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1921*c0909341SAndroid Build Coastguard Worker bx r7 1922*c0909341SAndroid Build Coastguard Worker 1923*c0909341SAndroid Build Coastguard Worker .align 2 1924*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_tbl): 1925*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h32) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 1926*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h16) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 1927*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h8) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 1928*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h4) - L(ipred_cfl_left_tbl) + CONFIG_THUMB 1929*c0909341SAndroid Build Coastguard Worker 1930*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4): 1931*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2, :32] 1932*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 1933*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1934*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #2 1935*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1936*c0909341SAndroid Build Coastguard Worker bx r12 1937*c0909341SAndroid Build Coastguard Worker 1938*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8): 1939*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64] 1940*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 1941*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1942*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1943*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #3 1944*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1945*c0909341SAndroid Build Coastguard Worker bx r12 1946*c0909341SAndroid Build Coastguard Worker 1947*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16): 1948*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r2, :128] 1949*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 1950*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 1951*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1952*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1953*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #4 1954*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1955*c0909341SAndroid Build Coastguard Worker bx r12 1956*c0909341SAndroid Build Coastguard Worker 1957*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32): 1958*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2, :128] 1959*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 1960*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 1961*c0909341SAndroid Build Coastguard Worker vadd.u16 q0, q2, q3 1962*c0909341SAndroid Build Coastguard Worker vadd.u16 d0, d0, d1 1963*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1964*c0909341SAndroid Build Coastguard Worker vpadd.u16 d0, d0 1965*c0909341SAndroid Build Coastguard Worker vrshr.u16 d0, d0, #5 1966*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 1967*c0909341SAndroid Build Coastguard Worker bx r12 1968*c0909341SAndroid Build Coastguard Workerendfunc 1969*c0909341SAndroid Build Coastguard Worker 1970*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1971*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1972*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1973*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 1974*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_8bpc_neon, export=1 1975*c0909341SAndroid Build Coastguard Worker push {r4-r8, lr} 1976*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 1977*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 1978*c0909341SAndroid Build Coastguard Worker sub r2, r2, r4 1979*c0909341SAndroid Build Coastguard Worker add r8, r3, r4 // width + height 1980*c0909341SAndroid Build Coastguard Worker vdup.16 q1, r6 // alpha 1981*c0909341SAndroid Build Coastguard Worker clz lr, r3 1982*c0909341SAndroid Build Coastguard Worker clz r6, r4 1983*c0909341SAndroid Build Coastguard Worker vdup.16 d16, r8 // width + height 1984*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_tbl) 1985*c0909341SAndroid Build Coastguard Worker rbit r8, r8 // rbit(width + height) 1986*c0909341SAndroid Build Coastguard Worker sub lr, lr, #22 // 26 leading bits, minus table offset 4 1987*c0909341SAndroid Build Coastguard Worker sub r6, r6, #26 1988*c0909341SAndroid Build Coastguard Worker clz r8, r8 // ctz(width + height) 1989*c0909341SAndroid Build Coastguard Worker ldr lr, [r7, lr, lsl #2] 1990*c0909341SAndroid Build Coastguard Worker ldr r6, [r7, r6, lsl #2] 1991*c0909341SAndroid Build Coastguard Worker neg r8, r8 // -ctz(width + height) 1992*c0909341SAndroid Build Coastguard Worker add r12, r7, lr 1993*c0909341SAndroid Build Coastguard Worker add r7, r7, r6 1994*c0909341SAndroid Build Coastguard Worker vshr.u16 d16, d16, #1 // (width + height) >> 1 1995*c0909341SAndroid Build Coastguard Worker vdup.16 d17, r8 // -ctz(width + height) 1996*c0909341SAndroid Build Coastguard Worker add r6, r0, r1 1997*c0909341SAndroid Build Coastguard Worker lsl r1, r1, #1 1998*c0909341SAndroid Build Coastguard Worker bx r7 1999*c0909341SAndroid Build Coastguard Worker 2000*c0909341SAndroid Build Coastguard Worker .align 2 2001*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_tbl): 2002*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h32) - L(ipred_cfl_tbl) + CONFIG_THUMB 2003*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h16) - L(ipred_cfl_tbl) + CONFIG_THUMB 2004*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h8) - L(ipred_cfl_tbl) + CONFIG_THUMB 2005*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h4) - L(ipred_cfl_tbl) + CONFIG_THUMB 2006*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w32) - L(ipred_cfl_tbl) + CONFIG_THUMB 2007*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w16) - L(ipred_cfl_tbl) + CONFIG_THUMB 2008*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w8) - L(ipred_cfl_tbl) + CONFIG_THUMB 2009*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w4) - L(ipred_cfl_tbl) + CONFIG_THUMB 2010*c0909341SAndroid Build Coastguard Worker 2011*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4): 2012*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r2, :32]! 2013*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 2014*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 2015*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2016*c0909341SAndroid Build Coastguard Worker bx r12 2017*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4): 2018*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[]}, [r2] 2019*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d16 2020*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d1, d1 2021*c0909341SAndroid Build Coastguard Worker vpadd.u16 d1, d1 2022*c0909341SAndroid Build Coastguard Worker cmp r4, #4 2023*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2024*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d17 2025*c0909341SAndroid Build Coastguard Worker beq 1f 2026*c0909341SAndroid Build Coastguard Worker // h = 8/16 2027*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 2028*c0909341SAndroid Build Coastguard Worker movw r8, #(0x5556/2) 2029*c0909341SAndroid Build Coastguard Worker cmp r4, #16 2030*c0909341SAndroid Build Coastguard Worker it ne 2031*c0909341SAndroid Build Coastguard Worker movne lr, r8 2032*c0909341SAndroid Build Coastguard Worker vdup.16 d18, lr 2033*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d18 2034*c0909341SAndroid Build Coastguard Worker1: 2035*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2036*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 2037*c0909341SAndroid Build Coastguard Worker 2038*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8): 2039*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64]! 2040*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d0, d0 2041*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2042*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 2043*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2044*c0909341SAndroid Build Coastguard Worker bx r12 2045*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8): 2046*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r2] 2047*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d16 2048*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d1, d1 2049*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1 2050*c0909341SAndroid Build Coastguard Worker vpadd.i16 d1, d1 2051*c0909341SAndroid Build Coastguard Worker cmp r4, #8 2052*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2053*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d17 2054*c0909341SAndroid Build Coastguard Worker beq 1f 2055*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 2056*c0909341SAndroid Build Coastguard Worker cmp r4, #32 2057*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 2058*c0909341SAndroid Build Coastguard Worker movw r8, #(0x5556/2) 2059*c0909341SAndroid Build Coastguard Worker it ne 2060*c0909341SAndroid Build Coastguard Worker movne lr, r8 2061*c0909341SAndroid Build Coastguard Worker vdup.16 d18, lr 2062*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d18 2063*c0909341SAndroid Build Coastguard Worker1: 2064*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2065*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 2066*c0909341SAndroid Build Coastguard Worker 2067*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16): 2068*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r2, :128]! 2069*c0909341SAndroid Build Coastguard Worker vaddl.u8 q0, d0, d1 2070*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2071*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2072*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 2073*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2074*c0909341SAndroid Build Coastguard Worker bx r12 2075*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16): 2076*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r2] 2077*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d16 2078*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 2079*c0909341SAndroid Build Coastguard Worker vadd.i16 d4, d4, d5 2080*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4 2081*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4 2082*c0909341SAndroid Build Coastguard Worker cmp r4, #16 2083*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d4 2084*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d17 2085*c0909341SAndroid Build Coastguard Worker beq 1f 2086*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 2087*c0909341SAndroid Build Coastguard Worker tst r4, #(32+16+8) // 16 added to make a consecutive bitmask 2088*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 2089*c0909341SAndroid Build Coastguard Worker movw r8, #(0x5556/2) 2090*c0909341SAndroid Build Coastguard Worker it ne 2091*c0909341SAndroid Build Coastguard Worker movne lr, r8 2092*c0909341SAndroid Build Coastguard Worker vdup.16 d18, lr 2093*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d18 2094*c0909341SAndroid Build Coastguard Worker1: 2095*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2096*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2097*c0909341SAndroid Build Coastguard Worker 2098*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32): 2099*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2, :128]! 2100*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 2101*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 2102*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q2, q3 2103*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2104*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2105*c0909341SAndroid Build Coastguard Worker add r2, r2, #1 2106*c0909341SAndroid Build Coastguard Worker vpadd.i16 d0, d0 2107*c0909341SAndroid Build Coastguard Worker bx r12 2108*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32): 2109*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r2] 2110*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d16 2111*c0909341SAndroid Build Coastguard Worker vaddl.u8 q2, d4, d5 2112*c0909341SAndroid Build Coastguard Worker vaddl.u8 q3, d6, d7 2113*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2114*c0909341SAndroid Build Coastguard Worker vadd.i16 d4, d4, d5 2115*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4 2116*c0909341SAndroid Build Coastguard Worker vpadd.i16 d4, d4 2117*c0909341SAndroid Build Coastguard Worker cmp r4, #32 2118*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d4 2119*c0909341SAndroid Build Coastguard Worker vshl.u16 d0, d0, d17 2120*c0909341SAndroid Build Coastguard Worker beq 1f 2121*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 2122*c0909341SAndroid Build Coastguard Worker cmp r4, #8 2123*c0909341SAndroid Build Coastguard Worker movw lr, #(0x3334/2) 2124*c0909341SAndroid Build Coastguard Worker movw r8, #(0x5556/2) 2125*c0909341SAndroid Build Coastguard Worker it ne 2126*c0909341SAndroid Build Coastguard Worker movne lr, r8 2127*c0909341SAndroid Build Coastguard Worker vdup.16 d18, lr 2128*c0909341SAndroid Build Coastguard Worker vqdmulh.s16 d0, d0, d18 2129*c0909341SAndroid Build Coastguard Worker1: 2130*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d0[0] 2131*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 2132*c0909341SAndroid Build Coastguard Workerendfunc 2133*c0909341SAndroid Build Coastguard Worker 2134*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx, 2135*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 2136*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 2137*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_8bpc_neon, export=1 2138*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 2139*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2140*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 2141*c0909341SAndroid Build Coastguard Worker clz r8, r5 2142*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 2143*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_420_tbl) 2144*c0909341SAndroid Build Coastguard Worker sub r8, r8, #27 2145*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2146*c0909341SAndroid Build Coastguard Worker vmov.i16 q8, #0 2147*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #0 2148*c0909341SAndroid Build Coastguard Worker vmov.i16 q10, #0 2149*c0909341SAndroid Build Coastguard Worker vmov.i16 q11, #0 2150*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2151*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 2152*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 2153*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 2154*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 2155*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 2156*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 2157*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 2158*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 2159*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 2160*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 2161*c0909341SAndroid Build Coastguard Worker bx r7 2162*c0909341SAndroid Build Coastguard Worker 2163*c0909341SAndroid Build Coastguard Worker .align 2 2164*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_tbl): 2165*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2166*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w8) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2167*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w4) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB 2168*c0909341SAndroid Build Coastguard Worker 2169*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4): 2170*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 2171*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2172*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r12, :64], r2 2173*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r1, :64], r2 2174*c0909341SAndroid Build Coastguard Worker vld1.8 {d3}, [r12, :64], r2 2175*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2176*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2177*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2178*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2179*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2180*c0909341SAndroid Build Coastguard Worker vst1.16 {q0}, [r0, :128]! 2181*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2182*c0909341SAndroid Build Coastguard Worker bgt 1b 2183*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2184*c0909341SAndroid Build Coastguard Worker vmov d0, d1 2185*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2186*c0909341SAndroid Build Coastguard Worker vmov d3, d1 2187*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad): 2188*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2189*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2190*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 2191*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2192*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2193*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q1 2194*c0909341SAndroid Build Coastguard Worker bgt 2b 2195*c0909341SAndroid Build Coastguard Worker3: 2196*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_calc_subtract_dc): 2197*c0909341SAndroid Build Coastguard Worker // Aggregate the sums 2198*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q8, q9 2199*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q10, q11 2200*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q0, q0 2201*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q1, q1 2202*c0909341SAndroid Build Coastguard Worker vadd.i32 q0, q1 2203*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2204*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 // sum 2205*c0909341SAndroid Build Coastguard Worker sub r0, r0, r6, lsl #3 2206*c0909341SAndroid Build Coastguard Worker vrshl.u32 d16, d0, d31 // (sum + (1 << (log2sz - 1))) >>= log2sz 2207*c0909341SAndroid Build Coastguard Worker vdup.16 q8, d16[0] 2208*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_subtract_dc): 2209*c0909341SAndroid Build Coastguard Worker6: // Subtract dc from ac 2210*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r0, :128] 2211*c0909341SAndroid Build Coastguard Worker subs r6, r6, #4 2212*c0909341SAndroid Build Coastguard Worker vsub.i16 q0, q0, q8 2213*c0909341SAndroid Build Coastguard Worker vsub.i16 q1, q1, q8 2214*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2215*c0909341SAndroid Build Coastguard Worker bgt 6b 2216*c0909341SAndroid Build Coastguard Worker pop {r4-r8, pc} 2217*c0909341SAndroid Build Coastguard Worker 2218*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8): 2219*c0909341SAndroid Build Coastguard Worker cmp r3, #0 2220*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_420_w8_wpad) 2221*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2222*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2223*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r12, :128], r2 2224*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r1, :128], r2 2225*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2226*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r12, :128], r2 2227*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2228*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2229*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2230*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2231*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2232*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2233*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q2, #1 2234*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2235*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2236*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2237*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2238*c0909341SAndroid Build Coastguard Worker bgt 1b 2239*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2240*c0909341SAndroid Build Coastguard Worker vmov q0, q1 2241*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2242*c0909341SAndroid Build Coastguard Worker 2243*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad): 2244*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2245*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r1, :64], r2 2246*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r12, :64], r2 2247*c0909341SAndroid Build Coastguard Worker vld1.16 {d1}, [r1, :64], r2 2248*c0909341SAndroid Build Coastguard Worker vld1.16 {d3}, [r12, :64], r2 2249*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2250*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2251*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2252*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2253*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d1[3] 2254*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2255*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2256*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2257*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2258*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2259*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2260*c0909341SAndroid Build Coastguard Worker bgt 1b 2261*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2262*c0909341SAndroid Build Coastguard Worker vmov q0, q1 2263*c0909341SAndroid Build Coastguard Worker 2264*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad): 2265*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2266*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2267*c0909341SAndroid Build Coastguard Worker subs r4, r4, #4 2268*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2269*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2270*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2271*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2272*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2273*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2274*c0909341SAndroid Build Coastguard Worker bgt 2b 2275*c0909341SAndroid Build Coastguard Worker3: 2276*c0909341SAndroid Build Coastguard Worker 2277*c0909341SAndroid Build Coastguard Worker // Double the height and reuse the w4 summing/subtracting 2278*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #1 2279*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 2280*c0909341SAndroid Build Coastguard Worker 2281*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16): 2282*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_420_w16_tbl) 2283*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #2] 2284*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 2285*c0909341SAndroid Build Coastguard Worker bx r7 2286*c0909341SAndroid Build Coastguard Worker 2287*c0909341SAndroid Build Coastguard Worker .align 2 2288*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_tbl): 2289*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad0) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2290*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad1) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2291*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad2) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2292*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad3) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB 2293*c0909341SAndroid Build Coastguard Worker 2294*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0): 2295*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2296*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1, :128], r2 2297*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r12, :128], r2 2298*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2299*c0909341SAndroid Build Coastguard Worker vld1.8 {q12, q13}, [r1, :128], r2 2300*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2301*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2302*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2303*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 2304*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q3 2305*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r12, :128], r2 2306*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q12, q12 2307*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q13, q13 2308*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2309*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2310*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q2 2311*c0909341SAndroid Build Coastguard Worker vadd.i16 q13, q13, q3 2312*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2313*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #1 2314*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q12, #1 2315*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q13, #1 2316*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2317*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2318*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2319*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2320*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2321*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2322*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2323*c0909341SAndroid Build Coastguard Worker bgt 1b 2324*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2325*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2326*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2327*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2328*c0909341SAndroid Build Coastguard Worker 2329*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1): 2330*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2331*c0909341SAndroid Build Coastguard Worker vldr d2, [r1, #16] 2332*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2333*c0909341SAndroid Build Coastguard Worker vldr d6, [r12, #16] 2334*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r12, :128], r2 2335*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d2, d2 2336*c0909341SAndroid Build Coastguard Worker vldr d26, [r1, #16] 2337*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2338*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [r1, :128], r2 2339*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d6, d6 2340*c0909341SAndroid Build Coastguard Worker vldr d30, [r12, #16] 2341*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2342*c0909341SAndroid Build Coastguard Worker vld1.8 {q14}, [r12, :128], r2 2343*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d26, d26 2344*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q12, q12 2345*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d30, d30 2346*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q14, q14 2347*c0909341SAndroid Build Coastguard Worker vadd.i16 d2, d2, d6 2348*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q2 2349*c0909341SAndroid Build Coastguard Worker vadd.i16 d26, d26, d30 2350*c0909341SAndroid Build Coastguard Worker vadd.i16 q12, q12, q14 2351*c0909341SAndroid Build Coastguard Worker vshl.i16 d2, d2, #1 2352*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2353*c0909341SAndroid Build Coastguard Worker vshl.i16 d6, d26, #1 2354*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q12, #1 2355*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d2[3] 2356*c0909341SAndroid Build Coastguard Worker vdup.16 d7, d6[3] 2357*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2358*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2359*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2360*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2361*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2362*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2363*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2364*c0909341SAndroid Build Coastguard Worker bgt 1b 2365*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2366*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2367*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2368*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2369*c0909341SAndroid Build Coastguard Worker 2370*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2): 2371*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 2372*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2373*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r12, :128], r2 2374*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r1, :128], r2 2375*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2376*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r12, :128], r2 2377*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2378*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2379*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2380*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q1 2381*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q3 2382*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #1 2383*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #1 2384*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2385*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 2386*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2387*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2388*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2389*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2390*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2391*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2392*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2393*c0909341SAndroid Build Coastguard Worker bgt 1b 2394*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2395*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2396*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2397*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2398*c0909341SAndroid Build Coastguard Worker 2399*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3): 2400*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 2401*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2402*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r12, :64], r2 2403*c0909341SAndroid Build Coastguard Worker vld1.8 {d4}, [r1, :64], r2 2404*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2405*c0909341SAndroid Build Coastguard Worker vld1.8 {d5}, [r12, :64], r2 2406*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2407*c0909341SAndroid Build Coastguard Worker vadd.i16 d0, d0, d1 2408*c0909341SAndroid Build Coastguard Worker vadd.i16 d4, d4, d5 2409*c0909341SAndroid Build Coastguard Worker vshl.i16 d0, d0, #1 2410*c0909341SAndroid Build Coastguard Worker vshl.i16 d4, d4, #1 2411*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d0[3] 2412*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d4[3] 2413*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2414*c0909341SAndroid Build Coastguard Worker vdup.16 d5, d4[3] 2415*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2416*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2417*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2418*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2419*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2420*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2421*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2422*c0909341SAndroid Build Coastguard Worker bgt 1b 2423*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2424*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2425*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2426*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2427*c0909341SAndroid Build Coastguard Worker 2428*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad): 2429*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2430*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2431*c0909341SAndroid Build Coastguard Worker subs r4, r4, #2 2432*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2433*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2434*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2435*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2436*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2437*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2438*c0909341SAndroid Build Coastguard Worker bgt 2b 2439*c0909341SAndroid Build Coastguard Worker3: 2440*c0909341SAndroid Build Coastguard Worker 2441*c0909341SAndroid Build Coastguard Worker // Quadruple the height and reuse the w4 summing/subtracting 2442*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #2 2443*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 2444*c0909341SAndroid Build Coastguard Workerendfunc 2445*c0909341SAndroid Build Coastguard Worker 2446*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx, 2447*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 2448*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 2449*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_8bpc_neon, export=1 2450*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 2451*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2452*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 2453*c0909341SAndroid Build Coastguard Worker clz r8, r5 2454*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 2455*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_422_tbl) 2456*c0909341SAndroid Build Coastguard Worker sub r8, r8, #27 2457*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2458*c0909341SAndroid Build Coastguard Worker vmov.i16 q8, #0 2459*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #0 2460*c0909341SAndroid Build Coastguard Worker vmov.i16 q10, #0 2461*c0909341SAndroid Build Coastguard Worker vmov.i16 q11, #0 2462*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2463*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 2464*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 2465*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 2466*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 2467*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 2468*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 2469*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 2470*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 2471*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 2472*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 2473*c0909341SAndroid Build Coastguard Worker bx r7 2474*c0909341SAndroid Build Coastguard Worker 2475*c0909341SAndroid Build Coastguard Worker .align 2 2476*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_tbl): 2477*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2478*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w8) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2479*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w4) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB 2480*c0909341SAndroid Build Coastguard Worker 2481*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4): 2482*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 2483*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2484*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r12, :64], r2 2485*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r1, :64], r2 2486*c0909341SAndroid Build Coastguard Worker vld1.8 {d3}, [r12, :64], r2 2487*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2488*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2489*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2490*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2491*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2492*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2493*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2494*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2495*c0909341SAndroid Build Coastguard Worker bgt 1b 2496*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2497*c0909341SAndroid Build Coastguard Worker vmov d0, d3 2498*c0909341SAndroid Build Coastguard Worker vmov d1, d3 2499*c0909341SAndroid Build Coastguard Worker vmov d2, d3 2500*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 2501*c0909341SAndroid Build Coastguard Worker 2502*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8): 2503*c0909341SAndroid Build Coastguard Worker cmp r3, #0 2504*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_422_w8_wpad) 2505*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2506*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2507*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r12, :128], r2 2508*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r1, :128], r2 2509*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2510*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r12, :128], r2 2511*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2512*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2513*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2514*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2515*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2516*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2517*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #2 2518*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2519*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2520*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2521*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2522*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2523*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2524*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2525*c0909341SAndroid Build Coastguard Worker bgt 1b 2526*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2527*c0909341SAndroid Build Coastguard Worker vmov q0, q3 2528*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2529*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2530*c0909341SAndroid Build Coastguard Worker 2531*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad): 2532*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2533*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2534*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r12, :64], r2 2535*c0909341SAndroid Build Coastguard Worker vld1.8 {d2}, [r1, :64], r2 2536*c0909341SAndroid Build Coastguard Worker vld1.8 {d3}, [r12, :64], r2 2537*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2538*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2539*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2540*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2541*c0909341SAndroid Build Coastguard Worker vdup.16 d7, d3[3] 2542*c0909341SAndroid Build Coastguard Worker vmov d6, d3 2543*c0909341SAndroid Build Coastguard Worker vdup.16 d5, d2[3] 2544*c0909341SAndroid Build Coastguard Worker vmov d4, d2 2545*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d1[3] 2546*c0909341SAndroid Build Coastguard Worker vmov d2, d1 2547*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2548*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2549*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2550*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2551*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2552*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2553*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2554*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2555*c0909341SAndroid Build Coastguard Worker bgt 1b 2556*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2557*c0909341SAndroid Build Coastguard Worker vmov q0, q3 2558*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2559*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2560*c0909341SAndroid Build Coastguard Worker 2561*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16): 2562*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_422_w16_tbl) 2563*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #2] 2564*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 2565*c0909341SAndroid Build Coastguard Worker bx r7 2566*c0909341SAndroid Build Coastguard Worker 2567*c0909341SAndroid Build Coastguard Worker .align 2 2568*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_tbl): 2569*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad0) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2570*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad1) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2571*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad2) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2572*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad3) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB 2573*c0909341SAndroid Build Coastguard Worker 2574*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0): 2575*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 2576*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1, :128], r2 2577*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r12, :128], r2 2578*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2579*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 2580*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2581*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q3, q3 2582*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2583*c0909341SAndroid Build Coastguard Worker vshl.i16 q1, q1, #2 2584*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2585*c0909341SAndroid Build Coastguard Worker vshl.i16 q3, q3, #2 2586*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2587*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2588*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2589*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2590*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2591*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2592*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2593*c0909341SAndroid Build Coastguard Worker bgt 1b 2594*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2595*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2596*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2597*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2598*c0909341SAndroid Build Coastguard Worker 2599*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1): 2600*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 2601*c0909341SAndroid Build Coastguard Worker vldr d2, [r1, #16] 2602*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2603*c0909341SAndroid Build Coastguard Worker vldr d6, [r12, #16] 2604*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r12, :128], r2 2605*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d2, d2 2606*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2607*c0909341SAndroid Build Coastguard Worker vpaddl.u8 d6, d6 2608*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2609*c0909341SAndroid Build Coastguard Worker vshl.i16 d2, d2, #2 2610*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2611*c0909341SAndroid Build Coastguard Worker vshl.i16 d6, d6, #2 2612*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2613*c0909341SAndroid Build Coastguard Worker vdup.16 d3, d2[3] 2614*c0909341SAndroid Build Coastguard Worker vdup.16 d7, d6[3] 2615*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2616*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2617*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2618*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2619*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2620*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2621*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2622*c0909341SAndroid Build Coastguard Worker bgt 1b 2623*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2624*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2625*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2626*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2627*c0909341SAndroid Build Coastguard Worker 2628*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2): 2629*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 2630*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r1, :128], r2 2631*c0909341SAndroid Build Coastguard Worker vld1.8 {q2}, [r12, :128], r2 2632*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2633*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q2, q2 2634*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2635*c0909341SAndroid Build Coastguard Worker vshl.i16 q2, q2, #2 2636*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2637*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 2638*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2639*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2640*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2641*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2642*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2643*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2644*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2645*c0909341SAndroid Build Coastguard Worker bgt 1b 2646*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2647*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2648*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2649*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2650*c0909341SAndroid Build Coastguard Worker 2651*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3): 2652*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 2653*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2654*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r12, :64], r2 2655*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 2656*c0909341SAndroid Build Coastguard Worker vshl.i16 q0, q0, #2 2657*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d1[3] 2658*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d0[3] 2659*c0909341SAndroid Build Coastguard Worker vdup.16 d5, d1[3] 2660*c0909341SAndroid Build Coastguard Worker vmov d4, d1 2661*c0909341SAndroid Build Coastguard Worker vdup.16 d1, d0[3] 2662*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2663*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2664*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2665*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2666*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2667*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2668*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2669*c0909341SAndroid Build Coastguard Worker bgt 1b 2670*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2671*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2672*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2673*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2674*c0909341SAndroid Build Coastguard Workerendfunc 2675*c0909341SAndroid Build Coastguard Worker 2676*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_8bpc_neon(int16_t *const ac, const pixel *const ypx, 2677*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 2678*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 2679*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_8bpc_neon, export=1 2680*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 2681*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 2682*c0909341SAndroid Build Coastguard Worker ldr r6, [sp, #32] 2683*c0909341SAndroid Build Coastguard Worker clz r8, r5 2684*c0909341SAndroid Build Coastguard Worker lsl r4, r4, #2 2685*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_444_tbl) 2686*c0909341SAndroid Build Coastguard Worker sub r8, r8, #26 2687*c0909341SAndroid Build Coastguard Worker ldr r8, [r7, r8, lsl #2] 2688*c0909341SAndroid Build Coastguard Worker vmov.i16 q8, #0 2689*c0909341SAndroid Build Coastguard Worker vmov.i16 q9, #0 2690*c0909341SAndroid Build Coastguard Worker vmov.i16 q10, #0 2691*c0909341SAndroid Build Coastguard Worker vmov.i16 q11, #0 2692*c0909341SAndroid Build Coastguard Worker add r7, r7, r8 2693*c0909341SAndroid Build Coastguard Worker sub r8, r6, r4 // height - h_pad 2694*c0909341SAndroid Build Coastguard Worker rbit lr, r5 // rbit(width) 2695*c0909341SAndroid Build Coastguard Worker rbit r12, r6 // rbit(height) 2696*c0909341SAndroid Build Coastguard Worker clz lr, lr // ctz(width) 2697*c0909341SAndroid Build Coastguard Worker clz r12, r12 // ctz(height) 2698*c0909341SAndroid Build Coastguard Worker add lr, lr, r12 // log2sz 2699*c0909341SAndroid Build Coastguard Worker add r12, r1, r2 2700*c0909341SAndroid Build Coastguard Worker vdup.32 d31, lr 2701*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 2702*c0909341SAndroid Build Coastguard Worker vneg.s32 d31, d31 // -log2sz 2703*c0909341SAndroid Build Coastguard Worker bx r7 2704*c0909341SAndroid Build Coastguard Worker 2705*c0909341SAndroid Build Coastguard Worker .align 2 2706*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_tbl): 2707*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 2708*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w16) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 2709*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w8) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 2710*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w4) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB 2711*c0909341SAndroid Build Coastguard Worker 2712*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4): 2713*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 2714*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[]}, [r1, :32], r2 2715*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32], r2 2716*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[]}, [r1, :32], r2 2717*c0909341SAndroid Build Coastguard Worker vld1.32 {d2[1]}, [r12, :32], r2 2718*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #3 2719*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d2, #3 2720*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2721*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2722*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2723*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2724*c0909341SAndroid Build Coastguard Worker bgt 1b 2725*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2726*c0909341SAndroid Build Coastguard Worker vmov d0, d3 2727*c0909341SAndroid Build Coastguard Worker vmov d1, d3 2728*c0909341SAndroid Build Coastguard Worker vmov d2, d3 2729*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 2730*c0909341SAndroid Build Coastguard Worker 2731*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8): 2732*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 2733*c0909341SAndroid Build Coastguard Worker vld1.16 {d0}, [r1, :64], r2 2734*c0909341SAndroid Build Coastguard Worker vld1.16 {d2}, [r12, :64], r2 2735*c0909341SAndroid Build Coastguard Worker vld1.16 {d4}, [r1, :64], r2 2736*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #3 2737*c0909341SAndroid Build Coastguard Worker vld1.16 {d6}, [r12, :64], r2 2738*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d2, #3 2739*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d4, #3 2740*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d6, #3 2741*c0909341SAndroid Build Coastguard Worker subs r8, r8, #4 2742*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2743*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2744*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2745*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2746*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2747*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2748*c0909341SAndroid Build Coastguard Worker bgt 1b 2749*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2750*c0909341SAndroid Build Coastguard Worker vmov q0, q3 2751*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2752*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 2753*c0909341SAndroid Build Coastguard Worker 2754*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16): 2755*c0909341SAndroid Build Coastguard Worker cmp r3, #0 2756*c0909341SAndroid Build Coastguard Worker bne L(ipred_cfl_ac_444_w16_wpad) 2757*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 2758*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r1, :128], r2 2759*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r12, :128], r2 2760*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d2, #3 2761*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d3, #3 2762*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d6, #3 2763*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d7, #3 2764*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2765*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2766*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2767*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2768*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2769*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2770*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2771*c0909341SAndroid Build Coastguard Worker bgt 1b 2772*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2773*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2774*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2775*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2776*c0909341SAndroid Build Coastguard Worker 2777*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad): 2778*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 2779*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2780*c0909341SAndroid Build Coastguard Worker vld1.8 {d4}, [r12, :64], r2 2781*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #3 2782*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d4, #3 2783*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2784*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 2785*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2786*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2787*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2788*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2789*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2790*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2791*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2792*c0909341SAndroid Build Coastguard Worker bgt 1b 2793*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2794*c0909341SAndroid Build Coastguard Worker vmov q0, q2 2795*c0909341SAndroid Build Coastguard Worker vmov q1, q3 2796*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 2797*c0909341SAndroid Build Coastguard Worker 2798*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32): 2799*c0909341SAndroid Build Coastguard Worker adr r7, L(ipred_cfl_ac_444_w32_tbl) 2800*c0909341SAndroid Build Coastguard Worker ldr r3, [r7, r3, lsl #1] // (w3>>1) << 2 2801*c0909341SAndroid Build Coastguard Worker add r7, r7, r3 2802*c0909341SAndroid Build Coastguard Worker bx r7 2803*c0909341SAndroid Build Coastguard Worker 2804*c0909341SAndroid Build Coastguard Worker .align 2 2805*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_tbl): 2806*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad0) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 2807*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad2) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 2808*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad4) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 2809*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad6) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB 2810*c0909341SAndroid Build Coastguard Worker 2811*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0): 2812*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 2813*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r1, :128], r2 2814*c0909341SAndroid Build Coastguard Worker vld1.8 {q13, q14}, [r12, :128], r2 2815*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d4, #3 2816*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d5, #3 2817*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d6, #3 2818*c0909341SAndroid Build Coastguard Worker vshll.u8 q3, d7, #3 2819*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d26, #3 2820*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d27, #3 2821*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2822*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2823*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2824*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2825*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d28, #3 2826*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d29, #3 2827*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2828*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2829*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2830*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 2831*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 2832*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 2833*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2834*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2835*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2836*c0909341SAndroid Build Coastguard Worker bgt 1b 2837*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2838*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 2839*c0909341SAndroid Build Coastguard Worker 2840*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2): 2841*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 2842*c0909341SAndroid Build Coastguard Worker vldr d4, [r1, #16] 2843*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r1, :128], r2 2844*c0909341SAndroid Build Coastguard Worker vldr d28, [r12, #16] 2845*c0909341SAndroid Build Coastguard Worker vld1.8 {q13}, [r12, :128], r2 2846*c0909341SAndroid Build Coastguard Worker vshll.u8 q2, d4, #3 2847*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d2, #3 2848*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d3, #3 2849*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d26, #3 2850*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d27, #3 2851*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d5[3] 2852*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2853*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2854*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2855*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2856*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d28, #3 2857*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2858*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2859*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2860*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2861*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 2862*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 2863*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 2864*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2865*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2866*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2867*c0909341SAndroid Build Coastguard Worker bgt 1b 2868*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2869*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 2870*c0909341SAndroid Build Coastguard Worker 2871*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4): 2872*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 16 2873*c0909341SAndroid Build Coastguard Worker vld1.8 {q1}, [r1, :128], r2 2874*c0909341SAndroid Build Coastguard Worker vld1.8 {q13}, [r12, :128], r2 2875*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d2, #3 2876*c0909341SAndroid Build Coastguard Worker vshll.u8 q1, d3, #3 2877*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d26, #3 2878*c0909341SAndroid Build Coastguard Worker vshll.u8 q13, d27, #3 2879*c0909341SAndroid Build Coastguard Worker vdup.16 q2, d3[3] 2880*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d3[3] 2881*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2882*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2883*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2884*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2885*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d27[3] 2886*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d27[3] 2887*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2888*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2889*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2890*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 2891*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 2892*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 2893*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2894*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2895*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2896*c0909341SAndroid Build Coastguard Worker bgt 1b 2897*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2898*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 2899*c0909341SAndroid Build Coastguard Worker 2900*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6): 2901*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 24 2902*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r1, :64], r2 2903*c0909341SAndroid Build Coastguard Worker vld1.8 {d24}, [r12, :64], r2 2904*c0909341SAndroid Build Coastguard Worker vshll.u8 q0, d0, #3 2905*c0909341SAndroid Build Coastguard Worker vshll.u8 q12, d24, #3 2906*c0909341SAndroid Build Coastguard Worker subs r8, r8, #2 2907*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d1[3] 2908*c0909341SAndroid Build Coastguard Worker vdup.16 q2, d1[3] 2909*c0909341SAndroid Build Coastguard Worker vdup.16 q3, d1[3] 2910*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2911*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q0 2912*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q1 2913*c0909341SAndroid Build Coastguard Worker vdup.16 q13, d25[3] 2914*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d25[3] 2915*c0909341SAndroid Build Coastguard Worker vdup.16 q1, d25[3] 2916*c0909341SAndroid Build Coastguard Worker vst1.16 {q2, q3}, [r0, :128]! 2917*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q2 2918*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q3 2919*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 2920*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 2921*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 2922*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2923*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2924*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2925*c0909341SAndroid Build Coastguard Worker bgt 1b 2926*c0909341SAndroid Build Coastguard Worker cmp r4, #0 2927*c0909341SAndroid Build Coastguard Worker 2928*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad): 2929*c0909341SAndroid Build Coastguard Worker beq 3f // This assumes that all callers already did "cmp r4, #0" 2930*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 2931*c0909341SAndroid Build Coastguard Worker subs r4, r4, #1 2932*c0909341SAndroid Build Coastguard Worker vst1.16 {q12, q13}, [r0, :128]! 2933*c0909341SAndroid Build Coastguard Worker vadd.i16 q8, q8, q12 2934*c0909341SAndroid Build Coastguard Worker vadd.i16 q9, q9, q13 2935*c0909341SAndroid Build Coastguard Worker vst1.16 {q0, q1}, [r0, :128]! 2936*c0909341SAndroid Build Coastguard Worker vadd.i16 q10, q10, q0 2937*c0909341SAndroid Build Coastguard Worker vadd.i16 q11, q11, q1 2938*c0909341SAndroid Build Coastguard Worker bgt 2b 2939*c0909341SAndroid Build Coastguard Worker3: 2940*c0909341SAndroid Build Coastguard Worker 2941*c0909341SAndroid Build Coastguard Worker // Multiply the height by eight and reuse the w4 subtracting 2942*c0909341SAndroid Build Coastguard Worker lsl r6, r6, #3 2943*c0909341SAndroid Build Coastguard Worker // Aggregate the sums, with wider intermediates earlier than in 2944*c0909341SAndroid Build Coastguard Worker // ipred_cfl_ac_420_w8_calc_subtract_dc. 2945*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q0, q8 2946*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q1, q9 2947*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q2, q10 2948*c0909341SAndroid Build Coastguard Worker vpaddl.u16 q3, q11 2949*c0909341SAndroid Build Coastguard Worker vadd.i32 q0, q0, q1 2950*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q3 2951*c0909341SAndroid Build Coastguard Worker vadd.i32 q0, q0, q2 2952*c0909341SAndroid Build Coastguard Worker vadd.i32 d0, d0, d1 2953*c0909341SAndroid Build Coastguard Worker vpadd.i32 d0, d0, d0 // sum 2954*c0909341SAndroid Build Coastguard Worker sub r0, r0, r6, lsl #3 2955*c0909341SAndroid Build Coastguard Worker vrshl.u32 d16, d0, d31 // (sum + (1 << (log2sz - 1))) >>= log2sz 2956*c0909341SAndroid Build Coastguard Worker vdup.16 q8, d16[0] 2957*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_subtract_dc) 2958*c0909341SAndroid Build Coastguard Workerendfunc 2959