1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_16bpc_neon(pixel *dst, const ptrdiff_t stride, 32*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 33*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 34*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height, 35*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 36*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_16bpc_neon, export=1 37*c0909341SAndroid Build Coastguard Worker ldr w8, [sp] 38*c0909341SAndroid Build Coastguard Worker clz w3, w3 39*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_128_tbl 40*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 41*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 42*c0909341SAndroid Build Coastguard Worker dup v0.8h, w8 43*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 44*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 45*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 46*c0909341SAndroid Build Coastguard Worker urshr v0.8h, v0.8h, #1 47*c0909341SAndroid Build Coastguard Worker br x5 48*c0909341SAndroid Build Coastguard Worker40: 49*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 50*c0909341SAndroid Build Coastguard Worker4: 51*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 52*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 53*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 54*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 55*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 56*c0909341SAndroid Build Coastguard Worker b.gt 4b 57*c0909341SAndroid Build Coastguard Worker ret 58*c0909341SAndroid Build Coastguard Worker80: 59*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 60*c0909341SAndroid Build Coastguard Worker8: 61*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 62*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 63*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 64*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 65*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 66*c0909341SAndroid Build Coastguard Worker b.gt 8b 67*c0909341SAndroid Build Coastguard Worker ret 68*c0909341SAndroid Build Coastguard Worker160: 69*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 70*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 71*c0909341SAndroid Build Coastguard Worker16: 72*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 73*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 74*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 75*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 76*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 77*c0909341SAndroid Build Coastguard Worker b.gt 16b 78*c0909341SAndroid Build Coastguard Worker ret 79*c0909341SAndroid Build Coastguard Worker320: 80*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 81*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 82*c0909341SAndroid Build Coastguard Worker mov v2.16b, v0.16b 83*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b 84*c0909341SAndroid Build Coastguard Worker32: 85*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 86*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 87*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 88*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 89*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 90*c0909341SAndroid Build Coastguard Worker b.gt 32b 91*c0909341SAndroid Build Coastguard Worker ret 92*c0909341SAndroid Build Coastguard Worker640: 93*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 94*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 95*c0909341SAndroid Build Coastguard Worker mov v2.16b, v0.16b 96*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b 97*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 98*c0909341SAndroid Build Coastguard Worker64: 99*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 100*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 101*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 102*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 103*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 104*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 105*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 106*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 107*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 108*c0909341SAndroid Build Coastguard Worker b.gt 64b 109*c0909341SAndroid Build Coastguard Worker ret 110*c0909341SAndroid Build Coastguard Workerendfunc 111*c0909341SAndroid Build Coastguard Worker 112*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_128_tbl 113*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_dc_128_tbl 114*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_dc_128_tbl 115*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_dc_128_tbl 116*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_dc_128_tbl 117*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_dc_128_tbl 118*c0909341SAndroid Build Coastguard Workerendjumptable 119*c0909341SAndroid Build Coastguard Worker 120*c0909341SAndroid Build Coastguard Worker// void ipred_v_16bpc_neon(pixel *dst, const ptrdiff_t stride, 121*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 122*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 123*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 124*c0909341SAndroid Build Coastguard Workerfunction ipred_v_16bpc_neon, export=1 125*c0909341SAndroid Build Coastguard Worker clz w3, w3 126*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_v_tbl 127*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 128*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 129*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 130*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 131*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 132*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 133*c0909341SAndroid Build Coastguard Worker br x5 134*c0909341SAndroid Build Coastguard Worker40: 135*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 136*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2] 137*c0909341SAndroid Build Coastguard Worker4: 138*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 139*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 140*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 141*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 142*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 143*c0909341SAndroid Build Coastguard Worker b.gt 4b 144*c0909341SAndroid Build Coastguard Worker ret 145*c0909341SAndroid Build Coastguard Worker80: 146*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 147*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2] 148*c0909341SAndroid Build Coastguard Worker8: 149*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 150*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 151*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 152*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 153*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 154*c0909341SAndroid Build Coastguard Worker b.gt 8b 155*c0909341SAndroid Build Coastguard Worker ret 156*c0909341SAndroid Build Coastguard Worker160: 157*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 158*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2] 159*c0909341SAndroid Build Coastguard Worker16: 160*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 161*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 162*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 163*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 164*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 165*c0909341SAndroid Build Coastguard Worker b.gt 16b 166*c0909341SAndroid Build Coastguard Worker ret 167*c0909341SAndroid Build Coastguard Worker320: 168*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 169*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2] 170*c0909341SAndroid Build Coastguard Worker32: 171*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 172*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 173*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 174*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 175*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 176*c0909341SAndroid Build Coastguard Worker b.gt 32b 177*c0909341SAndroid Build Coastguard Worker ret 178*c0909341SAndroid Build Coastguard Worker640: 179*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 180*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], #64 181*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 182*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2] 183*c0909341SAndroid Build Coastguard Worker64: 184*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 185*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 186*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 187*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], x1 188*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 189*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 190*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 191*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 192*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], x1 193*c0909341SAndroid Build Coastguard Worker b.gt 64b 194*c0909341SAndroid Build Coastguard Worker ret 195*c0909341SAndroid Build Coastguard Workerendfunc 196*c0909341SAndroid Build Coastguard Worker 197*c0909341SAndroid Build Coastguard Workerjumptable ipred_v_tbl 198*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_v_tbl 199*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_v_tbl 200*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_v_tbl 201*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_v_tbl 202*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_v_tbl 203*c0909341SAndroid Build Coastguard Workerendjumptable 204*c0909341SAndroid Build Coastguard Worker 205*c0909341SAndroid Build Coastguard Worker// void ipred_h_16bpc_neon(pixel *dst, const ptrdiff_t stride, 206*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 207*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 208*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 209*c0909341SAndroid Build Coastguard Workerfunction ipred_h_16bpc_neon, export=1 210*c0909341SAndroid Build Coastguard Worker clz w3, w3 211*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_h_tbl 212*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 213*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 214*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 215*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 216*c0909341SAndroid Build Coastguard Worker mov x7, #-8 217*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 218*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 219*c0909341SAndroid Build Coastguard Worker br x5 220*c0909341SAndroid Build Coastguard Worker40: 221*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 222*c0909341SAndroid Build Coastguard Worker4: 223*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 224*c0909341SAndroid Build Coastguard Worker st1 {v3.4h}, [x0], x1 225*c0909341SAndroid Build Coastguard Worker st1 {v2.4h}, [x6], x1 226*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 227*c0909341SAndroid Build Coastguard Worker st1 {v1.4h}, [x0], x1 228*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 229*c0909341SAndroid Build Coastguard Worker b.gt 4b 230*c0909341SAndroid Build Coastguard Worker ret 231*c0909341SAndroid Build Coastguard Worker80: 232*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 233*c0909341SAndroid Build Coastguard Worker8: 234*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 235*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x0], x1 236*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 237*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 238*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x0], x1 239*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 240*c0909341SAndroid Build Coastguard Worker b.gt 8b 241*c0909341SAndroid Build Coastguard Worker ret 242*c0909341SAndroid Build Coastguard Worker160: 243*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 244*c0909341SAndroid Build Coastguard Worker16: 245*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 246*c0909341SAndroid Build Coastguard Worker str q3, [x0, #16] 247*c0909341SAndroid Build Coastguard Worker str q2, [x6, #16] 248*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x0], x1 249*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 250*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 251*c0909341SAndroid Build Coastguard Worker str q1, [x0, #16] 252*c0909341SAndroid Build Coastguard Worker str q0, [x6, #16] 253*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x0], x1 254*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 255*c0909341SAndroid Build Coastguard Worker b.gt 16b 256*c0909341SAndroid Build Coastguard Worker ret 257*c0909341SAndroid Build Coastguard Worker320: 258*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 259*c0909341SAndroid Build Coastguard Worker32: 260*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 261*c0909341SAndroid Build Coastguard Worker str q3, [x0, #16] 262*c0909341SAndroid Build Coastguard Worker str q2, [x6, #16] 263*c0909341SAndroid Build Coastguard Worker stp q3, q3, [x0, #32] 264*c0909341SAndroid Build Coastguard Worker stp q2, q2, [x6, #32] 265*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x0], x1 266*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 267*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 268*c0909341SAndroid Build Coastguard Worker str q1, [x0, #16] 269*c0909341SAndroid Build Coastguard Worker str q0, [x6, #16] 270*c0909341SAndroid Build Coastguard Worker stp q1, q1, [x0, #32] 271*c0909341SAndroid Build Coastguard Worker stp q0, q0, [x6, #32] 272*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x0], x1 273*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 274*c0909341SAndroid Build Coastguard Worker b.gt 32b 275*c0909341SAndroid Build Coastguard Worker ret 276*c0909341SAndroid Build Coastguard Worker640: 277*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 278*c0909341SAndroid Build Coastguard Worker64: 279*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 280*c0909341SAndroid Build Coastguard Worker str q3, [x0, #16] 281*c0909341SAndroid Build Coastguard Worker str q2, [x6, #16] 282*c0909341SAndroid Build Coastguard Worker stp q3, q3, [x0, #32] 283*c0909341SAndroid Build Coastguard Worker stp q2, q2, [x6, #32] 284*c0909341SAndroid Build Coastguard Worker stp q3, q3, [x0, #64] 285*c0909341SAndroid Build Coastguard Worker stp q2, q2, [x6, #64] 286*c0909341SAndroid Build Coastguard Worker stp q3, q3, [x0, #96] 287*c0909341SAndroid Build Coastguard Worker stp q2, q2, [x6, #96] 288*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x0], x1 289*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 290*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 291*c0909341SAndroid Build Coastguard Worker str q1, [x0, #16] 292*c0909341SAndroid Build Coastguard Worker str q0, [x6, #16] 293*c0909341SAndroid Build Coastguard Worker stp q1, q1, [x0, #32] 294*c0909341SAndroid Build Coastguard Worker stp q0, q0, [x6, #32] 295*c0909341SAndroid Build Coastguard Worker stp q1, q1, [x0, #64] 296*c0909341SAndroid Build Coastguard Worker stp q0, q0, [x6, #64] 297*c0909341SAndroid Build Coastguard Worker stp q1, q1, [x0, #96] 298*c0909341SAndroid Build Coastguard Worker stp q0, q0, [x6, #96] 299*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x0], x1 300*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 301*c0909341SAndroid Build Coastguard Worker b.gt 64b 302*c0909341SAndroid Build Coastguard Worker ret 303*c0909341SAndroid Build Coastguard Workerendfunc 304*c0909341SAndroid Build Coastguard Worker 305*c0909341SAndroid Build Coastguard Workerjumptable ipred_h_tbl 306*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_h_tbl 307*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_h_tbl 308*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_h_tbl 309*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_h_tbl 310*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_h_tbl 311*c0909341SAndroid Build Coastguard Workerendjumptable 312*c0909341SAndroid Build Coastguard Worker 313*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_16bpc_neon(pixel *dst, const ptrdiff_t stride, 314*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 315*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 316*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 317*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_16bpc_neon, export=1 318*c0909341SAndroid Build Coastguard Worker clz w3, w3 319*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_top_tbl 320*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 321*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 322*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 323*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 324*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 325*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 326*c0909341SAndroid Build Coastguard Worker br x5 327*c0909341SAndroid Build Coastguard Worker40: 328*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 329*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2] 330*c0909341SAndroid Build Coastguard Worker addv h0, v0.4h 331*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #2 332*c0909341SAndroid Build Coastguard Worker dup v0.4h, v0.h[0] 333*c0909341SAndroid Build Coastguard Worker4: 334*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 335*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 336*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 337*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 338*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 339*c0909341SAndroid Build Coastguard Worker b.gt 4b 340*c0909341SAndroid Build Coastguard Worker ret 341*c0909341SAndroid Build Coastguard Worker80: 342*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 343*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2] 344*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 345*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 346*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 347*c0909341SAndroid Build Coastguard Worker8: 348*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 349*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 350*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 351*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 352*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 353*c0909341SAndroid Build Coastguard Worker b.gt 8b 354*c0909341SAndroid Build Coastguard Worker ret 355*c0909341SAndroid Build Coastguard Worker160: 356*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 357*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2] 358*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 359*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 360*c0909341SAndroid Build Coastguard Worker urshr v2.4h, v0.4h, #4 361*c0909341SAndroid Build Coastguard Worker dup v0.8h, v2.h[0] 362*c0909341SAndroid Build Coastguard Worker dup v1.8h, v2.h[0] 363*c0909341SAndroid Build Coastguard Worker16: 364*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 365*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 366*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 367*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 368*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 369*c0909341SAndroid Build Coastguard Worker b.gt 16b 370*c0909341SAndroid Build Coastguard Worker ret 371*c0909341SAndroid Build Coastguard Worker320: 372*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 373*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2] 374*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 375*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 376*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 377*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 378*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v0.4s, #5 379*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 380*c0909341SAndroid Build Coastguard Worker dup v1.8h, v4.h[0] 381*c0909341SAndroid Build Coastguard Worker dup v2.8h, v4.h[0] 382*c0909341SAndroid Build Coastguard Worker dup v3.8h, v4.h[0] 383*c0909341SAndroid Build Coastguard Worker32: 384*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 385*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 386*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 387*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 388*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 389*c0909341SAndroid Build Coastguard Worker b.gt 32b 390*c0909341SAndroid Build Coastguard Worker ret 391*c0909341SAndroid Build Coastguard Worker640: 392*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 393*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], #64 394*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 395*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2] 396*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 397*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 398*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 399*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 400*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v6.8h 401*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v4.8h 402*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 403*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v0.4s, #6 404*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 405*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 406*c0909341SAndroid Build Coastguard Worker dup v1.8h, v4.h[0] 407*c0909341SAndroid Build Coastguard Worker dup v2.8h, v4.h[0] 408*c0909341SAndroid Build Coastguard Worker dup v3.8h, v4.h[0] 409*c0909341SAndroid Build Coastguard Worker64: 410*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 411*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 412*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 413*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 414*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 415*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 416*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 417*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 418*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 419*c0909341SAndroid Build Coastguard Worker b.gt 64b 420*c0909341SAndroid Build Coastguard Worker ret 421*c0909341SAndroid Build Coastguard Workerendfunc 422*c0909341SAndroid Build Coastguard Worker 423*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_top_tbl 424*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_dc_top_tbl 425*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_dc_top_tbl 426*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_dc_top_tbl 427*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_dc_top_tbl 428*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_dc_top_tbl 429*c0909341SAndroid Build Coastguard Workerendjumptable 430*c0909341SAndroid Build Coastguard Worker 431*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_16bpc_neon(pixel *dst, const ptrdiff_t stride, 432*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 433*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 434*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 435*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_16bpc_neon, export=1 436*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw #1 437*c0909341SAndroid Build Coastguard Worker clz w3, w3 438*c0909341SAndroid Build Coastguard Worker clz w7, w4 439*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_left_tbl 440*c0909341SAndroid Build Coastguard Worker sub w3, w3, #20 // 25 leading bits, minus table offset 5 441*c0909341SAndroid Build Coastguard Worker sub w7, w7, #25 442*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 443*c0909341SAndroid Build Coastguard Worker ldrsw x7, [x5, w7, uxtw #2] 444*c0909341SAndroid Build Coastguard Worker add x3, x5, x3 445*c0909341SAndroid Build Coastguard Worker add x5, x5, x7 446*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 447*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 448*c0909341SAndroid Build Coastguard Worker br x5 449*c0909341SAndroid Build Coastguard Worker 450*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4): 451*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 452*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2] 453*c0909341SAndroid Build Coastguard Worker addv h0, v0.4h 454*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #2 455*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 456*c0909341SAndroid Build Coastguard Worker br x3 457*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4): 458*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 459*c0909341SAndroid Build Coastguard Worker1: 460*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 461*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 462*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 463*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 464*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 465*c0909341SAndroid Build Coastguard Worker b.gt 1b 466*c0909341SAndroid Build Coastguard Worker ret 467*c0909341SAndroid Build Coastguard Worker 468*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8): 469*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 470*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2] 471*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 472*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 473*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 474*c0909341SAndroid Build Coastguard Worker br x3 475*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8): 476*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 477*c0909341SAndroid Build Coastguard Worker1: 478*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 479*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 480*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 481*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 482*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 483*c0909341SAndroid Build Coastguard Worker b.gt 1b 484*c0909341SAndroid Build Coastguard Worker ret 485*c0909341SAndroid Build Coastguard Worker 486*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16): 487*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 488*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2] 489*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 490*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 491*c0909341SAndroid Build Coastguard Worker urshr v2.4h, v0.4h, #4 492*c0909341SAndroid Build Coastguard Worker dup v0.8h, v2.h[0] 493*c0909341SAndroid Build Coastguard Worker dup v1.8h, v2.h[0] 494*c0909341SAndroid Build Coastguard Worker br x3 495*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16): 496*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 497*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 498*c0909341SAndroid Build Coastguard Worker1: 499*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 500*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 501*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 502*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 503*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 504*c0909341SAndroid Build Coastguard Worker b.gt 1b 505*c0909341SAndroid Build Coastguard Worker ret 506*c0909341SAndroid Build Coastguard Worker 507*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32): 508*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 509*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2] 510*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 511*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 512*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 513*c0909341SAndroid Build Coastguard Worker uaddlp v0.4s, v0.8h 514*c0909341SAndroid Build Coastguard Worker addv s0, v0.4s 515*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v0.4s, #5 516*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 517*c0909341SAndroid Build Coastguard Worker br x3 518*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32): 519*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 520*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 521*c0909341SAndroid Build Coastguard Worker mov v2.16b, v0.16b 522*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b 523*c0909341SAndroid Build Coastguard Worker1: 524*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 525*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 526*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 527*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 528*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 529*c0909341SAndroid Build Coastguard Worker b.gt 1b 530*c0909341SAndroid Build Coastguard Worker ret 531*c0909341SAndroid Build Coastguard Worker 532*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64): 533*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 534*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], #64 535*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 536*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2] 537*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 538*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 539*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 540*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 541*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v6.8h 542*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v4.8h 543*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 544*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v0.4s, #6 545*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 546*c0909341SAndroid Build Coastguard Worker br x3 547*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64): 548*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 549*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 550*c0909341SAndroid Build Coastguard Worker mov v2.16b, v0.16b 551*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b 552*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 553*c0909341SAndroid Build Coastguard Worker1: 554*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 555*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 556*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 557*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 558*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 559*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 560*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 561*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 562*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 563*c0909341SAndroid Build Coastguard Worker b.gt 1b 564*c0909341SAndroid Build Coastguard Worker ret 565*c0909341SAndroid Build Coastguard Workerendfunc 566*c0909341SAndroid Build Coastguard Worker 567*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_left_tbl 568*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h64) - ipred_dc_left_tbl 569*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h32) - ipred_dc_left_tbl 570*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h16) - ipred_dc_left_tbl 571*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h8) - ipred_dc_left_tbl 572*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h4) - ipred_dc_left_tbl 573*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w64) - ipred_dc_left_tbl 574*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w32) - ipred_dc_left_tbl 575*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w16) - ipred_dc_left_tbl 576*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w8) - ipred_dc_left_tbl 577*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w4) - ipred_dc_left_tbl 578*c0909341SAndroid Build Coastguard Workerendjumptable 579*c0909341SAndroid Build Coastguard Worker 580*c0909341SAndroid Build Coastguard Worker// void ipred_dc_16bpc_neon(pixel *dst, const ptrdiff_t stride, 581*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 582*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 583*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 584*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_16bpc_neon, export=1 585*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw #1 586*c0909341SAndroid Build Coastguard Worker add w7, w3, w4 // width + height 587*c0909341SAndroid Build Coastguard Worker clz w3, w3 588*c0909341SAndroid Build Coastguard Worker clz w6, w4 589*c0909341SAndroid Build Coastguard Worker dup v16.4s, w7 // width + height 590*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_tbl 591*c0909341SAndroid Build Coastguard Worker rbit w7, w7 // rbit(width + height) 592*c0909341SAndroid Build Coastguard Worker sub w3, w3, #20 // 25 leading bits, minus table offset 5 593*c0909341SAndroid Build Coastguard Worker sub w6, w6, #25 594*c0909341SAndroid Build Coastguard Worker clz w7, w7 // ctz(width + height) 595*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 596*c0909341SAndroid Build Coastguard Worker ldrsw x6, [x5, w6, uxtw #2] 597*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -ctz(width + height) 598*c0909341SAndroid Build Coastguard Worker add x3, x5, x3 599*c0909341SAndroid Build Coastguard Worker add x5, x5, x6 600*c0909341SAndroid Build Coastguard Worker ushr v16.4s, v16.4s, #1 // (width + height) >> 1 601*c0909341SAndroid Build Coastguard Worker dup v17.4s, w7 // -ctz(width + height) 602*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 603*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 604*c0909341SAndroid Build Coastguard Worker br x5 605*c0909341SAndroid Build Coastguard Worker 606*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4): 607*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 608*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2], #8 609*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.4h 610*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 611*c0909341SAndroid Build Coastguard Worker br x3 612*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4): 613*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 614*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x2] 615*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 616*c0909341SAndroid Build Coastguard Worker uaddlv s1, v1.4h 617*c0909341SAndroid Build Coastguard Worker cmp w4, #4 618*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v1.2s 619*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 620*c0909341SAndroid Build Coastguard Worker b.eq 1f 621*c0909341SAndroid Build Coastguard Worker // h = 8/16 622*c0909341SAndroid Build Coastguard Worker cmp w4, #16 623*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 624*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 625*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 626*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 627*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 628*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 629*c0909341SAndroid Build Coastguard Worker1: 630*c0909341SAndroid Build Coastguard Worker dup v0.4h, v0.h[0] 631*c0909341SAndroid Build Coastguard Worker2: 632*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 633*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 634*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 635*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x0], x1 636*c0909341SAndroid Build Coastguard Worker st1 {v0.4h}, [x6], x1 637*c0909341SAndroid Build Coastguard Worker b.gt 2b 638*c0909341SAndroid Build Coastguard Worker ret 639*c0909341SAndroid Build Coastguard Worker 640*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8): 641*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 642*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2], #16 643*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 644*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 645*c0909341SAndroid Build Coastguard Worker br x3 646*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8): 647*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 648*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x2] 649*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 650*c0909341SAndroid Build Coastguard Worker uaddlv s1, v1.8h 651*c0909341SAndroid Build Coastguard Worker cmp w4, #8 652*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v1.2s 653*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 654*c0909341SAndroid Build Coastguard Worker b.eq 1f 655*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 656*c0909341SAndroid Build Coastguard Worker cmp w4, #32 657*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 658*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 659*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 660*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 661*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 662*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 663*c0909341SAndroid Build Coastguard Worker1: 664*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 665*c0909341SAndroid Build Coastguard Worker2: 666*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 667*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 668*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 669*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 670*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 671*c0909341SAndroid Build Coastguard Worker b.gt 2b 672*c0909341SAndroid Build Coastguard Worker ret 673*c0909341SAndroid Build Coastguard Worker 674*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16): 675*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 676*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2], #32 677*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 678*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 679*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 680*c0909341SAndroid Build Coastguard Worker br x3 681*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16): 682*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 683*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x2] 684*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 685*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v2.8h 686*c0909341SAndroid Build Coastguard Worker uaddlv s1, v1.8h 687*c0909341SAndroid Build Coastguard Worker cmp w4, #16 688*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v1.2s 689*c0909341SAndroid Build Coastguard Worker ushl v4.2s, v0.2s, v17.2s 690*c0909341SAndroid Build Coastguard Worker b.eq 1f 691*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 692*c0909341SAndroid Build Coastguard Worker tst w4, #(32+16+8) // 16 added to make a consecutive bitmask 693*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 694*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 695*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 696*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 697*c0909341SAndroid Build Coastguard Worker mul v4.2s, v4.2s, v16.2s 698*c0909341SAndroid Build Coastguard Worker ushr v4.2s, v4.2s, #17 699*c0909341SAndroid Build Coastguard Worker1: 700*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 701*c0909341SAndroid Build Coastguard Worker dup v1.8h, v4.h[0] 702*c0909341SAndroid Build Coastguard Worker2: 703*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 704*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 705*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 706*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 707*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], x1 708*c0909341SAndroid Build Coastguard Worker b.gt 2b 709*c0909341SAndroid Build Coastguard Worker ret 710*c0909341SAndroid Build Coastguard Worker 711*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32): 712*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 713*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], #64 714*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 715*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 716*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 717*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 718*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 719*c0909341SAndroid Build Coastguard Worker br x3 720*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32): 721*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 722*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h, v3.8h, v4.8h}, [x2] 723*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 724*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v2.8h 725*c0909341SAndroid Build Coastguard Worker addp v3.8h, v3.8h, v4.8h 726*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v3.8h 727*c0909341SAndroid Build Coastguard Worker uaddlv s1, v1.8h 728*c0909341SAndroid Build Coastguard Worker cmp w4, #32 729*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v1.2s 730*c0909341SAndroid Build Coastguard Worker ushl v4.2s, v0.2s, v17.2s 731*c0909341SAndroid Build Coastguard Worker b.eq 1f 732*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 733*c0909341SAndroid Build Coastguard Worker cmp w4, #8 734*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 735*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 736*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 737*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 738*c0909341SAndroid Build Coastguard Worker mul v4.2s, v4.2s, v16.2s 739*c0909341SAndroid Build Coastguard Worker ushr v4.2s, v4.2s, #17 740*c0909341SAndroid Build Coastguard Worker1: 741*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 742*c0909341SAndroid Build Coastguard Worker dup v1.8h, v4.h[0] 743*c0909341SAndroid Build Coastguard Worker dup v2.8h, v4.h[0] 744*c0909341SAndroid Build Coastguard Worker dup v3.8h, v4.h[0] 745*c0909341SAndroid Build Coastguard Worker2: 746*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 747*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 748*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 749*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 750*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 751*c0909341SAndroid Build Coastguard Worker b.gt 2b 752*c0909341SAndroid Build Coastguard Worker ret 753*c0909341SAndroid Build Coastguard Worker 754*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64): 755*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 756*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], #64 757*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 758*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], #64 759*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 760*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 761*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 762*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 763*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v6.8h 764*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v4.8h 765*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 766*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 767*c0909341SAndroid Build Coastguard Worker br x3 768*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64): 769*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 770*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h, v3.8h, v4.8h}, [x2], #64 771*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 772*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v2.8h 773*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x2] 774*c0909341SAndroid Build Coastguard Worker addp v3.8h, v3.8h, v4.8h 775*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h 776*c0909341SAndroid Build Coastguard Worker addp v22.8h, v22.8h, v23.8h 777*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v3.8h 778*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v22.8h 779*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v20.8h 780*c0909341SAndroid Build Coastguard Worker uaddlv s1, v1.8h 781*c0909341SAndroid Build Coastguard Worker cmp w4, #64 782*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v1.2s 783*c0909341SAndroid Build Coastguard Worker ushl v4.2s, v0.2s, v17.2s 784*c0909341SAndroid Build Coastguard Worker b.eq 1f 785*c0909341SAndroid Build Coastguard Worker // h = 16/32 786*c0909341SAndroid Build Coastguard Worker cmp w4, #16 787*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 788*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 789*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 790*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 791*c0909341SAndroid Build Coastguard Worker mul v4.2s, v4.2s, v16.2s 792*c0909341SAndroid Build Coastguard Worker ushr v4.2s, v4.2s, #17 793*c0909341SAndroid Build Coastguard Worker1: 794*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 795*c0909341SAndroid Build Coastguard Worker dup v0.8h, v4.h[0] 796*c0909341SAndroid Build Coastguard Worker dup v1.8h, v4.h[0] 797*c0909341SAndroid Build Coastguard Worker dup v2.8h, v4.h[0] 798*c0909341SAndroid Build Coastguard Worker dup v3.8h, v4.h[0] 799*c0909341SAndroid Build Coastguard Worker2: 800*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 801*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 802*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 803*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 804*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 805*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 806*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], #64 807*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 808*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x6], x1 809*c0909341SAndroid Build Coastguard Worker b.gt 2b 810*c0909341SAndroid Build Coastguard Worker ret 811*c0909341SAndroid Build Coastguard Workerendfunc 812*c0909341SAndroid Build Coastguard Worker 813*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_tbl 814*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h64) - ipred_dc_tbl 815*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h32) - ipred_dc_tbl 816*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h16) - ipred_dc_tbl 817*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h8) - ipred_dc_tbl 818*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h4) - ipred_dc_tbl 819*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w64) - ipred_dc_tbl 820*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w32) - ipred_dc_tbl 821*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w16) - ipred_dc_tbl 822*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w8) - ipred_dc_tbl 823*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w4) - ipred_dc_tbl 824*c0909341SAndroid Build Coastguard Workerendjumptable 825*c0909341SAndroid Build Coastguard Worker 826*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_16bpc_neon(pixel *dst, const ptrdiff_t stride, 827*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 828*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 829*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 830*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_16bpc_neon, export=1 831*c0909341SAndroid Build Coastguard Worker clz w9, w3 832*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_paeth_tbl 833*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 834*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 835*c0909341SAndroid Build Coastguard Worker ld1r {v4.8h}, [x2] 836*c0909341SAndroid Build Coastguard Worker add x8, x2, #2 837*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 838*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 839*c0909341SAndroid Build Coastguard Worker mov x7, #-8 840*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 841*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 842*c0909341SAndroid Build Coastguard Worker br x5 843*c0909341SAndroid Build Coastguard Worker40: 844*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 845*c0909341SAndroid Build Coastguard Worker ld1r {v5.2d}, [x8] 846*c0909341SAndroid Build Coastguard Worker sub v6.8h, v5.8h, v4.8h // top - topleft 847*c0909341SAndroid Build Coastguard Worker4: 848*c0909341SAndroid Build Coastguard Worker ld4r {v0.4h, v1.4h, v2.4h, v3.4h}, [x2], x7 849*c0909341SAndroid Build Coastguard Worker zip1 v0.2d, v0.2d, v1.2d 850*c0909341SAndroid Build Coastguard Worker zip1 v2.2d, v2.2d, v3.2d 851*c0909341SAndroid Build Coastguard Worker add v16.8h, v6.8h, v0.8h // base 852*c0909341SAndroid Build Coastguard Worker add v17.8h, v6.8h, v2.8h 853*c0909341SAndroid Build Coastguard Worker sabd v20.8h, v5.8h, v16.8h // tdiff 854*c0909341SAndroid Build Coastguard Worker sabd v21.8h, v5.8h, v17.8h 855*c0909341SAndroid Build Coastguard Worker sabd v22.8h, v4.8h, v16.8h // tldiff 856*c0909341SAndroid Build Coastguard Worker sabd v23.8h, v4.8h, v17.8h 857*c0909341SAndroid Build Coastguard Worker sabd v16.8h, v0.8h, v16.8h // ldiff 858*c0909341SAndroid Build Coastguard Worker sabd v17.8h, v2.8h, v17.8h 859*c0909341SAndroid Build Coastguard Worker umin v18.8h, v20.8h, v22.8h // min(tdiff, tldiff) 860*c0909341SAndroid Build Coastguard Worker umin v19.8h, v21.8h, v23.8h 861*c0909341SAndroid Build Coastguard Worker cmge v20.8h, v22.8h, v20.8h // tldiff >= tdiff 862*c0909341SAndroid Build Coastguard Worker cmge v21.8h, v23.8h, v21.8h 863*c0909341SAndroid Build Coastguard Worker cmge v16.8h, v18.8h, v16.8h // min(tdiff, tldiff) >= ldiff 864*c0909341SAndroid Build Coastguard Worker cmge v17.8h, v19.8h, v17.8h 865*c0909341SAndroid Build Coastguard Worker bsl v21.16b, v5.16b, v4.16b // tdiff <= tldiff ? top : topleft 866*c0909341SAndroid Build Coastguard Worker bsl v20.16b, v5.16b, v4.16b 867*c0909341SAndroid Build Coastguard Worker bit v21.16b, v2.16b, v17.16b // ldiff <= min ? left : ... 868*c0909341SAndroid Build Coastguard Worker bit v20.16b, v0.16b, v16.16b 869*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x0], x1 870*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x6], x1 871*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 872*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x0], x1 873*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x6], x1 874*c0909341SAndroid Build Coastguard Worker b.gt 4b 875*c0909341SAndroid Build Coastguard Worker ret 876*c0909341SAndroid Build Coastguard Worker80: 877*c0909341SAndroid Build Coastguard Worker160: 878*c0909341SAndroid Build Coastguard Worker320: 879*c0909341SAndroid Build Coastguard Worker640: 880*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 881*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x8], #16 882*c0909341SAndroid Build Coastguard Worker mov w9, w3 883*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x10 884*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 885*c0909341SAndroid Build Coastguard Worker add x10, x6, x1 886*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 887*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 888*c0909341SAndroid Build Coastguard Worker1: 889*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 890*c0909341SAndroid Build Coastguard Worker2: 891*c0909341SAndroid Build Coastguard Worker sub v6.8h, v5.8h, v4.8h // top - topleft 892*c0909341SAndroid Build Coastguard Worker add v16.8h, v6.8h, v0.8h // base 893*c0909341SAndroid Build Coastguard Worker add v17.8h, v6.8h, v1.8h 894*c0909341SAndroid Build Coastguard Worker add v18.8h, v6.8h, v2.8h 895*c0909341SAndroid Build Coastguard Worker add v19.8h, v6.8h, v3.8h 896*c0909341SAndroid Build Coastguard Worker sabd v20.8h, v5.8h, v16.8h // tdiff 897*c0909341SAndroid Build Coastguard Worker sabd v21.8h, v5.8h, v17.8h 898*c0909341SAndroid Build Coastguard Worker sabd v22.8h, v5.8h, v18.8h 899*c0909341SAndroid Build Coastguard Worker sabd v23.8h, v5.8h, v19.8h 900*c0909341SAndroid Build Coastguard Worker sabd v24.8h, v4.8h, v16.8h // tldiff 901*c0909341SAndroid Build Coastguard Worker sabd v25.8h, v4.8h, v17.8h 902*c0909341SAndroid Build Coastguard Worker sabd v26.8h, v4.8h, v18.8h 903*c0909341SAndroid Build Coastguard Worker sabd v27.8h, v4.8h, v19.8h 904*c0909341SAndroid Build Coastguard Worker sabd v16.8h, v0.8h, v16.8h // ldiff 905*c0909341SAndroid Build Coastguard Worker sabd v17.8h, v1.8h, v17.8h 906*c0909341SAndroid Build Coastguard Worker sabd v18.8h, v2.8h, v18.8h 907*c0909341SAndroid Build Coastguard Worker sabd v19.8h, v3.8h, v19.8h 908*c0909341SAndroid Build Coastguard Worker umin v28.8h, v20.8h, v24.8h // min(tdiff, tldiff) 909*c0909341SAndroid Build Coastguard Worker umin v29.8h, v21.8h, v25.8h 910*c0909341SAndroid Build Coastguard Worker umin v30.8h, v22.8h, v26.8h 911*c0909341SAndroid Build Coastguard Worker umin v31.8h, v23.8h, v27.8h 912*c0909341SAndroid Build Coastguard Worker cmge v20.8h, v24.8h, v20.8h // tldiff >= tdiff 913*c0909341SAndroid Build Coastguard Worker cmge v21.8h, v25.8h, v21.8h 914*c0909341SAndroid Build Coastguard Worker cmge v22.8h, v26.8h, v22.8h 915*c0909341SAndroid Build Coastguard Worker cmge v23.8h, v27.8h, v23.8h 916*c0909341SAndroid Build Coastguard Worker cmge v16.8h, v28.8h, v16.8h // min(tdiff, tldiff) >= ldiff 917*c0909341SAndroid Build Coastguard Worker cmge v17.8h, v29.8h, v17.8h 918*c0909341SAndroid Build Coastguard Worker cmge v18.8h, v30.8h, v18.8h 919*c0909341SAndroid Build Coastguard Worker cmge v19.8h, v31.8h, v19.8h 920*c0909341SAndroid Build Coastguard Worker bsl v23.16b, v5.16b, v4.16b // tdiff <= tldiff ? top : topleft 921*c0909341SAndroid Build Coastguard Worker bsl v22.16b, v5.16b, v4.16b 922*c0909341SAndroid Build Coastguard Worker bsl v21.16b, v5.16b, v4.16b 923*c0909341SAndroid Build Coastguard Worker bsl v20.16b, v5.16b, v4.16b 924*c0909341SAndroid Build Coastguard Worker bit v23.16b, v3.16b, v19.16b // ldiff <= min ? left : ... 925*c0909341SAndroid Build Coastguard Worker bit v22.16b, v2.16b, v18.16b 926*c0909341SAndroid Build Coastguard Worker bit v21.16b, v1.16b, v17.16b 927*c0909341SAndroid Build Coastguard Worker bit v20.16b, v0.16b, v16.16b 928*c0909341SAndroid Build Coastguard Worker st1 {v23.8h}, [x0], #16 929*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [x6], #16 930*c0909341SAndroid Build Coastguard Worker subs w3, w3, #8 931*c0909341SAndroid Build Coastguard Worker st1 {v21.8h}, [x5], #16 932*c0909341SAndroid Build Coastguard Worker st1 {v20.8h}, [x10], #16 933*c0909341SAndroid Build Coastguard Worker b.le 8f 934*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x8], #16 935*c0909341SAndroid Build Coastguard Worker b 2b 936*c0909341SAndroid Build Coastguard Worker8: 937*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 938*c0909341SAndroid Build Coastguard Worker b.le 9f 939*c0909341SAndroid Build Coastguard Worker // End of horizontal loop, move pointers to next four rows 940*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw #1 941*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 942*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 943*c0909341SAndroid Build Coastguard Worker // Load the top row as early as possible 944*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x8], #16 945*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 946*c0909341SAndroid Build Coastguard Worker add x10, x10, x1 947*c0909341SAndroid Build Coastguard Worker mov w3, w9 948*c0909341SAndroid Build Coastguard Worker b 1b 949*c0909341SAndroid Build Coastguard Worker9: 950*c0909341SAndroid Build Coastguard Worker ret 951*c0909341SAndroid Build Coastguard Workerendfunc 952*c0909341SAndroid Build Coastguard Worker 953*c0909341SAndroid Build Coastguard Workerjumptable ipred_paeth_tbl 954*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_paeth_tbl 955*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_paeth_tbl 956*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_paeth_tbl 957*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_paeth_tbl 958*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_paeth_tbl 959*c0909341SAndroid Build Coastguard Workerendjumptable 960*c0909341SAndroid Build Coastguard Worker 961*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_16bpc_neon(pixel *dst, const ptrdiff_t stride, 962*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 963*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 964*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 965*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_16bpc_neon, export=1 966*c0909341SAndroid Build Coastguard Worker movrel x10, X(sm_weights) 967*c0909341SAndroid Build Coastguard Worker add x11, x10, w4, uxtw 968*c0909341SAndroid Build Coastguard Worker add x10, x10, w3, uxtw 969*c0909341SAndroid Build Coastguard Worker clz w9, w3 970*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_tbl 971*c0909341SAndroid Build Coastguard Worker sub x12, x2, w4, uxtw #1 972*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 973*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 974*c0909341SAndroid Build Coastguard Worker ld1r {v4.8h}, [x12] // bottom 975*c0909341SAndroid Build Coastguard Worker add x8, x2, #2 976*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 977*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 978*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 979*c0909341SAndroid Build Coastguard Worker br x5 980*c0909341SAndroid Build Coastguard Worker40: 981*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 982*c0909341SAndroid Build Coastguard Worker ld1r {v6.2d}, [x8] // top 983*c0909341SAndroid Build Coastguard Worker ld1r {v7.2s}, [x10] // weights_hor 984*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 985*c0909341SAndroid Build Coastguard Worker mov x7, #-8 986*c0909341SAndroid Build Coastguard Worker dup v5.8h, v6.h[3] // right 987*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v4.8h // top-bottom 988*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 989*c0909341SAndroid Build Coastguard Worker add v31.4h, v4.4h, v5.4h // bottom+right 990*c0909341SAndroid Build Coastguard Worker4: 991*c0909341SAndroid Build Coastguard Worker ld4r {v0.4h, v1.4h, v2.4h, v3.4h}, [x2], x7 // left 992*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x11], #4 // weights_ver 993*c0909341SAndroid Build Coastguard Worker ushll v20.4s, v31.4h, #8 // (bottom+right)*256 994*c0909341SAndroid Build Coastguard Worker ushll v21.4s, v31.4h, #8 995*c0909341SAndroid Build Coastguard Worker ushll v22.4s, v31.4h, #8 996*c0909341SAndroid Build Coastguard Worker ushll v23.4s, v31.4h, #8 997*c0909341SAndroid Build Coastguard Worker zip1 v1.2d, v1.2d, v0.2d // left, flipped 998*c0909341SAndroid Build Coastguard Worker zip1 v0.2d, v3.2d, v2.2d 999*c0909341SAndroid Build Coastguard Worker zip1 v16.2s, v16.2s, v17.2s // weights_ver 1000*c0909341SAndroid Build Coastguard Worker zip1 v18.2s, v18.2s, v19.2s 1001*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h // left-right 1002*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1003*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1004*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1005*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v0.4h, v7.4h // += (left-right)*weights_hor 1006*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v0.8h, v7.8h 1007*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v1.4h, v7.4h 1008*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v1.8h, v7.8h 1009*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v6.4h, v16.4h // += (top-bottom)*weights_ver 1010*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v6.8h, v16.8h 1011*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v6.4h, v18.4h 1012*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v6.8h, v18.8h 1013*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #9 1014*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v21.4s, #9 1015*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v22.4s, #9 1016*c0909341SAndroid Build Coastguard Worker rshrn v23.4h, v23.4s, #9 1017*c0909341SAndroid Build Coastguard Worker st1 {v20.4h}, [x0], x1 1018*c0909341SAndroid Build Coastguard Worker st1 {v21.4h}, [x6], x1 1019*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1020*c0909341SAndroid Build Coastguard Worker st1 {v22.4h}, [x0], x1 1021*c0909341SAndroid Build Coastguard Worker st1 {v23.4h}, [x6], x1 1022*c0909341SAndroid Build Coastguard Worker b.gt 4b 1023*c0909341SAndroid Build Coastguard Worker ret 1024*c0909341SAndroid Build Coastguard Worker80: 1025*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1026*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [x8] // top 1027*c0909341SAndroid Build Coastguard Worker ld1 {v7.8b}, [x10] // weights_hor 1028*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 1029*c0909341SAndroid Build Coastguard Worker mov x7, #-8 1030*c0909341SAndroid Build Coastguard Worker dup v5.8h, v6.h[7] // right 1031*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v4.8h // top-bottom 1032*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 1033*c0909341SAndroid Build Coastguard Worker add v31.4h, v4.4h, v5.4h // bottom+right 1034*c0909341SAndroid Build Coastguard Worker8: 1035*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 // left 1036*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x11], #4 // weights_ver 1037*c0909341SAndroid Build Coastguard Worker ushll v20.4s, v31.4h, #8 // (bottom+right)*256 1038*c0909341SAndroid Build Coastguard Worker ushll v21.4s, v31.4h, #8 1039*c0909341SAndroid Build Coastguard Worker ushll v22.4s, v31.4h, #8 1040*c0909341SAndroid Build Coastguard Worker ushll v23.4s, v31.4h, #8 1041*c0909341SAndroid Build Coastguard Worker ushll v24.4s, v31.4h, #8 1042*c0909341SAndroid Build Coastguard Worker ushll v25.4s, v31.4h, #8 1043*c0909341SAndroid Build Coastguard Worker ushll v26.4s, v31.4h, #8 1044*c0909341SAndroid Build Coastguard Worker ushll v27.4s, v31.4h, #8 1045*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h // left-right 1046*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1047*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v5.8h 1048*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v5.8h 1049*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1050*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1051*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1052*c0909341SAndroid Build Coastguard Worker uxtl v19.8h, v19.8b 1053*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v3.4h, v7.4h // += (left-right)*weights_hor 1054*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v3.8h, v7.8h // (left flipped) 1055*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v2.4h, v7.4h 1056*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v2.8h, v7.8h 1057*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v1.4h, v7.4h 1058*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v1.8h, v7.8h 1059*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v0.4h, v7.4h 1060*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v0.8h, v7.8h 1061*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v6.4h, v16.4h // += (top-bottom)*weights_ver 1062*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v6.8h, v16.8h 1063*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v6.4h, v17.4h 1064*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v6.8h, v17.8h 1065*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v6.4h, v18.4h 1066*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v6.8h, v18.8h 1067*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v6.4h, v19.4h 1068*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v6.8h, v19.8h 1069*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #9 1070*c0909341SAndroid Build Coastguard Worker rshrn2 v20.8h, v21.4s, #9 1071*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v22.4s, #9 1072*c0909341SAndroid Build Coastguard Worker rshrn2 v21.8h, v23.4s, #9 1073*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v24.4s, #9 1074*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v25.4s, #9 1075*c0909341SAndroid Build Coastguard Worker rshrn v23.4h, v26.4s, #9 1076*c0909341SAndroid Build Coastguard Worker rshrn2 v23.8h, v27.4s, #9 1077*c0909341SAndroid Build Coastguard Worker st1 {v20.8h}, [x0], x1 1078*c0909341SAndroid Build Coastguard Worker st1 {v21.8h}, [x6], x1 1079*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1080*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [x0], x1 1081*c0909341SAndroid Build Coastguard Worker st1 {v23.8h}, [x6], x1 1082*c0909341SAndroid Build Coastguard Worker b.gt 8b 1083*c0909341SAndroid Build Coastguard Worker ret 1084*c0909341SAndroid Build Coastguard Worker160: 1085*c0909341SAndroid Build Coastguard Worker320: 1086*c0909341SAndroid Build Coastguard Worker640: 1087*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1088*c0909341SAndroid Build Coastguard Worker add x12, x2, w3, uxtw #1 1089*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 1090*c0909341SAndroid Build Coastguard Worker ld1r {v5.8h}, [x12] // right 1091*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 1092*c0909341SAndroid Build Coastguard Worker mov x7, #-4 1093*c0909341SAndroid Build Coastguard Worker mov w9, w3 1094*c0909341SAndroid Build Coastguard Worker add v31.4h, v4.4h, v5.4h // bottom+right 1095*c0909341SAndroid Build Coastguard Worker 1096*c0909341SAndroid Build Coastguard Worker1: 1097*c0909341SAndroid Build Coastguard Worker ld2r {v0.8h, v1.8h}, [x2], x7 // left 1098*c0909341SAndroid Build Coastguard Worker ld2r {v16.8b, v17.8b}, [x11], #2 // weights_ver 1099*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h // left-right 1100*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1101*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1102*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1103*c0909341SAndroid Build Coastguard Worker2: 1104*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x10], #16 // weights_hor 1105*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x8], #32 // top 1106*c0909341SAndroid Build Coastguard Worker ushll v20.4s, v31.4h, #8 // (bottom+right)*256 1107*c0909341SAndroid Build Coastguard Worker ushll v21.4s, v31.4h, #8 1108*c0909341SAndroid Build Coastguard Worker ushll v22.4s, v31.4h, #8 1109*c0909341SAndroid Build Coastguard Worker ushll v23.4s, v31.4h, #8 1110*c0909341SAndroid Build Coastguard Worker ushll v24.4s, v31.4h, #8 1111*c0909341SAndroid Build Coastguard Worker ushll v25.4s, v31.4h, #8 1112*c0909341SAndroid Build Coastguard Worker ushll v26.4s, v31.4h, #8 1113*c0909341SAndroid Build Coastguard Worker ushll v27.4s, v31.4h, #8 1114*c0909341SAndroid Build Coastguard Worker uxtl v6.8h, v7.8b // weights_hor 1115*c0909341SAndroid Build Coastguard Worker uxtl2 v7.8h, v7.16b 1116*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v4.8h // top-bottom 1117*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v4.8h 1118*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v1.4h, v6.4h // += (left-right)*weights_hor 1119*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v1.8h, v6.8h // (left flipped) 1120*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v1.4h, v7.4h 1121*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v1.8h, v7.8h 1122*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v0.4h, v6.4h 1123*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v0.8h, v6.8h 1124*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v0.4h, v7.4h 1125*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v0.8h, v7.8h 1126*c0909341SAndroid Build Coastguard Worker smlal v20.4s, v2.4h, v16.4h // += (top-bottom)*weights_ver 1127*c0909341SAndroid Build Coastguard Worker smlal2 v21.4s, v2.8h, v16.8h 1128*c0909341SAndroid Build Coastguard Worker smlal v22.4s, v3.4h, v16.4h 1129*c0909341SAndroid Build Coastguard Worker smlal2 v23.4s, v3.8h, v16.8h 1130*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v2.4h, v17.4h 1131*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v2.8h, v17.8h 1132*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v3.4h, v17.4h 1133*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v3.8h, v17.8h 1134*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #9 1135*c0909341SAndroid Build Coastguard Worker rshrn2 v20.8h, v21.4s, #9 1136*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v22.4s, #9 1137*c0909341SAndroid Build Coastguard Worker rshrn2 v21.8h, v23.4s, #9 1138*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v24.4s, #9 1139*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v25.4s, #9 1140*c0909341SAndroid Build Coastguard Worker rshrn v23.4h, v26.4s, #9 1141*c0909341SAndroid Build Coastguard Worker rshrn2 v23.8h, v27.4s, #9 1142*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1143*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [x0], #32 1144*c0909341SAndroid Build Coastguard Worker st1 {v22.8h, v23.8h}, [x6], #32 1145*c0909341SAndroid Build Coastguard Worker b.gt 2b 1146*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1147*c0909341SAndroid Build Coastguard Worker b.le 9f 1148*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw #1 1149*c0909341SAndroid Build Coastguard Worker sub x10, x10, w9, uxtw 1150*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1151*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1152*c0909341SAndroid Build Coastguard Worker mov w3, w9 1153*c0909341SAndroid Build Coastguard Worker b 1b 1154*c0909341SAndroid Build Coastguard Worker9: 1155*c0909341SAndroid Build Coastguard Worker ret 1156*c0909341SAndroid Build Coastguard Workerendfunc 1157*c0909341SAndroid Build Coastguard Worker 1158*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_tbl 1159*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_tbl 1160*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_tbl 1161*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_tbl 1162*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_tbl 1163*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_tbl 1164*c0909341SAndroid Build Coastguard Workerendjumptable 1165*c0909341SAndroid Build Coastguard Worker 1166*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1167*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1168*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1169*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1170*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_16bpc_neon, export=1 1171*c0909341SAndroid Build Coastguard Worker movrel x7, X(sm_weights) 1172*c0909341SAndroid Build Coastguard Worker add x7, x7, w4, uxtw 1173*c0909341SAndroid Build Coastguard Worker clz w9, w3 1174*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_v_tbl 1175*c0909341SAndroid Build Coastguard Worker sub x8, x2, w4, uxtw #1 1176*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1177*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 1178*c0909341SAndroid Build Coastguard Worker ld1r {v4.8h}, [x8] // bottom 1179*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 1180*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 1181*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 1182*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1183*c0909341SAndroid Build Coastguard Worker br x5 1184*c0909341SAndroid Build Coastguard Worker40: 1185*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1186*c0909341SAndroid Build Coastguard Worker ld1r {v6.2d}, [x2] // top 1187*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v4.8h // top-bottom 1188*c0909341SAndroid Build Coastguard Worker4: 1189*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1190*c0909341SAndroid Build Coastguard Worker zip1 v16.2s, v16.2s, v17.2s // weights_ver 1191*c0909341SAndroid Build Coastguard Worker zip1 v18.2s, v18.2s, v19.2s 1192*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v16.8b, #7 // weights_ver << 7 1193*c0909341SAndroid Build Coastguard Worker ushll v18.8h, v18.8b, #7 1194*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v6.8h, v16.8h // ((top-bottom)*weights_ver + 128) >> 8 1195*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v6.8h, v18.8h 1196*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v4.8h 1197*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v4.8h 1198*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x0], x1 1199*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x6], x1 1200*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1201*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x0], x1 1202*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x6], x1 1203*c0909341SAndroid Build Coastguard Worker b.gt 4b 1204*c0909341SAndroid Build Coastguard Worker ret 1205*c0909341SAndroid Build Coastguard Worker80: 1206*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1207*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [x2] // top 1208*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v4.8h // top-bottom 1209*c0909341SAndroid Build Coastguard Worker8: 1210*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1211*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v16.8b, #7 // weights_ver << 7 1212*c0909341SAndroid Build Coastguard Worker ushll v17.8h, v17.8b, #7 1213*c0909341SAndroid Build Coastguard Worker ushll v18.8h, v18.8b, #7 1214*c0909341SAndroid Build Coastguard Worker ushll v19.8h, v19.8b, #7 1215*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v6.8h, v16.8h // ((top-bottom)*weights_ver + 128) >> 8 1216*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v6.8h, v17.8h 1217*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v6.8h, v18.8h 1218*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.8h, v6.8h, v19.8h 1219*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v4.8h 1220*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v4.8h 1221*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v4.8h 1222*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v4.8h 1223*c0909341SAndroid Build Coastguard Worker st1 {v20.8h}, [x0], x1 1224*c0909341SAndroid Build Coastguard Worker st1 {v21.8h}, [x6], x1 1225*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1226*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [x0], x1 1227*c0909341SAndroid Build Coastguard Worker st1 {v23.8h}, [x6], x1 1228*c0909341SAndroid Build Coastguard Worker b.gt 8b 1229*c0909341SAndroid Build Coastguard Worker ret 1230*c0909341SAndroid Build Coastguard Worker160: 1231*c0909341SAndroid Build Coastguard Worker320: 1232*c0909341SAndroid Build Coastguard Worker640: 1233*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1234*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x8 1235*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 1236*c0909341SAndroid Build Coastguard Worker add x8, x6, x1 1237*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1238*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 1239*c0909341SAndroid Build Coastguard Worker mov w9, w3 1240*c0909341SAndroid Build Coastguard Worker 1241*c0909341SAndroid Build Coastguard Worker1: 1242*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1243*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v16.8b, #7 // weights_ver << 7 1244*c0909341SAndroid Build Coastguard Worker ushll v17.8h, v17.8b, #7 1245*c0909341SAndroid Build Coastguard Worker ushll v18.8h, v18.8b, #7 1246*c0909341SAndroid Build Coastguard Worker ushll v19.8h, v19.8b, #7 1247*c0909341SAndroid Build Coastguard Worker2: 1248*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2], #32 // top 1249*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v4.8h // top-bottom 1250*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v4.8h 1251*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v2.8h, v16.8h // ((top-bottom)*weights_ver + 128) >> 8 1252*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v3.8h, v16.8h 1253*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v2.8h, v17.8h 1254*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.8h, v3.8h, v17.8h 1255*c0909341SAndroid Build Coastguard Worker sqrdmulh v24.8h, v2.8h, v18.8h 1256*c0909341SAndroid Build Coastguard Worker sqrdmulh v25.8h, v3.8h, v18.8h 1257*c0909341SAndroid Build Coastguard Worker sqrdmulh v26.8h, v2.8h, v19.8h 1258*c0909341SAndroid Build Coastguard Worker sqrdmulh v27.8h, v3.8h, v19.8h 1259*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v4.8h 1260*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v4.8h 1261*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v4.8h 1262*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v4.8h 1263*c0909341SAndroid Build Coastguard Worker add v24.8h, v24.8h, v4.8h 1264*c0909341SAndroid Build Coastguard Worker add v25.8h, v25.8h, v4.8h 1265*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v4.8h 1266*c0909341SAndroid Build Coastguard Worker add v27.8h, v27.8h, v4.8h 1267*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1268*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [x0], #32 1269*c0909341SAndroid Build Coastguard Worker st1 {v22.8h, v23.8h}, [x6], #32 1270*c0909341SAndroid Build Coastguard Worker st1 {v24.8h, v25.8h}, [x5], #32 1271*c0909341SAndroid Build Coastguard Worker st1 {v26.8h, v27.8h}, [x8], #32 1272*c0909341SAndroid Build Coastguard Worker b.gt 2b 1273*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1274*c0909341SAndroid Build Coastguard Worker b.le 9f 1275*c0909341SAndroid Build Coastguard Worker sub x2, x2, w9, uxtw #1 1276*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1277*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1278*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 1279*c0909341SAndroid Build Coastguard Worker add x8, x8, x1 1280*c0909341SAndroid Build Coastguard Worker mov w3, w9 1281*c0909341SAndroid Build Coastguard Worker b 1b 1282*c0909341SAndroid Build Coastguard Worker9: 1283*c0909341SAndroid Build Coastguard Worker ret 1284*c0909341SAndroid Build Coastguard Workerendfunc 1285*c0909341SAndroid Build Coastguard Worker 1286*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_v_tbl 1287*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_v_tbl 1288*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_v_tbl 1289*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_v_tbl 1290*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_v_tbl 1291*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_v_tbl 1292*c0909341SAndroid Build Coastguard Workerendjumptable 1293*c0909341SAndroid Build Coastguard Worker 1294*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1295*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1296*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1297*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1298*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_16bpc_neon, export=1 1299*c0909341SAndroid Build Coastguard Worker movrel x8, X(sm_weights) 1300*c0909341SAndroid Build Coastguard Worker add x8, x8, w3, uxtw 1301*c0909341SAndroid Build Coastguard Worker clz w9, w3 1302*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_h_tbl 1303*c0909341SAndroid Build Coastguard Worker add x12, x2, w3, uxtw #1 1304*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1305*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 1306*c0909341SAndroid Build Coastguard Worker ld1r {v5.8h}, [x12] // right 1307*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 1308*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 1309*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1310*c0909341SAndroid Build Coastguard Worker br x5 1311*c0909341SAndroid Build Coastguard Worker40: 1312*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1313*c0909341SAndroid Build Coastguard Worker ld1r {v7.2s}, [x8] // weights_hor 1314*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 1315*c0909341SAndroid Build Coastguard Worker mov x7, #-8 1316*c0909341SAndroid Build Coastguard Worker ushll v7.8h, v7.8b, #7 // weights_hor << 7 1317*c0909341SAndroid Build Coastguard Worker4: 1318*c0909341SAndroid Build Coastguard Worker ld4r {v0.4h, v1.4h, v2.4h, v3.4h}, [x2], x7 // left 1319*c0909341SAndroid Build Coastguard Worker zip1 v1.2d, v1.2d, v0.2d // left, flipped 1320*c0909341SAndroid Build Coastguard Worker zip1 v0.2d, v3.2d, v2.2d 1321*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h // left-right 1322*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1323*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v0.8h, v7.8h // ((left-right)*weights_hor + 128) >> 8 1324*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v1.8h, v7.8h 1325*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v5.8h 1326*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v5.8h 1327*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x0], x1 1328*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x6], x1 1329*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1330*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x0], x1 1331*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x6], x1 1332*c0909341SAndroid Build Coastguard Worker b.gt 4b 1333*c0909341SAndroid Build Coastguard Worker ret 1334*c0909341SAndroid Build Coastguard Worker80: 1335*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1336*c0909341SAndroid Build Coastguard Worker ld1 {v7.8b}, [x8] // weights_hor 1337*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 1338*c0909341SAndroid Build Coastguard Worker mov x7, #-8 1339*c0909341SAndroid Build Coastguard Worker ushll v7.8h, v7.8b, #7 // weights_hor << 7 1340*c0909341SAndroid Build Coastguard Worker8: 1341*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 // left 1342*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v5.8h // left-right 1343*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v5.8h 1344*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1345*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h 1346*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v3.8h, v7.8h // ((left-right)*weights_hor + 128) >> 8 1347*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v2.8h, v7.8h // (left flipped) 1348*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v1.8h, v7.8h 1349*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.8h, v0.8h, v7.8h 1350*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v5.8h 1351*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v5.8h 1352*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v5.8h 1353*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v5.8h 1354*c0909341SAndroid Build Coastguard Worker st1 {v20.8h}, [x0], x1 1355*c0909341SAndroid Build Coastguard Worker st1 {v21.8h}, [x6], x1 1356*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1357*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [x0], x1 1358*c0909341SAndroid Build Coastguard Worker st1 {v23.8h}, [x6], x1 1359*c0909341SAndroid Build Coastguard Worker b.gt 8b 1360*c0909341SAndroid Build Coastguard Worker ret 1361*c0909341SAndroid Build Coastguard Worker160: 1362*c0909341SAndroid Build Coastguard Worker320: 1363*c0909341SAndroid Build Coastguard Worker640: 1364*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1365*c0909341SAndroid Build Coastguard Worker sub x2, x2, #8 1366*c0909341SAndroid Build Coastguard Worker mov x7, #-8 1367*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x10 1368*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 1369*c0909341SAndroid Build Coastguard Worker add x10, x6, x1 1370*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1371*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 1372*c0909341SAndroid Build Coastguard Worker mov w9, w3 1373*c0909341SAndroid Build Coastguard Worker 1374*c0909341SAndroid Build Coastguard Worker1: 1375*c0909341SAndroid Build Coastguard Worker ld4r {v0.8h, v1.8h, v2.8h, v3.8h}, [x2], x7 // left 1376*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v5.8h // left-right 1377*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v5.8h 1378*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v5.8h 1379*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v5.8h 1380*c0909341SAndroid Build Coastguard Worker2: 1381*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x8], #16 // weights_hor 1382*c0909341SAndroid Build Coastguard Worker ushll v6.8h, v7.8b, #7 // weights_hor << 7 1383*c0909341SAndroid Build Coastguard Worker ushll2 v7.8h, v7.16b, #7 1384*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v3.8h, v6.8h // ((left-right)*weights_hor + 128) >> 8 1385*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.8h, v3.8h, v7.8h // (left flipped) 1386*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.8h, v2.8h, v6.8h 1387*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.8h, v2.8h, v7.8h 1388*c0909341SAndroid Build Coastguard Worker sqrdmulh v24.8h, v1.8h, v6.8h 1389*c0909341SAndroid Build Coastguard Worker sqrdmulh v25.8h, v1.8h, v7.8h 1390*c0909341SAndroid Build Coastguard Worker sqrdmulh v26.8h, v0.8h, v6.8h 1391*c0909341SAndroid Build Coastguard Worker sqrdmulh v27.8h, v0.8h, v7.8h 1392*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v5.8h 1393*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v5.8h 1394*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v5.8h 1395*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v5.8h 1396*c0909341SAndroid Build Coastguard Worker add v24.8h, v24.8h, v5.8h 1397*c0909341SAndroid Build Coastguard Worker add v25.8h, v25.8h, v5.8h 1398*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v5.8h 1399*c0909341SAndroid Build Coastguard Worker add v27.8h, v27.8h, v5.8h 1400*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1401*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [x0], #32 1402*c0909341SAndroid Build Coastguard Worker st1 {v22.8h, v23.8h}, [x6], #32 1403*c0909341SAndroid Build Coastguard Worker st1 {v24.8h, v25.8h}, [x5], #32 1404*c0909341SAndroid Build Coastguard Worker st1 {v26.8h, v27.8h}, [x10], #32 1405*c0909341SAndroid Build Coastguard Worker b.gt 2b 1406*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1407*c0909341SAndroid Build Coastguard Worker b.le 9f 1408*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw 1409*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1410*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1411*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 1412*c0909341SAndroid Build Coastguard Worker add x10, x10, x1 1413*c0909341SAndroid Build Coastguard Worker mov w3, w9 1414*c0909341SAndroid Build Coastguard Worker b 1b 1415*c0909341SAndroid Build Coastguard Worker9: 1416*c0909341SAndroid Build Coastguard Worker ret 1417*c0909341SAndroid Build Coastguard Workerendfunc 1418*c0909341SAndroid Build Coastguard Worker 1419*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_h_tbl 1420*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_h_tbl 1421*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_h_tbl 1422*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_h_tbl 1423*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_h_tbl 1424*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_h_tbl 1425*c0909341SAndroid Build Coastguard Workerendjumptable 1426*c0909341SAndroid Build Coastguard Worker 1427*c0909341SAndroid Build Coastguard Workerconst padding_mask_buf 1428*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1429*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1430*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1431*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1432*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1433*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1434*c0909341SAndroid Build Coastguard Workerpadding_mask: 1435*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1436*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1437*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1438*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1439*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1440*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1441*c0909341SAndroid Build Coastguard Workerendconst 1442*c0909341SAndroid Build Coastguard Worker 1443*c0909341SAndroid Build Coastguard Worker// void ipred_z1_upsample_edge_16bpc_neon(pixel *out, const int hsz, 1444*c0909341SAndroid Build Coastguard Worker// const pixel *const in, const int end, 1445*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 1446*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_upsample_edge_16bpc_neon, export=1 1447*c0909341SAndroid Build Coastguard Worker dup v30.8h, w4 // bitdepth_max 1448*c0909341SAndroid Build Coastguard Worker movrel x4, padding_mask 1449*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2] // in[] 1450*c0909341SAndroid Build Coastguard Worker add x5, x2, w3, uxtw #1 // in[end] 1451*c0909341SAndroid Build Coastguard Worker sub x4, x4, w3, uxtw #1 1452*c0909341SAndroid Build Coastguard Worker 1453*c0909341SAndroid Build Coastguard Worker ld1r {v2.8h}, [x5] // padding 1454*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h}, [x4] // padding_mask 1455*c0909341SAndroid Build Coastguard Worker 1456*c0909341SAndroid Build Coastguard Worker movi v31.8h, #9 1457*c0909341SAndroid Build Coastguard Worker 1458*c0909341SAndroid Build Coastguard Worker bit v0.16b, v2.16b, v3.16b // padded in[] 1459*c0909341SAndroid Build Coastguard Worker bit v1.16b, v2.16b, v4.16b 1460*c0909341SAndroid Build Coastguard Worker 1461*c0909341SAndroid Build Coastguard Worker ext v4.16b, v0.16b, v1.16b, #2 1462*c0909341SAndroid Build Coastguard Worker ext v5.16b, v1.16b, v2.16b, #2 1463*c0909341SAndroid Build Coastguard Worker ext v6.16b, v0.16b, v1.16b, #4 1464*c0909341SAndroid Build Coastguard Worker ext v7.16b, v1.16b, v2.16b, #4 1465*c0909341SAndroid Build Coastguard Worker ext v16.16b, v0.16b, v1.16b, #6 1466*c0909341SAndroid Build Coastguard Worker ext v17.16b, v1.16b, v2.16b, #6 1467*c0909341SAndroid Build Coastguard Worker 1468*c0909341SAndroid Build Coastguard Worker add v18.8h, v4.8h, v6.8h // in[i+1] + in[i+2] 1469*c0909341SAndroid Build Coastguard Worker add v19.8h, v5.8h, v7.8h 1470*c0909341SAndroid Build Coastguard Worker add v20.8h, v0.8h, v16.8h 1471*c0909341SAndroid Build Coastguard Worker add v21.8h, v1.8h, v17.8h 1472*c0909341SAndroid Build Coastguard Worker umull v22.4s, v18.4h, v31.4h // 9*(in[i+1] + in[i+2]) 1473*c0909341SAndroid Build Coastguard Worker umull2 v23.4s, v18.8h, v31.8h 1474*c0909341SAndroid Build Coastguard Worker umull v24.4s, v19.4h, v31.4h 1475*c0909341SAndroid Build Coastguard Worker umull2 v25.4s, v19.8h, v31.8h 1476*c0909341SAndroid Build Coastguard Worker usubw v22.4s, v22.4s, v20.4h 1477*c0909341SAndroid Build Coastguard Worker usubw2 v23.4s, v23.4s, v20.8h 1478*c0909341SAndroid Build Coastguard Worker usubw v24.4s, v24.4s, v21.4h 1479*c0909341SAndroid Build Coastguard Worker usubw2 v25.4s, v25.4s, v21.8h 1480*c0909341SAndroid Build Coastguard Worker 1481*c0909341SAndroid Build Coastguard Worker sqrshrun v16.4h, v22.4s, #4 1482*c0909341SAndroid Build Coastguard Worker sqrshrun2 v16.8h, v23.4s, #4 1483*c0909341SAndroid Build Coastguard Worker sqrshrun v17.4h, v24.4s, #4 1484*c0909341SAndroid Build Coastguard Worker sqrshrun2 v17.8h, v25.4s, #4 1485*c0909341SAndroid Build Coastguard Worker 1486*c0909341SAndroid Build Coastguard Worker smin v16.8h, v16.8h, v30.8h 1487*c0909341SAndroid Build Coastguard Worker smin v17.8h, v17.8h, v30.8h 1488*c0909341SAndroid Build Coastguard Worker 1489*c0909341SAndroid Build Coastguard Worker zip1 v0.8h, v4.8h, v16.8h 1490*c0909341SAndroid Build Coastguard Worker zip2 v1.8h, v4.8h, v16.8h 1491*c0909341SAndroid Build Coastguard Worker zip1 v2.8h, v5.8h, v17.8h 1492*c0909341SAndroid Build Coastguard Worker zip2 v3.8h, v5.8h, v17.8h 1493*c0909341SAndroid Build Coastguard Worker 1494*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0] 1495*c0909341SAndroid Build Coastguard Worker 1496*c0909341SAndroid Build Coastguard Worker ret 1497*c0909341SAndroid Build Coastguard Workerendfunc 1498*c0909341SAndroid Build Coastguard Worker 1499*c0909341SAndroid Build Coastguard Worker// void ipred_z2_upsample_edge_16bpc_neon(pixel *out, const int sz, 1500*c0909341SAndroid Build Coastguard Worker// const pixel *const in, 1501*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 1502*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_upsample_edge_16bpc_neon, export=1 1503*c0909341SAndroid Build Coastguard Worker dup v30.8h, w3 // bitdepth_max 1504*c0909341SAndroid Build Coastguard Worker // Here, sz is 4 or 8, and we produce 2*sz+1 output elements. 1505*c0909341SAndroid Build Coastguard Worker movrel x4, padding_mask 1506*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x2] // in[] 1507*c0909341SAndroid Build Coastguard Worker add x5, x2, w1, uxtw #1 // in[sz] 1508*c0909341SAndroid Build Coastguard Worker sub x4, x4, w1, uxtw #1 1509*c0909341SAndroid Build Coastguard Worker 1510*c0909341SAndroid Build Coastguard Worker ld1r {v3.8h}, [x2] // in[0] for padding 1511*c0909341SAndroid Build Coastguard Worker ld1r {v2.8h}, [x5] // padding 1512*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x4] // padding_mask 1513*c0909341SAndroid Build Coastguard Worker 1514*c0909341SAndroid Build Coastguard Worker movi v31.8h, #9 1515*c0909341SAndroid Build Coastguard Worker 1516*c0909341SAndroid Build Coastguard Worker bit v0.16b, v2.16b, v4.16b // padded in[] 1517*c0909341SAndroid Build Coastguard Worker bit v1.16b, v2.16b, v5.16b 1518*c0909341SAndroid Build Coastguard Worker 1519*c0909341SAndroid Build Coastguard Worker ext v4.16b, v3.16b, v0.16b, #14 1520*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #2 1521*c0909341SAndroid Build Coastguard Worker ext v6.16b, v0.16b, v1.16b, #4 1522*c0909341SAndroid Build Coastguard Worker 1523*c0909341SAndroid Build Coastguard Worker add v16.8h, v0.8h, v5.8h // in[i+0] + in[i+1] 1524*c0909341SAndroid Build Coastguard Worker add v17.8h, v4.8h, v6.8h // in[i-1] + in[i+2] 1525*c0909341SAndroid Build Coastguard Worker umull v18.4s, v16.4h, v31.4h // 9*(in[i+1] + in[i+2]) 1526*c0909341SAndroid Build Coastguard Worker umull2 v19.4s, v16.8h, v31.8h 1527*c0909341SAndroid Build Coastguard Worker usubw v18.4s, v18.4s, v17.4h 1528*c0909341SAndroid Build Coastguard Worker usubw2 v19.4s, v19.4s, v17.8h 1529*c0909341SAndroid Build Coastguard Worker 1530*c0909341SAndroid Build Coastguard Worker sqrshrun v16.4h, v18.4s, #4 1531*c0909341SAndroid Build Coastguard Worker sqrshrun2 v16.8h, v19.4s, #4 1532*c0909341SAndroid Build Coastguard Worker 1533*c0909341SAndroid Build Coastguard Worker add x5, x0, #2*16 1534*c0909341SAndroid Build Coastguard Worker 1535*c0909341SAndroid Build Coastguard Worker smin v16.8h, v16.8h, v30.8h 1536*c0909341SAndroid Build Coastguard Worker 1537*c0909341SAndroid Build Coastguard Worker zip1 v4.8h, v0.8h, v16.8h 1538*c0909341SAndroid Build Coastguard Worker zip2 v5.8h, v0.8h, v16.8h 1539*c0909341SAndroid Build Coastguard Worker 1540*c0909341SAndroid Build Coastguard Worker st1 {v2.h}[0], [x5] 1541*c0909341SAndroid Build Coastguard Worker // In case sz=8, output one single pixel in out[16]. 1542*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [x0] 1543*c0909341SAndroid Build Coastguard Worker 1544*c0909341SAndroid Build Coastguard Worker ret 1545*c0909341SAndroid Build Coastguard Workerendfunc 1546*c0909341SAndroid Build Coastguard Worker 1547*c0909341SAndroid Build Coastguard Workerconst edge_filter 1548*c0909341SAndroid Build Coastguard Worker .short 0, 4, 8, 0 1549*c0909341SAndroid Build Coastguard Worker .short 0, 5, 6, 0 1550*c0909341SAndroid Build Coastguard Worker// Leaving out the coeffs for strength=3 1551*c0909341SAndroid Build Coastguard Worker// .byte 2, 4, 4, 0 1552*c0909341SAndroid Build Coastguard Workerendconst 1553*c0909341SAndroid Build Coastguard Worker 1554*c0909341SAndroid Build Coastguard Worker// void ipred_z1_filter_edge_16bpc_neon(pixel *out, const int sz, 1555*c0909341SAndroid Build Coastguard Worker// const pixel *const in, const int end, 1556*c0909341SAndroid Build Coastguard Worker// const int strength); 1557*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_filter_edge_16bpc_neon, export=1 1558*c0909341SAndroid Build Coastguard Worker cmp w4, #3 1559*c0909341SAndroid Build Coastguard Worker b.eq L(fivetap) // if (strength == 3) goto fivetap 1560*c0909341SAndroid Build Coastguard Worker 1561*c0909341SAndroid Build Coastguard Worker movrel x5, edge_filter, -6 1562*c0909341SAndroid Build Coastguard Worker add x5, x5, w4, uxtw #3 // edge_filter + 2*((strength - 1)*4 + 1) 1563*c0909341SAndroid Build Coastguard Worker 1564*c0909341SAndroid Build Coastguard Worker ld1 {v31.s}[0], [x5] // kernel[1-2] 1565*c0909341SAndroid Build Coastguard Worker 1566*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2], #16 1567*c0909341SAndroid Build Coastguard Worker 1568*c0909341SAndroid Build Coastguard Worker dup v30.8h, v31.h[0] 1569*c0909341SAndroid Build Coastguard Worker dup v31.8h, v31.h[1] 1570*c0909341SAndroid Build Coastguard Worker1: 1571*c0909341SAndroid Build Coastguard Worker // in[end], is the last valid pixel. We produce 16 pixels out by 1572*c0909341SAndroid Build Coastguard Worker // using 18 pixels in - the last pixel used is [17] of the ones 1573*c0909341SAndroid Build Coastguard Worker // read/buffered. 1574*c0909341SAndroid Build Coastguard Worker cmp w3, #17 1575*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x2], #32 1576*c0909341SAndroid Build Coastguard Worker b.lt 2f 1577*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1578*c0909341SAndroid Build Coastguard Worker ext v4.16b, v1.16b, v2.16b, #2 1579*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1580*c0909341SAndroid Build Coastguard Worker ext v6.16b, v1.16b, v2.16b, #4 1581*c0909341SAndroid Build Coastguard Worker mul v16.8h, v0.8h, v30.8h 1582*c0909341SAndroid Build Coastguard Worker mla v16.8h, v3.8h, v31.8h 1583*c0909341SAndroid Build Coastguard Worker mla v16.8h, v5.8h, v30.8h 1584*c0909341SAndroid Build Coastguard Worker mul v17.8h, v1.8h, v30.8h 1585*c0909341SAndroid Build Coastguard Worker mla v17.8h, v4.8h, v31.8h 1586*c0909341SAndroid Build Coastguard Worker mla v17.8h, v6.8h, v30.8h 1587*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1588*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 1589*c0909341SAndroid Build Coastguard Worker urshr v16.8h, v16.8h, #4 1590*c0909341SAndroid Build Coastguard Worker urshr v17.8h, v17.8h, #4 1591*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1592*c0909341SAndroid Build Coastguard Worker st1 {v16.8h, v17.8h}, [x0], #32 1593*c0909341SAndroid Build Coastguard Worker b.gt 1b 1594*c0909341SAndroid Build Coastguard Worker ret 1595*c0909341SAndroid Build Coastguard Worker2: 1596*c0909341SAndroid Build Coastguard Worker // Right padding 1597*c0909341SAndroid Build Coastguard Worker 1598*c0909341SAndroid Build Coastguard Worker // x2[w3-24] is the padding pixel (x2 points 24 pixels ahead) 1599*c0909341SAndroid Build Coastguard Worker movrel x5, padding_mask 1600*c0909341SAndroid Build Coastguard Worker sub w6, w3, #24 1601*c0909341SAndroid Build Coastguard Worker sub x5, x5, w3, uxtw #1 1602*c0909341SAndroid Build Coastguard Worker add x6, x2, w6, sxtw #1 1603*c0909341SAndroid Build Coastguard Worker 1604*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h}, [x5] // padding_mask 1605*c0909341SAndroid Build Coastguard Worker 1606*c0909341SAndroid Build Coastguard Worker ld1r {v2.8h}, [x6] 1607*c0909341SAndroid Build Coastguard Worker bit v0.16b, v2.16b, v3.16b // Pad v0-v1 1608*c0909341SAndroid Build Coastguard Worker bit v1.16b, v2.16b, v4.16b 1609*c0909341SAndroid Build Coastguard Worker 1610*c0909341SAndroid Build Coastguard Worker // Filter one block 1611*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1612*c0909341SAndroid Build Coastguard Worker ext v4.16b, v1.16b, v2.16b, #2 1613*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1614*c0909341SAndroid Build Coastguard Worker ext v6.16b, v1.16b, v2.16b, #4 1615*c0909341SAndroid Build Coastguard Worker mul v16.8h, v0.8h, v30.8h 1616*c0909341SAndroid Build Coastguard Worker mla v16.8h, v3.8h, v31.8h 1617*c0909341SAndroid Build Coastguard Worker mla v16.8h, v5.8h, v30.8h 1618*c0909341SAndroid Build Coastguard Worker mul v17.8h, v1.8h, v30.8h 1619*c0909341SAndroid Build Coastguard Worker mla v17.8h, v4.8h, v31.8h 1620*c0909341SAndroid Build Coastguard Worker mla v17.8h, v6.8h, v30.8h 1621*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1622*c0909341SAndroid Build Coastguard Worker urshr v16.8h, v16.8h, #4 1623*c0909341SAndroid Build Coastguard Worker urshr v17.8h, v17.8h, #4 1624*c0909341SAndroid Build Coastguard Worker st1 {v16.8h, v17.8h}, [x0], #32 1625*c0909341SAndroid Build Coastguard Worker b.le 9f 1626*c0909341SAndroid Build Coastguard Worker5: 1627*c0909341SAndroid Build Coastguard Worker // After one block, any remaining output would only be filtering 1628*c0909341SAndroid Build Coastguard Worker // padding - thus just store the padding. 1629*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1630*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x0], #16 1631*c0909341SAndroid Build Coastguard Worker b.gt 5b 1632*c0909341SAndroid Build Coastguard Worker9: 1633*c0909341SAndroid Build Coastguard Worker ret 1634*c0909341SAndroid Build Coastguard Worker 1635*c0909341SAndroid Build Coastguard WorkerL(fivetap): 1636*c0909341SAndroid Build Coastguard Worker sub x2, x2, #2 // topleft -= 1 pixel 1637*c0909341SAndroid Build Coastguard Worker movi v29.8h, #2 1638*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2], #16 1639*c0909341SAndroid Build Coastguard Worker movi v30.8h, #4 1640*c0909341SAndroid Build Coastguard Worker movi v31.8h, #4 1641*c0909341SAndroid Build Coastguard Worker ins v0.h[0], v0.h[1] 1642*c0909341SAndroid Build Coastguard Worker1: 1643*c0909341SAndroid Build Coastguard Worker // in[end+1], is the last valid pixel. We produce 16 pixels out by 1644*c0909341SAndroid Build Coastguard Worker // using 20 pixels in - the last pixel used is [19] of the ones 1645*c0909341SAndroid Build Coastguard Worker // read/buffered. 1646*c0909341SAndroid Build Coastguard Worker cmp w3, #18 1647*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x2], #32 1648*c0909341SAndroid Build Coastguard Worker b.lt 2f // if (end + 1 < 19) 1649*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1650*c0909341SAndroid Build Coastguard Worker ext v4.16b, v1.16b, v2.16b, #2 1651*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1652*c0909341SAndroid Build Coastguard Worker ext v6.16b, v1.16b, v2.16b, #4 1653*c0909341SAndroid Build Coastguard Worker ext v16.16b, v0.16b, v1.16b, #6 1654*c0909341SAndroid Build Coastguard Worker ext v17.16b, v1.16b, v2.16b, #6 1655*c0909341SAndroid Build Coastguard Worker ext v18.16b, v0.16b, v1.16b, #8 1656*c0909341SAndroid Build Coastguard Worker ext v19.16b, v1.16b, v2.16b, #8 1657*c0909341SAndroid Build Coastguard Worker mul v20.8h, v0.8h, v29.8h 1658*c0909341SAndroid Build Coastguard Worker mla v20.8h, v3.8h, v30.8h 1659*c0909341SAndroid Build Coastguard Worker mla v20.8h, v5.8h, v31.8h 1660*c0909341SAndroid Build Coastguard Worker mla v20.8h, v16.8h, v30.8h 1661*c0909341SAndroid Build Coastguard Worker mla v20.8h, v18.8h, v29.8h 1662*c0909341SAndroid Build Coastguard Worker mul v21.8h, v1.8h, v29.8h 1663*c0909341SAndroid Build Coastguard Worker mla v21.8h, v4.8h, v30.8h 1664*c0909341SAndroid Build Coastguard Worker mla v21.8h, v6.8h, v31.8h 1665*c0909341SAndroid Build Coastguard Worker mla v21.8h, v17.8h, v30.8h 1666*c0909341SAndroid Build Coastguard Worker mla v21.8h, v19.8h, v29.8h 1667*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1668*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 1669*c0909341SAndroid Build Coastguard Worker urshr v20.8h, v20.8h, #4 1670*c0909341SAndroid Build Coastguard Worker urshr v21.8h, v21.8h, #4 1671*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1672*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [x0], #32 1673*c0909341SAndroid Build Coastguard Worker b.gt 1b 1674*c0909341SAndroid Build Coastguard Worker ret 1675*c0909341SAndroid Build Coastguard Worker2: 1676*c0909341SAndroid Build Coastguard Worker // Right padding 1677*c0909341SAndroid Build Coastguard Worker 1678*c0909341SAndroid Build Coastguard Worker // x2[w3+1-24] is the padding pixel (x2 points 24 pixels ahead) 1679*c0909341SAndroid Build Coastguard Worker movrel x5, padding_mask, -2 1680*c0909341SAndroid Build Coastguard Worker sub w6, w3, #23 1681*c0909341SAndroid Build Coastguard Worker sub x5, x5, w3, uxtw #1 1682*c0909341SAndroid Build Coastguard Worker add x6, x2, w6, sxtw #1 1683*c0909341SAndroid Build Coastguard Worker 1684*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h, v5.8h}, [x5] // padding_mask 1685*c0909341SAndroid Build Coastguard Worker 1686*c0909341SAndroid Build Coastguard Worker ld1r {v28.8h}, [x6] 1687*c0909341SAndroid Build Coastguard Worker bit v0.16b, v28.16b, v3.16b // Pad v0-v2 1688*c0909341SAndroid Build Coastguard Worker bit v1.16b, v28.16b, v4.16b 1689*c0909341SAndroid Build Coastguard Worker bit v2.16b, v28.16b, v5.16b 1690*c0909341SAndroid Build Coastguard Worker4: 1691*c0909341SAndroid Build Coastguard Worker // Filter one block 1692*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1693*c0909341SAndroid Build Coastguard Worker ext v4.16b, v1.16b, v2.16b, #2 1694*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1695*c0909341SAndroid Build Coastguard Worker ext v6.16b, v1.16b, v2.16b, #4 1696*c0909341SAndroid Build Coastguard Worker ext v16.16b, v0.16b, v1.16b, #6 1697*c0909341SAndroid Build Coastguard Worker ext v17.16b, v1.16b, v2.16b, #6 1698*c0909341SAndroid Build Coastguard Worker ext v18.16b, v0.16b, v1.16b, #8 1699*c0909341SAndroid Build Coastguard Worker ext v19.16b, v1.16b, v2.16b, #8 1700*c0909341SAndroid Build Coastguard Worker mul v20.8h, v0.8h, v29.8h 1701*c0909341SAndroid Build Coastguard Worker mla v20.8h, v3.8h, v30.8h 1702*c0909341SAndroid Build Coastguard Worker mla v20.8h, v5.8h, v31.8h 1703*c0909341SAndroid Build Coastguard Worker mla v20.8h, v16.8h, v30.8h 1704*c0909341SAndroid Build Coastguard Worker mla v20.8h, v18.8h, v29.8h 1705*c0909341SAndroid Build Coastguard Worker mul v21.8h, v1.8h, v29.8h 1706*c0909341SAndroid Build Coastguard Worker mla v21.8h, v4.8h, v30.8h 1707*c0909341SAndroid Build Coastguard Worker mla v21.8h, v6.8h, v31.8h 1708*c0909341SAndroid Build Coastguard Worker mla v21.8h, v17.8h, v30.8h 1709*c0909341SAndroid Build Coastguard Worker mla v21.8h, v19.8h, v29.8h 1710*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1711*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 1712*c0909341SAndroid Build Coastguard Worker mov v1.16b, v28.16b 1713*c0909341SAndroid Build Coastguard Worker mov v2.16b, v28.16b 1714*c0909341SAndroid Build Coastguard Worker urshr v20.8h, v20.8h, #4 1715*c0909341SAndroid Build Coastguard Worker urshr v21.8h, v21.8h, #4 1716*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1717*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [x0], #32 1718*c0909341SAndroid Build Coastguard Worker b.le 9f 1719*c0909341SAndroid Build Coastguard Worker // v0-v1[w3+1] is the last valid pixel; if (w3 + 1 > 0) we need to 1720*c0909341SAndroid Build Coastguard Worker // filter properly once more - aka (w3 >= 0). 1721*c0909341SAndroid Build Coastguard Worker cmp w3, #0 1722*c0909341SAndroid Build Coastguard Worker b.ge 4b 1723*c0909341SAndroid Build Coastguard Worker5: 1724*c0909341SAndroid Build Coastguard Worker // When w3 <= 0, all remaining pixels in v0-v1 are equal to the 1725*c0909341SAndroid Build Coastguard Worker // last valid pixel - thus just output that without filtering. 1726*c0909341SAndroid Build Coastguard Worker subs w1, w1, #8 1727*c0909341SAndroid Build Coastguard Worker st1 {v28.8h}, [x0], #16 1728*c0909341SAndroid Build Coastguard Worker b.gt 5b 1729*c0909341SAndroid Build Coastguard Worker9: 1730*c0909341SAndroid Build Coastguard Worker ret 1731*c0909341SAndroid Build Coastguard Workerendfunc 1732*c0909341SAndroid Build Coastguard Worker 1733*c0909341SAndroid Build Coastguard Worker// void ipred_pixel_set_16bpc_neon(pixel *out, const pixel px, 1734*c0909341SAndroid Build Coastguard Worker// const int n); 1735*c0909341SAndroid Build Coastguard Workerfunction ipred_pixel_set_16bpc_neon, export=1 1736*c0909341SAndroid Build Coastguard Worker dup v0.8h, w1 1737*c0909341SAndroid Build Coastguard Worker1: 1738*c0909341SAndroid Build Coastguard Worker subs w2, w2, #8 1739*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], #16 1740*c0909341SAndroid Build Coastguard Worker b.gt 1b 1741*c0909341SAndroid Build Coastguard Worker ret 1742*c0909341SAndroid Build Coastguard Workerendfunc 1743*c0909341SAndroid Build Coastguard Worker 1744*c0909341SAndroid Build Coastguard Worker// void ipred_z1_fill1_16bpc_neon(pixel *dst, const ptrdiff_t stride, 1745*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 1746*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1747*c0909341SAndroid Build Coastguard Worker// const int dx, const int max_base_x); 1748*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_fill1_16bpc_neon, export=1 1749*c0909341SAndroid Build Coastguard Worker clz w9, w3 1750*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z1_fill1_tbl 1751*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1752*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 1753*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw #1 // top[max_base_x] 1754*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 1755*c0909341SAndroid Build Coastguard Worker ld1r {v31.8h}, [x10] // padding 1756*c0909341SAndroid Build Coastguard Worker mov w7, w5 1757*c0909341SAndroid Build Coastguard Worker mov w15, #64 1758*c0909341SAndroid Build Coastguard Worker br x8 1759*c0909341SAndroid Build Coastguard Worker40: 1760*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1761*c0909341SAndroid Build Coastguard Worker4: 1762*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1763*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1764*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1765*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1766*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1767*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1768*c0909341SAndroid Build Coastguard Worker b.ge 49f 1769*c0909341SAndroid Build Coastguard Worker lsl w8, w8, #1 1770*c0909341SAndroid Build Coastguard Worker lsl w10, w10, #1 1771*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // top[base] 1772*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 1773*c0909341SAndroid Build Coastguard Worker dup v4.4h, w9 // frac 1774*c0909341SAndroid Build Coastguard Worker dup v5.4h, w11 1775*c0909341SAndroid Build Coastguard Worker ext v1.16b, v0.16b, v0.16b, #2 // top[base+1] 1776*c0909341SAndroid Build Coastguard Worker ext v3.16b, v2.16b, v2.16b, #2 1777*c0909341SAndroid Build Coastguard Worker sub v6.4h, v1.4h, v0.4h // top[base+1]-top[base] 1778*c0909341SAndroid Build Coastguard Worker sub v7.4h, v3.4h, v2.4h 1779*c0909341SAndroid Build Coastguard Worker ushll v16.4s, v0.4h, #6 // top[base]*64 1780*c0909341SAndroid Build Coastguard Worker ushll v17.4s, v2.4h, #6 1781*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v6.4h, v4.4h // + top[base+1]*frac 1782*c0909341SAndroid Build Coastguard Worker smlal v17.4s, v7.4h, v5.4h 1783*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 1784*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v17.4s, #6 1785*c0909341SAndroid Build Coastguard Worker st1 {v16.4h}, [x0], x1 1786*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1787*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1788*c0909341SAndroid Build Coastguard Worker st1 {v17.4h}, [x0], x1 1789*c0909341SAndroid Build Coastguard Worker b.gt 4b 1790*c0909341SAndroid Build Coastguard Worker ret 1791*c0909341SAndroid Build Coastguard Worker 1792*c0909341SAndroid Build Coastguard Worker49: 1793*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 1794*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1795*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 1796*c0909341SAndroid Build Coastguard Worker b.gt 49b 1797*c0909341SAndroid Build Coastguard Worker ret 1798*c0909341SAndroid Build Coastguard Worker 1799*c0909341SAndroid Build Coastguard Worker80: 1800*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1801*c0909341SAndroid Build Coastguard Worker8: 1802*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1803*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1804*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1805*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1806*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1807*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1808*c0909341SAndroid Build Coastguard Worker b.ge 89f 1809*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 1810*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 1811*c0909341SAndroid Build Coastguard Worker dup v4.8h, w9 // frac 1812*c0909341SAndroid Build Coastguard Worker dup v5.8h, w11 1813*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x8] // top[base] 1814*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10] 1815*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 1816*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 1817*c0909341SAndroid Build Coastguard Worker ldr h1, [x8, #16] 1818*c0909341SAndroid Build Coastguard Worker ldr h3, [x10, #16] 1819*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // 64 - frac 1820*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 1821*c0909341SAndroid Build Coastguard Worker ext v1.16b, v0.16b, v1.16b, #2 // top[base+1] 1822*c0909341SAndroid Build Coastguard Worker ext v3.16b, v2.16b, v3.16b, #2 1823*c0909341SAndroid Build Coastguard Worker umull v16.4s, v0.4h, v6.4h // top[base]*(64-frac) 1824*c0909341SAndroid Build Coastguard Worker umlal v16.4s, v1.4h, v4.4h // + top[base+1]*frac 1825*c0909341SAndroid Build Coastguard Worker umull2 v17.4s, v0.8h, v6.8h 1826*c0909341SAndroid Build Coastguard Worker umlal2 v17.4s, v1.8h, v4.8h 1827*c0909341SAndroid Build Coastguard Worker umull v18.4s, v2.4h, v7.4h 1828*c0909341SAndroid Build Coastguard Worker umlal v18.4s, v3.4h, v5.4h 1829*c0909341SAndroid Build Coastguard Worker umull2 v19.4s, v2.8h, v7.8h 1830*c0909341SAndroid Build Coastguard Worker umlal2 v19.4s, v3.8h, v5.8h 1831*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 1832*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #6 1833*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v18.4s, #6 1834*c0909341SAndroid Build Coastguard Worker rshrn2 v17.8h, v19.4s, #6 1835*c0909341SAndroid Build Coastguard Worker st1 {v16.8h}, [x0], x1 1836*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1837*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1838*c0909341SAndroid Build Coastguard Worker st1 {v17.8h}, [x0], x1 1839*c0909341SAndroid Build Coastguard Worker b.gt 8b 1840*c0909341SAndroid Build Coastguard Worker ret 1841*c0909341SAndroid Build Coastguard Worker 1842*c0909341SAndroid Build Coastguard Worker89: 1843*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 1844*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1845*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 1846*c0909341SAndroid Build Coastguard Worker b.gt 89b 1847*c0909341SAndroid Build Coastguard Worker ret 1848*c0909341SAndroid Build Coastguard Worker 1849*c0909341SAndroid Build Coastguard Worker160: 1850*c0909341SAndroid Build Coastguard Worker320: 1851*c0909341SAndroid Build Coastguard Worker640: 1852*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1853*c0909341SAndroid Build Coastguard Worker 1854*c0909341SAndroid Build Coastguard Worker mov w12, w3 1855*c0909341SAndroid Build Coastguard Worker 1856*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 1857*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1858*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 1859*c0909341SAndroid Build Coastguard Worker1: 1860*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1861*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1862*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1863*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1864*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1865*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1866*c0909341SAndroid Build Coastguard Worker b.ge 169f 1867*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 1868*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 1869*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // frac 1870*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 1871*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x8], #48 // top[base] 1872*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h, v5.8h}, [x10], #48 1873*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 1874*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 1875*c0909341SAndroid Build Coastguard Worker dup v16.8h, w9 // 64 - frac 1876*c0909341SAndroid Build Coastguard Worker dup v17.8h, w11 1877*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1878*c0909341SAndroid Build Coastguard Worker2: 1879*c0909341SAndroid Build Coastguard Worker ext v18.16b, v0.16b, v1.16b, #2 // top[base+1] 1880*c0909341SAndroid Build Coastguard Worker ext v19.16b, v1.16b, v2.16b, #2 1881*c0909341SAndroid Build Coastguard Worker ext v20.16b, v3.16b, v4.16b, #2 1882*c0909341SAndroid Build Coastguard Worker ext v21.16b, v4.16b, v5.16b, #2 1883*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1884*c0909341SAndroid Build Coastguard Worker umull v22.4s, v0.4h, v16.4h // top[base]*(64-frac) 1885*c0909341SAndroid Build Coastguard Worker umlal v22.4s, v18.4h, v6.4h // + top[base+1]*frac 1886*c0909341SAndroid Build Coastguard Worker umull2 v23.4s, v0.8h, v16.8h 1887*c0909341SAndroid Build Coastguard Worker umlal2 v23.4s, v18.8h, v6.8h 1888*c0909341SAndroid Build Coastguard Worker umull v24.4s, v1.4h, v16.4h 1889*c0909341SAndroid Build Coastguard Worker umlal v24.4s, v19.4h, v6.4h 1890*c0909341SAndroid Build Coastguard Worker umull2 v25.4s, v1.8h, v16.8h 1891*c0909341SAndroid Build Coastguard Worker umlal2 v25.4s, v19.8h, v6.8h 1892*c0909341SAndroid Build Coastguard Worker umull v26.4s, v3.4h, v17.4h 1893*c0909341SAndroid Build Coastguard Worker umlal v26.4s, v20.4h, v7.4h 1894*c0909341SAndroid Build Coastguard Worker umull2 v27.4s, v3.8h, v17.8h 1895*c0909341SAndroid Build Coastguard Worker umlal2 v27.4s, v20.8h, v7.8h 1896*c0909341SAndroid Build Coastguard Worker umull v28.4s, v4.4h, v17.4h 1897*c0909341SAndroid Build Coastguard Worker umlal v28.4s, v21.4h, v7.4h 1898*c0909341SAndroid Build Coastguard Worker umull2 v29.4s, v4.8h, v17.8h 1899*c0909341SAndroid Build Coastguard Worker umlal2 v29.4s, v21.8h, v7.8h 1900*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v22.4s, #6 1901*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v23.4s, #6 1902*c0909341SAndroid Build Coastguard Worker rshrn v23.4h, v24.4s, #6 1903*c0909341SAndroid Build Coastguard Worker rshrn2 v23.8h, v25.4s, #6 1904*c0909341SAndroid Build Coastguard Worker rshrn v24.4h, v26.4s, #6 1905*c0909341SAndroid Build Coastguard Worker rshrn2 v24.8h, v27.4s, #6 1906*c0909341SAndroid Build Coastguard Worker rshrn v25.4h, v28.4s, #6 1907*c0909341SAndroid Build Coastguard Worker rshrn2 v25.8h, v29.4s, #6 1908*c0909341SAndroid Build Coastguard Worker st1 {v22.8h, v23.8h}, [x0], #32 1909*c0909341SAndroid Build Coastguard Worker st1 {v24.8h, v25.8h}, [x13], #32 1910*c0909341SAndroid Build Coastguard Worker b.le 3f 1911*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 1912*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x8], #32 // top[base] 1913*c0909341SAndroid Build Coastguard Worker mov v3.16b, v5.16b 1914*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x10], #32 1915*c0909341SAndroid Build Coastguard Worker b 2b 1916*c0909341SAndroid Build Coastguard Worker 1917*c0909341SAndroid Build Coastguard Worker3: 1918*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1919*c0909341SAndroid Build Coastguard Worker b.le 9f 1920*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1921*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 1922*c0909341SAndroid Build Coastguard Worker mov w3, w12 1923*c0909341SAndroid Build Coastguard Worker b 1b 1924*c0909341SAndroid Build Coastguard Worker9: 1925*c0909341SAndroid Build Coastguard Worker ret 1926*c0909341SAndroid Build Coastguard Worker 1927*c0909341SAndroid Build Coastguard Worker169: 1928*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], #16 1929*c0909341SAndroid Build Coastguard Worker subs w3, w3, #8 1930*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x13], #16 1931*c0909341SAndroid Build Coastguard Worker b.gt 169b 1932*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1933*c0909341SAndroid Build Coastguard Worker b.le 9b 1934*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1935*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 1936*c0909341SAndroid Build Coastguard Worker mov w3, w12 1937*c0909341SAndroid Build Coastguard Worker b 169b 1938*c0909341SAndroid Build Coastguard Workerendfunc 1939*c0909341SAndroid Build Coastguard Worker 1940*c0909341SAndroid Build Coastguard Workerjumptable ipred_z1_fill1_tbl 1941*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z1_fill1_tbl 1942*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z1_fill1_tbl 1943*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z1_fill1_tbl 1944*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z1_fill1_tbl 1945*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z1_fill1_tbl 1946*c0909341SAndroid Build Coastguard Workerendjumptable 1947*c0909341SAndroid Build Coastguard Worker 1948*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_fill2_16bpc_neon, export=1 1949*c0909341SAndroid Build Coastguard Worker cmp w3, #8 1950*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // top[max_base_x] 1951*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 1952*c0909341SAndroid Build Coastguard Worker mov w7, w5 1953*c0909341SAndroid Build Coastguard Worker mov w15, #64 1954*c0909341SAndroid Build Coastguard Worker b.eq 8f 1955*c0909341SAndroid Build Coastguard Worker 1956*c0909341SAndroid Build Coastguard Worker4: // w == 4 1957*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1958*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1959*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1960*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1961*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1962*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1963*c0909341SAndroid Build Coastguard Worker b.ge 49f 1964*c0909341SAndroid Build Coastguard Worker lsl w8, w8, #1 1965*c0909341SAndroid Build Coastguard Worker lsl w10, w10, #1 1966*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // top[base] 1967*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 1968*c0909341SAndroid Build Coastguard Worker dup v4.4h, w9 // frac 1969*c0909341SAndroid Build Coastguard Worker dup v5.4h, w11 1970*c0909341SAndroid Build Coastguard Worker uzp2 v1.8h, v0.8h, v0.8h // top[base+1] 1971*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v0.8h // top[base] 1972*c0909341SAndroid Build Coastguard Worker uzp2 v3.8h, v2.8h, v2.8h 1973*c0909341SAndroid Build Coastguard Worker uzp1 v2.8h, v2.8h, v2.8h 1974*c0909341SAndroid Build Coastguard Worker sub v6.4h, v1.4h, v0.4h // top[base+1]-top[base] 1975*c0909341SAndroid Build Coastguard Worker sub v7.4h, v3.4h, v2.4h 1976*c0909341SAndroid Build Coastguard Worker ushll v16.4s, v0.4h, #6 // top[base]*64 1977*c0909341SAndroid Build Coastguard Worker ushll v17.4s, v2.4h, #6 1978*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v6.4h, v4.4h // + top[base+1]*frac 1979*c0909341SAndroid Build Coastguard Worker smlal v17.4s, v7.4h, v5.4h 1980*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 1981*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v17.4s, #6 1982*c0909341SAndroid Build Coastguard Worker st1 {v16.4h}, [x0], x1 1983*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1984*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1985*c0909341SAndroid Build Coastguard Worker st1 {v17.4h}, [x0], x1 1986*c0909341SAndroid Build Coastguard Worker b.gt 4b 1987*c0909341SAndroid Build Coastguard Worker ret 1988*c0909341SAndroid Build Coastguard Worker 1989*c0909341SAndroid Build Coastguard Worker49: 1990*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 1991*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1992*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 1993*c0909341SAndroid Build Coastguard Worker b.gt 49b 1994*c0909341SAndroid Build Coastguard Worker ret 1995*c0909341SAndroid Build Coastguard Worker 1996*c0909341SAndroid Build Coastguard Worker8: // w == 8 1997*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1998*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1999*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 2000*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 2001*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 2002*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 2003*c0909341SAndroid Build Coastguard Worker b.ge 89f 2004*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 2005*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 2006*c0909341SAndroid Build Coastguard Worker dup v4.8h, w9 // frac 2007*c0909341SAndroid Build Coastguard Worker dup v5.8h, w11 2008*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x8] // top[base] 2009*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10] 2010*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 2011*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 2012*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // 64 - frac 2013*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 2014*c0909341SAndroid Build Coastguard Worker uzp2 v20.8h, v0.8h, v1.8h // top[base+1] 2015*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h // top[base] 2016*c0909341SAndroid Build Coastguard Worker uzp2 v21.8h, v2.8h, v3.8h 2017*c0909341SAndroid Build Coastguard Worker uzp1 v2.8h, v2.8h, v3.8h 2018*c0909341SAndroid Build Coastguard Worker umull v16.4s, v0.4h, v6.4h // top[base]*(64-frac) 2019*c0909341SAndroid Build Coastguard Worker umlal v16.4s, v20.4h, v4.4h // + top[base+1]*frac 2020*c0909341SAndroid Build Coastguard Worker umull2 v17.4s, v0.8h, v6.8h 2021*c0909341SAndroid Build Coastguard Worker umlal2 v17.4s, v20.8h, v4.8h 2022*c0909341SAndroid Build Coastguard Worker umull v18.4s, v2.4h, v7.4h 2023*c0909341SAndroid Build Coastguard Worker umlal v18.4s, v21.4h, v5.4h 2024*c0909341SAndroid Build Coastguard Worker umull2 v19.4s, v2.8h, v7.8h 2025*c0909341SAndroid Build Coastguard Worker umlal2 v19.4s, v21.8h, v5.8h 2026*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 2027*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #6 2028*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v18.4s, #6 2029*c0909341SAndroid Build Coastguard Worker rshrn2 v17.8h, v19.4s, #6 2030*c0909341SAndroid Build Coastguard Worker st1 {v16.8h}, [x0], x1 2031*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 2032*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 2033*c0909341SAndroid Build Coastguard Worker st1 {v17.8h}, [x0], x1 2034*c0909341SAndroid Build Coastguard Worker b.gt 8b 2035*c0909341SAndroid Build Coastguard Worker ret 2036*c0909341SAndroid Build Coastguard Worker 2037*c0909341SAndroid Build Coastguard Worker89: 2038*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 2039*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 2040*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 2041*c0909341SAndroid Build Coastguard Worker b.gt 89b 2042*c0909341SAndroid Build Coastguard Worker ret 2043*c0909341SAndroid Build Coastguard Workerendfunc 2044*c0909341SAndroid Build Coastguard Worker 2045*c0909341SAndroid Build Coastguard Worker// void ipred_reverse_16bpc_neon(pixel *dst, const pixel *const src, 2046*c0909341SAndroid Build Coastguard Worker// const int n); 2047*c0909341SAndroid Build Coastguard Workerfunction ipred_reverse_16bpc_neon, export=1 2048*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 2049*c0909341SAndroid Build Coastguard Worker add x3, x0, #8 2050*c0909341SAndroid Build Coastguard Worker mov x4, #16 2051*c0909341SAndroid Build Coastguard Worker1: 2052*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1] 2053*c0909341SAndroid Build Coastguard Worker subs w2, w2, #8 2054*c0909341SAndroid Build Coastguard Worker rev64 v0.8h, v0.8h 2055*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 2056*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], x4 2057*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x3], x4 2058*c0909341SAndroid Build Coastguard Worker b.gt 1b 2059*c0909341SAndroid Build Coastguard Worker ret 2060*c0909341SAndroid Build Coastguard Workerendfunc 2061*c0909341SAndroid Build Coastguard Worker 2062*c0909341SAndroid Build Coastguard Workerconst increments 2063*c0909341SAndroid Build Coastguard Worker .short 0, 1, 2, 3, 4, 5, 6, 7 2064*c0909341SAndroid Build Coastguard Workerendconst 2065*c0909341SAndroid Build Coastguard Worker 2066*c0909341SAndroid Build Coastguard Worker// void ipred_z2_fill1_16bpc_neon(pixel *dst, const ptrdiff_t stride, 2067*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 2068*c0909341SAndroid Build Coastguard Worker// const pixel *const left, 2069*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 2070*c0909341SAndroid Build Coastguard Worker// const int dx, const int dy); 2071*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill1_16bpc_neon, export=1 2072*c0909341SAndroid Build Coastguard Worker clz w10, w4 2073*c0909341SAndroid Build Coastguard Worker movrel x9, ipred_z2_fill1_tbl 2074*c0909341SAndroid Build Coastguard Worker sub w10, w10, #25 2075*c0909341SAndroid Build Coastguard Worker ldrsw x10, [x9, w10, uxtw #2] 2076*c0909341SAndroid Build Coastguard Worker mov w8, #(1 << 6) // xpos = 1 << 6 2077*c0909341SAndroid Build Coastguard Worker add x9, x9, x10 2078*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2079*c0909341SAndroid Build Coastguard Worker 2080*c0909341SAndroid Build Coastguard Worker movrel x11, increments 2081*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 2082*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 2083*c0909341SAndroid Build Coastguard Worker 2084*c0909341SAndroid Build Coastguard Worker br x9 2085*c0909341SAndroid Build Coastguard Worker40: 2086*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2087*c0909341SAndroid Build Coastguard Worker 2088*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 2089*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2090*c0909341SAndroid Build Coastguard Worker 2091*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 2092*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 2093*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 2094*c0909341SAndroid Build Coastguard Worker 2095*c0909341SAndroid Build Coastguard Worker // Worst case height for w=4 is 16, but we need at least h+1 elements 2096*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x3] // left[] 2097*c0909341SAndroid Build Coastguard Worker 2098*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 2099*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 2100*c0909341SAndroid Build Coastguard Worker 2101*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2102*c0909341SAndroid Build Coastguard Worker and v27.8b, v30.8b, v25.8b // frac_y 2103*c0909341SAndroid Build Coastguard Worker 2104*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2105*c0909341SAndroid Build Coastguard Worker 2106*c0909341SAndroid Build Coastguard Worker movi v23.4h, #1, lsl #8 2107*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 2108*c0909341SAndroid Build Coastguard Worker zip1 v29.8b, v29.8b, v29.8b // duplicate elements 2109*c0909341SAndroid Build Coastguard Worker movi v17.8b, #2 2110*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v23.8b // 2*base, 2*base+1, ... 2111*c0909341SAndroid Build Coastguard Worker 2112*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 (*2) 2113*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 (*2) 2114*c0909341SAndroid Build Coastguard Worker 2115*c0909341SAndroid Build Coastguard Worker tbl v18.8b, {v0.16b}, v29.8b // left[base_y] 2116*c0909341SAndroid Build Coastguard Worker 2117*c0909341SAndroid Build Coastguard Worker trn1 v30.2d, v30.2d, v28.2d // base_y + 1, base_y + 2 2118*c0909341SAndroid Build Coastguard Worker 2119*c0909341SAndroid Build Coastguard Worker sub v28.4h, v26.4h, v27.4h // 64 - frac_y 2120*c0909341SAndroid Build Coastguard Worker 2121*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,0,1,2,3} 2122*c0909341SAndroid Build Coastguard Worker 2123*c0909341SAndroid Build Coastguard Worker trn1 v27.2d, v27.2d, v27.2d // frac_y 2124*c0909341SAndroid Build Coastguard Worker trn1 v28.2d, v28.2d, v28.2d // 64 - frac_y 2125*c0909341SAndroid Build Coastguard Worker 2126*c0909341SAndroid Build Coastguard Worker movi v29.16b, #4 2127*c0909341SAndroid Build Coastguard Worker4: 2128*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2129*c0909341SAndroid Build Coastguard Worker dup v16.4h, w8 // xpos 2130*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2131*c0909341SAndroid Build Coastguard Worker cmp w9, #-4 // base_x <= -4 2132*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2133*c0909341SAndroid Build Coastguard Worker b.le 49f 2134*c0909341SAndroid Build Coastguard Worker 2135*c0909341SAndroid Build Coastguard Worker lsl w9, w9, #1 2136*c0909341SAndroid Build Coastguard Worker lsl w11, w11, #1 2137*c0909341SAndroid Build Coastguard Worker 2138*c0909341SAndroid Build Coastguard Worker dup v17.4h, w8 // xpos 2139*c0909341SAndroid Build Coastguard Worker 2140*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 2141*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 2142*c0909341SAndroid Build Coastguard Worker 2143*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d // xpos 2144*c0909341SAndroid Build Coastguard Worker 2145*c0909341SAndroid Build Coastguard Worker // Cut corners here; only doing tbl over v0-v1 here; we only 2146*c0909341SAndroid Build Coastguard Worker // seem to need the last pixel, from v2, after skipping to the 2147*c0909341SAndroid Build Coastguard Worker // left-only codepath below. 2148*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b}, v30.16b // left[base_y+1], left[base_y+2] 2149*c0909341SAndroid Build Coastguard Worker 2150*c0909341SAndroid Build Coastguard Worker sshr v20.8h, v16.8h, #6 // first base_x for each row 2151*c0909341SAndroid Build Coastguard Worker 2152*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #2 // top[base_x+1] 2153*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #2 2154*c0909341SAndroid Build Coastguard Worker 2155*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2156*c0909341SAndroid Build Coastguard Worker 2157*c0909341SAndroid Build Coastguard Worker trn1 v18.2d, v18.2d, v19.2d // left[base_y], left[base_y+1] 2158*c0909341SAndroid Build Coastguard Worker 2159*c0909341SAndroid Build Coastguard Worker trn1 v4.2d, v4.2d, v6.2d // top[base_x] 2160*c0909341SAndroid Build Coastguard Worker trn1 v5.2d, v5.2d, v7.2d // top[base_x+1] 2161*c0909341SAndroid Build Coastguard Worker 2162*c0909341SAndroid Build Coastguard Worker sub v17.8h, v26.8h, v16.8h // 64 - frac_x 2163*c0909341SAndroid Build Coastguard Worker 2164*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v31.8h // actual base_x 2165*c0909341SAndroid Build Coastguard Worker 2166*c0909341SAndroid Build Coastguard Worker umull v21.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2167*c0909341SAndroid Build Coastguard Worker umlal v21.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2168*c0909341SAndroid Build Coastguard Worker umull2 v22.4s, v18.8h, v28.8h 2169*c0909341SAndroid Build Coastguard Worker umlal2 v22.4s, v19.8h, v27.8h 2170*c0909341SAndroid Build Coastguard Worker 2171*c0909341SAndroid Build Coastguard Worker umull v23.4s, v4.4h, v17.4h // top[base_x]-*(64-frac_x) 2172*c0909341SAndroid Build Coastguard Worker umlal v23.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 2173*c0909341SAndroid Build Coastguard Worker umull2 v24.4s, v4.8h, v17.8h 2174*c0909341SAndroid Build Coastguard Worker umlal2 v24.4s, v5.8h, v16.8h 2175*c0909341SAndroid Build Coastguard Worker 2176*c0909341SAndroid Build Coastguard Worker cmge v20.8h, v20.8h, #0 2177*c0909341SAndroid Build Coastguard Worker 2178*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v21.4s, #6 2179*c0909341SAndroid Build Coastguard Worker rshrn2 v21.8h, v22.4s, #6 2180*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v23.4s, #6 2181*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v24.4s, #6 2182*c0909341SAndroid Build Coastguard Worker 2183*c0909341SAndroid Build Coastguard Worker bit v21.16b, v22.16b, v20.16b 2184*c0909341SAndroid Build Coastguard Worker 2185*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x0], x1 2186*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2187*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2188*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x0], x1 2189*c0909341SAndroid Build Coastguard Worker b.le 9f 2190*c0909341SAndroid Build Coastguard Worker 2191*c0909341SAndroid Build Coastguard Worker ext v18.16b, v19.16b, v19.16b, #8 2192*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v29.16b // base_y += 2 (*2) 2193*c0909341SAndroid Build Coastguard Worker b 4b 2194*c0909341SAndroid Build Coastguard Worker 2195*c0909341SAndroid Build Coastguard Worker49: 2196*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1], left[base_y+2] 2197*c0909341SAndroid Build Coastguard Worker 2198*c0909341SAndroid Build Coastguard Worker trn1 v18.2d, v18.2d, v19.2d // left[base_y], left[base_y+1] 2199*c0909341SAndroid Build Coastguard Worker 2200*c0909341SAndroid Build Coastguard Worker umull v20.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2201*c0909341SAndroid Build Coastguard Worker umlal v20.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2202*c0909341SAndroid Build Coastguard Worker umull2 v21.4s, v18.8h, v28.8h 2203*c0909341SAndroid Build Coastguard Worker umlal2 v21.4s, v19.8h, v27.8h 2204*c0909341SAndroid Build Coastguard Worker 2205*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #6 2206*c0909341SAndroid Build Coastguard Worker rshrn2 v20.8h, v21.4s, #6 2207*c0909341SAndroid Build Coastguard Worker 2208*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x0], x1 2209*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2210*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x0], x1 2211*c0909341SAndroid Build Coastguard Worker b.le 9f 2212*c0909341SAndroid Build Coastguard Worker 2213*c0909341SAndroid Build Coastguard Worker ext v18.16b, v19.16b, v19.16b, #8 2214*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v29.16b // base_y += 2 (*2) 2215*c0909341SAndroid Build Coastguard Worker b 49b 2216*c0909341SAndroid Build Coastguard Worker 2217*c0909341SAndroid Build Coastguard Worker9: 2218*c0909341SAndroid Build Coastguard Worker ret 2219*c0909341SAndroid Build Coastguard Worker 2220*c0909341SAndroid Build Coastguard Worker80: 2221*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2222*c0909341SAndroid Build Coastguard Worker 2223*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 2224*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 2225*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 2226*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 2227*c0909341SAndroid Build Coastguard Worker 2228*c0909341SAndroid Build Coastguard Worker dup v18.8h, w7 // -dy 2229*c0909341SAndroid Build Coastguard Worker add x3, x3, #2 // Skip past left[0] 2230*c0909341SAndroid Build Coastguard Worker 2231*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v18.8h // {0,1,2,3,4,5,6,7}* -dy 2232*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 2233*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v18.8h // -= dy 2234*c0909341SAndroid Build Coastguard Worker 2235*c0909341SAndroid Build Coastguard Worker // Worst case height for w=8 is 32. 2236*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x3] // left[] 2237*c0909341SAndroid Build Coastguard Worker ld1r {v15.8h}, [x2] // left[0] == top[0] 2238*c0909341SAndroid Build Coastguard Worker 2239*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 2240*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 2241*c0909341SAndroid Build Coastguard Worker 2242*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v16.8h, #6 // ypos >> 6 2243*c0909341SAndroid Build Coastguard Worker and v27.16b, v16.16b, v25.16b // frac_y 2244*c0909341SAndroid Build Coastguard Worker 2245*c0909341SAndroid Build Coastguard Worker movi v23.8h, #1, lsl #8 2246*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 2247*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2248*c0909341SAndroid Build Coastguard Worker zip1 v29.16b, v29.16b, v29.16b // duplicate elements 2249*c0909341SAndroid Build Coastguard Worker movi v17.16b, #2 2250*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v23.16b // 2*base, 2*base+1, ... 2251*c0909341SAndroid Build Coastguard Worker 2252*c0909341SAndroid Build Coastguard Worker // Cut corners here; for the first row we don't expect to need to 2253*c0909341SAndroid Build Coastguard Worker // read outside of v0. 2254*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b}, v29.16b // left[base_y] 2255*c0909341SAndroid Build Coastguard Worker 2256*c0909341SAndroid Build Coastguard Worker add v30.16b, v29.16b, v19.16b // base_y + 2 (*2) 2257*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v17.16b // base_y + 1 (*2) 2258*c0909341SAndroid Build Coastguard Worker 2259*c0909341SAndroid Build Coastguard Worker sub v28.8h, v26.8h, v27.8h // 64 - frac_y 2260*c0909341SAndroid Build Coastguard Worker 2261*c0909341SAndroid Build Coastguard Worker movi v24.16b, #4 2262*c0909341SAndroid Build Coastguard Worker8: 2263*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2264*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2265*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2266*c0909341SAndroid Build Coastguard Worker cmp w9, #-16 // base_x <= -16 2267*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2268*c0909341SAndroid Build Coastguard Worker b.le 89f 2269*c0909341SAndroid Build Coastguard Worker 2270*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2271*c0909341SAndroid Build Coastguard Worker 2272*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw #1 2273*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw #1 2274*c0909341SAndroid Build Coastguard Worker 2275*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x9] // top[base_x] 2276*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2277*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x11] 2278*c0909341SAndroid Build Coastguard Worker 2279*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2280*c0909341SAndroid Build Coastguard Worker 2281*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2282*c0909341SAndroid Build Coastguard Worker 2283*c0909341SAndroid Build Coastguard Worker sshr v21.8h, v16.8h, #6 // first base_x 2284*c0909341SAndroid Build Coastguard Worker sshr v22.8h, v17.8h, #6 2285*c0909341SAndroid Build Coastguard Worker 2286*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b // left[base_y+2] 2287*c0909341SAndroid Build Coastguard Worker 2288*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v5.16b, #2 // top[base_x+1] 2289*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v7.16b, #2 2290*c0909341SAndroid Build Coastguard Worker 2291*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2292*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v25.16b 2293*c0909341SAndroid Build Coastguard Worker 2294*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2295*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2296*c0909341SAndroid Build Coastguard Worker 2297*c0909341SAndroid Build Coastguard Worker sub v8.8h, v26.8h, v16.8h // 64 - frac_x 2298*c0909341SAndroid Build Coastguard Worker sub v9.8h, v26.8h, v17.8h 2299*c0909341SAndroid Build Coastguard Worker 2300*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 2301*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 2302*c0909341SAndroid Build Coastguard Worker 2303*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v31.8h // actual base_x 2304*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v31.8h 2305*c0909341SAndroid Build Coastguard Worker 2306*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 2307*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 2308*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 2309*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 2310*c0909341SAndroid Build Coastguard Worker 2311*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 2312*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 2313*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 2314*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 2315*c0909341SAndroid Build Coastguard Worker 2316*c0909341SAndroid Build Coastguard Worker umull v12.4s, v4.4h, v8.4h // top[base_x]-*(64-frac_x) 2317*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 2318*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v4.8h, v8.8h 2319*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v5.8h, v16.8h 2320*c0909341SAndroid Build Coastguard Worker umull v14.4s, v6.4h, v9.4h 2321*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v17.4h 2322*c0909341SAndroid Build Coastguard Worker umull2 v18.4s, v6.8h, v9.8h 2323*c0909341SAndroid Build Coastguard Worker umlal2 v18.4s, v7.8h, v17.8h 2324*c0909341SAndroid Build Coastguard Worker 2325*c0909341SAndroid Build Coastguard Worker cmge v21.8h, v21.8h, #0 2326*c0909341SAndroid Build Coastguard Worker cmge v22.8h, v22.8h, #0 2327*c0909341SAndroid Build Coastguard Worker 2328*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 2329*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 2330*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 2331*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v18.4s, #6 2332*c0909341SAndroid Build Coastguard Worker 2333*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v21.16b 2334*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v22.16b 2335*c0909341SAndroid Build Coastguard Worker 2336*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 2337*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2338*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2339*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x0], x1 2340*c0909341SAndroid Build Coastguard Worker b.le 9f 2341*c0909341SAndroid Build Coastguard Worker 2342*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2343*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 2344*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 (*2) 2345*c0909341SAndroid Build Coastguard Worker b 8b 2346*c0909341SAndroid Build Coastguard Worker 2347*c0909341SAndroid Build Coastguard Worker89: 2348*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b 2349*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b 2350*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2351*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b // left[base_y+2] 2352*c0909341SAndroid Build Coastguard Worker 2353*c0909341SAndroid Build Coastguard Worker umull v4.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2354*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2355*c0909341SAndroid Build Coastguard Worker umull2 v5.4s, v18.8h, v28.8h 2356*c0909341SAndroid Build Coastguard Worker umlal2 v5.4s, v19.8h, v27.8h 2357*c0909341SAndroid Build Coastguard Worker umull v6.4s, v19.4h, v28.4h 2358*c0909341SAndroid Build Coastguard Worker umlal v6.4s, v20.4h, v27.4h 2359*c0909341SAndroid Build Coastguard Worker umull2 v7.4s, v19.8h, v28.8h 2360*c0909341SAndroid Build Coastguard Worker umlal2 v7.4s, v20.8h, v27.8h 2361*c0909341SAndroid Build Coastguard Worker 2362*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v4.4s, #6 2363*c0909341SAndroid Build Coastguard Worker rshrn2 v4.8h, v5.4s, #6 2364*c0909341SAndroid Build Coastguard Worker rshrn v5.4h, v6.4s, #6 2365*c0909341SAndroid Build Coastguard Worker rshrn2 v5.8h, v7.4s, #6 2366*c0909341SAndroid Build Coastguard Worker 2367*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x0], x1 2368*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2369*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [x0], x1 2370*c0909341SAndroid Build Coastguard Worker b.le 9f 2371*c0909341SAndroid Build Coastguard Worker 2372*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2373*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 2374*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 (*2) 2375*c0909341SAndroid Build Coastguard Worker b 89b 2376*c0909341SAndroid Build Coastguard Worker 2377*c0909341SAndroid Build Coastguard Worker9: 2378*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 2379*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 2380*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 2381*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 2382*c0909341SAndroid Build Coastguard Worker ret 2383*c0909341SAndroid Build Coastguard Worker 2384*c0909341SAndroid Build Coastguard Worker160: 2385*c0909341SAndroid Build Coastguard Worker320: 2386*c0909341SAndroid Build Coastguard Worker640: 2387*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2388*c0909341SAndroid Build Coastguard Worker 2389*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 2390*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 2391*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 2392*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 2393*c0909341SAndroid Build Coastguard Worker 2394*c0909341SAndroid Build Coastguard Worker dup v25.8h, w7 // -dy 2395*c0909341SAndroid Build Coastguard Worker add x3, x3, #2 // Skip past left[0] 2396*c0909341SAndroid Build Coastguard Worker 2397*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 // alternating row 2398*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 // stride *= 2 2399*c0909341SAndroid Build Coastguard Worker sub x1, x1, w4, uxtw #1 // stride -= width 2400*c0909341SAndroid Build Coastguard Worker 2401*c0909341SAndroid Build Coastguard Worker movi v11.8h, #8 2402*c0909341SAndroid Build Coastguard Worker mul v26.8h, v31.8h, v25.8h // {0,1,2,3,4,5,6,7}* -dy 2403*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v25.8h // -= dy 2404*c0909341SAndroid Build Coastguard Worker mul v25.8h, v25.8h, v11.8h // -8*dy 2405*c0909341SAndroid Build Coastguard Worker 2406*c0909341SAndroid Build Coastguard Worker // Worst case height is 64, but we can only fit 32 pixels into 2407*c0909341SAndroid Build Coastguard Worker // v0-v3 usable within one tbx instruction. As long as base_y is 2408*c0909341SAndroid Build Coastguard Worker // up to 32, we use tbx. 2409*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x3] // left[] 2410*c0909341SAndroid Build Coastguard Worker ld1r {v15.8h}, [x2] // left[0] == top[0] 2411*c0909341SAndroid Build Coastguard Worker 2412*c0909341SAndroid Build Coastguard Worker mov w12, w4 // orig w 2413*c0909341SAndroid Build Coastguard Worker neg w14, w4 // -w 2414*c0909341SAndroid Build Coastguard Worker 2415*c0909341SAndroid Build Coastguard Worker1: 2416*c0909341SAndroid Build Coastguard Worker mov v23.16b, v26.16b // reset ypos 2417*c0909341SAndroid Build Coastguard Worker 2418*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2419*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2420*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2421*c0909341SAndroid Build Coastguard Worker cmp w9, w14 // base_x <= -2*w 2422*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2423*c0909341SAndroid Build Coastguard Worker b.le 169f 2424*c0909341SAndroid Build Coastguard Worker 2425*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2426*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2427*c0909341SAndroid Build Coastguard Worker 2428*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw #1 2429*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw #1 2430*c0909341SAndroid Build Coastguard Worker 2431*c0909341SAndroid Build Coastguard Worker sshr v21.8h, v16.8h, #6 // first base_x 2432*c0909341SAndroid Build Coastguard Worker sshr v22.8h, v17.8h, #6 2433*c0909341SAndroid Build Coastguard Worker 2434*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [x9], #16 // top[base_x] 2435*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [x11], #16 2436*c0909341SAndroid Build Coastguard Worker 2437*c0909341SAndroid Build Coastguard Worker movi v10.8h, #0x3e 2438*c0909341SAndroid Build Coastguard Worker movi v11.8h, #64 2439*c0909341SAndroid Build Coastguard Worker 2440*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v10.16b // frac_x 2441*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v10.16b 2442*c0909341SAndroid Build Coastguard Worker 2443*c0909341SAndroid Build Coastguard Worker sub v8.8h, v11.8h, v16.8h // 64 - frac_x 2444*c0909341SAndroid Build Coastguard Worker sub v9.8h, v11.8h, v17.8h 2445*c0909341SAndroid Build Coastguard Worker 2446*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v31.8h // actual base_x 2447*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v31.8h 2448*c0909341SAndroid Build Coastguard Worker 2449*c0909341SAndroid Build Coastguard Worker2: 2450*c0909341SAndroid Build Coastguard Worker smov w10, v22.h[0] 2451*c0909341SAndroid Build Coastguard Worker 2452*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v23.8h, #6 // ypos >> 6 2453*c0909341SAndroid Build Coastguard Worker movi v12.8h, #64 2454*c0909341SAndroid Build Coastguard Worker cmp w10, #0 // base_x (bottom left) >= 0 2455*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] // base_y[0] 2456*c0909341SAndroid Build Coastguard Worker movi v10.8h, #0x3e 2457*c0909341SAndroid Build Coastguard Worker 2458*c0909341SAndroid Build Coastguard Worker b.ge 4f 2459*c0909341SAndroid Build Coastguard Worker and v27.16b, v23.16b, v10.16b // frac_y 2460*c0909341SAndroid Build Coastguard Worker cmp w10, #(32-3) 2461*c0909341SAndroid Build Coastguard Worker 2462*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2463*c0909341SAndroid Build Coastguard Worker sub v28.8h, v12.8h, v27.8h // 64 - frac_y 2464*c0909341SAndroid Build Coastguard Worker b.gt 22f 2465*c0909341SAndroid Build Coastguard Worker 2466*c0909341SAndroid Build Coastguard Worker21: 2467*c0909341SAndroid Build Coastguard Worker // base_y < 32, using tbx 2468*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 2469*c0909341SAndroid Build Coastguard Worker movi v11.8h, #1, lsl #8 2470*c0909341SAndroid Build Coastguard Worker zip1 v29.16b, v29.16b, v29.16b // duplicate elements 2471*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v11.16b // 2*base, 2*base+1, ... 2472*c0909341SAndroid Build Coastguard Worker 2473*c0909341SAndroid Build Coastguard Worker movi v13.16b, #2 2474*c0909341SAndroid Build Coastguard Worker 2475*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y] 2476*c0909341SAndroid Build Coastguard Worker 2477*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v13.16b // base_y + 1 (*2) 2478*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2479*c0909341SAndroid Build Coastguard Worker 2480*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2481*c0909341SAndroid Build Coastguard Worker 2482*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v13.16b // base_y + 2 (*2) 2483*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2484*c0909341SAndroid Build Coastguard Worker 2485*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+2] 2486*c0909341SAndroid Build Coastguard Worker 2487*c0909341SAndroid Build Coastguard Worker b 23f 2488*c0909341SAndroid Build Coastguard Worker 2489*c0909341SAndroid Build Coastguard Worker22: 2490*c0909341SAndroid Build Coastguard Worker // base_y >= 32, using separate loads. 2491*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[1] 2492*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[2] 2493*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw #1 2494*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[3] 2495*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw #1 2496*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[0], [x10] 2497*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[4] 2498*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw #1 2499*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[1], [x15] 2500*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[5] 2501*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw #1 2502*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[2], [x16] 2503*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[6] 2504*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw #1 2505*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[3], [x17] 2506*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[7] 2507*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw #1 2508*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw #1 2509*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[4], [x10] 2510*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw #1 2511*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[5], [x15] 2512*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[6], [x16] 2513*c0909341SAndroid Build Coastguard Worker ld3 {v18.h, v19.h, v20.h}[7], [x17] 2514*c0909341SAndroid Build Coastguard Worker 2515*c0909341SAndroid Build Coastguard Worker23: 2516*c0909341SAndroid Build Coastguard Worker 2517*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x9], #16 // top[base_x] 2518*c0909341SAndroid Build Coastguard Worker ld1 {v7.8h}, [x11], #16 2519*c0909341SAndroid Build Coastguard Worker 2520*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v25.8h // ypos -= 8*dy 2521*c0909341SAndroid Build Coastguard Worker 2522*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2523*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2524*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 2525*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 2526*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 2527*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 2528*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 2529*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 2530*c0909341SAndroid Build Coastguard Worker 2531*c0909341SAndroid Build Coastguard Worker ext v18.16b, v4.16b, v5.16b, #2 // top[base_x+1] 2532*c0909341SAndroid Build Coastguard Worker ext v19.16b, v6.16b, v7.16b, #2 2533*c0909341SAndroid Build Coastguard Worker 2534*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 2535*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 2536*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 2537*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 2538*c0909341SAndroid Build Coastguard Worker 2539*c0909341SAndroid Build Coastguard Worker umull v12.4s, v4.4h, v8.4h // top[base_x]-*(64-frac_x) 2540*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v18.4h, v16.4h // + top[base_x+1]*frac_x 2541*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v4.8h, v8.8h 2542*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v18.8h, v16.8h 2543*c0909341SAndroid Build Coastguard Worker umull v14.4s, v6.4h, v9.4h 2544*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v19.4h, v17.4h 2545*c0909341SAndroid Build Coastguard Worker umull2 v20.4s, v6.8h, v9.8h 2546*c0909341SAndroid Build Coastguard Worker umlal2 v20.4s, v19.8h, v17.8h 2547*c0909341SAndroid Build Coastguard Worker 2548*c0909341SAndroid Build Coastguard Worker cmge v18.8h, v21.8h, #0 2549*c0909341SAndroid Build Coastguard Worker cmge v19.8h, v22.8h, #0 2550*c0909341SAndroid Build Coastguard Worker 2551*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 2552*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 2553*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 2554*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v20.4s, #6 2555*c0909341SAndroid Build Coastguard Worker 2556*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v18.16b 2557*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v19.16b 2558*c0909341SAndroid Build Coastguard Worker 2559*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], #16 2560*c0909341SAndroid Build Coastguard Worker subs w4, w4, #8 2561*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x13], #16 2562*c0909341SAndroid Build Coastguard Worker b.le 3f 2563*c0909341SAndroid Build Coastguard Worker 2564*c0909341SAndroid Build Coastguard Worker movi v10.8h, #8 2565*c0909341SAndroid Build Coastguard Worker mov v4.16b, v5.16b 2566*c0909341SAndroid Build Coastguard Worker mov v6.16b, v7.16b 2567*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v10.8h // base_x += 8 2568*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v10.8h 2569*c0909341SAndroid Build Coastguard Worker b 2b 2570*c0909341SAndroid Build Coastguard Worker 2571*c0909341SAndroid Build Coastguard Worker3: 2572*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2573*c0909341SAndroid Build Coastguard Worker b.le 9f 2574*c0909341SAndroid Build Coastguard Worker movi v10.8h, #128 2575*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 2576*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 2577*c0909341SAndroid Build Coastguard Worker mov w4, w12 // reset w 2578*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v10.8h // ypos += 2*(1<<6) 2579*c0909341SAndroid Build Coastguard Worker b 1b 2580*c0909341SAndroid Build Coastguard Worker 2581*c0909341SAndroid Build Coastguard Worker4: // The rest of the row only predicted from top[] 2582*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x9], #16 // top[base_x] 2583*c0909341SAndroid Build Coastguard Worker ld1 {v7.8h}, [x11], #16 2584*c0909341SAndroid Build Coastguard Worker 2585*c0909341SAndroid Build Coastguard Worker ext v18.16b, v4.16b, v5.16b, #2 // top[base_x+1] 2586*c0909341SAndroid Build Coastguard Worker ext v19.16b, v6.16b, v7.16b, #2 2587*c0909341SAndroid Build Coastguard Worker 2588*c0909341SAndroid Build Coastguard Worker umull v12.4s, v4.4h, v8.4h // top[base_x]-*(64-frac_x) 2589*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v18.4h, v16.4h // + top[base_x+1]*frac_x 2590*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v4.8h, v8.8h 2591*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v18.8h, v16.8h 2592*c0909341SAndroid Build Coastguard Worker umull v14.4s, v6.4h, v9.4h 2593*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v19.4h, v17.4h 2594*c0909341SAndroid Build Coastguard Worker umull2 v20.4s, v6.8h, v9.8h 2595*c0909341SAndroid Build Coastguard Worker umlal2 v20.4s, v19.8h, v17.8h 2596*c0909341SAndroid Build Coastguard Worker 2597*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 2598*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 2599*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 2600*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v20.4s, #6 2601*c0909341SAndroid Build Coastguard Worker 2602*c0909341SAndroid Build Coastguard Worker st1 {v12.8h}, [x0], #16 2603*c0909341SAndroid Build Coastguard Worker subs w4, w4, #8 2604*c0909341SAndroid Build Coastguard Worker st1 {v13.8h}, [x13], #16 2605*c0909341SAndroid Build Coastguard Worker b.le 3b 2606*c0909341SAndroid Build Coastguard Worker 2607*c0909341SAndroid Build Coastguard Worker mov v4.16b, v5.16b 2608*c0909341SAndroid Build Coastguard Worker mov v6.16b, v7.16b 2609*c0909341SAndroid Build Coastguard Worker b 4b 2610*c0909341SAndroid Build Coastguard Worker 2611*c0909341SAndroid Build Coastguard Worker169: // The rest of the block only predicted from left[] 2612*c0909341SAndroid Build Coastguard Worker add x1, x1, w4, uxtw #1 // restore stride 2613*c0909341SAndroid Build Coastguard Worker mov w12, w5 // orig remaining h 2614*c0909341SAndroid Build Coastguard Worker1: 2615*c0909341SAndroid Build Coastguard Worker movi v12.8h, #64 2616*c0909341SAndroid Build Coastguard Worker movi v10.8h, #0x3e 2617*c0909341SAndroid Build Coastguard Worker 2618*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v23.8h, #6 // ypos >> 6 2619*c0909341SAndroid Build Coastguard Worker and v27.16b, v23.16b, v10.16b // frac_y 2620*c0909341SAndroid Build Coastguard Worker 2621*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] // base_y[0] 2622*c0909341SAndroid Build Coastguard Worker 2623*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 2624*c0909341SAndroid Build Coastguard Worker movi v11.8h, #1, lsl #8 2625*c0909341SAndroid Build Coastguard Worker zip1 v29.16b, v29.16b, v29.16b // duplicate elements 2626*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v25.8h // ypos -= 8*dy 2627*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v11.16b // 2*base, 2*base+1, ... 2628*c0909341SAndroid Build Coastguard Worker 2629*c0909341SAndroid Build Coastguard Worker cmp w10, #(32-1) 2630*c0909341SAndroid Build Coastguard Worker 2631*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2632*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 2633*c0909341SAndroid Build Coastguard Worker 2634*c0909341SAndroid Build Coastguard Worker sub v28.8h, v12.8h, v27.8h // 64 - frac_y 2635*c0909341SAndroid Build Coastguard Worker 2636*c0909341SAndroid Build Coastguard Worker b.gt 31f 2637*c0909341SAndroid Build Coastguard Worker 2638*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y] 2639*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // base_y + 1 (*2) 2640*c0909341SAndroid Build Coastguard Worker 2641*c0909341SAndroid Build Coastguard Worker2: 2642*c0909341SAndroid Build Coastguard Worker // base_y < 32, using tbx. 2643*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] // base_y[0] 2644*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2645*c0909341SAndroid Build Coastguard Worker cmp w10, #(64-4) 2646*c0909341SAndroid Build Coastguard Worker b.gt 32f 2647*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2648*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // base_y + 2 (*2) 2649*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2650*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+2] 2651*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // next base_y 2652*c0909341SAndroid Build Coastguard Worker 2653*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2654*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2655*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 2656*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 2657*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 2658*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 2659*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 2660*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 2661*c0909341SAndroid Build Coastguard Worker 2662*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 2663*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 2664*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 2665*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 2666*c0909341SAndroid Build Coastguard Worker 2667*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 2668*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2669*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x13], x1 2670*c0909341SAndroid Build Coastguard Worker b.le 4f 2671*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2672*c0909341SAndroid Build Coastguard Worker b 2b 2673*c0909341SAndroid Build Coastguard Worker 2674*c0909341SAndroid Build Coastguard Worker31: // base_y >= 32, using separate loads, loading v18 if we had to bail 2675*c0909341SAndroid Build Coastguard Worker // in the prologue. 2676*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] 2677*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[2] 2678*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 2679*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[4] 2680*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2681*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[6] 2682*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2683*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[0], [x10] 2684*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[8] 2685*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2686*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[1], [x15] 2687*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[10] 2688*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2689*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[2], [x16] 2690*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[12] 2691*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2692*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[3], [x17] 2693*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[14] 2694*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2695*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2696*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[4], [x10] 2697*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2698*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[5], [x15] 2699*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // next base_y 2700*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[6], [x16] 2701*c0909341SAndroid Build Coastguard Worker ld1 {v18.h}[7], [x17] 2702*c0909341SAndroid Build Coastguard Worker 2703*c0909341SAndroid Build Coastguard Worker32: // base_y >= 32, using separate loads. 2704*c0909341SAndroid Build Coastguard Worker cmp w5, #4 2705*c0909341SAndroid Build Coastguard Worker b.lt 34f 2706*c0909341SAndroid Build Coastguard Worker33: // h >= 4, preserving v18 from the previous round, loading v19-v22. 2707*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] 2708*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 2709*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[2] 2710*c0909341SAndroid Build Coastguard Worker movi v10.16b, #8 2711*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[4] 2712*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2713*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[6] 2714*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2715*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[0], [x10] 2716*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[8] 2717*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2718*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[1], [x15] 2719*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[10] 2720*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2721*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[2], [x16] 2722*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[12] 2723*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2724*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[3], [x17] 2725*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[14] 2726*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2727*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2728*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[4], [x10] 2729*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2730*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[5], [x15] 2731*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[6], [x16] 2732*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v10.16b // next base_y 2733*c0909341SAndroid Build Coastguard Worker ld4 {v19.h, v20.h, v21.h, v22.h}[7], [x17] 2734*c0909341SAndroid Build Coastguard Worker 2735*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2736*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2737*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 2738*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 2739*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 2740*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 2741*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 2742*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 2743*c0909341SAndroid Build Coastguard Worker 2744*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 2745*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 2746*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 2747*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 2748*c0909341SAndroid Build Coastguard Worker 2749*c0909341SAndroid Build Coastguard Worker umull v12.4s, v20.4h, v28.4h // left[base_y]*(64-frac_y) 2750*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v21.4h, v27.4h // + left[base_y+1]*frac_y 2751*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v20.8h, v28.8h 2752*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v21.8h, v27.8h 2753*c0909341SAndroid Build Coastguard Worker umull v14.4s, v21.4h, v28.4h 2754*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v22.4h, v27.4h 2755*c0909341SAndroid Build Coastguard Worker umull2 v18.4s, v21.8h, v28.8h 2756*c0909341SAndroid Build Coastguard Worker umlal2 v18.4s, v22.8h, v27.8h 2757*c0909341SAndroid Build Coastguard Worker 2758*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 2759*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 2760*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 2761*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v18.4s, #6 2762*c0909341SAndroid Build Coastguard Worker 2763*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 2764*c0909341SAndroid Build Coastguard Worker cmp w5, #2 2765*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x13], x1 2766*c0909341SAndroid Build Coastguard Worker st1 {v12.8h}, [x0], x1 2767*c0909341SAndroid Build Coastguard Worker st1 {v13.8h}, [x13], x1 2768*c0909341SAndroid Build Coastguard Worker b.lt 4f 2769*c0909341SAndroid Build Coastguard Worker mov v18.16b, v22.16b 2770*c0909341SAndroid Build Coastguard Worker b.gt 33b 2771*c0909341SAndroid Build Coastguard Worker 2772*c0909341SAndroid Build Coastguard Worker34: // h == 2, preserving v18 from the previous round, loading v19-v20. 2773*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[0] 2774*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[2] 2775*c0909341SAndroid Build Coastguard Worker movi v21.16b, #4 2776*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[4] 2777*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2778*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[6] 2779*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2780*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[0], [x10] 2781*c0909341SAndroid Build Coastguard Worker smov w10, v29.b[8] 2782*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2783*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[1], [x15] 2784*c0909341SAndroid Build Coastguard Worker smov w15, v29.b[10] 2785*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2786*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[2], [x16] 2787*c0909341SAndroid Build Coastguard Worker smov w16, v29.b[12] 2788*c0909341SAndroid Build Coastguard Worker add x10, x3, w10, sxtw 2789*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[3], [x17] 2790*c0909341SAndroid Build Coastguard Worker smov w17, v29.b[14] 2791*c0909341SAndroid Build Coastguard Worker add x15, x3, w15, sxtw 2792*c0909341SAndroid Build Coastguard Worker add x16, x3, w16, sxtw 2793*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[4], [x10] 2794*c0909341SAndroid Build Coastguard Worker add x17, x3, w17, sxtw 2795*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[5], [x15] 2796*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[6], [x16] 2797*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // next base_y 2798*c0909341SAndroid Build Coastguard Worker ld2 {v19.h, v20.h}[7], [x17] 2799*c0909341SAndroid Build Coastguard Worker 2800*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2801*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2802*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 2803*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 2804*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 2805*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 2806*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 2807*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 2808*c0909341SAndroid Build Coastguard Worker 2809*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 2810*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 2811*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 2812*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 2813*c0909341SAndroid Build Coastguard Worker 2814*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 2815*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x13], x1 2816*c0909341SAndroid Build Coastguard Worker // The h==2 case only happens once at the end, if at all. 2817*c0909341SAndroid Build Coastguard Worker 2818*c0909341SAndroid Build Coastguard Worker4: 2819*c0909341SAndroid Build Coastguard Worker subs w4, w4, #8 2820*c0909341SAndroid Build Coastguard Worker b.le 9f 2821*c0909341SAndroid Build Coastguard Worker 2822*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 2823*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 2824*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 2825*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 2826*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 2827*c0909341SAndroid Build Coastguard Worker add x13, x13, #16 2828*c0909341SAndroid Build Coastguard Worker mov w5, w12 // reset h 2829*c0909341SAndroid Build Coastguard Worker b 1b 2830*c0909341SAndroid Build Coastguard Worker 2831*c0909341SAndroid Build Coastguard Worker9: 2832*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 2833*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 2834*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 2835*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 2836*c0909341SAndroid Build Coastguard Worker ret 2837*c0909341SAndroid Build Coastguard Workerendfunc 2838*c0909341SAndroid Build Coastguard Worker 2839*c0909341SAndroid Build Coastguard Workerjumptable ipred_z2_fill1_tbl 2840*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z2_fill1_tbl 2841*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z2_fill1_tbl 2842*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z2_fill1_tbl 2843*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z2_fill1_tbl 2844*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z2_fill1_tbl 2845*c0909341SAndroid Build Coastguard Workerendjumptable 2846*c0909341SAndroid Build Coastguard Worker 2847*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill2_16bpc_neon, export=1 2848*c0909341SAndroid Build Coastguard Worker cmp w4, #8 2849*c0909341SAndroid Build Coastguard Worker mov w8, #(2 << 6) // xpos = 2 << 6 2850*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2851*c0909341SAndroid Build Coastguard Worker 2852*c0909341SAndroid Build Coastguard Worker movrel x11, increments 2853*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 2854*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 2855*c0909341SAndroid Build Coastguard Worker b.eq 80f 2856*c0909341SAndroid Build Coastguard Worker 2857*c0909341SAndroid Build Coastguard Worker40: 2858*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 2859*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2860*c0909341SAndroid Build Coastguard Worker 2861*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 2862*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 2863*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 2864*c0909341SAndroid Build Coastguard Worker 2865*c0909341SAndroid Build Coastguard Worker // For upsample_top, w <= 8 and h <= 8; we may need up to h+1 elements 2866*c0909341SAndroid Build Coastguard Worker // from left. 2867*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x3] // left[] 2868*c0909341SAndroid Build Coastguard Worker 2869*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 2870*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 2871*c0909341SAndroid Build Coastguard Worker 2872*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2873*c0909341SAndroid Build Coastguard Worker and v27.8b, v30.8b, v25.8b // frac_y 2874*c0909341SAndroid Build Coastguard Worker 2875*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2876*c0909341SAndroid Build Coastguard Worker 2877*c0909341SAndroid Build Coastguard Worker movi v23.4h, #1, lsl #8 2878*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 2879*c0909341SAndroid Build Coastguard Worker zip1 v29.8b, v29.8b, v29.8b // duplicate elements 2880*c0909341SAndroid Build Coastguard Worker movi v17.8b, #2 2881*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v23.8b // 2*base, 2*base+1, ... 2882*c0909341SAndroid Build Coastguard Worker 2883*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 (*2) 2884*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 (*2) 2885*c0909341SAndroid Build Coastguard Worker 2886*c0909341SAndroid Build Coastguard Worker tbl v18.8b, {v0.16b}, v29.8b // left[base_y] 2887*c0909341SAndroid Build Coastguard Worker 2888*c0909341SAndroid Build Coastguard Worker trn1 v30.2d, v30.2d, v28.2d // base_y + 1, base_y + 2 2889*c0909341SAndroid Build Coastguard Worker 2890*c0909341SAndroid Build Coastguard Worker sub v28.4h, v26.4h, v27.4h // 64 - frac_y 2891*c0909341SAndroid Build Coastguard Worker 2892*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,0,1,2,3} 2893*c0909341SAndroid Build Coastguard Worker 2894*c0909341SAndroid Build Coastguard Worker trn1 v27.2d, v27.2d, v27.2d // frac_y 2895*c0909341SAndroid Build Coastguard Worker trn1 v28.2d, v28.2d, v28.2d // 64 - frac_y 2896*c0909341SAndroid Build Coastguard Worker 2897*c0909341SAndroid Build Coastguard Worker movi v29.16b, #4 2898*c0909341SAndroid Build Coastguard Worker add v31.8h, v31.8h, v31.8h // {0,2,4,6,0,2,4,6} 2899*c0909341SAndroid Build Coastguard Worker4: 2900*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2901*c0909341SAndroid Build Coastguard Worker dup v16.4h, w8 // xpos 2902*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2903*c0909341SAndroid Build Coastguard Worker cmp w9, #-8 // base_x <= -8 2904*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2905*c0909341SAndroid Build Coastguard Worker b.le 49f 2906*c0909341SAndroid Build Coastguard Worker 2907*c0909341SAndroid Build Coastguard Worker lsl w9, w9, #1 2908*c0909341SAndroid Build Coastguard Worker lsl w11, w11, #1 2909*c0909341SAndroid Build Coastguard Worker 2910*c0909341SAndroid Build Coastguard Worker dup v17.4h, w8 // xpos 2911*c0909341SAndroid Build Coastguard Worker 2912*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 2913*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 2914*c0909341SAndroid Build Coastguard Worker 2915*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d // xpos 2916*c0909341SAndroid Build Coastguard Worker 2917*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b}, v30.16b // left[base_y+1], left[base_y+2] 2918*c0909341SAndroid Build Coastguard Worker 2919*c0909341SAndroid Build Coastguard Worker sshr v20.8h, v16.8h, #6 // first base_x for each row 2920*c0909341SAndroid Build Coastguard Worker 2921*c0909341SAndroid Build Coastguard Worker uzp2 v5.8h, v4.8h, v6.8h // top[base_x+1] 2922*c0909341SAndroid Build Coastguard Worker uzp1 v4.8h, v4.8h, v6.8h // top[base_x] 2923*c0909341SAndroid Build Coastguard Worker 2924*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2925*c0909341SAndroid Build Coastguard Worker 2926*c0909341SAndroid Build Coastguard Worker trn1 v18.2d, v18.2d, v19.2d // left[base_y], left[base_y+1] 2927*c0909341SAndroid Build Coastguard Worker 2928*c0909341SAndroid Build Coastguard Worker sub v17.8h, v26.8h, v16.8h // 64 - frac_x 2929*c0909341SAndroid Build Coastguard Worker 2930*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v31.8h // actual base_x 2931*c0909341SAndroid Build Coastguard Worker 2932*c0909341SAndroid Build Coastguard Worker umull v21.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2933*c0909341SAndroid Build Coastguard Worker umlal v21.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2934*c0909341SAndroid Build Coastguard Worker umull2 v22.4s, v18.8h, v28.8h 2935*c0909341SAndroid Build Coastguard Worker umlal2 v22.4s, v19.8h, v27.8h 2936*c0909341SAndroid Build Coastguard Worker 2937*c0909341SAndroid Build Coastguard Worker umull v23.4s, v4.4h, v17.4h // top[base_x]-*(64-frac_x) 2938*c0909341SAndroid Build Coastguard Worker umlal v23.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 2939*c0909341SAndroid Build Coastguard Worker umull2 v24.4s, v4.8h, v17.8h 2940*c0909341SAndroid Build Coastguard Worker umlal2 v24.4s, v5.8h, v16.8h 2941*c0909341SAndroid Build Coastguard Worker 2942*c0909341SAndroid Build Coastguard Worker cmge v20.8h, v20.8h, #0 2943*c0909341SAndroid Build Coastguard Worker 2944*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v21.4s, #6 2945*c0909341SAndroid Build Coastguard Worker rshrn2 v21.8h, v22.4s, #6 2946*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v23.4s, #6 2947*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v24.4s, #6 2948*c0909341SAndroid Build Coastguard Worker 2949*c0909341SAndroid Build Coastguard Worker bit v21.16b, v22.16b, v20.16b 2950*c0909341SAndroid Build Coastguard Worker 2951*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x0], x1 2952*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2953*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2954*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x0], x1 2955*c0909341SAndroid Build Coastguard Worker b.le 9f 2956*c0909341SAndroid Build Coastguard Worker 2957*c0909341SAndroid Build Coastguard Worker ext v18.16b, v19.16b, v19.16b, #8 2958*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v29.16b // base_y += 2 (*2) 2959*c0909341SAndroid Build Coastguard Worker b 4b 2960*c0909341SAndroid Build Coastguard Worker 2961*c0909341SAndroid Build Coastguard Worker49: 2962*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b}, v30.16b // left[base_y+1], left[base_y+2] 2963*c0909341SAndroid Build Coastguard Worker 2964*c0909341SAndroid Build Coastguard Worker trn1 v18.2d, v18.2d, v19.2d // left[base_y], left[base_y+1] 2965*c0909341SAndroid Build Coastguard Worker 2966*c0909341SAndroid Build Coastguard Worker umull v20.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 2967*c0909341SAndroid Build Coastguard Worker umlal v20.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 2968*c0909341SAndroid Build Coastguard Worker umull2 v21.4s, v18.8h, v28.8h 2969*c0909341SAndroid Build Coastguard Worker umlal2 v21.4s, v19.8h, v27.8h 2970*c0909341SAndroid Build Coastguard Worker 2971*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #6 2972*c0909341SAndroid Build Coastguard Worker rshrn2 v20.8h, v21.4s, #6 2973*c0909341SAndroid Build Coastguard Worker 2974*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x0], x1 2975*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2976*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x0], x1 2977*c0909341SAndroid Build Coastguard Worker b.le 9f 2978*c0909341SAndroid Build Coastguard Worker 2979*c0909341SAndroid Build Coastguard Worker ext v18.16b, v19.16b, v19.16b, #8 2980*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v29.16b // base_y += 2 (*2) 2981*c0909341SAndroid Build Coastguard Worker b 49b 2982*c0909341SAndroid Build Coastguard Worker 2983*c0909341SAndroid Build Coastguard Worker9: 2984*c0909341SAndroid Build Coastguard Worker ret 2985*c0909341SAndroid Build Coastguard Worker 2986*c0909341SAndroid Build Coastguard Worker80: 2987*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 2988*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 2989*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 2990*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 2991*c0909341SAndroid Build Coastguard Worker 2992*c0909341SAndroid Build Coastguard Worker dup v18.8h, w7 // -dy 2993*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2994*c0909341SAndroid Build Coastguard Worker 2995*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v18.8h // {0,1,2,3,4,5,6,7}* -dy 2996*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 2997*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v18.8h // -= dy 2998*c0909341SAndroid Build Coastguard Worker 2999*c0909341SAndroid Build Coastguard Worker // For upsample_top, w <= 8 and h <= 8; we may need up to h+1 elements 3000*c0909341SAndroid Build Coastguard Worker // from left. 3001*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x3] // left[] 3002*c0909341SAndroid Build Coastguard Worker 3003*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 3004*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 3005*c0909341SAndroid Build Coastguard Worker 3006*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v16.8h, #6 // ypos >> 6 3007*c0909341SAndroid Build Coastguard Worker and v27.16b, v16.16b, v25.16b // frac_y 3008*c0909341SAndroid Build Coastguard Worker 3009*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 3010*c0909341SAndroid Build Coastguard Worker 3011*c0909341SAndroid Build Coastguard Worker movi v23.8h, #1, lsl #8 3012*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 3013*c0909341SAndroid Build Coastguard Worker zip1 v29.16b, v29.16b, v29.16b // duplicate elements 3014*c0909341SAndroid Build Coastguard Worker movi v17.16b, #2 3015*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v23.16b // 2*base, 2*base+1, ... 3016*c0909341SAndroid Build Coastguard Worker 3017*c0909341SAndroid Build Coastguard Worker // Cut corners here; for the first row we don't expect to need to 3018*c0909341SAndroid Build Coastguard Worker // read outside of v0. 3019*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b}, v29.16b // left[base_y] 3020*c0909341SAndroid Build Coastguard Worker 3021*c0909341SAndroid Build Coastguard Worker add v30.16b, v29.16b, v19.16b // base_y + 2 (*2) 3022*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v17.16b // base_y + 1 (*2) 3023*c0909341SAndroid Build Coastguard Worker 3024*c0909341SAndroid Build Coastguard Worker sub v28.8h, v26.8h, v27.8h // 64 - frac_y 3025*c0909341SAndroid Build Coastguard Worker 3026*c0909341SAndroid Build Coastguard Worker movi v24.16b, #4 3027*c0909341SAndroid Build Coastguard Worker add v31.16b, v31.16b, v31.16b // {0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14} 3028*c0909341SAndroid Build Coastguard Worker8: 3029*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 3030*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 3031*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3032*c0909341SAndroid Build Coastguard Worker cmp w9, #-16 // base_x <= -16 3033*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 3034*c0909341SAndroid Build Coastguard Worker b.le 89f 3035*c0909341SAndroid Build Coastguard Worker 3036*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 3037*c0909341SAndroid Build Coastguard Worker 3038*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw #1 3039*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw #1 3040*c0909341SAndroid Build Coastguard Worker 3041*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x9] // top[base_x] 3042*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x11] 3043*c0909341SAndroid Build Coastguard Worker 3044*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b}, v29.16b // left[base_y+1] 3045*c0909341SAndroid Build Coastguard Worker 3046*c0909341SAndroid Build Coastguard Worker sshr v21.8h, v16.8h, #6 // first base_x 3047*c0909341SAndroid Build Coastguard Worker sshr v22.8h, v17.8h, #6 3048*c0909341SAndroid Build Coastguard Worker 3049*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b, v1.16b}, v30.16b // left[base_y+2] 3050*c0909341SAndroid Build Coastguard Worker 3051*c0909341SAndroid Build Coastguard Worker uzp2 v2.8h, v4.8h, v5.8h // top[base_x+1] 3052*c0909341SAndroid Build Coastguard Worker uzp1 v4.8h, v4.8h, v5.8h // top[base_x] 3053*c0909341SAndroid Build Coastguard Worker uzp2 v3.8h, v6.8h, v7.8h 3054*c0909341SAndroid Build Coastguard Worker uzp1 v6.8h, v6.8h, v7.8h 3055*c0909341SAndroid Build Coastguard Worker mov v5.16b, v2.16b 3056*c0909341SAndroid Build Coastguard Worker mov v7.16b, v3.16b 3057*c0909341SAndroid Build Coastguard Worker 3058*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 3059*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v25.16b 3060*c0909341SAndroid Build Coastguard Worker 3061*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3062*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3063*c0909341SAndroid Build Coastguard Worker 3064*c0909341SAndroid Build Coastguard Worker sub v8.8h, v26.8h, v16.8h // 64 - frac_x 3065*c0909341SAndroid Build Coastguard Worker sub v9.8h, v26.8h, v17.8h 3066*c0909341SAndroid Build Coastguard Worker 3067*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 3068*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 3069*c0909341SAndroid Build Coastguard Worker 3070*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v31.8h // actual base_x 3071*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v31.8h 3072*c0909341SAndroid Build Coastguard Worker 3073*c0909341SAndroid Build Coastguard Worker umull v12.4s, v19.4h, v28.4h 3074*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v20.4h, v27.4h 3075*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v19.8h, v28.8h 3076*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v20.8h, v27.8h 3077*c0909341SAndroid Build Coastguard Worker 3078*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 3079*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 3080*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 3081*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 3082*c0909341SAndroid Build Coastguard Worker 3083*c0909341SAndroid Build Coastguard Worker umull v12.4s, v4.4h, v8.4h // top[base_x]-*(64-frac_x) 3084*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 3085*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v4.8h, v8.8h 3086*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v5.8h, v16.8h 3087*c0909341SAndroid Build Coastguard Worker umull v14.4s, v6.4h, v9.4h 3088*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v17.4h 3089*c0909341SAndroid Build Coastguard Worker umull2 v18.4s, v6.8h, v9.8h 3090*c0909341SAndroid Build Coastguard Worker umlal2 v18.4s, v7.8h, v17.8h 3091*c0909341SAndroid Build Coastguard Worker 3092*c0909341SAndroid Build Coastguard Worker cmge v21.8h, v21.8h, #0 3093*c0909341SAndroid Build Coastguard Worker cmge v22.8h, v22.8h, #0 3094*c0909341SAndroid Build Coastguard Worker 3095*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 3096*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 3097*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 3098*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v18.4s, #6 3099*c0909341SAndroid Build Coastguard Worker 3100*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v21.16b 3101*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v22.16b 3102*c0909341SAndroid Build Coastguard Worker 3103*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 3104*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3105*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3106*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x0], x1 3107*c0909341SAndroid Build Coastguard Worker b.le 9f 3108*c0909341SAndroid Build Coastguard Worker 3109*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 3110*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3111*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 (*2) 3112*c0909341SAndroid Build Coastguard Worker b 8b 3113*c0909341SAndroid Build Coastguard Worker 3114*c0909341SAndroid Build Coastguard Worker89: 3115*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b}, v29.16b // left[base_y+1] 3116*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b, v1.16b}, v30.16b // left[base_y+2] 3117*c0909341SAndroid Build Coastguard Worker 3118*c0909341SAndroid Build Coastguard Worker umull v4.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3119*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3120*c0909341SAndroid Build Coastguard Worker umull2 v5.4s, v18.8h, v28.8h 3121*c0909341SAndroid Build Coastguard Worker umlal2 v5.4s, v19.8h, v27.8h 3122*c0909341SAndroid Build Coastguard Worker umull v6.4s, v19.4h, v28.4h 3123*c0909341SAndroid Build Coastguard Worker umlal v6.4s, v20.4h, v27.4h 3124*c0909341SAndroid Build Coastguard Worker umull2 v7.4s, v19.8h, v28.8h 3125*c0909341SAndroid Build Coastguard Worker umlal2 v7.4s, v20.8h, v27.8h 3126*c0909341SAndroid Build Coastguard Worker 3127*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v4.4s, #6 3128*c0909341SAndroid Build Coastguard Worker rshrn2 v4.8h, v5.4s, #6 3129*c0909341SAndroid Build Coastguard Worker rshrn v5.4h, v6.4s, #6 3130*c0909341SAndroid Build Coastguard Worker rshrn2 v5.8h, v7.4s, #6 3131*c0909341SAndroid Build Coastguard Worker 3132*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x0], x1 3133*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3134*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [x0], x1 3135*c0909341SAndroid Build Coastguard Worker b.le 9f 3136*c0909341SAndroid Build Coastguard Worker 3137*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 3138*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3139*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 (*2) 3140*c0909341SAndroid Build Coastguard Worker b 89b 3141*c0909341SAndroid Build Coastguard Worker 3142*c0909341SAndroid Build Coastguard Worker9: 3143*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 3144*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 3145*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 3146*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 3147*c0909341SAndroid Build Coastguard Worker ret 3148*c0909341SAndroid Build Coastguard Workerendfunc 3149*c0909341SAndroid Build Coastguard Worker 3150*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill3_16bpc_neon, export=1 3151*c0909341SAndroid Build Coastguard Worker cmp w4, #8 3152*c0909341SAndroid Build Coastguard Worker mov w8, #(1 << 6) // xpos = 1 << 6 3153*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3154*c0909341SAndroid Build Coastguard Worker 3155*c0909341SAndroid Build Coastguard Worker movrel x11, increments 3156*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 3157*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 3158*c0909341SAndroid Build Coastguard Worker b.eq 80f 3159*c0909341SAndroid Build Coastguard Worker 3160*c0909341SAndroid Build Coastguard Worker40: 3161*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 3162*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 3163*c0909341SAndroid Build Coastguard Worker 3164*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 3165*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 3166*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 3167*c0909341SAndroid Build Coastguard Worker 3168*c0909341SAndroid Build Coastguard Worker // For upsample_left, w <= 8 and h <= 8; we may need up to 2*h+1 elements. 3169*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x3] // left[] 3170*c0909341SAndroid Build Coastguard Worker 3171*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 3172*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 3173*c0909341SAndroid Build Coastguard Worker 3174*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 3175*c0909341SAndroid Build Coastguard Worker and v27.8b, v30.8b, v25.8b // frac_y 3176*c0909341SAndroid Build Coastguard Worker 3177*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v19.8b // base_y = (ypos >> 6) + 2 3178*c0909341SAndroid Build Coastguard Worker 3179*c0909341SAndroid Build Coastguard Worker movi v23.4h, #1, lsl #8 3180*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 3181*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 3182*c0909341SAndroid Build Coastguard Worker zip1 v29.8b, v29.8b, v29.8b // duplicate elements 3183*c0909341SAndroid Build Coastguard Worker movi v17.8b, #2 3184*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v23.8b // 2*base, 2*base+1, ... 3185*c0909341SAndroid Build Coastguard Worker 3186*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 (*2) 3187*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 (*2) 3188*c0909341SAndroid Build Coastguard Worker 3189*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,0,1,2,3} 3190*c0909341SAndroid Build Coastguard Worker 3191*c0909341SAndroid Build Coastguard Worker add v24.8b, v30.8b, v19.8b // base_y + 3 (*2) 3192*c0909341SAndroid Build Coastguard Worker 3193*c0909341SAndroid Build Coastguard Worker trn1 v29.2d, v29.2d, v28.2d // base_y + 0, base_y + 2 3194*c0909341SAndroid Build Coastguard Worker trn1 v30.2d, v30.2d, v24.2d // base_y + 1, base_y + 3 3195*c0909341SAndroid Build Coastguard Worker 3196*c0909341SAndroid Build Coastguard Worker sub v28.4h, v26.4h, v27.4h // 64 - frac_y 3197*c0909341SAndroid Build Coastguard Worker 3198*c0909341SAndroid Build Coastguard Worker trn1 v27.2d, v27.2d, v27.2d // frac_y 3199*c0909341SAndroid Build Coastguard Worker trn1 v28.2d, v28.2d, v28.2d // 64 - frac_y 3200*c0909341SAndroid Build Coastguard Worker 3201*c0909341SAndroid Build Coastguard Worker movi v24.16b, #8 3202*c0909341SAndroid Build Coastguard Worker4: 3203*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 3204*c0909341SAndroid Build Coastguard Worker dup v16.4h, w8 // xpos 3205*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3206*c0909341SAndroid Build Coastguard Worker cmp w9, #-4 // base_x <= -4 3207*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 3208*c0909341SAndroid Build Coastguard Worker b.le 49f 3209*c0909341SAndroid Build Coastguard Worker 3210*c0909341SAndroid Build Coastguard Worker lsl w9, w9, #1 3211*c0909341SAndroid Build Coastguard Worker lsl w11, w11, #1 3212*c0909341SAndroid Build Coastguard Worker 3213*c0909341SAndroid Build Coastguard Worker dup v17.4h, w8 // xpos 3214*c0909341SAndroid Build Coastguard Worker 3215*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 3216*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 3217*c0909341SAndroid Build Coastguard Worker 3218*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d // xpos 3219*c0909341SAndroid Build Coastguard Worker 3220*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0], left[base_y+2] 3221*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1], left[base_y+3] 3222*c0909341SAndroid Build Coastguard Worker 3223*c0909341SAndroid Build Coastguard Worker sshr v20.8h, v16.8h, #6 // first base_x for each row 3224*c0909341SAndroid Build Coastguard Worker 3225*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #2 // top[base_x+1] 3226*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #2 3227*c0909341SAndroid Build Coastguard Worker 3228*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 3229*c0909341SAndroid Build Coastguard Worker 3230*c0909341SAndroid Build Coastguard Worker trn1 v4.2d, v4.2d, v6.2d // top[base_x] 3231*c0909341SAndroid Build Coastguard Worker trn1 v5.2d, v5.2d, v7.2d // top[base_x+1] 3232*c0909341SAndroid Build Coastguard Worker 3233*c0909341SAndroid Build Coastguard Worker sub v17.8h, v26.8h, v16.8h // 64 - frac_x 3234*c0909341SAndroid Build Coastguard Worker 3235*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v31.8h // actual base_x 3236*c0909341SAndroid Build Coastguard Worker 3237*c0909341SAndroid Build Coastguard Worker umull v21.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3238*c0909341SAndroid Build Coastguard Worker umlal v21.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3239*c0909341SAndroid Build Coastguard Worker umull2 v22.4s, v18.8h, v28.8h 3240*c0909341SAndroid Build Coastguard Worker umlal2 v22.4s, v19.8h, v27.8h 3241*c0909341SAndroid Build Coastguard Worker 3242*c0909341SAndroid Build Coastguard Worker umull v23.4s, v4.4h, v17.4h // top[base_x]-*(64-frac_x) 3243*c0909341SAndroid Build Coastguard Worker umlal v23.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 3244*c0909341SAndroid Build Coastguard Worker umull2 v24.4s, v4.8h, v17.8h 3245*c0909341SAndroid Build Coastguard Worker umlal2 v24.4s, v5.8h, v16.8h 3246*c0909341SAndroid Build Coastguard Worker 3247*c0909341SAndroid Build Coastguard Worker cmge v20.8h, v20.8h, #0 3248*c0909341SAndroid Build Coastguard Worker 3249*c0909341SAndroid Build Coastguard Worker rshrn v21.4h, v21.4s, #6 3250*c0909341SAndroid Build Coastguard Worker rshrn2 v21.8h, v22.4s, #6 3251*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v23.4s, #6 3252*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v24.4s, #6 3253*c0909341SAndroid Build Coastguard Worker 3254*c0909341SAndroid Build Coastguard Worker movi v24.16b, #8 3255*c0909341SAndroid Build Coastguard Worker 3256*c0909341SAndroid Build Coastguard Worker bit v21.16b, v22.16b, v20.16b 3257*c0909341SAndroid Build Coastguard Worker 3258*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x0], x1 3259*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3260*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3261*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x0], x1 3262*c0909341SAndroid Build Coastguard Worker b.le 9f 3263*c0909341SAndroid Build Coastguard Worker 3264*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 4 (*2) 3265*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 4 (*2) 3266*c0909341SAndroid Build Coastguard Worker b 4b 3267*c0909341SAndroid Build Coastguard Worker 3268*c0909341SAndroid Build Coastguard Worker49: 3269*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0], left[base_y+2] 3270*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1], left[base_y+3] 3271*c0909341SAndroid Build Coastguard Worker 3272*c0909341SAndroid Build Coastguard Worker umull v20.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3273*c0909341SAndroid Build Coastguard Worker umlal v20.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3274*c0909341SAndroid Build Coastguard Worker umull2 v21.4s, v18.8h, v28.8h 3275*c0909341SAndroid Build Coastguard Worker umlal2 v21.4s, v19.8h, v27.8h 3276*c0909341SAndroid Build Coastguard Worker 3277*c0909341SAndroid Build Coastguard Worker rshrn v20.4h, v20.4s, #6 3278*c0909341SAndroid Build Coastguard Worker rshrn2 v20.8h, v21.4s, #6 3279*c0909341SAndroid Build Coastguard Worker 3280*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x0], x1 3281*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3282*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x0], x1 3283*c0909341SAndroid Build Coastguard Worker b.le 9f 3284*c0909341SAndroid Build Coastguard Worker 3285*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 4 (*2) 3286*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 4 (*2) 3287*c0909341SAndroid Build Coastguard Worker b 49b 3288*c0909341SAndroid Build Coastguard Worker 3289*c0909341SAndroid Build Coastguard Worker9: 3290*c0909341SAndroid Build Coastguard Worker ret 3291*c0909341SAndroid Build Coastguard Worker 3292*c0909341SAndroid Build Coastguard Worker80: 3293*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 3294*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 3295*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 3296*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 3297*c0909341SAndroid Build Coastguard Worker 3298*c0909341SAndroid Build Coastguard Worker dup v18.8h, w7 // -dy 3299*c0909341SAndroid Build Coastguard Worker movi v17.16b, #2 3300*c0909341SAndroid Build Coastguard Worker 3301*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v18.8h // {0,1,2,3,4,5,6,7}* -dy 3302*c0909341SAndroid Build Coastguard Worker movi v25.8h, #0x3e 3303*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v18.8h // -= dy 3304*c0909341SAndroid Build Coastguard Worker 3305*c0909341SAndroid Build Coastguard Worker // For upsample_left, w <= 8 and h <= 8; we may need up to 2*h+1 elements. 3306*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x3] // left[] 3307*c0909341SAndroid Build Coastguard Worker 3308*c0909341SAndroid Build Coastguard Worker movi v26.8h, #64 3309*c0909341SAndroid Build Coastguard Worker movi v19.16b, #4 3310*c0909341SAndroid Build Coastguard Worker 3311*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v16.8h, #6 // ypos >> 6 3312*c0909341SAndroid Build Coastguard Worker and v27.16b, v16.16b, v25.16b // frac_y 3313*c0909341SAndroid Build Coastguard Worker 3314*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 2 3315*c0909341SAndroid Build Coastguard Worker 3316*c0909341SAndroid Build Coastguard Worker movi v23.8h, #1, lsl #8 3317*c0909341SAndroid Build Coastguard Worker shl v29.8b, v29.8b, #1 // 2*base_y 3318*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 3319*c0909341SAndroid Build Coastguard Worker zip1 v29.16b, v29.16b, v29.16b // duplicate elements 3320*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v23.16b // 2*base, 2*base+1, ... 3321*c0909341SAndroid Build Coastguard Worker 3322*c0909341SAndroid Build Coastguard Worker add v30.16b, v29.16b, v17.16b // base_y + 1 (*2) 3323*c0909341SAndroid Build Coastguard Worker 3324*c0909341SAndroid Build Coastguard Worker sub v28.8h, v26.8h, v27.8h // 64 - frac_y 3325*c0909341SAndroid Build Coastguard Worker 3326*c0909341SAndroid Build Coastguard Worker movi v24.16b, #4 3327*c0909341SAndroid Build Coastguard Worker8: 3328*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 3329*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 3330*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3331*c0909341SAndroid Build Coastguard Worker cmp w9, #-16 // base_x <= -16 3332*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 3333*c0909341SAndroid Build Coastguard Worker b.le 89f 3334*c0909341SAndroid Build Coastguard Worker 3335*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 3336*c0909341SAndroid Build Coastguard Worker 3337*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw #1 3338*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw #1 3339*c0909341SAndroid Build Coastguard Worker 3340*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x9] // top[base_x] 3341*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x11] 3342*c0909341SAndroid Build Coastguard Worker 3343*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0] 3344*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3345*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1] 3346*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b 3347*c0909341SAndroid Build Coastguard Worker 3348*c0909341SAndroid Build Coastguard Worker sshr v22.8h, v16.8h, #6 // first base_x 3349*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+2] 3350*c0909341SAndroid Build Coastguard Worker sshr v23.8h, v17.8h, #6 3351*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+3] 3352*c0909341SAndroid Build Coastguard Worker 3353*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v5.16b, #2 // top[base_x+1] 3354*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v7.16b, #2 3355*c0909341SAndroid Build Coastguard Worker 3356*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 3357*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v25.16b 3358*c0909341SAndroid Build Coastguard Worker 3359*c0909341SAndroid Build Coastguard Worker umull v10.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3360*c0909341SAndroid Build Coastguard Worker umlal v10.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3361*c0909341SAndroid Build Coastguard Worker 3362*c0909341SAndroid Build Coastguard Worker sub v8.8h, v26.8h, v16.8h // 64 - frac_x 3363*c0909341SAndroid Build Coastguard Worker sub v9.8h, v26.8h, v17.8h 3364*c0909341SAndroid Build Coastguard Worker 3365*c0909341SAndroid Build Coastguard Worker umull2 v11.4s, v18.8h, v28.8h 3366*c0909341SAndroid Build Coastguard Worker umlal2 v11.4s, v19.8h, v27.8h 3367*c0909341SAndroid Build Coastguard Worker 3368*c0909341SAndroid Build Coastguard Worker add v22.8h, v22.8h, v31.8h // actual base_x 3369*c0909341SAndroid Build Coastguard Worker add v23.8h, v23.8h, v31.8h 3370*c0909341SAndroid Build Coastguard Worker 3371*c0909341SAndroid Build Coastguard Worker umull v12.4s, v20.4h, v28.4h 3372*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v21.4h, v27.4h 3373*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v20.8h, v28.8h 3374*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v21.8h, v27.8h 3375*c0909341SAndroid Build Coastguard Worker 3376*c0909341SAndroid Build Coastguard Worker rshrn v10.4h, v10.4s, #6 3377*c0909341SAndroid Build Coastguard Worker rshrn2 v10.8h, v11.4s, #6 3378*c0909341SAndroid Build Coastguard Worker rshrn v11.4h, v12.4s, #6 3379*c0909341SAndroid Build Coastguard Worker rshrn2 v11.8h, v13.4s, #6 3380*c0909341SAndroid Build Coastguard Worker 3381*c0909341SAndroid Build Coastguard Worker umull v12.4s, v4.4h, v8.4h // top[base_x]-*(64-frac_x) 3382*c0909341SAndroid Build Coastguard Worker umlal v12.4s, v5.4h, v16.4h // + top[base_x+1]*frac_x 3383*c0909341SAndroid Build Coastguard Worker umull2 v13.4s, v4.8h, v8.8h 3384*c0909341SAndroid Build Coastguard Worker umlal2 v13.4s, v5.8h, v16.8h 3385*c0909341SAndroid Build Coastguard Worker umull v14.4s, v6.4h, v9.4h 3386*c0909341SAndroid Build Coastguard Worker umlal v14.4s, v7.4h, v17.4h 3387*c0909341SAndroid Build Coastguard Worker umull2 v18.4s, v6.8h, v9.8h 3388*c0909341SAndroid Build Coastguard Worker umlal2 v18.4s, v7.8h, v17.8h 3389*c0909341SAndroid Build Coastguard Worker 3390*c0909341SAndroid Build Coastguard Worker cmge v22.8h, v22.8h, #0 3391*c0909341SAndroid Build Coastguard Worker cmge v23.8h, v23.8h, #0 3392*c0909341SAndroid Build Coastguard Worker 3393*c0909341SAndroid Build Coastguard Worker rshrn v12.4h, v12.4s, #6 3394*c0909341SAndroid Build Coastguard Worker rshrn2 v12.8h, v13.4s, #6 3395*c0909341SAndroid Build Coastguard Worker rshrn v13.4h, v14.4s, #6 3396*c0909341SAndroid Build Coastguard Worker rshrn2 v13.8h, v18.4s, #6 3397*c0909341SAndroid Build Coastguard Worker 3398*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v22.16b 3399*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v23.16b 3400*c0909341SAndroid Build Coastguard Worker 3401*c0909341SAndroid Build Coastguard Worker st1 {v10.8h}, [x0], x1 3402*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3403*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3404*c0909341SAndroid Build Coastguard Worker st1 {v11.8h}, [x0], x1 3405*c0909341SAndroid Build Coastguard Worker b.le 9f 3406*c0909341SAndroid Build Coastguard Worker 3407*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3408*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b 3409*c0909341SAndroid Build Coastguard Worker b 8b 3410*c0909341SAndroid Build Coastguard Worker 3411*c0909341SAndroid Build Coastguard Worker89: 3412*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0] 3413*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3414*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1] 3415*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b 3416*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+2] 3417*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+3] 3418*c0909341SAndroid Build Coastguard Worker 3419*c0909341SAndroid Build Coastguard Worker umull v4.4s, v18.4h, v28.4h // left[base_y]*(64-frac_y) 3420*c0909341SAndroid Build Coastguard Worker umlal v4.4s, v19.4h, v27.4h // + left[base_y+1]*frac_y 3421*c0909341SAndroid Build Coastguard Worker umull2 v5.4s, v18.8h, v28.8h 3422*c0909341SAndroid Build Coastguard Worker umlal2 v5.4s, v19.8h, v27.8h 3423*c0909341SAndroid Build Coastguard Worker umull v6.4s, v20.4h, v28.4h 3424*c0909341SAndroid Build Coastguard Worker umlal v6.4s, v21.4h, v27.4h 3425*c0909341SAndroid Build Coastguard Worker umull2 v7.4s, v20.8h, v28.8h 3426*c0909341SAndroid Build Coastguard Worker umlal2 v7.4s, v21.8h, v27.8h 3427*c0909341SAndroid Build Coastguard Worker 3428*c0909341SAndroid Build Coastguard Worker rshrn v4.4h, v4.4s, #6 3429*c0909341SAndroid Build Coastguard Worker rshrn2 v4.8h, v5.4s, #6 3430*c0909341SAndroid Build Coastguard Worker rshrn v5.4h, v6.4s, #6 3431*c0909341SAndroid Build Coastguard Worker rshrn2 v5.8h, v7.4s, #6 3432*c0909341SAndroid Build Coastguard Worker 3433*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x0], x1 3434*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3435*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [x0], x1 3436*c0909341SAndroid Build Coastguard Worker b.le 9f 3437*c0909341SAndroid Build Coastguard Worker 3438*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 (*2) 3439*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b 3440*c0909341SAndroid Build Coastguard Worker b 89b 3441*c0909341SAndroid Build Coastguard Worker 3442*c0909341SAndroid Build Coastguard Worker9: 3443*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 3444*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 3445*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 3446*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 3447*c0909341SAndroid Build Coastguard Worker ret 3448*c0909341SAndroid Build Coastguard Workerendfunc 3449*c0909341SAndroid Build Coastguard Worker 3450*c0909341SAndroid Build Coastguard Worker// void ipred_z3_fill1_16bpc_neon(pixel *dst, const ptrdiff_t stride, 3451*c0909341SAndroid Build Coastguard Worker// const pixel *const left, 3452*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 3453*c0909341SAndroid Build Coastguard Worker// const int dy, const int max_base_y); 3454*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill1_16bpc_neon, export=1 3455*c0909341SAndroid Build Coastguard Worker clz w9, w4 3456*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z3_fill1_tbl 3457*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 3458*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 3459*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw #1 // left[max_base_y] 3460*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 3461*c0909341SAndroid Build Coastguard Worker ld1r {v31.8h}, [x10] // padding 3462*c0909341SAndroid Build Coastguard Worker mov w7, w5 3463*c0909341SAndroid Build Coastguard Worker mov w15, #64 3464*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 3465*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3466*c0909341SAndroid Build Coastguard Worker br x8 3467*c0909341SAndroid Build Coastguard Worker 3468*c0909341SAndroid Build Coastguard Worker40: 3469*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3470*c0909341SAndroid Build Coastguard Worker4: 3471*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3472*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3473*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3474*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 3475*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3476*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3477*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3478*c0909341SAndroid Build Coastguard Worker lsl w8, w8, #1 3479*c0909341SAndroid Build Coastguard Worker lsl w10, w10, #1 3480*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // left[base] 3481*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 3482*c0909341SAndroid Build Coastguard Worker dup v4.8h, w9 // frac 3483*c0909341SAndroid Build Coastguard Worker dup v5.8h, w11 3484*c0909341SAndroid Build Coastguard Worker ext v1.16b, v0.16b, v0.16b, #2 // left[base+1] 3485*c0909341SAndroid Build Coastguard Worker ext v3.16b, v2.16b, v2.16b, #2 3486*c0909341SAndroid Build Coastguard Worker sub v6.4h, v1.4h, v0.4h // top[base+1]-top[base] 3487*c0909341SAndroid Build Coastguard Worker sub v7.4h, v3.4h, v2.4h 3488*c0909341SAndroid Build Coastguard Worker ushll v16.4s, v0.4h, #6 // top[base]*64 3489*c0909341SAndroid Build Coastguard Worker ushll v17.4s, v2.4h, #6 3490*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v6.4h, v4.4h // + top[base+1]*frac 3491*c0909341SAndroid Build Coastguard Worker smlal v17.4s, v7.4h, v5.4h 3492*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 3493*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v17.4s, #6 3494*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3495*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v16.8h, v17.8h 3496*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3497*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x13], x1 3498*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3499*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[2], [x0] 3500*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[3], [x13] 3501*c0909341SAndroid Build Coastguard Worker b.le 9f 3502*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1 // ptr -= 4 * (2*stride) 3503*c0909341SAndroid Build Coastguard Worker sub x13, x13, x1 3504*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3505*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3506*c0909341SAndroid Build Coastguard Worker b 4b 3507*c0909341SAndroid Build Coastguard Worker9: 3508*c0909341SAndroid Build Coastguard Worker ret 3509*c0909341SAndroid Build Coastguard Worker 3510*c0909341SAndroid Build Coastguard Worker80: 3511*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3512*c0909341SAndroid Build Coastguard Worker8: 3513*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3514*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3515*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3516*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 3517*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3518*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3519*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3520*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 3521*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 3522*c0909341SAndroid Build Coastguard Worker dup v4.8h, w9 // frac 3523*c0909341SAndroid Build Coastguard Worker dup v5.8h, w11 3524*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x8] // left[base] 3525*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10] 3526*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 3527*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 3528*c0909341SAndroid Build Coastguard Worker ldr h1, [x8, #16] 3529*c0909341SAndroid Build Coastguard Worker ldr h3, [x10, #16] 3530*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // 64 - frac 3531*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 3532*c0909341SAndroid Build Coastguard Worker ext v1.16b, v0.16b, v1.16b, #2 // left[base+1] 3533*c0909341SAndroid Build Coastguard Worker ext v3.16b, v2.16b, v3.16b, #2 3534*c0909341SAndroid Build Coastguard Worker umull v16.4s, v0.4h, v6.4h // left[base]*(64-frac) 3535*c0909341SAndroid Build Coastguard Worker umlal v16.4s, v1.4h, v4.4h // + left[base+1]*frac 3536*c0909341SAndroid Build Coastguard Worker umull2 v17.4s, v0.8h, v6.8h 3537*c0909341SAndroid Build Coastguard Worker umlal2 v17.4s, v1.8h, v4.8h 3538*c0909341SAndroid Build Coastguard Worker umull v18.4s, v2.4h, v7.4h 3539*c0909341SAndroid Build Coastguard Worker umlal v18.4s, v3.4h, v5.4h 3540*c0909341SAndroid Build Coastguard Worker umull2 v19.4s, v2.8h, v7.8h 3541*c0909341SAndroid Build Coastguard Worker umlal2 v19.4s, v3.8h, v5.8h 3542*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 3543*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #6 3544*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v18.4s, #6 3545*c0909341SAndroid Build Coastguard Worker rshrn2 v17.8h, v19.4s, #6 3546*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3547*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v16.8h, v17.8h 3548*c0909341SAndroid Build Coastguard Worker zip2 v19.8h, v16.8h, v17.8h 3549*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3550*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3551*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x13], x1 3552*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[2], [x0], x1 3553*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[3], [x13], x1 3554*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[0], [x0], x1 3555*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[1], [x13], x1 3556*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[2], [x0], x1 3557*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[3], [x13], x1 3558*c0909341SAndroid Build Coastguard Worker b.le 9f 3559*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #2 // ptr -= 4 * (2*stride) 3560*c0909341SAndroid Build Coastguard Worker sub x13, x13, x1, lsl #2 3561*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3562*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3563*c0909341SAndroid Build Coastguard Worker b 8b 3564*c0909341SAndroid Build Coastguard Worker9: 3565*c0909341SAndroid Build Coastguard Worker ret 3566*c0909341SAndroid Build Coastguard Worker 3567*c0909341SAndroid Build Coastguard Worker160: 3568*c0909341SAndroid Build Coastguard Worker320: 3569*c0909341SAndroid Build Coastguard Worker640: 3570*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3571*c0909341SAndroid Build Coastguard Worker mov w12, w4 3572*c0909341SAndroid Build Coastguard Worker1: 3573*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3574*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3575*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // ypos += dy 3576*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_y 3577*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3578*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3579*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3580*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 3581*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 3582*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // frac 3583*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 3584*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x8], #48 // left[base] 3585*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h, v5.8h}, [x10], #48 3586*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 3587*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 3588*c0909341SAndroid Build Coastguard Worker dup v16.8h, w9 // 64 - frac 3589*c0909341SAndroid Build Coastguard Worker dup v17.8h, w11 3590*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // ypos += dy 3591*c0909341SAndroid Build Coastguard Worker2: 3592*c0909341SAndroid Build Coastguard Worker ext v18.16b, v0.16b, v1.16b, #2 // left[base+1] 3593*c0909341SAndroid Build Coastguard Worker ext v19.16b, v1.16b, v2.16b, #2 3594*c0909341SAndroid Build Coastguard Worker ext v20.16b, v3.16b, v4.16b, #2 3595*c0909341SAndroid Build Coastguard Worker ext v21.16b, v4.16b, v5.16b, #2 3596*c0909341SAndroid Build Coastguard Worker subs w4, w4, #16 3597*c0909341SAndroid Build Coastguard Worker umull v22.4s, v0.4h, v16.4h // left[base]*(64-frac) 3598*c0909341SAndroid Build Coastguard Worker umlal v22.4s, v18.4h, v6.4h // + left[base+1]*frac 3599*c0909341SAndroid Build Coastguard Worker umull2 v23.4s, v0.8h, v16.8h 3600*c0909341SAndroid Build Coastguard Worker umlal2 v23.4s, v18.8h, v6.8h 3601*c0909341SAndroid Build Coastguard Worker umull v24.4s, v1.4h, v16.4h 3602*c0909341SAndroid Build Coastguard Worker umlal v24.4s, v19.4h, v6.4h 3603*c0909341SAndroid Build Coastguard Worker umull2 v25.4s, v1.8h, v16.8h 3604*c0909341SAndroid Build Coastguard Worker umlal2 v25.4s, v19.8h, v6.8h 3605*c0909341SAndroid Build Coastguard Worker umull v26.4s, v3.4h, v17.4h 3606*c0909341SAndroid Build Coastguard Worker umlal v26.4s, v20.4h, v7.4h 3607*c0909341SAndroid Build Coastguard Worker umull2 v27.4s, v3.8h, v17.8h 3608*c0909341SAndroid Build Coastguard Worker umlal2 v27.4s, v20.8h, v7.8h 3609*c0909341SAndroid Build Coastguard Worker umull v28.4s, v4.4h, v17.4h 3610*c0909341SAndroid Build Coastguard Worker umlal v28.4s, v21.4h, v7.4h 3611*c0909341SAndroid Build Coastguard Worker umull2 v29.4s, v4.8h, v17.8h 3612*c0909341SAndroid Build Coastguard Worker umlal2 v29.4s, v21.8h, v7.8h 3613*c0909341SAndroid Build Coastguard Worker rshrn v22.4h, v22.4s, #6 3614*c0909341SAndroid Build Coastguard Worker rshrn2 v22.8h, v23.4s, #6 3615*c0909341SAndroid Build Coastguard Worker rshrn v23.4h, v24.4s, #6 3616*c0909341SAndroid Build Coastguard Worker rshrn2 v23.8h, v25.4s, #6 3617*c0909341SAndroid Build Coastguard Worker rshrn v24.4h, v26.4s, #6 3618*c0909341SAndroid Build Coastguard Worker rshrn2 v24.8h, v27.4s, #6 3619*c0909341SAndroid Build Coastguard Worker rshrn v25.4h, v28.4s, #6 3620*c0909341SAndroid Build Coastguard Worker rshrn2 v25.8h, v29.4s, #6 3621*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v22.8h, v24.8h 3622*c0909341SAndroid Build Coastguard Worker zip2 v19.8h, v22.8h, v24.8h 3623*c0909341SAndroid Build Coastguard Worker zip1 v20.8h, v23.8h, v25.8h 3624*c0909341SAndroid Build Coastguard Worker zip2 v21.8h, v23.8h, v25.8h 3625*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3626*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x13], x1 3627*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[2], [x0], x1 3628*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[3], [x13], x1 3629*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[0], [x0], x1 3630*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[1], [x13], x1 3631*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[2], [x0], x1 3632*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[3], [x13], x1 3633*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[0], [x0], x1 3634*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[1], [x13], x1 3635*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[2], [x0], x1 3636*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[3], [x13], x1 3637*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[0], [x0], x1 3638*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[1], [x13], x1 3639*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[2], [x0], x1 3640*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[3], [x13], x1 3641*c0909341SAndroid Build Coastguard Worker b.le 3f 3642*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 3643*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x8], #32 // left[base] 3644*c0909341SAndroid Build Coastguard Worker mov v3.16b, v5.16b 3645*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x10], #32 3646*c0909341SAndroid Build Coastguard Worker b 2b 3647*c0909341SAndroid Build Coastguard Worker 3648*c0909341SAndroid Build Coastguard Worker3: 3649*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3650*c0909341SAndroid Build Coastguard Worker b.le 9f 3651*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3652*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3653*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3654*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3655*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3656*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3657*c0909341SAndroid Build Coastguard Worker mov w4, w12 3658*c0909341SAndroid Build Coastguard Worker b 1b 3659*c0909341SAndroid Build Coastguard Worker9: 3660*c0909341SAndroid Build Coastguard Worker ret 3661*c0909341SAndroid Build Coastguard Workerendfunc 3662*c0909341SAndroid Build Coastguard Worker 3663*c0909341SAndroid Build Coastguard Workerjumptable ipred_z3_fill1_tbl 3664*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z3_fill1_tbl 3665*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z3_fill1_tbl 3666*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z3_fill1_tbl 3667*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z3_fill1_tbl 3668*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z3_fill1_tbl 3669*c0909341SAndroid Build Coastguard Workerendjumptable 3670*c0909341SAndroid Build Coastguard Worker 3671*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill_padding_neon, export=0 3672*c0909341SAndroid Build Coastguard Worker cmp w3, #8 3673*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z3_fill_padding_tbl 3674*c0909341SAndroid Build Coastguard Worker b.gt ipred_z3_fill_padding_wide 3675*c0909341SAndroid Build Coastguard Worker // w3 = remaining width, w4 = constant height 3676*c0909341SAndroid Build Coastguard Worker mov w12, w4 3677*c0909341SAndroid Build Coastguard Worker 3678*c0909341SAndroid Build Coastguard Worker1: 3679*c0909341SAndroid Build Coastguard Worker // Fill a WxH rectangle with padding. W can be any number; 3680*c0909341SAndroid Build Coastguard Worker // this fills the exact width by filling in the largest 3681*c0909341SAndroid Build Coastguard Worker // power of two in the remaining width, and repeating. 3682*c0909341SAndroid Build Coastguard Worker clz w9, w3 3683*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 3684*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 3685*c0909341SAndroid Build Coastguard Worker add x9, x8, x9 3686*c0909341SAndroid Build Coastguard Worker br x9 3687*c0909341SAndroid Build Coastguard Worker 3688*c0909341SAndroid Build Coastguard Worker20: 3689*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3690*c0909341SAndroid Build Coastguard Worker2: 3691*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 3692*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3693*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x13], x1 3694*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 3695*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x13], x1 3696*c0909341SAndroid Build Coastguard Worker b.gt 2b 3697*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3698*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3699*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3700*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3701*c0909341SAndroid Build Coastguard Worker b.le 9f 3702*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3703*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3704*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3705*c0909341SAndroid Build Coastguard Worker mov w4, w12 3706*c0909341SAndroid Build Coastguard Worker b 1b 3707*c0909341SAndroid Build Coastguard Worker 3708*c0909341SAndroid Build Coastguard Worker40: 3709*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3710*c0909341SAndroid Build Coastguard Worker4: 3711*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 3712*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3713*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x13], x1 3714*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x0], x1 3715*c0909341SAndroid Build Coastguard Worker st1 {v31.4h}, [x13], x1 3716*c0909341SAndroid Build Coastguard Worker b.gt 4b 3717*c0909341SAndroid Build Coastguard Worker subs w3, w3, #4 3718*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3719*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3720*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3721*c0909341SAndroid Build Coastguard Worker b.le 9f 3722*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3723*c0909341SAndroid Build Coastguard Worker add x0, x0, #8 3724*c0909341SAndroid Build Coastguard Worker add x13, x13, #8 3725*c0909341SAndroid Build Coastguard Worker mov w4, w12 3726*c0909341SAndroid Build Coastguard Worker b 1b 3727*c0909341SAndroid Build Coastguard Worker 3728*c0909341SAndroid Build Coastguard Worker80: 3729*c0909341SAndroid Build Coastguard Worker160: 3730*c0909341SAndroid Build Coastguard Worker320: 3731*c0909341SAndroid Build Coastguard Worker640: 3732*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3733*c0909341SAndroid Build Coastguard Worker8: 3734*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 3735*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3736*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x13], x1 3737*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], x1 3738*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x13], x1 3739*c0909341SAndroid Build Coastguard Worker b.gt 8b 3740*c0909341SAndroid Build Coastguard Worker subs w3, w3, #8 3741*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3742*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3743*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3744*c0909341SAndroid Build Coastguard Worker b.le 9f 3745*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3746*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 3747*c0909341SAndroid Build Coastguard Worker add x13, x13, #16 3748*c0909341SAndroid Build Coastguard Worker mov w4, w12 3749*c0909341SAndroid Build Coastguard Worker b 1b 3750*c0909341SAndroid Build Coastguard Worker 3751*c0909341SAndroid Build Coastguard Worker9: 3752*c0909341SAndroid Build Coastguard Worker ret 3753*c0909341SAndroid Build Coastguard Workerendfunc 3754*c0909341SAndroid Build Coastguard Worker 3755*c0909341SAndroid Build Coastguard Workerjumptable ipred_z3_fill_padding_tbl 3756*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z3_fill_padding_tbl 3757*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z3_fill_padding_tbl 3758*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z3_fill_padding_tbl 3759*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z3_fill_padding_tbl 3760*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z3_fill_padding_tbl 3761*c0909341SAndroid Build Coastguard Worker .word 20b - ipred_z3_fill_padding_tbl 3762*c0909341SAndroid Build Coastguard Workerendjumptable 3763*c0909341SAndroid Build Coastguard Worker 3764*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill_padding_wide 3765*c0909341SAndroid Build Coastguard Worker // Fill a WxH rectangle with padding, with W > 8. 3766*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3767*c0909341SAndroid Build Coastguard Worker mov w12, w3 3768*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 3769*c0909341SAndroid Build Coastguard Worker1: 3770*c0909341SAndroid Build Coastguard Worker ands w5, w3, #7 3771*c0909341SAndroid Build Coastguard Worker b.eq 2f 3772*c0909341SAndroid Build Coastguard Worker // If the width isn't aligned to 8, first do one 8 pixel write 3773*c0909341SAndroid Build Coastguard Worker // and align the start pointer. 3774*c0909341SAndroid Build Coastguard Worker sub w3, w3, w5 3775*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0] 3776*c0909341SAndroid Build Coastguard Worker add x0, x0, w5, uxtw #1 3777*c0909341SAndroid Build Coastguard Worker2: 3778*c0909341SAndroid Build Coastguard Worker // Fill the rest of the line with aligned 8 pixel writes. 3779*c0909341SAndroid Build Coastguard Worker subs w3, w3, #8 3780*c0909341SAndroid Build Coastguard Worker st1 {v31.8h}, [x0], #16 3781*c0909341SAndroid Build Coastguard Worker b.gt 2b 3782*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 3783*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 3784*c0909341SAndroid Build Coastguard Worker b.le 9f 3785*c0909341SAndroid Build Coastguard Worker mov w3, w12 3786*c0909341SAndroid Build Coastguard Worker b 1b 3787*c0909341SAndroid Build Coastguard Worker9: 3788*c0909341SAndroid Build Coastguard Worker ret 3789*c0909341SAndroid Build Coastguard Workerendfunc 3790*c0909341SAndroid Build Coastguard Worker 3791*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill2_16bpc_neon, export=1 3792*c0909341SAndroid Build Coastguard Worker cmp w4, #8 3793*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // left[max_base_y] 3794*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 3795*c0909341SAndroid Build Coastguard Worker mov w7, w5 3796*c0909341SAndroid Build Coastguard Worker mov w15, #64 3797*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 3798*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3799*c0909341SAndroid Build Coastguard Worker b.eq 8f 3800*c0909341SAndroid Build Coastguard Worker 3801*c0909341SAndroid Build Coastguard Worker4: // h == 4 3802*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3803*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3804*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3805*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 3806*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3807*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3808*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3809*c0909341SAndroid Build Coastguard Worker lsl w8, w8, #1 3810*c0909341SAndroid Build Coastguard Worker lsl w10, w10, #1 3811*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // top[base] 3812*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 3813*c0909341SAndroid Build Coastguard Worker dup v4.4h, w9 // frac 3814*c0909341SAndroid Build Coastguard Worker dup v5.4h, w11 3815*c0909341SAndroid Build Coastguard Worker uzp2 v1.8h, v0.8h, v0.8h // top[base+1] 3816*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v0.8h // top[base] 3817*c0909341SAndroid Build Coastguard Worker uzp2 v3.8h, v2.8h, v2.8h 3818*c0909341SAndroid Build Coastguard Worker uzp1 v2.8h, v2.8h, v2.8h 3819*c0909341SAndroid Build Coastguard Worker sub v6.4h, v1.4h, v0.4h // top[base+1]-top[base] 3820*c0909341SAndroid Build Coastguard Worker sub v7.4h, v3.4h, v2.4h 3821*c0909341SAndroid Build Coastguard Worker ushll v16.4s, v0.4h, #6 // top[base]*64 3822*c0909341SAndroid Build Coastguard Worker ushll v17.4s, v2.4h, #6 3823*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v6.4h, v4.4h // + top[base+1]*frac 3824*c0909341SAndroid Build Coastguard Worker smlal v17.4s, v7.4h, v5.4h 3825*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 3826*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v17.4s, #6 3827*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3828*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v16.8h, v17.8h 3829*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3830*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x13], x1 3831*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3832*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[2], [x0] 3833*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[3], [x13] 3834*c0909341SAndroid Build Coastguard Worker b.le 9f 3835*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1 // ptr -= 4 * (2*stride) 3836*c0909341SAndroid Build Coastguard Worker sub x13, x13, x1 3837*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3838*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3839*c0909341SAndroid Build Coastguard Worker b 4b 3840*c0909341SAndroid Build Coastguard Worker9: 3841*c0909341SAndroid Build Coastguard Worker ret 3842*c0909341SAndroid Build Coastguard Worker 3843*c0909341SAndroid Build Coastguard Worker8: // h == 8 3844*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3845*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3846*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3847*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 3848*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3849*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3850*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3851*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw #1 3852*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw #1 3853*c0909341SAndroid Build Coastguard Worker dup v4.8h, w9 // frac 3854*c0909341SAndroid Build Coastguard Worker dup v5.8h, w11 3855*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x8] // top[base] 3856*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10] 3857*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 3858*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 3859*c0909341SAndroid Build Coastguard Worker dup v6.8h, w9 // 64 - frac 3860*c0909341SAndroid Build Coastguard Worker dup v7.8h, w11 3861*c0909341SAndroid Build Coastguard Worker uzp2 v20.8h, v0.8h, v1.8h // top[base+1] 3862*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h // top[base] 3863*c0909341SAndroid Build Coastguard Worker uzp2 v21.8h, v2.8h, v3.8h 3864*c0909341SAndroid Build Coastguard Worker uzp1 v2.8h, v2.8h, v3.8h 3865*c0909341SAndroid Build Coastguard Worker umull v16.4s, v0.4h, v6.4h // top[base]*(64-frac) 3866*c0909341SAndroid Build Coastguard Worker umlal v16.4s, v20.4h, v4.4h // + top[base+1]*frac 3867*c0909341SAndroid Build Coastguard Worker umull2 v17.4s, v0.8h, v6.8h 3868*c0909341SAndroid Build Coastguard Worker umlal2 v17.4s, v20.8h, v4.8h 3869*c0909341SAndroid Build Coastguard Worker umull v18.4s, v2.4h, v7.4h 3870*c0909341SAndroid Build Coastguard Worker umlal v18.4s, v21.4h, v5.4h 3871*c0909341SAndroid Build Coastguard Worker umull2 v19.4s, v2.8h, v7.8h 3872*c0909341SAndroid Build Coastguard Worker umlal2 v19.4s, v21.8h, v5.8h 3873*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 3874*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #6 3875*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v18.4s, #6 3876*c0909341SAndroid Build Coastguard Worker rshrn2 v17.8h, v19.4s, #6 3877*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3878*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v16.8h, v17.8h 3879*c0909341SAndroid Build Coastguard Worker zip2 v19.8h, v16.8h, v17.8h 3880*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 3881*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3882*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x13], x1 3883*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[2], [x0], x1 3884*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[3], [x13], x1 3885*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[0], [x0], x1 3886*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[1], [x13], x1 3887*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[2], [x0], x1 3888*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[3], [x13], x1 3889*c0909341SAndroid Build Coastguard Worker b.le 9f 3890*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #2 // ptr -= 4 * (2*stride) 3891*c0909341SAndroid Build Coastguard Worker sub x13, x13, x1, lsl #2 3892*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3893*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3894*c0909341SAndroid Build Coastguard Worker b 8b 3895*c0909341SAndroid Build Coastguard Worker9: 3896*c0909341SAndroid Build Coastguard Worker ret 3897*c0909341SAndroid Build Coastguard Workerendfunc 3898*c0909341SAndroid Build Coastguard Worker 3899*c0909341SAndroid Build Coastguard Worker 3900*c0909341SAndroid Build Coastguard Worker// void ipred_filter_16bpc_neon(pixel *dst, const ptrdiff_t stride, 3901*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 3902*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int filt_idx, 3903*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height, 3904*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 3905*c0909341SAndroid Build Coastguard Worker.macro filter_fn bpc 3906*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_\bpc\()bpc_neon 3907*c0909341SAndroid Build Coastguard Worker and w5, w5, #511 3908*c0909341SAndroid Build Coastguard Worker movrel x6, X(filter_intra_taps) 3909*c0909341SAndroid Build Coastguard Worker lsl w5, w5, #6 3910*c0909341SAndroid Build Coastguard Worker add x6, x6, w5, uxtw 3911*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [x6], #32 3912*c0909341SAndroid Build Coastguard Worker clz w9, w3 3913*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_filter\bpc\()_tbl 3914*c0909341SAndroid Build Coastguard Worker ld1 {v20.8b, v21.8b, v22.8b}, [x6] 3915*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 3916*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 3917*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v16.8b 3918*c0909341SAndroid Build Coastguard Worker sxtl v17.8h, v17.8b 3919*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 3920*c0909341SAndroid Build Coastguard Worker sxtl v18.8h, v18.8b 3921*c0909341SAndroid Build Coastguard Worker sxtl v19.8h, v19.8b 3922*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 3923*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3924*c0909341SAndroid Build Coastguard Worker sxtl v20.8h, v20.8b 3925*c0909341SAndroid Build Coastguard Worker sxtl v21.8h, v21.8b 3926*c0909341SAndroid Build Coastguard Worker sxtl v22.8h, v22.8b 3927*c0909341SAndroid Build Coastguard Worker dup v31.8h, w8 3928*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 3929*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 3930*c0909341SAndroid Build Coastguard Worker.endif 3931*c0909341SAndroid Build Coastguard Worker br x5 3932*c0909341SAndroid Build Coastguard Worker40: 3933*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3934*c0909341SAndroid Build Coastguard Worker ldur d0, [x2, #2] // top (0-3) 3935*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 3936*c0909341SAndroid Build Coastguard Worker mov x7, #-4 3937*c0909341SAndroid Build Coastguard Worker4: 3938*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x2], x7 // left (0-1) + topleft (2) 3939*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 3940*c0909341SAndroid Build Coastguard Worker mul v2.8h, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 3941*c0909341SAndroid Build Coastguard Worker mla v2.8h, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 3942*c0909341SAndroid Build Coastguard Worker mla v2.8h, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 3943*c0909341SAndroid Build Coastguard Worker mla v2.8h, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 3944*c0909341SAndroid Build Coastguard Worker mla v2.8h, v16.8h, v1.h[2] // p0(topleft) * filter(0) 3945*c0909341SAndroid Build Coastguard Worker mla v2.8h, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 3946*c0909341SAndroid Build Coastguard Worker mla v2.8h, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 3947*c0909341SAndroid Build Coastguard Worker srshr v2.8h, v2.8h, #4 3948*c0909341SAndroid Build Coastguard Worker smax v2.8h, v2.8h, v30.8h 3949*c0909341SAndroid Build Coastguard Worker.else 3950*c0909341SAndroid Build Coastguard Worker smull v2.4s, v17.4h, v0.h[0] // p1(top[0]) * filter(1) 3951*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v0.h[1] // p2(top[1]) * filter(2) 3952*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v0.h[2] // p3(top[2]) * filter(3) 3953*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v0.h[3] // p4(top[3]) * filter(4) 3954*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v16.4h, v1.h[2] // p0(topleft) * filter(0) 3955*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[1] // p5(left[0]) * filter(5) 3956*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[0] // p6(left[1]) * filter(6) 3957*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 3958*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 3959*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 3960*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 3961*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v16.8h, v1.h[2] // p0(topleft) * filter(0) 3962*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 3963*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 3964*c0909341SAndroid Build Coastguard Worker sqrshrun v2.4h, v2.4s, #4 3965*c0909341SAndroid Build Coastguard Worker sqrshrun2 v2.8h, v3.4s, #4 3966*c0909341SAndroid Build Coastguard Worker.endif 3967*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 3968*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3969*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[0], [x0], x1 3970*c0909341SAndroid Build Coastguard Worker ext v0.16b, v2.16b, v2.16b, #8 // move top from [4-7] to [0-3] 3971*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[1], [x6], x1 3972*c0909341SAndroid Build Coastguard Worker b.gt 4b 3973*c0909341SAndroid Build Coastguard Worker ret 3974*c0909341SAndroid Build Coastguard Worker80: 3975*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3976*c0909341SAndroid Build Coastguard Worker ldur q0, [x2, #2] // top (0-7) 3977*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 3978*c0909341SAndroid Build Coastguard Worker mov x7, #-4 3979*c0909341SAndroid Build Coastguard Worker8: 3980*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x2], x7 // left (0-1) + topleft (2) 3981*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 3982*c0909341SAndroid Build Coastguard Worker mul v2.8h, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 3983*c0909341SAndroid Build Coastguard Worker mla v2.8h, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 3984*c0909341SAndroid Build Coastguard Worker mla v2.8h, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 3985*c0909341SAndroid Build Coastguard Worker mla v2.8h, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 3986*c0909341SAndroid Build Coastguard Worker mla v2.8h, v16.8h, v1.h[2] // p0(topleft) * filter(0) 3987*c0909341SAndroid Build Coastguard Worker mla v2.8h, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 3988*c0909341SAndroid Build Coastguard Worker mla v2.8h, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 3989*c0909341SAndroid Build Coastguard Worker mul v3.8h, v17.8h, v0.h[4] // p1(top[0]) * filter(1) 3990*c0909341SAndroid Build Coastguard Worker mla v3.8h, v18.8h, v0.h[5] // p2(top[1]) * filter(2) 3991*c0909341SAndroid Build Coastguard Worker mla v3.8h, v19.8h, v0.h[6] // p3(top[2]) * filter(3) 3992*c0909341SAndroid Build Coastguard Worker srshr v2.8h, v2.8h, #4 3993*c0909341SAndroid Build Coastguard Worker smax v2.8h, v2.8h, v30.8h 3994*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 3995*c0909341SAndroid Build Coastguard Worker mla v3.8h, v20.8h, v0.h[7] // p4(top[3]) * filter(4) 3996*c0909341SAndroid Build Coastguard Worker mla v3.8h, v16.8h, v0.h[3] // p0(topleft) * filter(0) 3997*c0909341SAndroid Build Coastguard Worker mla v3.8h, v21.8h, v2.h[3] // p5(left[0]) * filter(5) 3998*c0909341SAndroid Build Coastguard Worker mla v3.8h, v22.8h, v2.h[7] // p6(left[1]) * filter(6) 3999*c0909341SAndroid Build Coastguard Worker srshr v3.8h, v3.8h, #4 4000*c0909341SAndroid Build Coastguard Worker smax v3.8h, v3.8h, v30.8h 4001*c0909341SAndroid Build Coastguard Worker.else 4002*c0909341SAndroid Build Coastguard Worker smull v2.4s, v17.4h, v0.h[0] // p1(top[0]) * filter(1) 4003*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v0.h[1] // p2(top[1]) * filter(2) 4004*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v0.h[2] // p3(top[2]) * filter(3) 4005*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v0.h[3] // p4(top[3]) * filter(4) 4006*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v16.4h, v1.h[2] // p0(topleft) * filter(0) 4007*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[1] // p5(left[0]) * filter(5) 4008*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[0] // p6(left[1]) * filter(6) 4009*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 4010*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 4011*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 4012*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 4013*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v16.8h, v1.h[2] // p0(topleft) * filter(0) 4014*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 4015*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 4016*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v0.h[4] // p1(top[0]) * filter(1) 4017*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v0.h[5] // p2(top[1]) * filter(2) 4018*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v19.4h, v0.h[6] // p3(top[2]) * filter(3) 4019*c0909341SAndroid Build Coastguard Worker sqrshrun v2.4h, v2.4s, #4 4020*c0909341SAndroid Build Coastguard Worker sqrshrun2 v2.8h, v3.4s, #4 4021*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 4022*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v0.h[7] // p4(top[3]) * filter(4) 4023*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v16.4h, v0.h[3] // p0(topleft) * filter(0) 4024*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v2.h[3] // p5(left[0]) * filter(5) 4025*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v2.h[7] // p6(left[1]) * filter(6) 4026*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v17.8h, v0.h[4] // p1(top[0]) * filter(1) 4027*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v18.8h, v0.h[5] // p2(top[1]) * filter(2) 4028*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v19.8h, v0.h[6] // p3(top[2]) * filter(3) 4029*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v20.8h, v0.h[7] // p4(top[3]) * filter(4) 4030*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v16.8h, v0.h[3] // p0(topleft) * filter(0) 4031*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v21.8h, v2.h[3] // p5(left[0]) * filter(5) 4032*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v22.8h, v2.h[7] // p6(left[1]) * filter(6) 4033*c0909341SAndroid Build Coastguard Worker sqrshrun v3.4h, v4.4s, #4 4034*c0909341SAndroid Build Coastguard Worker sqrshrun2 v3.8h, v5.4s, #4 4035*c0909341SAndroid Build Coastguard Worker.endif 4036*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4037*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 4038*c0909341SAndroid Build Coastguard Worker st2 {v2.d, v3.d}[0], [x0], x1 4039*c0909341SAndroid Build Coastguard Worker zip2 v0.2d, v2.2d, v3.2d 4040*c0909341SAndroid Build Coastguard Worker st2 {v2.d, v3.d}[1], [x6], x1 4041*c0909341SAndroid Build Coastguard Worker b.gt 8b 4042*c0909341SAndroid Build Coastguard Worker ret 4043*c0909341SAndroid Build Coastguard Worker160: 4044*c0909341SAndroid Build Coastguard Worker320: 4045*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4046*c0909341SAndroid Build Coastguard Worker add x8, x2, #2 4047*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 4048*c0909341SAndroid Build Coastguard Worker mov x7, #-4 4049*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 4050*c0909341SAndroid Build Coastguard Worker mov w9, w3 4051*c0909341SAndroid Build Coastguard Worker 4052*c0909341SAndroid Build Coastguard Worker1: 4053*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2], x7 // left (0-1) + topleft (2) 4054*c0909341SAndroid Build Coastguard Worker2: 4055*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h, v2.8h}, [x8], #32 // top(0-15) 4056*c0909341SAndroid Build Coastguard Worker.if \bpc == 10 4057*c0909341SAndroid Build Coastguard Worker mul v3.8h, v16.8h, v0.h[2] // p0(topleft) * filter(0) 4058*c0909341SAndroid Build Coastguard Worker mla v3.8h, v21.8h, v0.h[1] // p5(left[0]) * filter(5) 4059*c0909341SAndroid Build Coastguard Worker mla v3.8h, v22.8h, v0.h[0] // p6(left[1]) * filter(6) 4060*c0909341SAndroid Build Coastguard Worker mla v3.8h, v17.8h, v1.h[0] // p1(top[0]) * filter(1) 4061*c0909341SAndroid Build Coastguard Worker mla v3.8h, v18.8h, v1.h[1] // p2(top[1]) * filter(2) 4062*c0909341SAndroid Build Coastguard Worker mla v3.8h, v19.8h, v1.h[2] // p3(top[2]) * filter(3) 4063*c0909341SAndroid Build Coastguard Worker mla v3.8h, v20.8h, v1.h[3] // p4(top[3]) * filter(4) 4064*c0909341SAndroid Build Coastguard Worker 4065*c0909341SAndroid Build Coastguard Worker mul v4.8h, v17.8h, v1.h[4] // p1(top[0]) * filter(1) 4066*c0909341SAndroid Build Coastguard Worker mla v4.8h, v18.8h, v1.h[5] // p2(top[1]) * filter(2) 4067*c0909341SAndroid Build Coastguard Worker mla v4.8h, v19.8h, v1.h[6] // p3(top[2]) * filter(3) 4068*c0909341SAndroid Build Coastguard Worker srshr v3.8h, v3.8h, #4 4069*c0909341SAndroid Build Coastguard Worker smax v3.8h, v3.8h, v30.8h 4070*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4071*c0909341SAndroid Build Coastguard Worker mla v4.8h, v20.8h, v1.h[7] // p4(top[3]) * filter(4) 4072*c0909341SAndroid Build Coastguard Worker mla v4.8h, v16.8h, v1.h[3] // p0(topleft) * filter(0) 4073*c0909341SAndroid Build Coastguard Worker mla v4.8h, v21.8h, v3.h[3] // p5(left[0]) * filter(5) 4074*c0909341SAndroid Build Coastguard Worker mla v4.8h, v22.8h, v3.h[7] // p6(left[1]) * filter(6) 4075*c0909341SAndroid Build Coastguard Worker 4076*c0909341SAndroid Build Coastguard Worker mul v5.8h, v17.8h, v2.h[0] // p1(top[0]) * filter(1) 4077*c0909341SAndroid Build Coastguard Worker mla v5.8h, v18.8h, v2.h[1] // p2(top[1]) * filter(2) 4078*c0909341SAndroid Build Coastguard Worker mla v5.8h, v19.8h, v2.h[2] // p3(top[2]) * filter(3) 4079*c0909341SAndroid Build Coastguard Worker srshr v4.8h, v4.8h, #4 4080*c0909341SAndroid Build Coastguard Worker smax v4.8h, v4.8h, v30.8h 4081*c0909341SAndroid Build Coastguard Worker smin v4.8h, v4.8h, v31.8h 4082*c0909341SAndroid Build Coastguard Worker mla v5.8h, v20.8h, v2.h[3] // p4(top[3]) * filter(4) 4083*c0909341SAndroid Build Coastguard Worker mla v5.8h, v16.8h, v1.h[7] // p0(topleft) * filter(0) 4084*c0909341SAndroid Build Coastguard Worker mla v5.8h, v21.8h, v4.h[3] // p5(left[0]) * filter(5) 4085*c0909341SAndroid Build Coastguard Worker mla v5.8h, v22.8h, v4.h[7] // p6(left[1]) * filter(6) 4086*c0909341SAndroid Build Coastguard Worker 4087*c0909341SAndroid Build Coastguard Worker mul v6.8h, v17.8h, v2.h[4] // p1(top[0]) * filter(1) 4088*c0909341SAndroid Build Coastguard Worker mla v6.8h, v18.8h, v2.h[5] // p2(top[1]) * filter(2) 4089*c0909341SAndroid Build Coastguard Worker mla v6.8h, v19.8h, v2.h[6] // p3(top[2]) * filter(3) 4090*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #4 4091*c0909341SAndroid Build Coastguard Worker smax v5.8h, v5.8h, v30.8h 4092*c0909341SAndroid Build Coastguard Worker smin v5.8h, v5.8h, v31.8h 4093*c0909341SAndroid Build Coastguard Worker mla v6.8h, v20.8h, v2.h[7] // p4(top[3]) * filter(4) 4094*c0909341SAndroid Build Coastguard Worker mla v6.8h, v16.8h, v2.h[3] // p0(topleft) * filter(0) 4095*c0909341SAndroid Build Coastguard Worker mla v6.8h, v21.8h, v5.h[3] // p5(left[0]) * filter(5) 4096*c0909341SAndroid Build Coastguard Worker mla v6.8h, v22.8h, v5.h[7] // p6(left[1]) * filter(6) 4097*c0909341SAndroid Build Coastguard Worker 4098*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 4099*c0909341SAndroid Build Coastguard Worker srshr v6.8h, v6.8h, #4 4100*c0909341SAndroid Build Coastguard Worker smax v6.8h, v6.8h, v30.8h 4101*c0909341SAndroid Build Coastguard Worker.else 4102*c0909341SAndroid Build Coastguard Worker smull v3.4s, v16.4h, v0.h[2] // p0(topleft) * filter(0) 4103*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v0.h[1] // p5(left[0]) * filter(5) 4104*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v0.h[0] // p6(left[1]) * filter(6) 4105*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v17.4h, v1.h[0] // p1(top[0]) * filter(1) 4106*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[1] // p2(top[1]) * filter(2) 4107*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[2] // p3(top[2]) * filter(3) 4108*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[3] // p4(top[3]) * filter(4) 4109*c0909341SAndroid Build Coastguard Worker smull2 v4.4s, v16.8h, v0.h[2] // p0(topleft) * filter(0) 4110*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v21.8h, v0.h[1] // p5(left[0]) * filter(5) 4111*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v22.8h, v0.h[0] // p6(left[1]) * filter(6) 4112*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v17.8h, v1.h[0] // p1(top[0]) * filter(1) 4113*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v18.8h, v1.h[1] // p2(top[1]) * filter(2) 4114*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v19.8h, v1.h[2] // p3(top[2]) * filter(3) 4115*c0909341SAndroid Build Coastguard Worker smlal2 v4.4s, v20.8h, v1.h[3] // p4(top[3]) * filter(4) 4116*c0909341SAndroid Build Coastguard Worker 4117*c0909341SAndroid Build Coastguard Worker smull v5.4s, v17.4h, v1.h[4] // p1(top[0]) * filter(1) 4118*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v18.4h, v1.h[5] // p2(top[1]) * filter(2) 4119*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v19.4h, v1.h[6] // p3(top[2]) * filter(3) 4120*c0909341SAndroid Build Coastguard Worker sqrshrun v3.4h, v3.4s, #4 4121*c0909341SAndroid Build Coastguard Worker sqrshrun2 v3.8h, v4.4s, #4 4122*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4123*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v20.4h, v1.h[7] // p4(top[3]) * filter(4) 4124*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v16.4h, v1.h[3] // p0(topleft) * filter(0) 4125*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v21.4h, v3.h[3] // p5(left[0]) * filter(5) 4126*c0909341SAndroid Build Coastguard Worker smlal v5.4s, v22.4h, v3.h[7] // p6(left[1]) * filter(6) 4127*c0909341SAndroid Build Coastguard Worker smull2 v6.4s, v17.8h, v1.h[4] // p1(top[0]) * filter(1) 4128*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v18.8h, v1.h[5] // p2(top[1]) * filter(2) 4129*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v19.8h, v1.h[6] // p3(top[2]) * filter(3) 4130*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v20.8h, v1.h[7] // p4(top[3]) * filter(4) 4131*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v16.8h, v1.h[3] // p0(topleft) * filter(0) 4132*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v21.8h, v3.h[3] // p5(left[0]) * filter(5) 4133*c0909341SAndroid Build Coastguard Worker smlal2 v6.4s, v22.8h, v3.h[7] // p6(left[1]) * filter(6) 4134*c0909341SAndroid Build Coastguard Worker 4135*c0909341SAndroid Build Coastguard Worker smull v24.4s, v17.4h, v2.h[0] // p1(top[0]) * filter(1) 4136*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v18.4h, v2.h[1] // p2(top[1]) * filter(2) 4137*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v19.4h, v2.h[2] // p3(top[2]) * filter(3) 4138*c0909341SAndroid Build Coastguard Worker sqrshrun v4.4h, v5.4s, #4 4139*c0909341SAndroid Build Coastguard Worker sqrshrun2 v4.8h, v6.4s, #4 4140*c0909341SAndroid Build Coastguard Worker smin v4.8h, v4.8h, v31.8h 4141*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v20.4h, v2.h[3] // p4(top[3]) * filter(4) 4142*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v16.4h, v1.h[7] // p0(topleft) * filter(0) 4143*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v21.4h, v4.h[3] // p5(left[0]) * filter(5) 4144*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v22.4h, v4.h[7] // p6(left[1]) * filter(6) 4145*c0909341SAndroid Build Coastguard Worker smull2 v25.4s, v17.8h, v2.h[0] // p1(top[0]) * filter(1) 4146*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v18.8h, v2.h[1] // p2(top[1]) * filter(2) 4147*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v19.8h, v2.h[2] // p3(top[2]) * filter(3) 4148*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v20.8h, v2.h[3] // p4(top[3]) * filter(4) 4149*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v16.8h, v1.h[7] // p0(topleft) * filter(0) 4150*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v21.8h, v4.h[3] // p5(left[0]) * filter(5) 4151*c0909341SAndroid Build Coastguard Worker smlal2 v25.4s, v22.8h, v4.h[7] // p6(left[1]) * filter(6) 4152*c0909341SAndroid Build Coastguard Worker 4153*c0909341SAndroid Build Coastguard Worker smull v26.4s, v17.4h, v2.h[4] // p1(top[0]) * filter(1) 4154*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v18.4h, v2.h[5] // p2(top[1]) * filter(2) 4155*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v19.4h, v2.h[6] // p3(top[2]) * filter(3) 4156*c0909341SAndroid Build Coastguard Worker sqrshrun v5.4h, v24.4s, #4 4157*c0909341SAndroid Build Coastguard Worker sqrshrun2 v5.8h, v25.4s, #4 4158*c0909341SAndroid Build Coastguard Worker smin v5.8h, v5.8h, v31.8h 4159*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v20.4h, v2.h[7] // p4(top[3]) * filter(4) 4160*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v16.4h, v2.h[3] // p0(topleft) * filter(0) 4161*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v21.4h, v5.h[3] // p5(left[0]) * filter(5) 4162*c0909341SAndroid Build Coastguard Worker smlal v26.4s, v22.4h, v5.h[7] // p6(left[1]) * filter(6) 4163*c0909341SAndroid Build Coastguard Worker smull2 v27.4s, v17.8h, v2.h[4] // p1(top[0]) * filter(1) 4164*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v18.8h, v2.h[5] // p2(top[1]) * filter(2) 4165*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v19.8h, v2.h[6] // p3(top[2]) * filter(3) 4166*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v20.8h, v2.h[7] // p4(top[3]) * filter(4) 4167*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v16.8h, v2.h[3] // p0(topleft) * filter(0) 4168*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v21.8h, v5.h[3] // p5(left[0]) * filter(5) 4169*c0909341SAndroid Build Coastguard Worker smlal2 v27.4s, v22.8h, v5.h[7] // p6(left[1]) * filter(6) 4170*c0909341SAndroid Build Coastguard Worker 4171*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 4172*c0909341SAndroid Build Coastguard Worker sqrshrun v6.4h, v26.4s, #4 4173*c0909341SAndroid Build Coastguard Worker sqrshrun2 v6.8h, v27.4s, #4 4174*c0909341SAndroid Build Coastguard Worker.endif 4175*c0909341SAndroid Build Coastguard Worker smin v6.8h, v6.8h, v31.8h 4176*c0909341SAndroid Build Coastguard Worker 4177*c0909341SAndroid Build Coastguard Worker ins v0.h[2], v2.h[7] 4178*c0909341SAndroid Build Coastguard Worker st4 {v3.d, v4.d, v5.d, v6.d}[0], [x0], #32 4179*c0909341SAndroid Build Coastguard Worker ins v0.h[0], v6.h[7] 4180*c0909341SAndroid Build Coastguard Worker st4 {v3.d, v4.d, v5.d, v6.d}[1], [x6], #32 4181*c0909341SAndroid Build Coastguard Worker ins v0.h[1], v6.h[3] 4182*c0909341SAndroid Build Coastguard Worker b.gt 2b 4183*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 4184*c0909341SAndroid Build Coastguard Worker b.le 9f 4185*c0909341SAndroid Build Coastguard Worker sub x8, x6, w9, uxtw #1 4186*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 4187*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 4188*c0909341SAndroid Build Coastguard Worker mov w3, w9 4189*c0909341SAndroid Build Coastguard Worker b 1b 4190*c0909341SAndroid Build Coastguard Worker9: 4191*c0909341SAndroid Build Coastguard Worker ret 4192*c0909341SAndroid Build Coastguard Workerendfunc 4193*c0909341SAndroid Build Coastguard Worker 4194*c0909341SAndroid Build Coastguard Workerjumptable ipred_filter\bpc\()_tbl 4195*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_filter\bpc\()_tbl 4196*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_filter\bpc\()_tbl 4197*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_filter\bpc\()_tbl 4198*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_filter\bpc\()_tbl 4199*c0909341SAndroid Build Coastguard Workerendjumptable 4200*c0909341SAndroid Build Coastguard Worker.endm 4201*c0909341SAndroid Build Coastguard Worker 4202*c0909341SAndroid Build Coastguard Workerfilter_fn 10 4203*c0909341SAndroid Build Coastguard Workerfilter_fn 12 4204*c0909341SAndroid Build Coastguard Worker 4205*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_16bpc_neon, export=1 4206*c0909341SAndroid Build Coastguard Worker ldr w8, [sp] 4207*c0909341SAndroid Build Coastguard Worker cmp w8, 0x3ff 4208*c0909341SAndroid Build Coastguard Worker b.le ipred_filter_10bpc_neon 4209*c0909341SAndroid Build Coastguard Worker b ipred_filter_12bpc_neon 4210*c0909341SAndroid Build Coastguard Workerendfunc 4211*c0909341SAndroid Build Coastguard Worker 4212*c0909341SAndroid Build Coastguard Worker// void pal_pred_16bpc_neon(pixel *dst, const ptrdiff_t stride, 4213*c0909341SAndroid Build Coastguard Worker// const pixel *const pal, const uint8_t *idx, 4214*c0909341SAndroid Build Coastguard Worker// const int w, const int h); 4215*c0909341SAndroid Build Coastguard Workerfunction pal_pred_16bpc_neon, export=1 4216*c0909341SAndroid Build Coastguard Worker ld1 {v30.8h}, [x2] 4217*c0909341SAndroid Build Coastguard Worker clz w9, w4 4218*c0909341SAndroid Build Coastguard Worker movrel x6, pal_pred_tbl 4219*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 4220*c0909341SAndroid Build Coastguard Worker movi v29.16b, #7 4221*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x6, w9, uxtw #2] 4222*c0909341SAndroid Build Coastguard Worker movi v31.8h, #1, lsl #8 4223*c0909341SAndroid Build Coastguard Worker add x6, x6, x9 4224*c0909341SAndroid Build Coastguard Worker br x6 4225*c0909341SAndroid Build Coastguard Worker40: 4226*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4227*c0909341SAndroid Build Coastguard Worker add x2, x0, x1 4228*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4229*c0909341SAndroid Build Coastguard Worker4: 4230*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x3], #8 4231*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 4232*c0909341SAndroid Build Coastguard Worker ushr v3.8b, v1.8b, #4 4233*c0909341SAndroid Build Coastguard Worker and v2.8b, v1.8b, v29.8b 4234*c0909341SAndroid Build Coastguard Worker zip1 v1.16b, v2.16b, v3.16b 4235*c0909341SAndroid Build Coastguard Worker // Restructure v1 from a, b, c, ... into 2*a, 2*a+1, 2*b, 2*b+1, 2*c, 2*c+1, ... 4236*c0909341SAndroid Build Coastguard Worker add v1.16b, v1.16b, v1.16b 4237*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v1.16b, v1.16b 4238*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v1.16b, v1.16b 4239*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v31.8h 4240*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v31.8h 4241*c0909341SAndroid Build Coastguard Worker tbl v0.16b, {v30.16b}, v0.16b 4242*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x0], x1 4243*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v30.16b}, v1.16b 4244*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x2], x1 4245*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[0], [x0], x1 4246*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[1], [x2], x1 4247*c0909341SAndroid Build Coastguard Worker b.gt 4b 4248*c0909341SAndroid Build Coastguard Worker ret 4249*c0909341SAndroid Build Coastguard Worker80: 4250*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4251*c0909341SAndroid Build Coastguard Worker add x2, x0, x1 4252*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4253*c0909341SAndroid Build Coastguard Worker8: 4254*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x3], #16 4255*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 4256*c0909341SAndroid Build Coastguard Worker ushr v4.16b, v2.16b, #4 4257*c0909341SAndroid Build Coastguard Worker and v3.16b, v2.16b, v29.16b 4258*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v3.16b, v4.16b 4259*c0909341SAndroid Build Coastguard Worker zip2 v3.16b, v3.16b, v4.16b 4260*c0909341SAndroid Build Coastguard Worker add v2.16b, v2.16b, v2.16b 4261*c0909341SAndroid Build Coastguard Worker add v3.16b, v3.16b, v3.16b 4262*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v2.16b, v2.16b 4263*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v2.16b, v2.16b 4264*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v3.16b, v3.16b 4265*c0909341SAndroid Build Coastguard Worker zip2 v3.16b, v3.16b, v3.16b 4266*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v31.8h 4267*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v31.8h 4268*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v31.8h 4269*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v31.8h 4270*c0909341SAndroid Build Coastguard Worker tbl v0.16b, {v30.16b}, v0.16b 4271*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v30.16b}, v1.16b 4272*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 4273*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v30.16b}, v2.16b 4274*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x2], x1 4275*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v30.16b}, v3.16b 4276*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x0], x1 4277*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x2], x1 4278*c0909341SAndroid Build Coastguard Worker b.gt 8b 4279*c0909341SAndroid Build Coastguard Worker ret 4280*c0909341SAndroid Build Coastguard Worker160: 4281*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4282*c0909341SAndroid Build Coastguard Worker add x2, x0, x1 4283*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4284*c0909341SAndroid Build Coastguard Worker16: 4285*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b, v5.16b}, [x3], #32 4286*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 4287*c0909341SAndroid Build Coastguard Worker ushr v7.16b, v4.16b, #4 4288*c0909341SAndroid Build Coastguard Worker and v6.16b, v4.16b, v29.16b 4289*c0909341SAndroid Build Coastguard Worker ushr v3.16b, v5.16b, #4 4290*c0909341SAndroid Build Coastguard Worker and v2.16b, v5.16b, v29.16b 4291*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v7.16b 4292*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v7.16b 4293*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v2.16b, v3.16b 4294*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v2.16b, v3.16b 4295*c0909341SAndroid Build Coastguard Worker add v4.16b, v4.16b, v4.16b 4296*c0909341SAndroid Build Coastguard Worker add v5.16b, v5.16b, v5.16b 4297*c0909341SAndroid Build Coastguard Worker add v6.16b, v6.16b, v6.16b 4298*c0909341SAndroid Build Coastguard Worker add v7.16b, v7.16b, v7.16b 4299*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v4.16b, v4.16b 4300*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v4.16b, v4.16b 4301*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v5.16b, v5.16b 4302*c0909341SAndroid Build Coastguard Worker zip2 v3.16b, v5.16b, v5.16b 4303*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v6.16b 4304*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v6.16b 4305*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v7.16b, v7.16b 4306*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v7.16b, v7.16b 4307*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v31.8h 4308*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v31.8h 4309*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v31.8h 4310*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v31.8h 4311*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v31.8h 4312*c0909341SAndroid Build Coastguard Worker tbl v0.16b, {v30.16b}, v0.16b 4313*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v31.8h 4314*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v30.16b}, v1.16b 4315*c0909341SAndroid Build Coastguard Worker add v6.8h, v6.8h, v31.8h 4316*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v30.16b}, v2.16b 4317*c0909341SAndroid Build Coastguard Worker add v7.8h, v7.8h, v31.8h 4318*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v30.16b}, v3.16b 4319*c0909341SAndroid Build Coastguard Worker tbl v4.16b, {v30.16b}, v4.16b 4320*c0909341SAndroid Build Coastguard Worker tbl v5.16b, {v30.16b}, v5.16b 4321*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 4322*c0909341SAndroid Build Coastguard Worker tbl v6.16b, {v30.16b}, v6.16b 4323*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x2], x1 4324*c0909341SAndroid Build Coastguard Worker tbl v7.16b, {v30.16b}, v7.16b 4325*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [x0], x1 4326*c0909341SAndroid Build Coastguard Worker st1 {v6.8h, v7.8h}, [x2], x1 4327*c0909341SAndroid Build Coastguard Worker b.gt 16b 4328*c0909341SAndroid Build Coastguard Worker ret 4329*c0909341SAndroid Build Coastguard Worker320: 4330*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4331*c0909341SAndroid Build Coastguard Worker add x2, x0, x1 4332*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4333*c0909341SAndroid Build Coastguard Worker32: 4334*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b, v5.16b}, [x3], #32 4335*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 4336*c0909341SAndroid Build Coastguard Worker ushr v7.16b, v4.16b, #4 4337*c0909341SAndroid Build Coastguard Worker and v6.16b, v4.16b, v29.16b 4338*c0909341SAndroid Build Coastguard Worker ushr v3.16b, v5.16b, #4 4339*c0909341SAndroid Build Coastguard Worker and v2.16b, v5.16b, v29.16b 4340*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v7.16b 4341*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v7.16b 4342*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v2.16b, v3.16b 4343*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v2.16b, v3.16b 4344*c0909341SAndroid Build Coastguard Worker add v4.16b, v4.16b, v4.16b 4345*c0909341SAndroid Build Coastguard Worker add v5.16b, v5.16b, v5.16b 4346*c0909341SAndroid Build Coastguard Worker add v6.16b, v6.16b, v6.16b 4347*c0909341SAndroid Build Coastguard Worker add v7.16b, v7.16b, v7.16b 4348*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v4.16b, v4.16b 4349*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v4.16b, v4.16b 4350*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v5.16b, v5.16b 4351*c0909341SAndroid Build Coastguard Worker zip2 v3.16b, v5.16b, v5.16b 4352*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v6.16b 4353*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v6.16b 4354*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v7.16b, v7.16b 4355*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v7.16b, v7.16b 4356*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v31.8h 4357*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v31.8h 4358*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v31.8h 4359*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v31.8h 4360*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v31.8h 4361*c0909341SAndroid Build Coastguard Worker tbl v0.16b, {v30.16b}, v0.16b 4362*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v31.8h 4363*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v30.16b}, v1.16b 4364*c0909341SAndroid Build Coastguard Worker add v6.8h, v6.8h, v31.8h 4365*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v30.16b}, v2.16b 4366*c0909341SAndroid Build Coastguard Worker add v7.8h, v7.8h, v31.8h 4367*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v30.16b}, v3.16b 4368*c0909341SAndroid Build Coastguard Worker tbl v4.16b, {v30.16b}, v4.16b 4369*c0909341SAndroid Build Coastguard Worker tbl v5.16b, {v30.16b}, v5.16b 4370*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 4371*c0909341SAndroid Build Coastguard Worker tbl v6.16b, {v30.16b}, v6.16b 4372*c0909341SAndroid Build Coastguard Worker tbl v7.16b, {v30.16b}, v7.16b 4373*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], x1 4374*c0909341SAndroid Build Coastguard Worker b.gt 32b 4375*c0909341SAndroid Build Coastguard Worker ret 4376*c0909341SAndroid Build Coastguard Worker640: 4377*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4378*c0909341SAndroid Build Coastguard Worker add x2, x0, #64 4379*c0909341SAndroid Build Coastguard Worker64: 4380*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b, v5.16b}, [x3], #32 4381*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 4382*c0909341SAndroid Build Coastguard Worker ushr v7.16b, v4.16b, #4 4383*c0909341SAndroid Build Coastguard Worker and v6.16b, v4.16b, v29.16b 4384*c0909341SAndroid Build Coastguard Worker ushr v3.16b, v5.16b, #4 4385*c0909341SAndroid Build Coastguard Worker and v2.16b, v5.16b, v29.16b 4386*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v7.16b 4387*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v7.16b 4388*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v2.16b, v3.16b 4389*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v2.16b, v3.16b 4390*c0909341SAndroid Build Coastguard Worker add v4.16b, v4.16b, v4.16b 4391*c0909341SAndroid Build Coastguard Worker add v5.16b, v5.16b, v5.16b 4392*c0909341SAndroid Build Coastguard Worker add v6.16b, v6.16b, v6.16b 4393*c0909341SAndroid Build Coastguard Worker add v7.16b, v7.16b, v7.16b 4394*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v4.16b, v4.16b 4395*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v4.16b, v4.16b 4396*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v5.16b, v5.16b 4397*c0909341SAndroid Build Coastguard Worker zip2 v3.16b, v5.16b, v5.16b 4398*c0909341SAndroid Build Coastguard Worker zip1 v4.16b, v6.16b, v6.16b 4399*c0909341SAndroid Build Coastguard Worker zip2 v5.16b, v6.16b, v6.16b 4400*c0909341SAndroid Build Coastguard Worker zip1 v6.16b, v7.16b, v7.16b 4401*c0909341SAndroid Build Coastguard Worker zip2 v7.16b, v7.16b, v7.16b 4402*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v31.8h 4403*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v31.8h 4404*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v31.8h 4405*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v31.8h 4406*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v31.8h 4407*c0909341SAndroid Build Coastguard Worker tbl v0.16b, {v30.16b}, v0.16b 4408*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v31.8h 4409*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v30.16b}, v1.16b 4410*c0909341SAndroid Build Coastguard Worker add v6.8h, v6.8h, v31.8h 4411*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v30.16b}, v2.16b 4412*c0909341SAndroid Build Coastguard Worker add v7.8h, v7.8h, v31.8h 4413*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v30.16b}, v3.16b 4414*c0909341SAndroid Build Coastguard Worker tbl v4.16b, {v30.16b}, v4.16b 4415*c0909341SAndroid Build Coastguard Worker tbl v5.16b, {v30.16b}, v5.16b 4416*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 4417*c0909341SAndroid Build Coastguard Worker tbl v6.16b, {v30.16b}, v6.16b 4418*c0909341SAndroid Build Coastguard Worker tbl v7.16b, {v30.16b}, v7.16b 4419*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x2], x1 4420*c0909341SAndroid Build Coastguard Worker b.gt 64b 4421*c0909341SAndroid Build Coastguard Worker ret 4422*c0909341SAndroid Build Coastguard Workerendfunc 4423*c0909341SAndroid Build Coastguard Worker 4424*c0909341SAndroid Build Coastguard Workerjumptable pal_pred_tbl 4425*c0909341SAndroid Build Coastguard Worker .word 640b - pal_pred_tbl 4426*c0909341SAndroid Build Coastguard Worker .word 320b - pal_pred_tbl 4427*c0909341SAndroid Build Coastguard Worker .word 160b - pal_pred_tbl 4428*c0909341SAndroid Build Coastguard Worker .word 80b - pal_pred_tbl 4429*c0909341SAndroid Build Coastguard Worker .word 40b - pal_pred_tbl 4430*c0909341SAndroid Build Coastguard Workerendjumptable 4431*c0909341SAndroid Build Coastguard Worker 4432*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_16bpc_neon(pixel *dst, const ptrdiff_t stride, 4433*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4434*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4435*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 4436*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 4437*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_16bpc_neon, export=1 4438*c0909341SAndroid Build Coastguard Worker dup v31.8h, w7 // bitdepth_max 4439*c0909341SAndroid Build Coastguard Worker clz w9, w3 4440*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_128_tbl 4441*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4442*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4443*c0909341SAndroid Build Coastguard Worker urshr v0.8h, v31.8h, #1 4444*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4445*c0909341SAndroid Build Coastguard Worker add x7, x7, x9 4446*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4447*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4448*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 4449*c0909341SAndroid Build Coastguard Worker br x7 4450*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4): 4451*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4452*c0909341SAndroid Build Coastguard Worker1: 4453*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x5], #32 4454*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4455*c0909341SAndroid Build Coastguard Worker smull v2.4s, v4.4h, v1.4h // diff = ac * alpha 4456*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v4.8h, v1.8h 4457*c0909341SAndroid Build Coastguard Worker smull v4.4s, v5.4h, v1.4h 4458*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v5.8h, v1.8h 4459*c0909341SAndroid Build Coastguard Worker cmlt v16.4s, v2.4s, #0 // sign 4460*c0909341SAndroid Build Coastguard Worker cmlt v17.4s, v3.4s, #0 4461*c0909341SAndroid Build Coastguard Worker cmlt v18.4s, v4.4s, #0 4462*c0909341SAndroid Build Coastguard Worker cmlt v19.4s, v5.4s, #0 4463*c0909341SAndroid Build Coastguard Worker add v2.4s, v2.4s, v16.4s // diff + sign 4464*c0909341SAndroid Build Coastguard Worker add v3.4s, v3.4s, v17.4s 4465*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v18.4s 4466*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v19.4s 4467*c0909341SAndroid Build Coastguard Worker rshrn v2.4h, v2.4s, #6 // (diff + sign + 32) >> 6 = apply_sign() 4468*c0909341SAndroid Build Coastguard Worker rshrn2 v2.8h, v3.4s, #6 4469*c0909341SAndroid Build Coastguard Worker rshrn v3.4h, v4.4s, #6 4470*c0909341SAndroid Build Coastguard Worker rshrn2 v3.8h, v5.4s, #6 4471*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v0.8h // dc + apply_sign() 4472*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v0.8h 4473*c0909341SAndroid Build Coastguard Worker smax v2.8h, v2.8h, v30.8h 4474*c0909341SAndroid Build Coastguard Worker smax v3.8h, v3.8h, v30.8h 4475*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 4476*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4477*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[0], [x0], x1 4478*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[1], [x6], x1 4479*c0909341SAndroid Build Coastguard Worker st1 {v3.d}[0], [x0], x1 4480*c0909341SAndroid Build Coastguard Worker st1 {v3.d}[1], [x6], x1 4481*c0909341SAndroid Build Coastguard Worker b.gt 1b 4482*c0909341SAndroid Build Coastguard Worker ret 4483*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8): 4484*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4485*c0909341SAndroid Build Coastguard Worker1: 4486*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x5], #32 4487*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 4488*c0909341SAndroid Build Coastguard Worker smull v2.4s, v4.4h, v1.4h // diff = ac * alpha 4489*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v4.8h, v1.8h 4490*c0909341SAndroid Build Coastguard Worker smull v4.4s, v5.4h, v1.4h 4491*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v5.8h, v1.8h 4492*c0909341SAndroid Build Coastguard Worker cmlt v16.4s, v2.4s, #0 // sign 4493*c0909341SAndroid Build Coastguard Worker cmlt v17.4s, v3.4s, #0 4494*c0909341SAndroid Build Coastguard Worker cmlt v18.4s, v4.4s, #0 4495*c0909341SAndroid Build Coastguard Worker cmlt v19.4s, v5.4s, #0 4496*c0909341SAndroid Build Coastguard Worker add v2.4s, v2.4s, v16.4s // diff + sign 4497*c0909341SAndroid Build Coastguard Worker add v3.4s, v3.4s, v17.4s 4498*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v18.4s 4499*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v19.4s 4500*c0909341SAndroid Build Coastguard Worker rshrn v2.4h, v2.4s, #6 // (diff + sign + 32) >> 6 = apply_sign() 4501*c0909341SAndroid Build Coastguard Worker rshrn2 v2.8h, v3.4s, #6 4502*c0909341SAndroid Build Coastguard Worker rshrn v3.4h, v4.4s, #6 4503*c0909341SAndroid Build Coastguard Worker rshrn2 v3.8h, v5.4s, #6 4504*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v0.8h // dc + apply_sign() 4505*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v0.8h 4506*c0909341SAndroid Build Coastguard Worker smax v2.8h, v2.8h, v30.8h 4507*c0909341SAndroid Build Coastguard Worker smax v3.8h, v3.8h, v30.8h 4508*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 4509*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4510*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x0], x1 4511*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x6], x1 4512*c0909341SAndroid Build Coastguard Worker b.gt 1b 4513*c0909341SAndroid Build Coastguard Worker ret 4514*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16): 4515*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4516*c0909341SAndroid Build Coastguard Worker add x7, x5, w3, uxtw #1 4517*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw #1 4518*c0909341SAndroid Build Coastguard Worker mov w9, w3 4519*c0909341SAndroid Build Coastguard Worker1: 4520*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x5], #32 4521*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x7], #32 4522*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 4523*c0909341SAndroid Build Coastguard Worker smull v16.4s, v2.4h, v1.4h // diff = ac * alpha 4524*c0909341SAndroid Build Coastguard Worker smull2 v17.4s, v2.8h, v1.8h 4525*c0909341SAndroid Build Coastguard Worker smull v18.4s, v3.4h, v1.4h 4526*c0909341SAndroid Build Coastguard Worker smull2 v19.4s, v3.8h, v1.8h 4527*c0909341SAndroid Build Coastguard Worker smull v2.4s, v4.4h, v1.4h 4528*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v4.8h, v1.8h 4529*c0909341SAndroid Build Coastguard Worker smull v4.4s, v5.4h, v1.4h 4530*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v5.8h, v1.8h 4531*c0909341SAndroid Build Coastguard Worker cmlt v20.4s, v16.4s, #0 // sign 4532*c0909341SAndroid Build Coastguard Worker cmlt v21.4s, v17.4s, #0 4533*c0909341SAndroid Build Coastguard Worker cmlt v22.4s, v18.4s, #0 4534*c0909341SAndroid Build Coastguard Worker cmlt v23.4s, v19.4s, #0 4535*c0909341SAndroid Build Coastguard Worker cmlt v24.4s, v2.4s, #0 4536*c0909341SAndroid Build Coastguard Worker cmlt v25.4s, v3.4s, #0 4537*c0909341SAndroid Build Coastguard Worker cmlt v26.4s, v4.4s, #0 4538*c0909341SAndroid Build Coastguard Worker cmlt v27.4s, v5.4s, #0 4539*c0909341SAndroid Build Coastguard Worker add v16.4s, v16.4s, v20.4s // diff + sign 4540*c0909341SAndroid Build Coastguard Worker add v17.4s, v17.4s, v21.4s 4541*c0909341SAndroid Build Coastguard Worker add v18.4s, v18.4s, v22.4s 4542*c0909341SAndroid Build Coastguard Worker add v19.4s, v19.4s, v23.4s 4543*c0909341SAndroid Build Coastguard Worker add v2.4s, v2.4s, v24.4s 4544*c0909341SAndroid Build Coastguard Worker add v3.4s, v3.4s, v25.4s 4545*c0909341SAndroid Build Coastguard Worker add v4.4s, v4.4s, v26.4s 4546*c0909341SAndroid Build Coastguard Worker add v5.4s, v5.4s, v27.4s 4547*c0909341SAndroid Build Coastguard Worker rshrn v16.4h, v16.4s, #6 // (diff + sign + 32) >> 6 = apply_sign() 4548*c0909341SAndroid Build Coastguard Worker rshrn2 v16.8h, v17.4s, #6 4549*c0909341SAndroid Build Coastguard Worker rshrn v17.4h, v18.4s, #6 4550*c0909341SAndroid Build Coastguard Worker rshrn2 v17.8h, v19.4s, #6 4551*c0909341SAndroid Build Coastguard Worker rshrn v6.4h, v2.4s, #6 4552*c0909341SAndroid Build Coastguard Worker rshrn2 v6.8h, v3.4s, #6 4553*c0909341SAndroid Build Coastguard Worker rshrn v7.4h, v4.4s, #6 4554*c0909341SAndroid Build Coastguard Worker rshrn2 v7.8h, v5.4s, #6 4555*c0909341SAndroid Build Coastguard Worker add v2.8h, v16.8h, v0.8h // dc + apply_sign() 4556*c0909341SAndroid Build Coastguard Worker add v3.8h, v17.8h, v0.8h 4557*c0909341SAndroid Build Coastguard Worker add v4.8h, v6.8h, v0.8h 4558*c0909341SAndroid Build Coastguard Worker add v5.8h, v7.8h, v0.8h 4559*c0909341SAndroid Build Coastguard Worker smax v2.8h, v2.8h, v30.8h 4560*c0909341SAndroid Build Coastguard Worker smax v3.8h, v3.8h, v30.8h 4561*c0909341SAndroid Build Coastguard Worker smax v4.8h, v4.8h, v30.8h 4562*c0909341SAndroid Build Coastguard Worker smax v5.8h, v5.8h, v30.8h 4563*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 4564*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 4565*c0909341SAndroid Build Coastguard Worker smin v4.8h, v4.8h, v31.8h 4566*c0909341SAndroid Build Coastguard Worker smin v5.8h, v5.8h, v31.8h 4567*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x0], #32 4568*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [x6], #32 4569*c0909341SAndroid Build Coastguard Worker b.gt 1b 4570*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 4571*c0909341SAndroid Build Coastguard Worker add x5, x5, w9, uxtw #1 4572*c0909341SAndroid Build Coastguard Worker add x7, x7, w9, uxtw #1 4573*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 4574*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 4575*c0909341SAndroid Build Coastguard Worker mov w3, w9 4576*c0909341SAndroid Build Coastguard Worker b.gt 1b 4577*c0909341SAndroid Build Coastguard Worker ret 4578*c0909341SAndroid Build Coastguard Workerendfunc 4579*c0909341SAndroid Build Coastguard Worker 4580*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_128_tbl 4581*c0909341SAndroid Build Coastguard Workeripred_cfl_splat_tbl: 4582*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - ipred_cfl_128_tbl 4583*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - ipred_cfl_128_tbl 4584*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w8) - ipred_cfl_128_tbl 4585*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w4) - ipred_cfl_128_tbl 4586*c0909341SAndroid Build Coastguard Workerendjumptable 4587*c0909341SAndroid Build Coastguard Worker 4588*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_16bpc_neon(pixel *dst, const ptrdiff_t stride, 4589*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4590*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4591*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 4592*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 4593*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_16bpc_neon, export=1 4594*c0909341SAndroid Build Coastguard Worker dup v31.8h, w7 // bitdepth_max 4595*c0909341SAndroid Build Coastguard Worker clz w9, w3 4596*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_top_tbl 4597*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4598*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4599*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4600*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 4601*c0909341SAndroid Build Coastguard Worker add x7, x7, x9 4602*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4603*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4604*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 4605*c0909341SAndroid Build Coastguard Worker br x7 4606*c0909341SAndroid Build Coastguard Worker4: 4607*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4608*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2] 4609*c0909341SAndroid Build Coastguard Worker addv h0, v0.4h 4610*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #2 4611*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4612*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 4613*c0909341SAndroid Build Coastguard Worker8: 4614*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4615*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2] 4616*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 4617*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4618*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4619*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 4620*c0909341SAndroid Build Coastguard Worker16: 4621*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4622*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2] 4623*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v3.8h 4624*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 4625*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #4 4626*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4627*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4628*c0909341SAndroid Build Coastguard Worker32: 4629*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4630*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x2] 4631*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4632*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 4633*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v4.8h 4634*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 4635*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v0.4s, #5 4636*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4637*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4638*c0909341SAndroid Build Coastguard Workerendfunc 4639*c0909341SAndroid Build Coastguard Worker 4640*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_top_tbl 4641*c0909341SAndroid Build Coastguard Worker .word 32b - ipred_cfl_top_tbl 4642*c0909341SAndroid Build Coastguard Worker .word 16b - ipred_cfl_top_tbl 4643*c0909341SAndroid Build Coastguard Worker .word 8b - ipred_cfl_top_tbl 4644*c0909341SAndroid Build Coastguard Worker .word 4b - ipred_cfl_top_tbl 4645*c0909341SAndroid Build Coastguard Workerendjumptable 4646*c0909341SAndroid Build Coastguard Worker 4647*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_16bpc_neon(pixel *dst, const ptrdiff_t stride, 4648*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4649*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4650*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 4651*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 4652*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_16bpc_neon, export=1 4653*c0909341SAndroid Build Coastguard Worker dup v31.8h, w7 // bitdepth_max 4654*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw #1 4655*c0909341SAndroid Build Coastguard Worker clz w9, w3 4656*c0909341SAndroid Build Coastguard Worker clz w8, w4 4657*c0909341SAndroid Build Coastguard Worker movrel x10, ipred_cfl_splat_tbl 4658*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_left_tbl 4659*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4660*c0909341SAndroid Build Coastguard Worker sub w8, w8, #26 4661*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, w9, uxtw #2] 4662*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 4663*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4664*c0909341SAndroid Build Coastguard Worker add x9, x10, x9 4665*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 4666*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4667*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4668*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 4669*c0909341SAndroid Build Coastguard Worker br x7 4670*c0909341SAndroid Build Coastguard Worker 4671*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4): 4672*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4673*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2] 4674*c0909341SAndroid Build Coastguard Worker addv h0, v0.4h 4675*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #2 4676*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4677*c0909341SAndroid Build Coastguard Worker br x9 4678*c0909341SAndroid Build Coastguard Worker 4679*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8): 4680*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4681*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2] 4682*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 4683*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4684*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4685*c0909341SAndroid Build Coastguard Worker br x9 4686*c0909341SAndroid Build Coastguard Worker 4687*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16): 4688*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4689*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2] 4690*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v3.8h 4691*c0909341SAndroid Build Coastguard Worker addv h0, v0.8h 4692*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #4 4693*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4694*c0909341SAndroid Build Coastguard Worker br x9 4695*c0909341SAndroid Build Coastguard Worker 4696*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32): 4697*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4698*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x2] 4699*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4700*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 4701*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v4.8h 4702*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 4703*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v0.4s, #5 4704*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4705*c0909341SAndroid Build Coastguard Worker br x9 4706*c0909341SAndroid Build Coastguard Workerendfunc 4707*c0909341SAndroid Build Coastguard Worker 4708*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_left_tbl 4709*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h32) - ipred_cfl_left_tbl 4710*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h16) - ipred_cfl_left_tbl 4711*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h8) - ipred_cfl_left_tbl 4712*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h4) - ipred_cfl_left_tbl 4713*c0909341SAndroid Build Coastguard Workerendjumptable 4714*c0909341SAndroid Build Coastguard Worker 4715*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_16bpc_neon(pixel *dst, const ptrdiff_t stride, 4716*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4717*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4718*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha, 4719*c0909341SAndroid Build Coastguard Worker// const int bitdepth_max); 4720*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_16bpc_neon, export=1 4721*c0909341SAndroid Build Coastguard Worker dup v31.8h, w7 // bitdepth_max 4722*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw #1 4723*c0909341SAndroid Build Coastguard Worker add w8, w3, w4 // width + height 4724*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4725*c0909341SAndroid Build Coastguard Worker clz w9, w3 4726*c0909341SAndroid Build Coastguard Worker clz w6, w4 4727*c0909341SAndroid Build Coastguard Worker dup v16.4s, w8 // width + height 4728*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_tbl 4729*c0909341SAndroid Build Coastguard Worker rbit w8, w8 // rbit(width + height) 4730*c0909341SAndroid Build Coastguard Worker sub w9, w9, #22 // 26 leading bits, minus table offset 4 4731*c0909341SAndroid Build Coastguard Worker sub w6, w6, #26 4732*c0909341SAndroid Build Coastguard Worker clz w8, w8 // ctz(width + height) 4733*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4734*c0909341SAndroid Build Coastguard Worker ldrsw x6, [x7, w6, uxtw #2] 4735*c0909341SAndroid Build Coastguard Worker neg w8, w8 // -ctz(width + height) 4736*c0909341SAndroid Build Coastguard Worker add x9, x7, x9 4737*c0909341SAndroid Build Coastguard Worker add x7, x7, x6 4738*c0909341SAndroid Build Coastguard Worker ushr v16.4s, v16.4s, #1 // (width + height) >> 1 4739*c0909341SAndroid Build Coastguard Worker dup v17.4s, w8 // -ctz(width + height) 4740*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4741*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4742*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 4743*c0909341SAndroid Build Coastguard Worker br x7 4744*c0909341SAndroid Build Coastguard Worker 4745*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4): 4746*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4747*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x2], #8 4748*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.4h 4749*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 4750*c0909341SAndroid Build Coastguard Worker br x9 4751*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4): 4752*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4753*c0909341SAndroid Build Coastguard Worker ld1 {v2.4h}, [x2] 4754*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 4755*c0909341SAndroid Build Coastguard Worker uaddlv s2, v2.4h 4756*c0909341SAndroid Build Coastguard Worker cmp w4, #4 4757*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v2.2s 4758*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 4759*c0909341SAndroid Build Coastguard Worker b.eq 1f 4760*c0909341SAndroid Build Coastguard Worker // h = 8/16 4761*c0909341SAndroid Build Coastguard Worker cmp w4, #16 4762*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 4763*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 4764*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4765*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 4766*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 4767*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 4768*c0909341SAndroid Build Coastguard Worker1: 4769*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4770*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 4771*c0909341SAndroid Build Coastguard Worker 4772*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8): 4773*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4774*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x2], #16 4775*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 4776*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 4777*c0909341SAndroid Build Coastguard Worker br x9 4778*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8): 4779*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4780*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x2] 4781*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 4782*c0909341SAndroid Build Coastguard Worker uaddlv s2, v2.8h 4783*c0909341SAndroid Build Coastguard Worker cmp w4, #8 4784*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v2.2s 4785*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 4786*c0909341SAndroid Build Coastguard Worker b.eq 1f 4787*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 4788*c0909341SAndroid Build Coastguard Worker cmp w4, #32 4789*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 4790*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 4791*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4792*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 4793*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 4794*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 4795*c0909341SAndroid Build Coastguard Worker1: 4796*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4797*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 4798*c0909341SAndroid Build Coastguard Worker 4799*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16): 4800*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4801*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2], #32 4802*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v3.8h 4803*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 4804*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 4805*c0909341SAndroid Build Coastguard Worker br x9 4806*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16): 4807*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4808*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x2] 4809*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v16.2s 4810*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4811*c0909341SAndroid Build Coastguard Worker uaddlv s2, v2.8h 4812*c0909341SAndroid Build Coastguard Worker cmp w4, #16 4813*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v2.2s 4814*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 4815*c0909341SAndroid Build Coastguard Worker b.eq 1f 4816*c0909341SAndroid Build Coastguard Worker // h = 4/8/32 4817*c0909341SAndroid Build Coastguard Worker tst w4, #(32+16+8) // 16 added to make a consecutive bitmask 4818*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 4819*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 4820*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4821*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 4822*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 4823*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 4824*c0909341SAndroid Build Coastguard Worker1: 4825*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4826*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4827*c0909341SAndroid Build Coastguard Worker 4828*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32): 4829*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4830*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x2], #64 4831*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4832*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 4833*c0909341SAndroid Build Coastguard Worker addp v0.8h, v2.8h, v4.8h 4834*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 4835*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h 4836*c0909341SAndroid Build Coastguard Worker br x9 4837*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32): 4838*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4839*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x2] 4840*c0909341SAndroid Build Coastguard Worker add v0.4s, v0.4s, v16.4s 4841*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4842*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 4843*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v4.8h 4844*c0909341SAndroid Build Coastguard Worker cmp w4, #32 4845*c0909341SAndroid Build Coastguard Worker uaddlv s2, v2.8h 4846*c0909341SAndroid Build Coastguard Worker add v0.2s, v0.2s, v2.2s 4847*c0909341SAndroid Build Coastguard Worker ushl v0.2s, v0.2s, v17.2s 4848*c0909341SAndroid Build Coastguard Worker b.eq 1f 4849*c0909341SAndroid Build Coastguard Worker // h = 8/16 4850*c0909341SAndroid Build Coastguard Worker cmp w4, #8 4851*c0909341SAndroid Build Coastguard Worker mov w16, #0x6667 4852*c0909341SAndroid Build Coastguard Worker mov w17, #0xAAAB 4853*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4854*c0909341SAndroid Build Coastguard Worker dup v16.2s, w16 4855*c0909341SAndroid Build Coastguard Worker mul v0.2s, v0.2s, v16.2s 4856*c0909341SAndroid Build Coastguard Worker ushr v0.2s, v0.2s, #17 4857*c0909341SAndroid Build Coastguard Worker1: 4858*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4859*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4860*c0909341SAndroid Build Coastguard Workerendfunc 4861*c0909341SAndroid Build Coastguard Worker 4862*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_tbl 4863*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h32) - ipred_cfl_tbl 4864*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h16) - ipred_cfl_tbl 4865*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h8) - ipred_cfl_tbl 4866*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h4) - ipred_cfl_tbl 4867*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w32) - ipred_cfl_tbl 4868*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w16) - ipred_cfl_tbl 4869*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w8) - ipred_cfl_tbl 4870*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w4) - ipred_cfl_tbl 4871*c0909341SAndroid Build Coastguard Workerendjumptable 4872*c0909341SAndroid Build Coastguard Worker 4873*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_16bpc_neon(int16_t *const ac, const pixel *const ypx, 4874*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 4875*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 4876*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_16bpc_neon, export=1 4877*c0909341SAndroid Build Coastguard Worker clz w8, w5 4878*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 4879*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_420_tbl 4880*c0909341SAndroid Build Coastguard Worker sub w8, w8, #27 4881*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 4882*c0909341SAndroid Build Coastguard Worker movi v24.4s, #0 4883*c0909341SAndroid Build Coastguard Worker movi v25.4s, #0 4884*c0909341SAndroid Build Coastguard Worker movi v26.4s, #0 4885*c0909341SAndroid Build Coastguard Worker movi v27.4s, #0 4886*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 4887*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 4888*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 4889*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 4890*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 4891*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 4892*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 4893*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 4894*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 4895*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 4896*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 4897*c0909341SAndroid Build Coastguard Worker br x7 4898*c0909341SAndroid Build Coastguard Worker 4899*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4): 4900*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4901*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 4902*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 4903*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x10], x2 4904*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x1], x2 4905*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x2 4906*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 4907*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v3.8h 4908*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 4909*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4910*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4911*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], #16 4912*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 4913*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 4914*c0909341SAndroid Build Coastguard Worker b.gt 1b 4915*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v0.2d, v0.2d 4916*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v0.2d, v0.2d 4917*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad): 4918*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 4919*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 4920*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4921*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4922*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 4923*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 4924*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 4925*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 4926*c0909341SAndroid Build Coastguard Worker b.gt 2b 4927*c0909341SAndroid Build Coastguard Worker3: 4928*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_calc_subtract_dc): 4929*c0909341SAndroid Build Coastguard Worker // Aggregate the sums 4930*c0909341SAndroid Build Coastguard Worker add v24.4s, v24.4s, v25.4s 4931*c0909341SAndroid Build Coastguard Worker add v26.4s, v26.4s, v27.4s 4932*c0909341SAndroid Build Coastguard Worker add v0.4s, v24.4s, v26.4s 4933*c0909341SAndroid Build Coastguard Worker addv s0, v0.4s // sum 4934*c0909341SAndroid Build Coastguard Worker sub x0, x0, w6, uxtw #3 4935*c0909341SAndroid Build Coastguard Worker urshl v4.2s, v0.2s, v31.2s // (sum + (1 << (log2sz - 1))) >>= log2sz 4936*c0909341SAndroid Build Coastguard Worker dup v4.8h, v4.h[0] 4937*c0909341SAndroid Build Coastguard Worker6: // Subtract dc from ac 4938*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0] 4939*c0909341SAndroid Build Coastguard Worker subs w6, w6, #4 4940*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v4.8h 4941*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v4.8h 4942*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4943*c0909341SAndroid Build Coastguard Worker b.gt 6b 4944*c0909341SAndroid Build Coastguard Worker ret 4945*c0909341SAndroid Build Coastguard Worker 4946*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8): 4947*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4948*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_420_w8_wpad) 4949*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 4950*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 4951*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10], x2 4952*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x1], x2 4953*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 4954*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x10], x2 4955*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 4956*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 4957*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 4958*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 4959*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v6.8h 4960*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4961*c0909341SAndroid Build Coastguard Worker shl v1.8h, v4.8h, #1 4962*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4963*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4964*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 4965*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 4966*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 4967*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 4968*c0909341SAndroid Build Coastguard Worker b.gt 1b 4969*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 4970*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 4971*c0909341SAndroid Build Coastguard Worker 4972*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad): 4973*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 4974*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 4975*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x10], x2 4976*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x1], x2 4977*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x2 4978*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 4979*c0909341SAndroid Build Coastguard Worker addp v1.8h, v1.8h, v3.8h 4980*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 4981*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4982*c0909341SAndroid Build Coastguard Worker dup v1.4h, v0.h[3] 4983*c0909341SAndroid Build Coastguard Worker dup v3.4h, v0.h[7] 4984*c0909341SAndroid Build Coastguard Worker trn2 v2.2d, v0.2d, v0.2d 4985*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4986*c0909341SAndroid Build Coastguard Worker st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 4987*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 4988*c0909341SAndroid Build Coastguard Worker uaddw v25.4s, v25.4s, v1.4h 4989*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v2.4h 4990*c0909341SAndroid Build Coastguard Worker uaddw v27.4s, v27.4s, v3.4h 4991*c0909341SAndroid Build Coastguard Worker b.gt 1b 4992*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v2.2d, v3.2d 4993*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v2.2d, v3.2d 4994*c0909341SAndroid Build Coastguard Worker 4995*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad): 4996*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 4997*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 4998*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4999*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 5000*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5001*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5002*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5003*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5004*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 5005*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5006*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5007*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5008*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5009*c0909341SAndroid Build Coastguard Worker b.gt 2b 5010*c0909341SAndroid Build Coastguard Worker3: 5011*c0909341SAndroid Build Coastguard Worker 5012*c0909341SAndroid Build Coastguard Worker // Double the height and reuse the w4 summing/subtracting 5013*c0909341SAndroid Build Coastguard Worker lsl w6, w6, #1 5014*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 5015*c0909341SAndroid Build Coastguard Worker 5016*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16): 5017*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5018*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_420_w16_tbl 5019*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 5020*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 5021*c0909341SAndroid Build Coastguard Worker br x7 5022*c0909341SAndroid Build Coastguard Worker 5023*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0): 5024*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5025*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 5026*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x1], x2 5027*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], x2 5028*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5029*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5030*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 5031*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 5032*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x1], x2 5033*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v4.8h 5034*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x10], x2 5035*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v6.8h 5036*c0909341SAndroid Build Coastguard Worker addp v16.8h, v16.8h, v17.8h 5037*c0909341SAndroid Build Coastguard Worker addp v18.8h, v18.8h, v19.8h 5038*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h 5039*c0909341SAndroid Build Coastguard Worker addp v22.8h, v22.8h, v23.8h 5040*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v20.8h 5041*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v22.8h 5042*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 5043*c0909341SAndroid Build Coastguard Worker shl v1.8h, v2.8h, #1 5044*c0909341SAndroid Build Coastguard Worker shl v2.8h, v16.8h, #1 5045*c0909341SAndroid Build Coastguard Worker shl v3.8h, v18.8h, #1 5046*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5047*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5048*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5049*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5050*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5051*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5052*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5053*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5054*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5055*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5056*c0909341SAndroid Build Coastguard Worker b.gt 1b 5057*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5058*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5059*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5060*c0909341SAndroid Build Coastguard Worker 5061*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1): 5062*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5063*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 5064*c0909341SAndroid Build Coastguard Worker ldr q2, [x1, #32] 5065*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5066*c0909341SAndroid Build Coastguard Worker ldr q5, [x10, #32] 5067*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h, v4.8h}, [x10], x2 5068*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v2.8h 5069*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5070*c0909341SAndroid Build Coastguard Worker addp v5.8h, v5.8h, v5.8h 5071*c0909341SAndroid Build Coastguard Worker addp v3.8h, v3.8h, v4.8h 5072*c0909341SAndroid Build Coastguard Worker ldr q18, [x1, #32] 5073*c0909341SAndroid Build Coastguard Worker add v2.4h, v2.4h, v5.4h 5074*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [x1], x2 5075*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v3.8h 5076*c0909341SAndroid Build Coastguard Worker ldr q21, [x10, #32] 5077*c0909341SAndroid Build Coastguard Worker ld1 {v19.8h, v20.8h}, [x10], x2 5078*c0909341SAndroid Build Coastguard Worker addp v18.8h, v18.8h, v18.8h 5079*c0909341SAndroid Build Coastguard Worker addp v16.8h, v16.8h, v17.8h 5080*c0909341SAndroid Build Coastguard Worker addp v21.8h, v21.8h, v21.8h 5081*c0909341SAndroid Build Coastguard Worker addp v19.8h, v19.8h, v20.8h 5082*c0909341SAndroid Build Coastguard Worker add v18.4h, v18.4h, v21.4h 5083*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v19.8h 5084*c0909341SAndroid Build Coastguard Worker shl v1.4h, v2.4h, #1 5085*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 5086*c0909341SAndroid Build Coastguard Worker shl v3.4h, v18.4h, #1 5087*c0909341SAndroid Build Coastguard Worker shl v2.8h, v16.8h, #1 5088*c0909341SAndroid Build Coastguard Worker dup v4.4h, v1.h[3] 5089*c0909341SAndroid Build Coastguard Worker dup v5.4h, v3.h[3] 5090*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v1.2d, v4.2d 5091*c0909341SAndroid Build Coastguard Worker trn1 v3.2d, v3.2d, v5.2d 5092*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5093*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5094*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5095*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5096*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5097*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5098*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5099*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5100*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5101*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5102*c0909341SAndroid Build Coastguard Worker b.gt 1b 5103*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5104*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5105*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5106*c0909341SAndroid Build Coastguard Worker 5107*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2): 5108*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5109*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 5110*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5111*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10], x2 5112*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x1], x2 5113*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5114*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x10], x2 5115*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5116*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 5117*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 5118*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 5119*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v6.8h 5120*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 5121*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #1 5122*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5123*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5124*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5125*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5126*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5127*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5128*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5129*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5130*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5131*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5132*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5133*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5134*c0909341SAndroid Build Coastguard Worker b.gt 1b 5135*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5136*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5137*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5138*c0909341SAndroid Build Coastguard Worker 5139*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3): 5140*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5141*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 5142*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5143*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10], x2 5144*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [x1], x2 5145*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [x10], x2 5146*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v4.8h 5147*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v6.8h 5148*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 5149*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 5150*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[3] 5151*c0909341SAndroid Build Coastguard Worker dup v3.8h, v0.h[7] 5152*c0909341SAndroid Build Coastguard Worker trn2 v2.2d, v0.2d, v3.2d 5153*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v1.2d 5154*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5155*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5156*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5157*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5158*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5159*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5160*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5161*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5162*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5163*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5164*c0909341SAndroid Build Coastguard Worker b.gt 1b 5165*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5166*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5167*c0909341SAndroid Build Coastguard Worker 5168*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad): 5169*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 5170*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 5171*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 5172*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5173*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5174*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5175*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5176*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5177*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5178*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5179*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5180*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5181*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5182*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5183*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5184*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5185*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5186*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5187*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5188*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5189*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5190*c0909341SAndroid Build Coastguard Worker b.gt 2b 5191*c0909341SAndroid Build Coastguard Worker3: 5192*c0909341SAndroid Build Coastguard Worker 5193*c0909341SAndroid Build Coastguard Worker // Quadruple the height and reuse the w4 summing/subtracting 5194*c0909341SAndroid Build Coastguard Worker lsl w6, w6, #2 5195*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 5196*c0909341SAndroid Build Coastguard Workerendfunc 5197*c0909341SAndroid Build Coastguard Worker 5198*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_420_tbl 5199*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16) - ipred_cfl_ac_420_tbl 5200*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w8) - ipred_cfl_ac_420_tbl 5201*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w4) - ipred_cfl_ac_420_tbl 5202*c0909341SAndroid Build Coastguard Workerendjumptable 5203*c0909341SAndroid Build Coastguard Worker 5204*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_420_w16_tbl 5205*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad0) - ipred_cfl_ac_420_w16_tbl 5206*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad1) - ipred_cfl_ac_420_w16_tbl 5207*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad2) - ipred_cfl_ac_420_w16_tbl 5208*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad3) - ipred_cfl_ac_420_w16_tbl 5209*c0909341SAndroid Build Coastguard Workerendjumptable 5210*c0909341SAndroid Build Coastguard Worker 5211*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_16bpc_neon(int16_t *const ac, const pixel *const ypx, 5212*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 5213*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 5214*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_16bpc_neon, export=1 5215*c0909341SAndroid Build Coastguard Worker clz w8, w5 5216*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 5217*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_422_tbl 5218*c0909341SAndroid Build Coastguard Worker sub w8, w8, #27 5219*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 5220*c0909341SAndroid Build Coastguard Worker movi v24.4s, #0 5221*c0909341SAndroid Build Coastguard Worker movi v25.4s, #0 5222*c0909341SAndroid Build Coastguard Worker movi v26.4s, #0 5223*c0909341SAndroid Build Coastguard Worker movi v27.4s, #0 5224*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 5225*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 5226*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 5227*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 5228*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 5229*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 5230*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 5231*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 5232*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 5233*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 5234*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 5235*c0909341SAndroid Build Coastguard Worker br x7 5236*c0909341SAndroid Build Coastguard Worker 5237*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4): 5238*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5239*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 5240*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5241*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x10], x2 5242*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x1], x2 5243*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x2 5244*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5245*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5246*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5247*c0909341SAndroid Build Coastguard Worker shl v1.8h, v2.8h, #2 5248*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5249*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 5250*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5251*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5252*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5253*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5254*c0909341SAndroid Build Coastguard Worker b.gt 1b 5255*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v1.2d, v1.2d 5256*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v1.2d, v1.2d 5257*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 5258*c0909341SAndroid Build Coastguard Worker 5259*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8): 5260*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5261*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_422_w8_wpad) 5262*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 5263*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5264*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10], x2 5265*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x1], x2 5266*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5267*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x10], x2 5268*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5269*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 5270*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 5271*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5272*c0909341SAndroid Build Coastguard Worker shl v1.8h, v2.8h, #2 5273*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #2 5274*c0909341SAndroid Build Coastguard Worker shl v3.8h, v6.8h, #2 5275*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5276*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5277*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5278*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5279*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5280*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5281*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5282*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5283*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5284*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5285*c0909341SAndroid Build Coastguard Worker b.gt 1b 5286*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 5287*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5288*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 5289*c0909341SAndroid Build Coastguard Worker 5290*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad): 5291*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 5292*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5293*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x10], x2 5294*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x1], x2 5295*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x2 5296*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5297*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5298*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5299*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 5300*c0909341SAndroid Build Coastguard Worker dup v4.4h, v0.h[3] 5301*c0909341SAndroid Build Coastguard Worker dup v5.8h, v0.h[7] 5302*c0909341SAndroid Build Coastguard Worker dup v6.4h, v2.h[3] 5303*c0909341SAndroid Build Coastguard Worker dup v7.8h, v2.h[7] 5304*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v0.2d, v5.2d 5305*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v4.2d 5306*c0909341SAndroid Build Coastguard Worker trn2 v3.2d, v2.2d, v7.2d 5307*c0909341SAndroid Build Coastguard Worker trn1 v2.2d, v2.2d, v6.2d 5308*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5309*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5310*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5311*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5312*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5313*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5314*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5315*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5316*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5317*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5318*c0909341SAndroid Build Coastguard Worker b.gt 1b 5319*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 5320*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5321*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 5322*c0909341SAndroid Build Coastguard Worker 5323*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16): 5324*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5325*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_422_w16_tbl 5326*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 5327*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 5328*c0909341SAndroid Build Coastguard Worker br x7 5329*c0909341SAndroid Build Coastguard Worker 5330*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0): 5331*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5332*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 5333*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x1], x2 5334*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x10], x2 5335*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5336*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5337*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 5338*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 5339*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5340*c0909341SAndroid Build Coastguard Worker shl v1.8h, v2.8h, #2 5341*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #2 5342*c0909341SAndroid Build Coastguard Worker shl v3.8h, v6.8h, #2 5343*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5344*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5345*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5346*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5347*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5348*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5349*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5350*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5351*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5352*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5353*c0909341SAndroid Build Coastguard Worker b.gt 1b 5354*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5355*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5356*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5357*c0909341SAndroid Build Coastguard Worker 5358*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1): 5359*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5360*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 5361*c0909341SAndroid Build Coastguard Worker ldr q2, [x1, #32] 5362*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5363*c0909341SAndroid Build Coastguard Worker ldr q6, [x10, #32] 5364*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x10], x2 5365*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v2.8h 5366*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5367*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v6.8h 5368*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 5369*c0909341SAndroid Build Coastguard Worker shl v1.4h, v2.4h, #2 5370*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5371*c0909341SAndroid Build Coastguard Worker shl v3.4h, v6.4h, #2 5372*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #2 5373*c0909341SAndroid Build Coastguard Worker dup v4.4h, v1.h[3] 5374*c0909341SAndroid Build Coastguard Worker dup v5.4h, v3.h[3] 5375*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v1.2d, v4.2d 5376*c0909341SAndroid Build Coastguard Worker trn1 v3.2d, v3.2d, v5.2d 5377*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5378*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5379*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5380*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5381*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5382*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5383*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5384*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5385*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5386*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5387*c0909341SAndroid Build Coastguard Worker b.gt 1b 5388*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5389*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5390*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5391*c0909341SAndroid Build Coastguard Worker 5392*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2): 5393*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5394*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 5395*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5396*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10], x2 5397*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 5398*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 5399*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 5400*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 5401*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5402*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5403*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5404*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5405*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5406*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5407*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5408*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5409*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5410*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5411*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5412*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5413*c0909341SAndroid Build Coastguard Worker b.gt 1b 5414*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5415*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5416*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5417*c0909341SAndroid Build Coastguard Worker 5418*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3): 5419*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5420*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 5421*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5422*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10], x2 5423*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v0.8h 5424*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v2.8h 5425*c0909341SAndroid Build Coastguard Worker shl v0.4h, v0.4h, #2 5426*c0909341SAndroid Build Coastguard Worker shl v2.4h, v2.4h, #2 5427*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[3] 5428*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[3] 5429*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v1.2d 5430*c0909341SAndroid Build Coastguard Worker trn1 v2.2d, v2.2d, v3.2d 5431*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5432*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5433*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5434*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5435*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5436*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5437*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5438*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5439*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5440*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5441*c0909341SAndroid Build Coastguard Worker b.gt 1b 5442*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5443*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5444*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5445*c0909341SAndroid Build Coastguard Workerendfunc 5446*c0909341SAndroid Build Coastguard Worker 5447*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_422_tbl 5448*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16) - ipred_cfl_ac_422_tbl 5449*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w8) - ipred_cfl_ac_422_tbl 5450*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w4) - ipred_cfl_ac_422_tbl 5451*c0909341SAndroid Build Coastguard Workerendjumptable 5452*c0909341SAndroid Build Coastguard Worker 5453*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_422_w16_tbl 5454*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad0) - ipred_cfl_ac_422_w16_tbl 5455*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad1) - ipred_cfl_ac_422_w16_tbl 5456*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad2) - ipred_cfl_ac_422_w16_tbl 5457*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad3) - ipred_cfl_ac_422_w16_tbl 5458*c0909341SAndroid Build Coastguard Workerendjumptable 5459*c0909341SAndroid Build Coastguard Worker 5460*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_16bpc_neon(int16_t *const ac, const pixel *const ypx, 5461*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 5462*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 5463*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_16bpc_neon, export=1 5464*c0909341SAndroid Build Coastguard Worker clz w8, w5 5465*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 5466*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_444_tbl 5467*c0909341SAndroid Build Coastguard Worker sub w8, w8, #26 5468*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 5469*c0909341SAndroid Build Coastguard Worker movi v24.4s, #0 5470*c0909341SAndroid Build Coastguard Worker movi v25.4s, #0 5471*c0909341SAndroid Build Coastguard Worker movi v26.4s, #0 5472*c0909341SAndroid Build Coastguard Worker movi v27.4s, #0 5473*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 5474*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 5475*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 5476*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 5477*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 5478*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 5479*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 5480*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 5481*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 5482*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 5483*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 5484*c0909341SAndroid Build Coastguard Worker br x7 5485*c0909341SAndroid Build Coastguard Worker 5486*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4): 5487*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5488*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 5489*c0909341SAndroid Build Coastguard Worker ld1 {v0.4h}, [x1], x2 5490*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x10], x2 5491*c0909341SAndroid Build Coastguard Worker ld1 {v1.4h}, [x1], x2 5492*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x10], x2 5493*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5494*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5495*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5496*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 5497*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5498*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5499*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5500*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5501*c0909341SAndroid Build Coastguard Worker b.gt 1b 5502*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v1.2d, v1.2d 5503*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v1.2d, v1.2d 5504*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 5505*c0909341SAndroid Build Coastguard Worker 5506*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8): 5507*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5508*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 5509*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5510*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x10], x2 5511*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x1], x2 5512*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5513*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x2 5514*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5515*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #3 5516*c0909341SAndroid Build Coastguard Worker shl v3.8h, v3.8h, #3 5517*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5518*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5519*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5520*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5521*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5522*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5523*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5524*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5525*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5526*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5527*c0909341SAndroid Build Coastguard Worker b.gt 1b 5528*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 5529*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5530*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 5531*c0909341SAndroid Build Coastguard Worker 5532*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16): 5533*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5534*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_444_w16_wpad) 5535*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 5536*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5537*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x10], x2 5538*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5539*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5540*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #3 5541*c0909341SAndroid Build Coastguard Worker shl v3.8h, v3.8h, #3 5542*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5543*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5544*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5545*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5546*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5547*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5548*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5549*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5550*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5551*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5552*c0909341SAndroid Build Coastguard Worker b.gt 1b 5553*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5554*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5555*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5556*c0909341SAndroid Build Coastguard Worker 5557*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad): 5558*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 5559*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5560*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10], x2 5561*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5562*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #3 5563*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5564*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5565*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5566*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5567*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5568*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5569*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5570*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5571*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5572*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5573*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5574*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5575*c0909341SAndroid Build Coastguard Worker b.gt 1b 5576*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5577*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5578*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5579*c0909341SAndroid Build Coastguard Worker 5580*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32): 5581*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5582*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_444_w32_tbl 5583*c0909341SAndroid Build Coastguard Worker lsr w3, w3, #1 5584*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 5585*c0909341SAndroid Build Coastguard Worker lsr x2, x2, #1 // Restore the stride to one line increments 5586*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 5587*c0909341SAndroid Build Coastguard Worker br x7 5588*c0909341SAndroid Build Coastguard Worker 5589*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0): 5590*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5591*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 5592*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x1], x2 5593*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5594*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5595*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #3 5596*c0909341SAndroid Build Coastguard Worker shl v3.8h, v3.8h, #3 5597*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 5598*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5599*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5600*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5601*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5602*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5603*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5604*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5605*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5606*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5607*c0909341SAndroid Build Coastguard Worker b.gt 1b 5608*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5609*c0909341SAndroid Build Coastguard Worker 5610*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2): 5611*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5612*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 5613*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h}, [x1], x2 5614*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #3 5615*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5616*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5617*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5618*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 5619*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5620*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5621*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5622*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5623*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5624*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5625*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5626*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5627*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5628*c0909341SAndroid Build Coastguard Worker b.gt 1b 5629*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5630*c0909341SAndroid Build Coastguard Worker 5631*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4): 5632*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5633*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 16 5634*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 5635*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #3 5636*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5637*c0909341SAndroid Build Coastguard Worker dup v2.8h, v1.h[7] 5638*c0909341SAndroid Build Coastguard Worker dup v3.8h, v1.h[7] 5639*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 5640*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5641*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5642*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5643*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5644*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5645*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5646*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5647*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5648*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5649*c0909341SAndroid Build Coastguard Worker b.gt 1b 5650*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5651*c0909341SAndroid Build Coastguard Worker 5652*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6): 5653*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5654*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 24 5655*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 5656*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #3 5657*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5658*c0909341SAndroid Build Coastguard Worker dup v2.8h, v0.h[7] 5659*c0909341SAndroid Build Coastguard Worker dup v3.8h, v0.h[7] 5660*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 5661*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5662*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5663*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5664*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5665*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5666*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5667*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5668*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5669*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5670*c0909341SAndroid Build Coastguard Worker b.gt 1b 5671*c0909341SAndroid Build Coastguard Worker 5672*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad): 5673*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 5674*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 5675*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 5676*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5677*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5678*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5679*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5680*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5681*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5682*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5683*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5684*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5685*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5686*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v0.4h 5687*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v0.8h 5688*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v1.4h 5689*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v1.8h 5690*c0909341SAndroid Build Coastguard Worker uaddw v24.4s, v24.4s, v2.4h 5691*c0909341SAndroid Build Coastguard Worker uaddw2 v25.4s, v25.4s, v2.8h 5692*c0909341SAndroid Build Coastguard Worker uaddw v26.4s, v26.4s, v3.4h 5693*c0909341SAndroid Build Coastguard Worker uaddw2 v27.4s, v27.4s, v3.8h 5694*c0909341SAndroid Build Coastguard Worker b.gt 2b 5695*c0909341SAndroid Build Coastguard Worker3: 5696*c0909341SAndroid Build Coastguard Worker 5697*c0909341SAndroid Build Coastguard Worker // Multiply the height by eight and reuse the w4 subtracting 5698*c0909341SAndroid Build Coastguard Worker lsl w6, w6, #3 5699*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_calc_subtract_dc) 5700*c0909341SAndroid Build Coastguard Workerendfunc 5701*c0909341SAndroid Build Coastguard Worker 5702*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_444_tbl 5703*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32) - ipred_cfl_ac_444_tbl 5704*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w16) - ipred_cfl_ac_444_tbl 5705*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w8) - ipred_cfl_ac_444_tbl 5706*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w4) - ipred_cfl_ac_444_tbl 5707*c0909341SAndroid Build Coastguard Workerendjumptable 5708*c0909341SAndroid Build Coastguard Worker 5709*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_444_w32_tbl 5710*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad0) - ipred_cfl_ac_444_w32_tbl 5711*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad2) - ipred_cfl_ac_444_w32_tbl 5712*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad4) - ipred_cfl_ac_444_w32_tbl 5713*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad6) - ipred_cfl_ac_444_w32_tbl 5714*c0909341SAndroid Build Coastguard Workerendjumptable 5715