1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, 32*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 33*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 34*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 35*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_8bpc_neon, export=1 36*c0909341SAndroid Build Coastguard Worker clz w3, w3 37*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_128_tbl 38*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 39*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 40*c0909341SAndroid Build Coastguard Worker movi v0.16b, #128 41*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 42*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 43*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 44*c0909341SAndroid Build Coastguard Worker br x5 45*c0909341SAndroid Build Coastguard Worker40: 46*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 47*c0909341SAndroid Build Coastguard Worker4: 48*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 49*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 50*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 51*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 52*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 53*c0909341SAndroid Build Coastguard Worker b.gt 4b 54*c0909341SAndroid Build Coastguard Worker ret 55*c0909341SAndroid Build Coastguard Worker80: 56*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 57*c0909341SAndroid Build Coastguard Worker8: 58*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 59*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 60*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 61*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 62*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 63*c0909341SAndroid Build Coastguard Worker b.gt 8b 64*c0909341SAndroid Build Coastguard Worker ret 65*c0909341SAndroid Build Coastguard Worker160: 66*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 67*c0909341SAndroid Build Coastguard Worker16: 68*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 69*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 70*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 71*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 72*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 73*c0909341SAndroid Build Coastguard Worker b.gt 16b 74*c0909341SAndroid Build Coastguard Worker ret 75*c0909341SAndroid Build Coastguard Worker320: 76*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 77*c0909341SAndroid Build Coastguard Worker movi v1.16b, #128 78*c0909341SAndroid Build Coastguard Worker32: 79*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 80*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 81*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 82*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 83*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 84*c0909341SAndroid Build Coastguard Worker b.gt 32b 85*c0909341SAndroid Build Coastguard Worker ret 86*c0909341SAndroid Build Coastguard Worker640: 87*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 88*c0909341SAndroid Build Coastguard Worker movi v1.16b, #128 89*c0909341SAndroid Build Coastguard Worker movi v2.16b, #128 90*c0909341SAndroid Build Coastguard Worker movi v3.16b, #128 91*c0909341SAndroid Build Coastguard Worker64: 92*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 93*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 94*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 95*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 96*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 97*c0909341SAndroid Build Coastguard Worker b.gt 64b 98*c0909341SAndroid Build Coastguard Worker ret 99*c0909341SAndroid Build Coastguard Workerendfunc 100*c0909341SAndroid Build Coastguard Worker 101*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_128_tbl 102*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_dc_128_tbl 103*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_dc_128_tbl 104*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_dc_128_tbl 105*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_dc_128_tbl 106*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_dc_128_tbl 107*c0909341SAndroid Build Coastguard Workerendjumptable 108*c0909341SAndroid Build Coastguard Worker 109*c0909341SAndroid Build Coastguard Worker// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, 110*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 111*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 112*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 113*c0909341SAndroid Build Coastguard Workerfunction ipred_v_8bpc_neon, export=1 114*c0909341SAndroid Build Coastguard Worker clz w3, w3 115*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_v_tbl 116*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 117*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 118*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 119*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 120*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 121*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 122*c0909341SAndroid Build Coastguard Worker br x5 123*c0909341SAndroid Build Coastguard Worker40: 124*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 125*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x2] 126*c0909341SAndroid Build Coastguard Worker4: 127*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 128*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 129*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 130*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 131*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 132*c0909341SAndroid Build Coastguard Worker b.gt 4b 133*c0909341SAndroid Build Coastguard Worker ret 134*c0909341SAndroid Build Coastguard Worker80: 135*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 136*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 137*c0909341SAndroid Build Coastguard Worker8: 138*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 139*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 140*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 141*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 142*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 143*c0909341SAndroid Build Coastguard Worker b.gt 8b 144*c0909341SAndroid Build Coastguard Worker ret 145*c0909341SAndroid Build Coastguard Worker160: 146*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 147*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] 148*c0909341SAndroid Build Coastguard Worker16: 149*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 150*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 151*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 152*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 153*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 154*c0909341SAndroid Build Coastguard Worker b.gt 16b 155*c0909341SAndroid Build Coastguard Worker ret 156*c0909341SAndroid Build Coastguard Worker320: 157*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 158*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] 159*c0909341SAndroid Build Coastguard Worker32: 160*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 161*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 162*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 163*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 164*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 165*c0909341SAndroid Build Coastguard Worker b.gt 32b 166*c0909341SAndroid Build Coastguard Worker ret 167*c0909341SAndroid Build Coastguard Worker640: 168*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 169*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2] 170*c0909341SAndroid Build Coastguard Worker64: 171*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 172*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 173*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 174*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 175*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 176*c0909341SAndroid Build Coastguard Worker b.gt 64b 177*c0909341SAndroid Build Coastguard Worker ret 178*c0909341SAndroid Build Coastguard Workerendfunc 179*c0909341SAndroid Build Coastguard Worker 180*c0909341SAndroid Build Coastguard Workerjumptable ipred_v_tbl 181*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_v_tbl 182*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_v_tbl 183*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_v_tbl 184*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_v_tbl 185*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_v_tbl 186*c0909341SAndroid Build Coastguard Workerendjumptable 187*c0909341SAndroid Build Coastguard Worker 188*c0909341SAndroid Build Coastguard Worker// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, 189*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 190*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 191*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 192*c0909341SAndroid Build Coastguard Workerfunction ipred_h_8bpc_neon, export=1 193*c0909341SAndroid Build Coastguard Worker clz w3, w3 194*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_h_tbl 195*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 196*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 197*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 198*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 199*c0909341SAndroid Build Coastguard Worker mov x7, #-4 200*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 201*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 202*c0909341SAndroid Build Coastguard Worker br x5 203*c0909341SAndroid Build Coastguard Worker40: 204*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 205*c0909341SAndroid Build Coastguard Worker4: 206*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 207*c0909341SAndroid Build Coastguard Worker st1 {v3.s}[0], [x0], x1 208*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[0], [x6], x1 209*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 210*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[0], [x0], x1 211*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 212*c0909341SAndroid Build Coastguard Worker b.gt 4b 213*c0909341SAndroid Build Coastguard Worker ret 214*c0909341SAndroid Build Coastguard Worker80: 215*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 216*c0909341SAndroid Build Coastguard Worker8: 217*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 218*c0909341SAndroid Build Coastguard Worker st1 {v3.8b}, [x0], x1 219*c0909341SAndroid Build Coastguard Worker st1 {v2.8b}, [x6], x1 220*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 221*c0909341SAndroid Build Coastguard Worker st1 {v1.8b}, [x0], x1 222*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 223*c0909341SAndroid Build Coastguard Worker b.gt 8b 224*c0909341SAndroid Build Coastguard Worker ret 225*c0909341SAndroid Build Coastguard Worker160: 226*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 227*c0909341SAndroid Build Coastguard Worker16: 228*c0909341SAndroid Build Coastguard Worker ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x7 229*c0909341SAndroid Build Coastguard Worker st1 {v3.16b}, [x0], x1 230*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x6], x1 231*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 232*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], x1 233*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 234*c0909341SAndroid Build Coastguard Worker b.gt 16b 235*c0909341SAndroid Build Coastguard Worker ret 236*c0909341SAndroid Build Coastguard Worker320: 237*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 238*c0909341SAndroid Build Coastguard Worker32: 239*c0909341SAndroid Build Coastguard Worker ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x7 240*c0909341SAndroid Build Coastguard Worker str q3, [x0, #16] 241*c0909341SAndroid Build Coastguard Worker str q2, [x6, #16] 242*c0909341SAndroid Build Coastguard Worker st1 {v3.16b}, [x0], x1 243*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x6], x1 244*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 245*c0909341SAndroid Build Coastguard Worker str q1, [x0, #16] 246*c0909341SAndroid Build Coastguard Worker str q0, [x6, #16] 247*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], x1 248*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 249*c0909341SAndroid Build Coastguard Worker b.gt 32b 250*c0909341SAndroid Build Coastguard Worker ret 251*c0909341SAndroid Build Coastguard Worker640: 252*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 253*c0909341SAndroid Build Coastguard Worker64: 254*c0909341SAndroid Build Coastguard Worker ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x7 255*c0909341SAndroid Build Coastguard Worker str q3, [x0, #16] 256*c0909341SAndroid Build Coastguard Worker str q2, [x6, #16] 257*c0909341SAndroid Build Coastguard Worker stp q3, q3, [x0, #32] 258*c0909341SAndroid Build Coastguard Worker stp q2, q2, [x6, #32] 259*c0909341SAndroid Build Coastguard Worker st1 {v3.16b}, [x0], x1 260*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x6], x1 261*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 262*c0909341SAndroid Build Coastguard Worker str q1, [x0, #16] 263*c0909341SAndroid Build Coastguard Worker str q0, [x6, #16] 264*c0909341SAndroid Build Coastguard Worker stp q1, q1, [x0, #32] 265*c0909341SAndroid Build Coastguard Worker stp q0, q0, [x6, #32] 266*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], x1 267*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 268*c0909341SAndroid Build Coastguard Worker b.gt 64b 269*c0909341SAndroid Build Coastguard Worker ret 270*c0909341SAndroid Build Coastguard Workerendfunc 271*c0909341SAndroid Build Coastguard Worker 272*c0909341SAndroid Build Coastguard Workerjumptable ipred_h_tbl 273*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_h_tbl 274*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_h_tbl 275*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_h_tbl 276*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_h_tbl 277*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_h_tbl 278*c0909341SAndroid Build Coastguard Workerendjumptable 279*c0909341SAndroid Build Coastguard Worker 280*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, 281*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 282*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 283*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 284*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_8bpc_neon, export=1 285*c0909341SAndroid Build Coastguard Worker clz w3, w3 286*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_top_tbl 287*c0909341SAndroid Build Coastguard Worker sub w3, w3, #25 288*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 289*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 290*c0909341SAndroid Build Coastguard Worker add x5, x5, x3 291*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 292*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 293*c0909341SAndroid Build Coastguard Worker br x5 294*c0909341SAndroid Build Coastguard Worker40: 295*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 296*c0909341SAndroid Build Coastguard Worker ld1r {v0.2s}, [x2] 297*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 298*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #3 299*c0909341SAndroid Build Coastguard Worker dup v0.8b, v0.b[0] 300*c0909341SAndroid Build Coastguard Worker4: 301*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 302*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 303*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 304*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 305*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 306*c0909341SAndroid Build Coastguard Worker b.gt 4b 307*c0909341SAndroid Build Coastguard Worker ret 308*c0909341SAndroid Build Coastguard Worker80: 309*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 310*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 311*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 312*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #3 313*c0909341SAndroid Build Coastguard Worker dup v0.8b, v0.b[0] 314*c0909341SAndroid Build Coastguard Worker8: 315*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 316*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 317*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 318*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 319*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 320*c0909341SAndroid Build Coastguard Worker b.gt 8b 321*c0909341SAndroid Build Coastguard Worker ret 322*c0909341SAndroid Build Coastguard Worker160: 323*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 324*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] 325*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 326*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #4 327*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 328*c0909341SAndroid Build Coastguard Worker16: 329*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 330*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 331*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 332*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 333*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 334*c0909341SAndroid Build Coastguard Worker b.gt 16b 335*c0909341SAndroid Build Coastguard Worker ret 336*c0909341SAndroid Build Coastguard Worker320: 337*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 338*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] 339*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 340*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 341*c0909341SAndroid Build Coastguard Worker add v2.4h, v0.4h, v1.4h 342*c0909341SAndroid Build Coastguard Worker rshrn v2.8b, v2.8h, #5 343*c0909341SAndroid Build Coastguard Worker dup v0.16b, v2.b[0] 344*c0909341SAndroid Build Coastguard Worker dup v1.16b, v2.b[0] 345*c0909341SAndroid Build Coastguard Worker32: 346*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 347*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 348*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 349*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 350*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 351*c0909341SAndroid Build Coastguard Worker b.gt 32b 352*c0909341SAndroid Build Coastguard Worker ret 353*c0909341SAndroid Build Coastguard Worker640: 354*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 355*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2] 356*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 357*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 358*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 359*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 360*c0909341SAndroid Build Coastguard Worker add v4.4h, v0.4h, v1.4h 361*c0909341SAndroid Build Coastguard Worker add v5.4h, v2.4h, v3.4h 362*c0909341SAndroid Build Coastguard Worker add v4.4h, v4.4h, v5.4h 363*c0909341SAndroid Build Coastguard Worker rshrn v4.8b, v4.8h, #6 364*c0909341SAndroid Build Coastguard Worker dup v0.16b, v4.b[0] 365*c0909341SAndroid Build Coastguard Worker dup v1.16b, v4.b[0] 366*c0909341SAndroid Build Coastguard Worker dup v2.16b, v4.b[0] 367*c0909341SAndroid Build Coastguard Worker dup v3.16b, v4.b[0] 368*c0909341SAndroid Build Coastguard Worker64: 369*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 370*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 371*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 372*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 373*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 374*c0909341SAndroid Build Coastguard Worker b.gt 64b 375*c0909341SAndroid Build Coastguard Worker ret 376*c0909341SAndroid Build Coastguard Workerendfunc 377*c0909341SAndroid Build Coastguard Worker 378*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_top_tbl 379*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_dc_top_tbl 380*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_dc_top_tbl 381*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_dc_top_tbl 382*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_dc_top_tbl 383*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_dc_top_tbl 384*c0909341SAndroid Build Coastguard Workerendjumptable 385*c0909341SAndroid Build Coastguard Worker 386*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, 387*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 388*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 389*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 390*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_8bpc_neon, export=1 391*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw 392*c0909341SAndroid Build Coastguard Worker clz w3, w3 393*c0909341SAndroid Build Coastguard Worker clz w7, w4 394*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_left_tbl 395*c0909341SAndroid Build Coastguard Worker sub w3, w3, #20 // 25 leading bits, minus table offset 5 396*c0909341SAndroid Build Coastguard Worker sub w7, w7, #25 397*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 398*c0909341SAndroid Build Coastguard Worker ldrsw x7, [x5, w7, uxtw #2] 399*c0909341SAndroid Build Coastguard Worker add x3, x5, x3 400*c0909341SAndroid Build Coastguard Worker add x5, x5, x7 401*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 402*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 403*c0909341SAndroid Build Coastguard Worker br x5 404*c0909341SAndroid Build Coastguard Worker 405*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4): 406*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 407*c0909341SAndroid Build Coastguard Worker ld1r {v0.2s}, [x2] 408*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 409*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #3 410*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 411*c0909341SAndroid Build Coastguard Worker br x3 412*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4): 413*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 414*c0909341SAndroid Build Coastguard Worker1: 415*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 416*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 417*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 418*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 419*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 420*c0909341SAndroid Build Coastguard Worker b.gt 1b 421*c0909341SAndroid Build Coastguard Worker ret 422*c0909341SAndroid Build Coastguard Worker 423*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8): 424*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 425*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 426*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 427*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #3 428*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 429*c0909341SAndroid Build Coastguard Worker br x3 430*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8): 431*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 432*c0909341SAndroid Build Coastguard Worker1: 433*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 434*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 435*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 436*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 437*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 438*c0909341SAndroid Build Coastguard Worker b.gt 1b 439*c0909341SAndroid Build Coastguard Worker ret 440*c0909341SAndroid Build Coastguard Worker 441*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16): 442*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 443*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] 444*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 445*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #4 446*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 447*c0909341SAndroid Build Coastguard Worker br x3 448*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16): 449*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 450*c0909341SAndroid Build Coastguard Worker1: 451*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 452*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 453*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 454*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 455*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 456*c0909341SAndroid Build Coastguard Worker b.gt 1b 457*c0909341SAndroid Build Coastguard Worker ret 458*c0909341SAndroid Build Coastguard Worker 459*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32): 460*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 461*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] 462*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 463*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 464*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 465*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #5 466*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 467*c0909341SAndroid Build Coastguard Worker br x3 468*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32): 469*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 470*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 471*c0909341SAndroid Build Coastguard Worker1: 472*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 473*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 474*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 475*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 476*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 477*c0909341SAndroid Build Coastguard Worker b.gt 1b 478*c0909341SAndroid Build Coastguard Worker ret 479*c0909341SAndroid Build Coastguard Worker 480*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64): 481*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 482*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2] 483*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 484*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 485*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 486*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 487*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 488*c0909341SAndroid Build Coastguard Worker add v2.4h, v2.4h, v3.4h 489*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 490*c0909341SAndroid Build Coastguard Worker rshrn v0.8b, v0.8h, #6 491*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 492*c0909341SAndroid Build Coastguard Worker br x3 493*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64): 494*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 495*c0909341SAndroid Build Coastguard Worker mov v1.16b, v0.16b 496*c0909341SAndroid Build Coastguard Worker mov v2.16b, v0.16b 497*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b 498*c0909341SAndroid Build Coastguard Worker1: 499*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 500*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 501*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 502*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 503*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 504*c0909341SAndroid Build Coastguard Worker b.gt 1b 505*c0909341SAndroid Build Coastguard Worker ret 506*c0909341SAndroid Build Coastguard Workerendfunc 507*c0909341SAndroid Build Coastguard Worker 508*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_left_tbl 509*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h64) - ipred_dc_left_tbl 510*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h32) - ipred_dc_left_tbl 511*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h16) - ipred_dc_left_tbl 512*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h8) - ipred_dc_left_tbl 513*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_h4) - ipred_dc_left_tbl 514*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w64) - ipred_dc_left_tbl 515*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w32) - ipred_dc_left_tbl 516*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w16) - ipred_dc_left_tbl 517*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w8) - ipred_dc_left_tbl 518*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_left_w4) - ipred_dc_left_tbl 519*c0909341SAndroid Build Coastguard Workerendjumptable 520*c0909341SAndroid Build Coastguard Worker 521*c0909341SAndroid Build Coastguard Worker// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride, 522*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 523*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 524*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 525*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_8bpc_neon, export=1 526*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw 527*c0909341SAndroid Build Coastguard Worker add w7, w3, w4 // width + height 528*c0909341SAndroid Build Coastguard Worker clz w3, w3 529*c0909341SAndroid Build Coastguard Worker clz w6, w4 530*c0909341SAndroid Build Coastguard Worker dup v16.8h, w7 // width + height 531*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_dc_tbl 532*c0909341SAndroid Build Coastguard Worker rbit w7, w7 // rbit(width + height) 533*c0909341SAndroid Build Coastguard Worker sub w3, w3, #20 // 25 leading bits, minus table offset 5 534*c0909341SAndroid Build Coastguard Worker sub w6, w6, #25 535*c0909341SAndroid Build Coastguard Worker clz w7, w7 // ctz(width + height) 536*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x5, w3, uxtw #2] 537*c0909341SAndroid Build Coastguard Worker ldrsw x6, [x5, w6, uxtw #2] 538*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -ctz(width + height) 539*c0909341SAndroid Build Coastguard Worker add x3, x5, x3 540*c0909341SAndroid Build Coastguard Worker add x5, x5, x6 541*c0909341SAndroid Build Coastguard Worker ushr v16.8h, v16.8h, #1 // (width + height) >> 1 542*c0909341SAndroid Build Coastguard Worker dup v17.8h, w7 // -ctz(width + height) 543*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 544*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 545*c0909341SAndroid Build Coastguard Worker br x5 546*c0909341SAndroid Build Coastguard Worker 547*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4): 548*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 549*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x2], #4 550*c0909341SAndroid Build Coastguard Worker ins v0.s[1], wzr 551*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 552*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 553*c0909341SAndroid Build Coastguard Worker br x3 554*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4): 555*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 556*c0909341SAndroid Build Coastguard Worker ld1 {v1.s}[0], [x2] 557*c0909341SAndroid Build Coastguard Worker ins v1.s[1], wzr 558*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 559*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.8b 560*c0909341SAndroid Build Coastguard Worker cmp w4, #4 561*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 562*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 563*c0909341SAndroid Build Coastguard Worker b.eq 1f 564*c0909341SAndroid Build Coastguard Worker // h = 8/16 565*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 566*c0909341SAndroid Build Coastguard Worker movk w16, #(0x5556/2), lsl #16 567*c0909341SAndroid Build Coastguard Worker add w17, w4, w4 // w17 = 2*h = 16 or 32 568*c0909341SAndroid Build Coastguard Worker lsr w16, w16, w17 569*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 570*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 571*c0909341SAndroid Build Coastguard Worker1: 572*c0909341SAndroid Build Coastguard Worker dup v0.8b, v0.b[0] 573*c0909341SAndroid Build Coastguard Worker2: 574*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 575*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 576*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 577*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 578*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x6], x1 579*c0909341SAndroid Build Coastguard Worker b.gt 2b 580*c0909341SAndroid Build Coastguard Worker ret 581*c0909341SAndroid Build Coastguard Worker 582*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8): 583*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 584*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2], #8 585*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 586*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 587*c0909341SAndroid Build Coastguard Worker br x3 588*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8): 589*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 590*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x2] 591*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 592*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.8b 593*c0909341SAndroid Build Coastguard Worker cmp w4, #8 594*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 595*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 596*c0909341SAndroid Build Coastguard Worker b.eq 1f 597*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 598*c0909341SAndroid Build Coastguard Worker cmp w4, #32 599*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 600*c0909341SAndroid Build Coastguard Worker mov w17, #(0x5556/2) 601*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 602*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 603*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 604*c0909341SAndroid Build Coastguard Worker1: 605*c0909341SAndroid Build Coastguard Worker dup v0.8b, v0.b[0] 606*c0909341SAndroid Build Coastguard Worker2: 607*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 608*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 609*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 610*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x0], x1 611*c0909341SAndroid Build Coastguard Worker st1 {v0.8b}, [x6], x1 612*c0909341SAndroid Build Coastguard Worker b.gt 2b 613*c0909341SAndroid Build Coastguard Worker ret 614*c0909341SAndroid Build Coastguard Worker 615*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16): 616*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 617*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2], #16 618*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 619*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 620*c0909341SAndroid Build Coastguard Worker br x3 621*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16): 622*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 623*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x2] 624*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 625*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 626*c0909341SAndroid Build Coastguard Worker cmp w4, #16 627*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 628*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 629*c0909341SAndroid Build Coastguard Worker b.eq 1f 630*c0909341SAndroid Build Coastguard Worker // h = 4/8/32/64 631*c0909341SAndroid Build Coastguard Worker tst w4, #(32+16+8) // 16 added to make a consecutive bitmask 632*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 633*c0909341SAndroid Build Coastguard Worker mov w17, #(0x5556/2) 634*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 635*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 636*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 637*c0909341SAndroid Build Coastguard Worker1: 638*c0909341SAndroid Build Coastguard Worker dup v0.16b, v0.b[0] 639*c0909341SAndroid Build Coastguard Worker2: 640*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 641*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 642*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 643*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], x1 644*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x6], x1 645*c0909341SAndroid Build Coastguard Worker b.gt 2b 646*c0909341SAndroid Build Coastguard Worker ret 647*c0909341SAndroid Build Coastguard Worker 648*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32): 649*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 650*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2], #32 651*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 652*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 653*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 654*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 655*c0909341SAndroid Build Coastguard Worker br x3 656*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32): 657*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 658*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b, v2.16b}, [x2] 659*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 660*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 661*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 662*c0909341SAndroid Build Coastguard Worker cmp w4, #32 663*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 664*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 665*c0909341SAndroid Build Coastguard Worker ushl v4.4h, v0.4h, v17.4h 666*c0909341SAndroid Build Coastguard Worker b.eq 1f 667*c0909341SAndroid Build Coastguard Worker // h = 8/16/64 668*c0909341SAndroid Build Coastguard Worker cmp w4, #8 669*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 670*c0909341SAndroid Build Coastguard Worker mov w17, #(0x5556/2) 671*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 672*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 673*c0909341SAndroid Build Coastguard Worker sqdmulh v4.4h, v4.4h, v16.4h 674*c0909341SAndroid Build Coastguard Worker1: 675*c0909341SAndroid Build Coastguard Worker dup v0.16b, v4.b[0] 676*c0909341SAndroid Build Coastguard Worker dup v1.16b, v4.b[0] 677*c0909341SAndroid Build Coastguard Worker2: 678*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 679*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 680*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 681*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0], x1 682*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x1 683*c0909341SAndroid Build Coastguard Worker b.gt 2b 684*c0909341SAndroid Build Coastguard Worker ret 685*c0909341SAndroid Build Coastguard Worker 686*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64): 687*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 688*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64 689*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 690*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 691*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 692*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 693*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 694*c0909341SAndroid Build Coastguard Worker add v2.4h, v2.4h, v3.4h 695*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 696*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 697*c0909341SAndroid Build Coastguard Worker br x3 698*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64): 699*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 700*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b, v2.16b, v3.16b, v4.16b}, [x2] 701*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 702*c0909341SAndroid Build Coastguard Worker uaddlv h1, v1.16b 703*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 704*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 705*c0909341SAndroid Build Coastguard Worker uaddlv h4, v4.16b 706*c0909341SAndroid Build Coastguard Worker add v1.4h, v1.4h, v2.4h 707*c0909341SAndroid Build Coastguard Worker add v3.4h, v3.4h, v4.4h 708*c0909341SAndroid Build Coastguard Worker cmp w4, #64 709*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v1.4h 710*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v3.4h 711*c0909341SAndroid Build Coastguard Worker ushl v4.4h, v0.4h, v17.4h 712*c0909341SAndroid Build Coastguard Worker b.eq 1f 713*c0909341SAndroid Build Coastguard Worker // h = 16/32 714*c0909341SAndroid Build Coastguard Worker mov w16, #(0x5556/2) 715*c0909341SAndroid Build Coastguard Worker movk w16, #(0x3334/2), lsl #16 716*c0909341SAndroid Build Coastguard Worker lsr w16, w16, w4 717*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 718*c0909341SAndroid Build Coastguard Worker sqdmulh v4.4h, v4.4h, v16.4h 719*c0909341SAndroid Build Coastguard Worker1: 720*c0909341SAndroid Build Coastguard Worker dup v0.16b, v4.b[0] 721*c0909341SAndroid Build Coastguard Worker dup v1.16b, v4.b[0] 722*c0909341SAndroid Build Coastguard Worker dup v2.16b, v4.b[0] 723*c0909341SAndroid Build Coastguard Worker dup v3.16b, v4.b[0] 724*c0909341SAndroid Build Coastguard Worker2: 725*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 726*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 727*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 728*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1 729*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], x1 730*c0909341SAndroid Build Coastguard Worker b.gt 2b 731*c0909341SAndroid Build Coastguard Worker ret 732*c0909341SAndroid Build Coastguard Workerendfunc 733*c0909341SAndroid Build Coastguard Worker 734*c0909341SAndroid Build Coastguard Workerjumptable ipred_dc_tbl 735*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h64) - ipred_dc_tbl 736*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h32) - ipred_dc_tbl 737*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h16) - ipred_dc_tbl 738*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h8) - ipred_dc_tbl 739*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_h4) - ipred_dc_tbl 740*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w64) - ipred_dc_tbl 741*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w32) - ipred_dc_tbl 742*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w16) - ipred_dc_tbl 743*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w8) - ipred_dc_tbl 744*c0909341SAndroid Build Coastguard Worker .word L(ipred_dc_w4) - ipred_dc_tbl 745*c0909341SAndroid Build Coastguard Workerendjumptable 746*c0909341SAndroid Build Coastguard Worker 747*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride, 748*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 749*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 750*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 751*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_8bpc_neon, export=1 752*c0909341SAndroid Build Coastguard Worker clz w9, w3 753*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_paeth_tbl 754*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 755*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 756*c0909341SAndroid Build Coastguard Worker ld1r {v4.16b}, [x2] 757*c0909341SAndroid Build Coastguard Worker add x8, x2, #1 758*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 759*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 760*c0909341SAndroid Build Coastguard Worker mov x7, #-4 761*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 762*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 763*c0909341SAndroid Build Coastguard Worker br x5 764*c0909341SAndroid Build Coastguard Worker40: 765*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 766*c0909341SAndroid Build Coastguard Worker ld1r {v5.4s}, [x8] 767*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v5.8b, v4.8b // top - topleft 768*c0909341SAndroid Build Coastguard Worker4: 769*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 770*c0909341SAndroid Build Coastguard Worker zip1 v0.2s, v0.2s, v1.2s 771*c0909341SAndroid Build Coastguard Worker zip1 v2.2s, v2.2s, v3.2s 772*c0909341SAndroid Build Coastguard Worker uaddw v16.8h, v6.8h, v0.8b 773*c0909341SAndroid Build Coastguard Worker uaddw v17.8h, v6.8h, v2.8b 774*c0909341SAndroid Build Coastguard Worker sqxtun v16.8b, v16.8h // base 775*c0909341SAndroid Build Coastguard Worker sqxtun2 v16.16b, v17.8h 776*c0909341SAndroid Build Coastguard Worker zip1 v0.2d, v0.2d, v2.2d 777*c0909341SAndroid Build Coastguard Worker uabd v20.16b, v5.16b, v16.16b // tdiff 778*c0909341SAndroid Build Coastguard Worker uabd v22.16b, v4.16b, v16.16b // tldiff 779*c0909341SAndroid Build Coastguard Worker uabd v16.16b, v0.16b, v16.16b // ldiff 780*c0909341SAndroid Build Coastguard Worker umin v18.16b, v20.16b, v22.16b // min(tdiff, tldiff) 781*c0909341SAndroid Build Coastguard Worker cmhs v20.16b, v22.16b, v20.16b // tldiff >= tdiff 782*c0909341SAndroid Build Coastguard Worker cmhs v16.16b, v18.16b, v16.16b // min(tdiff, tldiff) >= ldiff 783*c0909341SAndroid Build Coastguard Worker bsl v20.16b, v5.16b, v4.16b // tdiff <= tldiff ? top : topleft 784*c0909341SAndroid Build Coastguard Worker bit v20.16b, v0.16b, v16.16b // ldiff <= min ? left : ... 785*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[3], [x0], x1 786*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[2], [x6], x1 787*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 788*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[1], [x0], x1 789*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[0], [x6], x1 790*c0909341SAndroid Build Coastguard Worker b.gt 4b 791*c0909341SAndroid Build Coastguard Worker ret 792*c0909341SAndroid Build Coastguard Worker80: 793*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 794*c0909341SAndroid Build Coastguard Worker ld1r {v5.2d}, [x8] 795*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v5.8b, v4.8b // top - topleft 796*c0909341SAndroid Build Coastguard Worker8: 797*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 798*c0909341SAndroid Build Coastguard Worker uaddw v16.8h, v6.8h, v0.8b 799*c0909341SAndroid Build Coastguard Worker uaddw v17.8h, v6.8h, v1.8b 800*c0909341SAndroid Build Coastguard Worker uaddw v18.8h, v6.8h, v2.8b 801*c0909341SAndroid Build Coastguard Worker uaddw v19.8h, v6.8h, v3.8b 802*c0909341SAndroid Build Coastguard Worker sqxtun v16.8b, v16.8h // base 803*c0909341SAndroid Build Coastguard Worker sqxtun2 v16.16b, v17.8h 804*c0909341SAndroid Build Coastguard Worker sqxtun v18.8b, v18.8h 805*c0909341SAndroid Build Coastguard Worker sqxtun2 v18.16b, v19.8h 806*c0909341SAndroid Build Coastguard Worker zip1 v2.2d, v2.2d, v3.2d 807*c0909341SAndroid Build Coastguard Worker zip1 v0.2d, v0.2d, v1.2d 808*c0909341SAndroid Build Coastguard Worker uabd v21.16b, v5.16b, v18.16b // tdiff 809*c0909341SAndroid Build Coastguard Worker uabd v20.16b, v5.16b, v16.16b 810*c0909341SAndroid Build Coastguard Worker uabd v23.16b, v4.16b, v18.16b // tldiff 811*c0909341SAndroid Build Coastguard Worker uabd v22.16b, v4.16b, v16.16b 812*c0909341SAndroid Build Coastguard Worker uabd v17.16b, v2.16b, v18.16b // ldiff 813*c0909341SAndroid Build Coastguard Worker uabd v16.16b, v0.16b, v16.16b 814*c0909341SAndroid Build Coastguard Worker umin v19.16b, v21.16b, v23.16b // min(tdiff, tldiff) 815*c0909341SAndroid Build Coastguard Worker umin v18.16b, v20.16b, v22.16b 816*c0909341SAndroid Build Coastguard Worker cmhs v21.16b, v23.16b, v21.16b // tldiff >= tdiff 817*c0909341SAndroid Build Coastguard Worker cmhs v20.16b, v22.16b, v20.16b 818*c0909341SAndroid Build Coastguard Worker cmhs v17.16b, v19.16b, v17.16b // min(tdiff, tldiff) >= ldiff 819*c0909341SAndroid Build Coastguard Worker cmhs v16.16b, v18.16b, v16.16b 820*c0909341SAndroid Build Coastguard Worker bsl v21.16b, v5.16b, v4.16b // tdiff <= tldiff ? top : topleft 821*c0909341SAndroid Build Coastguard Worker bsl v20.16b, v5.16b, v4.16b 822*c0909341SAndroid Build Coastguard Worker bit v21.16b, v2.16b, v17.16b // ldiff <= min ? left : ... 823*c0909341SAndroid Build Coastguard Worker bit v20.16b, v0.16b, v16.16b 824*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[1], [x0], x1 825*c0909341SAndroid Build Coastguard Worker st1 {v21.d}[0], [x6], x1 826*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 827*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[1], [x0], x1 828*c0909341SAndroid Build Coastguard Worker st1 {v20.d}[0], [x6], x1 829*c0909341SAndroid Build Coastguard Worker b.gt 8b 830*c0909341SAndroid Build Coastguard Worker ret 831*c0909341SAndroid Build Coastguard Worker160: 832*c0909341SAndroid Build Coastguard Worker320: 833*c0909341SAndroid Build Coastguard Worker640: 834*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 835*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x8], #16 836*c0909341SAndroid Build Coastguard Worker mov w9, w3 837*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x10 838*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 839*c0909341SAndroid Build Coastguard Worker add x10, x6, x1 840*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 841*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 842*c0909341SAndroid Build Coastguard Worker1: 843*c0909341SAndroid Build Coastguard Worker ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x7 844*c0909341SAndroid Build Coastguard Worker2: 845*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v5.8b, v4.8b // top - topleft 846*c0909341SAndroid Build Coastguard Worker usubl2 v7.8h, v5.16b, v4.16b 847*c0909341SAndroid Build Coastguard Worker uaddw v24.8h, v6.8h, v0.8b 848*c0909341SAndroid Build Coastguard Worker uaddw v25.8h, v7.8h, v0.8b 849*c0909341SAndroid Build Coastguard Worker uaddw v26.8h, v6.8h, v1.8b 850*c0909341SAndroid Build Coastguard Worker uaddw v27.8h, v7.8h, v1.8b 851*c0909341SAndroid Build Coastguard Worker uaddw v28.8h, v6.8h, v2.8b 852*c0909341SAndroid Build Coastguard Worker uaddw v29.8h, v7.8h, v2.8b 853*c0909341SAndroid Build Coastguard Worker uaddw v30.8h, v6.8h, v3.8b 854*c0909341SAndroid Build Coastguard Worker uaddw v31.8h, v7.8h, v3.8b 855*c0909341SAndroid Build Coastguard Worker sqxtun v17.8b, v26.8h // base 856*c0909341SAndroid Build Coastguard Worker sqxtun2 v17.16b, v27.8h 857*c0909341SAndroid Build Coastguard Worker sqxtun v16.8b, v24.8h 858*c0909341SAndroid Build Coastguard Worker sqxtun2 v16.16b, v25.8h 859*c0909341SAndroid Build Coastguard Worker sqxtun v19.8b, v30.8h 860*c0909341SAndroid Build Coastguard Worker sqxtun2 v19.16b, v31.8h 861*c0909341SAndroid Build Coastguard Worker sqxtun v18.8b, v28.8h 862*c0909341SAndroid Build Coastguard Worker sqxtun2 v18.16b, v29.8h 863*c0909341SAndroid Build Coastguard Worker uabd v23.16b, v5.16b, v19.16b // tdiff 864*c0909341SAndroid Build Coastguard Worker uabd v22.16b, v5.16b, v18.16b 865*c0909341SAndroid Build Coastguard Worker uabd v21.16b, v5.16b, v17.16b 866*c0909341SAndroid Build Coastguard Worker uabd v20.16b, v5.16b, v16.16b 867*c0909341SAndroid Build Coastguard Worker uabd v27.16b, v4.16b, v19.16b // tldiff 868*c0909341SAndroid Build Coastguard Worker uabd v26.16b, v4.16b, v18.16b 869*c0909341SAndroid Build Coastguard Worker uabd v25.16b, v4.16b, v17.16b 870*c0909341SAndroid Build Coastguard Worker uabd v24.16b, v4.16b, v16.16b 871*c0909341SAndroid Build Coastguard Worker uabd v19.16b, v3.16b, v19.16b // ldiff 872*c0909341SAndroid Build Coastguard Worker uabd v18.16b, v2.16b, v18.16b 873*c0909341SAndroid Build Coastguard Worker uabd v17.16b, v1.16b, v17.16b 874*c0909341SAndroid Build Coastguard Worker uabd v16.16b, v0.16b, v16.16b 875*c0909341SAndroid Build Coastguard Worker umin v31.16b, v23.16b, v27.16b // min(tdiff, tldiff) 876*c0909341SAndroid Build Coastguard Worker umin v30.16b, v22.16b, v26.16b 877*c0909341SAndroid Build Coastguard Worker umin v29.16b, v21.16b, v25.16b 878*c0909341SAndroid Build Coastguard Worker umin v28.16b, v20.16b, v24.16b 879*c0909341SAndroid Build Coastguard Worker cmhs v23.16b, v27.16b, v23.16b // tldiff >= tdiff 880*c0909341SAndroid Build Coastguard Worker cmhs v22.16b, v26.16b, v22.16b 881*c0909341SAndroid Build Coastguard Worker cmhs v21.16b, v25.16b, v21.16b 882*c0909341SAndroid Build Coastguard Worker cmhs v20.16b, v24.16b, v20.16b 883*c0909341SAndroid Build Coastguard Worker cmhs v19.16b, v31.16b, v19.16b // min(tdiff, tldiff) >= ldiff 884*c0909341SAndroid Build Coastguard Worker cmhs v18.16b, v30.16b, v18.16b 885*c0909341SAndroid Build Coastguard Worker cmhs v17.16b, v29.16b, v17.16b 886*c0909341SAndroid Build Coastguard Worker cmhs v16.16b, v28.16b, v16.16b 887*c0909341SAndroid Build Coastguard Worker bsl v23.16b, v5.16b, v4.16b // tdiff <= tldiff ? top : topleft 888*c0909341SAndroid Build Coastguard Worker bsl v22.16b, v5.16b, v4.16b 889*c0909341SAndroid Build Coastguard Worker bsl v21.16b, v5.16b, v4.16b 890*c0909341SAndroid Build Coastguard Worker bsl v20.16b, v5.16b, v4.16b 891*c0909341SAndroid Build Coastguard Worker bit v23.16b, v3.16b, v19.16b // ldiff <= min ? left : ... 892*c0909341SAndroid Build Coastguard Worker bit v22.16b, v2.16b, v18.16b 893*c0909341SAndroid Build Coastguard Worker bit v21.16b, v1.16b, v17.16b 894*c0909341SAndroid Build Coastguard Worker bit v20.16b, v0.16b, v16.16b 895*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 896*c0909341SAndroid Build Coastguard Worker st1 {v23.16b}, [x0], #16 897*c0909341SAndroid Build Coastguard Worker st1 {v22.16b}, [x6], #16 898*c0909341SAndroid Build Coastguard Worker st1 {v21.16b}, [x5], #16 899*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x10], #16 900*c0909341SAndroid Build Coastguard Worker b.le 8f 901*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x8], #16 902*c0909341SAndroid Build Coastguard Worker b 2b 903*c0909341SAndroid Build Coastguard Worker8: 904*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 905*c0909341SAndroid Build Coastguard Worker b.le 9f 906*c0909341SAndroid Build Coastguard Worker // End of horizontal loop, move pointers to next four rows 907*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw 908*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 909*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 910*c0909341SAndroid Build Coastguard Worker // Load the top row as early as possible 911*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x8], #16 912*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 913*c0909341SAndroid Build Coastguard Worker add x10, x10, x1 914*c0909341SAndroid Build Coastguard Worker mov w3, w9 915*c0909341SAndroid Build Coastguard Worker b 1b 916*c0909341SAndroid Build Coastguard Worker9: 917*c0909341SAndroid Build Coastguard Worker ret 918*c0909341SAndroid Build Coastguard Workerendfunc 919*c0909341SAndroid Build Coastguard Worker 920*c0909341SAndroid Build Coastguard Workerjumptable ipred_paeth_tbl 921*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_paeth_tbl 922*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_paeth_tbl 923*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_paeth_tbl 924*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_paeth_tbl 925*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_paeth_tbl 926*c0909341SAndroid Build Coastguard Workerendjumptable 927*c0909341SAndroid Build Coastguard Worker 928*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride, 929*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 930*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 931*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 932*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_8bpc_neon, export=1 933*c0909341SAndroid Build Coastguard Worker movrel x10, X(sm_weights) 934*c0909341SAndroid Build Coastguard Worker add x11, x10, w4, uxtw 935*c0909341SAndroid Build Coastguard Worker add x10, x10, w3, uxtw 936*c0909341SAndroid Build Coastguard Worker clz w9, w3 937*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_tbl 938*c0909341SAndroid Build Coastguard Worker sub x12, x2, w4, uxtw 939*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 940*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 941*c0909341SAndroid Build Coastguard Worker ld1r {v4.16b}, [x12] // bottom 942*c0909341SAndroid Build Coastguard Worker add x8, x2, #1 943*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 944*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 945*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 946*c0909341SAndroid Build Coastguard Worker br x5 947*c0909341SAndroid Build Coastguard Worker40: 948*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 949*c0909341SAndroid Build Coastguard Worker ld1r {v6.2s}, [x8] // top 950*c0909341SAndroid Build Coastguard Worker ld1r {v7.2s}, [x10] // weights_hor 951*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 952*c0909341SAndroid Build Coastguard Worker mov x7, #-4 953*c0909341SAndroid Build Coastguard Worker dup v5.16b, v6.b[3] // right 954*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v6.8b, v4.8b // top-bottom 955*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 956*c0909341SAndroid Build Coastguard Worker4: 957*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 // left 958*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x11], #4 // weights_ver 959*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 960*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 961*c0909341SAndroid Build Coastguard Worker zip1 v1.2s, v1.2s, v0.2s // left, flipped 962*c0909341SAndroid Build Coastguard Worker zip1 v0.2s, v3.2s, v2.2s 963*c0909341SAndroid Build Coastguard Worker zip1 v16.2s, v16.2s, v17.2s // weights_ver 964*c0909341SAndroid Build Coastguard Worker zip1 v18.2s, v18.2s, v19.2s 965*c0909341SAndroid Build Coastguard Worker shll v22.8h, v4.8b, #8 // bottom*256 966*c0909341SAndroid Build Coastguard Worker shll v23.8h, v4.8b, #8 967*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b // left-right 968*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 969*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 970*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 971*c0909341SAndroid Build Coastguard Worker mla v20.8h, v0.8h, v7.8h // right*256 + (left-right)*weights_hor 972*c0909341SAndroid Build Coastguard Worker mla v21.8h, v1.8h, v7.8h 973*c0909341SAndroid Build Coastguard Worker mla v22.8h, v6.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 974*c0909341SAndroid Build Coastguard Worker mla v23.8h, v6.8h, v18.8h 975*c0909341SAndroid Build Coastguard Worker uhadd v20.8h, v20.8h, v22.8h 976*c0909341SAndroid Build Coastguard Worker uhadd v21.8h, v21.8h, v23.8h 977*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 978*c0909341SAndroid Build Coastguard Worker rshrn v21.8b, v21.8h, #8 979*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[0], [x0], x1 980*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[1], [x6], x1 981*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 982*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[0], [x0], x1 983*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[1], [x6], x1 984*c0909341SAndroid Build Coastguard Worker b.gt 4b 985*c0909341SAndroid Build Coastguard Worker ret 986*c0909341SAndroid Build Coastguard Worker80: 987*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 988*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [x8] // top 989*c0909341SAndroid Build Coastguard Worker ld1 {v7.8b}, [x10] // weights_hor 990*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 991*c0909341SAndroid Build Coastguard Worker mov x7, #-4 992*c0909341SAndroid Build Coastguard Worker dup v5.16b, v6.b[7] // right 993*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v6.8b, v4.8b // top-bottom 994*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 995*c0909341SAndroid Build Coastguard Worker8: 996*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 // left 997*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x11], #4 // weights_ver 998*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 999*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 1000*c0909341SAndroid Build Coastguard Worker shll v22.8h, v5.8b, #8 1001*c0909341SAndroid Build Coastguard Worker shll v23.8h, v5.8b, #8 1002*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b // left-right 1003*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 1004*c0909341SAndroid Build Coastguard Worker usubl v2.8h, v2.8b, v5.8b 1005*c0909341SAndroid Build Coastguard Worker usubl v3.8h, v3.8b, v5.8b 1006*c0909341SAndroid Build Coastguard Worker shll v24.8h, v4.8b, #8 // bottom*256 1007*c0909341SAndroid Build Coastguard Worker shll v25.8h, v4.8b, #8 1008*c0909341SAndroid Build Coastguard Worker shll v26.8h, v4.8b, #8 1009*c0909341SAndroid Build Coastguard Worker shll v27.8h, v4.8b, #8 1010*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1011*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1012*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1013*c0909341SAndroid Build Coastguard Worker uxtl v19.8h, v19.8b 1014*c0909341SAndroid Build Coastguard Worker mla v20.8h, v3.8h, v7.8h // right*256 + (left-right)*weights_hor 1015*c0909341SAndroid Build Coastguard Worker mla v21.8h, v2.8h, v7.8h // (left flipped) 1016*c0909341SAndroid Build Coastguard Worker mla v22.8h, v1.8h, v7.8h 1017*c0909341SAndroid Build Coastguard Worker mla v23.8h, v0.8h, v7.8h 1018*c0909341SAndroid Build Coastguard Worker mla v24.8h, v6.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 1019*c0909341SAndroid Build Coastguard Worker mla v25.8h, v6.8h, v17.8h 1020*c0909341SAndroid Build Coastguard Worker mla v26.8h, v6.8h, v18.8h 1021*c0909341SAndroid Build Coastguard Worker mla v27.8h, v6.8h, v19.8h 1022*c0909341SAndroid Build Coastguard Worker uhadd v20.8h, v20.8h, v24.8h 1023*c0909341SAndroid Build Coastguard Worker uhadd v21.8h, v21.8h, v25.8h 1024*c0909341SAndroid Build Coastguard Worker uhadd v22.8h, v22.8h, v26.8h 1025*c0909341SAndroid Build Coastguard Worker uhadd v23.8h, v23.8h, v27.8h 1026*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1027*c0909341SAndroid Build Coastguard Worker rshrn v21.8b, v21.8h, #8 1028*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1029*c0909341SAndroid Build Coastguard Worker rshrn v23.8b, v23.8h, #8 1030*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x0], x1 1031*c0909341SAndroid Build Coastguard Worker st1 {v21.8b}, [x6], x1 1032*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1033*c0909341SAndroid Build Coastguard Worker st1 {v22.8b}, [x0], x1 1034*c0909341SAndroid Build Coastguard Worker st1 {v23.8b}, [x6], x1 1035*c0909341SAndroid Build Coastguard Worker b.gt 8b 1036*c0909341SAndroid Build Coastguard Worker ret 1037*c0909341SAndroid Build Coastguard Worker160: 1038*c0909341SAndroid Build Coastguard Worker320: 1039*c0909341SAndroid Build Coastguard Worker640: 1040*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1041*c0909341SAndroid Build Coastguard Worker add x12, x2, w3, uxtw 1042*c0909341SAndroid Build Coastguard Worker sub x2, x2, #2 1043*c0909341SAndroid Build Coastguard Worker mov x7, #-2 1044*c0909341SAndroid Build Coastguard Worker ld1r {v5.16b}, [x12] // right 1045*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 1046*c0909341SAndroid Build Coastguard Worker mov w9, w3 1047*c0909341SAndroid Build Coastguard Worker 1048*c0909341SAndroid Build Coastguard Worker1: 1049*c0909341SAndroid Build Coastguard Worker ld2r {v0.8b, v1.8b}, [x2], x7 // left 1050*c0909341SAndroid Build Coastguard Worker ld2r {v16.8b, v17.8b}, [x11], #2 // weights_ver 1051*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b // left-right 1052*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 1053*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1054*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1055*c0909341SAndroid Build Coastguard Worker2: 1056*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x10], #16 // weights_hor 1057*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x8], #16 // top 1058*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 1059*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 1060*c0909341SAndroid Build Coastguard Worker shll v22.8h, v5.8b, #8 1061*c0909341SAndroid Build Coastguard Worker shll v23.8h, v5.8b, #8 1062*c0909341SAndroid Build Coastguard Worker uxtl v6.8h, v7.8b // weights_hor 1063*c0909341SAndroid Build Coastguard Worker uxtl2 v7.8h, v7.16b 1064*c0909341SAndroid Build Coastguard Worker usubl v2.8h, v3.8b, v4.8b // top-bottom 1065*c0909341SAndroid Build Coastguard Worker usubl2 v3.8h, v3.16b, v4.16b 1066*c0909341SAndroid Build Coastguard Worker mla v20.8h, v1.8h, v6.8h // right*256 + (left-right)*weights_hor 1067*c0909341SAndroid Build Coastguard Worker mla v21.8h, v1.8h, v7.8h // (left flipped) 1068*c0909341SAndroid Build Coastguard Worker mla v22.8h, v0.8h, v6.8h 1069*c0909341SAndroid Build Coastguard Worker mla v23.8h, v0.8h, v7.8h 1070*c0909341SAndroid Build Coastguard Worker shll v24.8h, v4.8b, #8 // bottom*256 1071*c0909341SAndroid Build Coastguard Worker shll v25.8h, v4.8b, #8 1072*c0909341SAndroid Build Coastguard Worker shll v26.8h, v4.8b, #8 1073*c0909341SAndroid Build Coastguard Worker shll v27.8h, v4.8b, #8 1074*c0909341SAndroid Build Coastguard Worker mla v24.8h, v2.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 1075*c0909341SAndroid Build Coastguard Worker mla v25.8h, v3.8h, v16.8h 1076*c0909341SAndroid Build Coastguard Worker mla v26.8h, v2.8h, v17.8h 1077*c0909341SAndroid Build Coastguard Worker mla v27.8h, v3.8h, v17.8h 1078*c0909341SAndroid Build Coastguard Worker uhadd v20.8h, v20.8h, v24.8h 1079*c0909341SAndroid Build Coastguard Worker uhadd v21.8h, v21.8h, v25.8h 1080*c0909341SAndroid Build Coastguard Worker uhadd v22.8h, v22.8h, v26.8h 1081*c0909341SAndroid Build Coastguard Worker uhadd v23.8h, v23.8h, v27.8h 1082*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1083*c0909341SAndroid Build Coastguard Worker rshrn2 v20.16b, v21.8h, #8 1084*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1085*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #8 1086*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1087*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x0], #16 1088*c0909341SAndroid Build Coastguard Worker st1 {v22.16b}, [x6], #16 1089*c0909341SAndroid Build Coastguard Worker b.gt 2b 1090*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1091*c0909341SAndroid Build Coastguard Worker b.le 9f 1092*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw 1093*c0909341SAndroid Build Coastguard Worker sub x10, x10, w9, uxtw 1094*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1095*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1096*c0909341SAndroid Build Coastguard Worker mov w3, w9 1097*c0909341SAndroid Build Coastguard Worker b 1b 1098*c0909341SAndroid Build Coastguard Worker9: 1099*c0909341SAndroid Build Coastguard Worker ret 1100*c0909341SAndroid Build Coastguard Workerendfunc 1101*c0909341SAndroid Build Coastguard Worker 1102*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_tbl 1103*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_tbl 1104*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_tbl 1105*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_tbl 1106*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_tbl 1107*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_tbl 1108*c0909341SAndroid Build Coastguard Workerendjumptable 1109*c0909341SAndroid Build Coastguard Worker 1110*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1111*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1112*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1113*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1114*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_8bpc_neon, export=1 1115*c0909341SAndroid Build Coastguard Worker movrel x7, X(sm_weights) 1116*c0909341SAndroid Build Coastguard Worker add x7, x7, w4, uxtw 1117*c0909341SAndroid Build Coastguard Worker clz w9, w3 1118*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_v_tbl 1119*c0909341SAndroid Build Coastguard Worker sub x8, x2, w4, uxtw 1120*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1121*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 1122*c0909341SAndroid Build Coastguard Worker ld1r {v4.16b}, [x8] // bottom 1123*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 1124*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 1125*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 1126*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1127*c0909341SAndroid Build Coastguard Worker br x5 1128*c0909341SAndroid Build Coastguard Worker40: 1129*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1130*c0909341SAndroid Build Coastguard Worker ld1r {v6.2s}, [x2] // top 1131*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v6.8b, v4.8b // top-bottom 1132*c0909341SAndroid Build Coastguard Worker4: 1133*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1134*c0909341SAndroid Build Coastguard Worker shll v22.8h, v4.8b, #8 // bottom*256 1135*c0909341SAndroid Build Coastguard Worker shll v23.8h, v4.8b, #8 1136*c0909341SAndroid Build Coastguard Worker zip1 v16.2s, v16.2s, v17.2s // weights_ver 1137*c0909341SAndroid Build Coastguard Worker zip1 v18.2s, v18.2s, v19.2s 1138*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1139*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1140*c0909341SAndroid Build Coastguard Worker mla v22.8h, v6.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 1141*c0909341SAndroid Build Coastguard Worker mla v23.8h, v6.8h, v18.8h 1142*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1143*c0909341SAndroid Build Coastguard Worker rshrn v23.8b, v23.8h, #8 1144*c0909341SAndroid Build Coastguard Worker st1 {v22.s}[0], [x0], x1 1145*c0909341SAndroid Build Coastguard Worker st1 {v22.s}[1], [x6], x1 1146*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1147*c0909341SAndroid Build Coastguard Worker st1 {v23.s}[0], [x0], x1 1148*c0909341SAndroid Build Coastguard Worker st1 {v23.s}[1], [x6], x1 1149*c0909341SAndroid Build Coastguard Worker b.gt 4b 1150*c0909341SAndroid Build Coastguard Worker ret 1151*c0909341SAndroid Build Coastguard Worker80: 1152*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1153*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [x2] // top 1154*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v6.8b, v4.8b // top-bottom 1155*c0909341SAndroid Build Coastguard Worker8: 1156*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1157*c0909341SAndroid Build Coastguard Worker shll v24.8h, v4.8b, #8 // bottom*256 1158*c0909341SAndroid Build Coastguard Worker shll v25.8h, v4.8b, #8 1159*c0909341SAndroid Build Coastguard Worker shll v26.8h, v4.8b, #8 1160*c0909341SAndroid Build Coastguard Worker shll v27.8h, v4.8b, #8 1161*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1162*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1163*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1164*c0909341SAndroid Build Coastguard Worker uxtl v19.8h, v19.8b 1165*c0909341SAndroid Build Coastguard Worker mla v24.8h, v6.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 1166*c0909341SAndroid Build Coastguard Worker mla v25.8h, v6.8h, v17.8h 1167*c0909341SAndroid Build Coastguard Worker mla v26.8h, v6.8h, v18.8h 1168*c0909341SAndroid Build Coastguard Worker mla v27.8h, v6.8h, v19.8h 1169*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v24.8h, #8 1170*c0909341SAndroid Build Coastguard Worker rshrn v25.8b, v25.8h, #8 1171*c0909341SAndroid Build Coastguard Worker rshrn v26.8b, v26.8h, #8 1172*c0909341SAndroid Build Coastguard Worker rshrn v27.8b, v27.8h, #8 1173*c0909341SAndroid Build Coastguard Worker st1 {v24.8b}, [x0], x1 1174*c0909341SAndroid Build Coastguard Worker st1 {v25.8b}, [x6], x1 1175*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1176*c0909341SAndroid Build Coastguard Worker st1 {v26.8b}, [x0], x1 1177*c0909341SAndroid Build Coastguard Worker st1 {v27.8b}, [x6], x1 1178*c0909341SAndroid Build Coastguard Worker b.gt 8b 1179*c0909341SAndroid Build Coastguard Worker ret 1180*c0909341SAndroid Build Coastguard Worker160: 1181*c0909341SAndroid Build Coastguard Worker320: 1182*c0909341SAndroid Build Coastguard Worker640: 1183*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1184*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x8 1185*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 1186*c0909341SAndroid Build Coastguard Worker add x8, x6, x1 1187*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1188*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 1189*c0909341SAndroid Build Coastguard Worker mov w9, w3 1190*c0909341SAndroid Build Coastguard Worker 1191*c0909341SAndroid Build Coastguard Worker1: 1192*c0909341SAndroid Build Coastguard Worker ld4r {v16.8b, v17.8b, v18.8b, v19.8b}, [x7], #4 // weights_ver 1193*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b // weights_ver 1194*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1195*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1196*c0909341SAndroid Build Coastguard Worker uxtl v19.8h, v19.8b 1197*c0909341SAndroid Build Coastguard Worker2: 1198*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x2], #16 // top 1199*c0909341SAndroid Build Coastguard Worker shll v20.8h, v4.8b, #8 // bottom*256 1200*c0909341SAndroid Build Coastguard Worker shll v21.8h, v4.8b, #8 1201*c0909341SAndroid Build Coastguard Worker shll v22.8h, v4.8b, #8 1202*c0909341SAndroid Build Coastguard Worker shll v23.8h, v4.8b, #8 1203*c0909341SAndroid Build Coastguard Worker shll v24.8h, v4.8b, #8 1204*c0909341SAndroid Build Coastguard Worker shll v25.8h, v4.8b, #8 1205*c0909341SAndroid Build Coastguard Worker shll v26.8h, v4.8b, #8 1206*c0909341SAndroid Build Coastguard Worker shll v27.8h, v4.8b, #8 1207*c0909341SAndroid Build Coastguard Worker usubl v2.8h, v3.8b, v4.8b // top-bottom 1208*c0909341SAndroid Build Coastguard Worker usubl2 v3.8h, v3.16b, v4.16b 1209*c0909341SAndroid Build Coastguard Worker mla v20.8h, v2.8h, v16.8h // bottom*256 + (top-bottom)*weights_ver 1210*c0909341SAndroid Build Coastguard Worker mla v21.8h, v3.8h, v16.8h 1211*c0909341SAndroid Build Coastguard Worker mla v22.8h, v2.8h, v17.8h 1212*c0909341SAndroid Build Coastguard Worker mla v23.8h, v3.8h, v17.8h 1213*c0909341SAndroid Build Coastguard Worker mla v24.8h, v2.8h, v18.8h 1214*c0909341SAndroid Build Coastguard Worker mla v25.8h, v3.8h, v18.8h 1215*c0909341SAndroid Build Coastguard Worker mla v26.8h, v2.8h, v19.8h 1216*c0909341SAndroid Build Coastguard Worker mla v27.8h, v3.8h, v19.8h 1217*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1218*c0909341SAndroid Build Coastguard Worker rshrn2 v20.16b, v21.8h, #8 1219*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1220*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #8 1221*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v24.8h, #8 1222*c0909341SAndroid Build Coastguard Worker rshrn2 v24.16b, v25.8h, #8 1223*c0909341SAndroid Build Coastguard Worker rshrn v26.8b, v26.8h, #8 1224*c0909341SAndroid Build Coastguard Worker rshrn2 v26.16b, v27.8h, #8 1225*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1226*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x0], #16 1227*c0909341SAndroid Build Coastguard Worker st1 {v22.16b}, [x6], #16 1228*c0909341SAndroid Build Coastguard Worker st1 {v24.16b}, [x5], #16 1229*c0909341SAndroid Build Coastguard Worker st1 {v26.16b}, [x8], #16 1230*c0909341SAndroid Build Coastguard Worker b.gt 2b 1231*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1232*c0909341SAndroid Build Coastguard Worker b.le 9f 1233*c0909341SAndroid Build Coastguard Worker sub x2, x2, w9, uxtw 1234*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1235*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1236*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 1237*c0909341SAndroid Build Coastguard Worker add x8, x8, x1 1238*c0909341SAndroid Build Coastguard Worker mov w3, w9 1239*c0909341SAndroid Build Coastguard Worker b 1b 1240*c0909341SAndroid Build Coastguard Worker9: 1241*c0909341SAndroid Build Coastguard Worker ret 1242*c0909341SAndroid Build Coastguard Workerendfunc 1243*c0909341SAndroid Build Coastguard Worker 1244*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_v_tbl 1245*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_v_tbl 1246*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_v_tbl 1247*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_v_tbl 1248*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_v_tbl 1249*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_v_tbl 1250*c0909341SAndroid Build Coastguard Workerendjumptable 1251*c0909341SAndroid Build Coastguard Worker 1252*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1253*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 1254*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int a, 1255*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 1256*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_8bpc_neon, export=1 1257*c0909341SAndroid Build Coastguard Worker movrel x8, X(sm_weights) 1258*c0909341SAndroid Build Coastguard Worker add x8, x8, w3, uxtw 1259*c0909341SAndroid Build Coastguard Worker clz w9, w3 1260*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_smooth_h_tbl 1261*c0909341SAndroid Build Coastguard Worker add x12, x2, w3, uxtw 1262*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1263*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 1264*c0909341SAndroid Build Coastguard Worker ld1r {v5.16b}, [x12] // right 1265*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 1266*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 1267*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1268*c0909341SAndroid Build Coastguard Worker br x5 1269*c0909341SAndroid Build Coastguard Worker40: 1270*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1271*c0909341SAndroid Build Coastguard Worker ld1r {v7.2s}, [x8] // weights_hor 1272*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 1273*c0909341SAndroid Build Coastguard Worker mov x7, #-4 1274*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 1275*c0909341SAndroid Build Coastguard Worker4: 1276*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 // left 1277*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 1278*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 1279*c0909341SAndroid Build Coastguard Worker zip1 v1.2s, v1.2s, v0.2s // left, flipped 1280*c0909341SAndroid Build Coastguard Worker zip1 v0.2s, v3.2s, v2.2s 1281*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b // left-right 1282*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 1283*c0909341SAndroid Build Coastguard Worker mla v20.8h, v0.8h, v7.8h // right*256 + (left-right)*weights_hor 1284*c0909341SAndroid Build Coastguard Worker mla v21.8h, v1.8h, v7.8h 1285*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1286*c0909341SAndroid Build Coastguard Worker rshrn v21.8b, v21.8h, #8 1287*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[0], [x0], x1 1288*c0909341SAndroid Build Coastguard Worker st1 {v20.s}[1], [x6], x1 1289*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1290*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[0], [x0], x1 1291*c0909341SAndroid Build Coastguard Worker st1 {v21.s}[1], [x6], x1 1292*c0909341SAndroid Build Coastguard Worker b.gt 4b 1293*c0909341SAndroid Build Coastguard Worker ret 1294*c0909341SAndroid Build Coastguard Worker80: 1295*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1296*c0909341SAndroid Build Coastguard Worker ld1 {v7.8b}, [x8] // weights_hor 1297*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 1298*c0909341SAndroid Build Coastguard Worker mov x7, #-4 1299*c0909341SAndroid Build Coastguard Worker uxtl v7.8h, v7.8b // weights_hor 1300*c0909341SAndroid Build Coastguard Worker8: 1301*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 // left 1302*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 1303*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 1304*c0909341SAndroid Build Coastguard Worker shll v22.8h, v5.8b, #8 1305*c0909341SAndroid Build Coastguard Worker shll v23.8h, v5.8b, #8 1306*c0909341SAndroid Build Coastguard Worker usubl v3.8h, v3.8b, v5.8b // left-right 1307*c0909341SAndroid Build Coastguard Worker usubl v2.8h, v2.8b, v5.8b 1308*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 1309*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b 1310*c0909341SAndroid Build Coastguard Worker mla v20.8h, v3.8h, v7.8h // right*256 + (left-right)*weights_hor 1311*c0909341SAndroid Build Coastguard Worker mla v21.8h, v2.8h, v7.8h // (left flipped) 1312*c0909341SAndroid Build Coastguard Worker mla v22.8h, v1.8h, v7.8h 1313*c0909341SAndroid Build Coastguard Worker mla v23.8h, v0.8h, v7.8h 1314*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1315*c0909341SAndroid Build Coastguard Worker rshrn v21.8b, v21.8h, #8 1316*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1317*c0909341SAndroid Build Coastguard Worker rshrn v23.8b, v23.8h, #8 1318*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x0], x1 1319*c0909341SAndroid Build Coastguard Worker st1 {v21.8b}, [x6], x1 1320*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1321*c0909341SAndroid Build Coastguard Worker st1 {v22.8b}, [x0], x1 1322*c0909341SAndroid Build Coastguard Worker st1 {v23.8b}, [x6], x1 1323*c0909341SAndroid Build Coastguard Worker b.gt 8b 1324*c0909341SAndroid Build Coastguard Worker ret 1325*c0909341SAndroid Build Coastguard Worker160: 1326*c0909341SAndroid Build Coastguard Worker320: 1327*c0909341SAndroid Build Coastguard Worker640: 1328*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1329*c0909341SAndroid Build Coastguard Worker sub x2, x2, #4 1330*c0909341SAndroid Build Coastguard Worker mov x7, #-4 1331*c0909341SAndroid Build Coastguard Worker // Set up pointers for four rows in parallel; x0, x6, x5, x10 1332*c0909341SAndroid Build Coastguard Worker add x5, x0, x1 1333*c0909341SAndroid Build Coastguard Worker add x10, x6, x1 1334*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1335*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 1336*c0909341SAndroid Build Coastguard Worker mov w9, w3 1337*c0909341SAndroid Build Coastguard Worker 1338*c0909341SAndroid Build Coastguard Worker1: 1339*c0909341SAndroid Build Coastguard Worker ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x2], x7 // left 1340*c0909341SAndroid Build Coastguard Worker usubl v0.8h, v0.8b, v5.8b // left-right 1341*c0909341SAndroid Build Coastguard Worker usubl v1.8h, v1.8b, v5.8b 1342*c0909341SAndroid Build Coastguard Worker usubl v2.8h, v2.8b, v5.8b 1343*c0909341SAndroid Build Coastguard Worker usubl v3.8h, v3.8b, v5.8b 1344*c0909341SAndroid Build Coastguard Worker2: 1345*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x8], #16 // weights_hor 1346*c0909341SAndroid Build Coastguard Worker shll v20.8h, v5.8b, #8 // right*256 1347*c0909341SAndroid Build Coastguard Worker shll v21.8h, v5.8b, #8 1348*c0909341SAndroid Build Coastguard Worker shll v22.8h, v5.8b, #8 1349*c0909341SAndroid Build Coastguard Worker shll v23.8h, v5.8b, #8 1350*c0909341SAndroid Build Coastguard Worker shll v24.8h, v5.8b, #8 1351*c0909341SAndroid Build Coastguard Worker shll v25.8h, v5.8b, #8 1352*c0909341SAndroid Build Coastguard Worker shll v26.8h, v5.8b, #8 1353*c0909341SAndroid Build Coastguard Worker shll v27.8h, v5.8b, #8 1354*c0909341SAndroid Build Coastguard Worker uxtl v6.8h, v7.8b // weights_hor 1355*c0909341SAndroid Build Coastguard Worker uxtl2 v7.8h, v7.16b 1356*c0909341SAndroid Build Coastguard Worker mla v20.8h, v3.8h, v6.8h // right*256 + (left-right)*weights_hor 1357*c0909341SAndroid Build Coastguard Worker mla v21.8h, v3.8h, v7.8h // (left flipped) 1358*c0909341SAndroid Build Coastguard Worker mla v22.8h, v2.8h, v6.8h 1359*c0909341SAndroid Build Coastguard Worker mla v23.8h, v2.8h, v7.8h 1360*c0909341SAndroid Build Coastguard Worker mla v24.8h, v1.8h, v6.8h 1361*c0909341SAndroid Build Coastguard Worker mla v25.8h, v1.8h, v7.8h 1362*c0909341SAndroid Build Coastguard Worker mla v26.8h, v0.8h, v6.8h 1363*c0909341SAndroid Build Coastguard Worker mla v27.8h, v0.8h, v7.8h 1364*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #8 1365*c0909341SAndroid Build Coastguard Worker rshrn2 v20.16b, v21.8h, #8 1366*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #8 1367*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #8 1368*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v24.8h, #8 1369*c0909341SAndroid Build Coastguard Worker rshrn2 v24.16b, v25.8h, #8 1370*c0909341SAndroid Build Coastguard Worker rshrn v26.8b, v26.8h, #8 1371*c0909341SAndroid Build Coastguard Worker rshrn2 v26.16b, v27.8h, #8 1372*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1373*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x0], #16 1374*c0909341SAndroid Build Coastguard Worker st1 {v22.16b}, [x6], #16 1375*c0909341SAndroid Build Coastguard Worker st1 {v24.16b}, [x5], #16 1376*c0909341SAndroid Build Coastguard Worker st1 {v26.16b}, [x10], #16 1377*c0909341SAndroid Build Coastguard Worker b.gt 2b 1378*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1379*c0909341SAndroid Build Coastguard Worker b.le 9f 1380*c0909341SAndroid Build Coastguard Worker sub x8, x8, w9, uxtw 1381*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1382*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 1383*c0909341SAndroid Build Coastguard Worker add x5, x5, x1 1384*c0909341SAndroid Build Coastguard Worker add x10, x10, x1 1385*c0909341SAndroid Build Coastguard Worker mov w3, w9 1386*c0909341SAndroid Build Coastguard Worker b 1b 1387*c0909341SAndroid Build Coastguard Worker9: 1388*c0909341SAndroid Build Coastguard Worker ret 1389*c0909341SAndroid Build Coastguard Workerendfunc 1390*c0909341SAndroid Build Coastguard Worker 1391*c0909341SAndroid Build Coastguard Workerjumptable ipred_smooth_h_tbl 1392*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_smooth_h_tbl 1393*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_smooth_h_tbl 1394*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_smooth_h_tbl 1395*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_smooth_h_tbl 1396*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_smooth_h_tbl 1397*c0909341SAndroid Build Coastguard Workerendjumptable 1398*c0909341SAndroid Build Coastguard Worker 1399*c0909341SAndroid Build Coastguard Workerconst padding_mask_buf 1400*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1401*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1402*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1403*c0909341SAndroid Build Coastguard Worker .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1404*c0909341SAndroid Build Coastguard Workerpadding_mask: 1405*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1406*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1407*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1408*c0909341SAndroid Build Coastguard Worker .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1409*c0909341SAndroid Build Coastguard Workerendconst 1410*c0909341SAndroid Build Coastguard Worker 1411*c0909341SAndroid Build Coastguard Worker// void ipred_z1_upsample_edge_8bpc_neon(pixel *out, const int hsz, 1412*c0909341SAndroid Build Coastguard Worker// const pixel *const in, const int end); 1413*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_upsample_edge_8bpc_neon, export=1 1414*c0909341SAndroid Build Coastguard Worker movrel x4, padding_mask 1415*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] // in[] 1416*c0909341SAndroid Build Coastguard Worker add x5, x2, w3, uxtw // in[end] 1417*c0909341SAndroid Build Coastguard Worker sub x4, x4, w3, uxtw 1418*c0909341SAndroid Build Coastguard Worker 1419*c0909341SAndroid Build Coastguard Worker ld1r {v1.16b}, [x5] // padding 1420*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x4] // padding_mask 1421*c0909341SAndroid Build Coastguard Worker 1422*c0909341SAndroid Build Coastguard Worker movi v31.8h, #9 1423*c0909341SAndroid Build Coastguard Worker 1424*c0909341SAndroid Build Coastguard Worker bit v0.16b, v1.16b, v3.16b // padded in[] 1425*c0909341SAndroid Build Coastguard Worker 1426*c0909341SAndroid Build Coastguard Worker ext v4.16b, v0.16b, v1.16b, #1 1427*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #2 1428*c0909341SAndroid Build Coastguard Worker ext v6.16b, v0.16b, v1.16b, #3 1429*c0909341SAndroid Build Coastguard Worker 1430*c0909341SAndroid Build Coastguard Worker uaddl v16.8h, v4.8b, v5.8b // in[i+1] + in[i+2] 1431*c0909341SAndroid Build Coastguard Worker uaddl2 v17.8h, v4.16b, v5.16b 1432*c0909341SAndroid Build Coastguard Worker uaddl v18.8h, v0.8b, v6.8b // in[i+0] + in[i+3] 1433*c0909341SAndroid Build Coastguard Worker uaddl2 v19.8h, v0.16b, v6.16b 1434*c0909341SAndroid Build Coastguard Worker mul v16.8h, v16.8h, v31.8h // 9*(in[i+1] + in[i+2]) 1435*c0909341SAndroid Build Coastguard Worker mul v17.8h, v17.8h, v31.8h 1436*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v18.8h 1437*c0909341SAndroid Build Coastguard Worker sub v17.8h, v17.8h, v19.8h 1438*c0909341SAndroid Build Coastguard Worker 1439*c0909341SAndroid Build Coastguard Worker sqrshrun v16.8b, v16.8h, #4 1440*c0909341SAndroid Build Coastguard Worker sqrshrun2 v16.16b, v17.8h, #4 1441*c0909341SAndroid Build Coastguard Worker 1442*c0909341SAndroid Build Coastguard Worker zip1 v0.16b, v4.16b, v16.16b 1443*c0909341SAndroid Build Coastguard Worker zip2 v1.16b, v4.16b, v16.16b 1444*c0909341SAndroid Build Coastguard Worker 1445*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x0] 1446*c0909341SAndroid Build Coastguard Worker 1447*c0909341SAndroid Build Coastguard Worker ret 1448*c0909341SAndroid Build Coastguard Workerendfunc 1449*c0909341SAndroid Build Coastguard Worker 1450*c0909341SAndroid Build Coastguard Worker// void ipred_z2_upsample_edge_8bpc_neon(pixel *out, const int sz, 1451*c0909341SAndroid Build Coastguard Worker// const pixel *const in); 1452*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_upsample_edge_8bpc_neon, export=1 1453*c0909341SAndroid Build Coastguard Worker // Here, sz is 4 or 8, and we produce 2*sz+1 output elements. 1454*c0909341SAndroid Build Coastguard Worker movrel x4, padding_mask 1455*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] // in[] 1456*c0909341SAndroid Build Coastguard Worker add x5, x2, w1, uxtw // in[sz] 1457*c0909341SAndroid Build Coastguard Worker sub x4, x4, w1, uxtw 1458*c0909341SAndroid Build Coastguard Worker 1459*c0909341SAndroid Build Coastguard Worker ld1r {v2.16b}, [x2] // in[0] for padding 1460*c0909341SAndroid Build Coastguard Worker ld1r {v1.16b}, [x5] // padding 1461*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x4] // padding_mask 1462*c0909341SAndroid Build Coastguard Worker 1463*c0909341SAndroid Build Coastguard Worker movi v31.8h, #9 1464*c0909341SAndroid Build Coastguard Worker 1465*c0909341SAndroid Build Coastguard Worker bit v0.16b, v1.16b, v3.16b // padded in[] 1466*c0909341SAndroid Build Coastguard Worker 1467*c0909341SAndroid Build Coastguard Worker ext v4.16b, v2.16b, v0.16b, #15 1468*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #1 1469*c0909341SAndroid Build Coastguard Worker ext v6.16b, v0.16b, v1.16b, #2 1470*c0909341SAndroid Build Coastguard Worker 1471*c0909341SAndroid Build Coastguard Worker uaddl v16.8h, v0.8b, v5.8b // in[i+0] + in[i+1] 1472*c0909341SAndroid Build Coastguard Worker uaddl v18.8h, v4.8b, v6.8b // in[i-1] + in[i+2] 1473*c0909341SAndroid Build Coastguard Worker mul v16.8h, v16.8h, v31.8h // 9*(in[i+1] + in[i+2]) 1474*c0909341SAndroid Build Coastguard Worker sub v16.8h, v16.8h, v18.8h 1475*c0909341SAndroid Build Coastguard Worker 1476*c0909341SAndroid Build Coastguard Worker sqrshrun v16.8b, v16.8h, #4 1477*c0909341SAndroid Build Coastguard Worker 1478*c0909341SAndroid Build Coastguard Worker add x5, x0, #16 1479*c0909341SAndroid Build Coastguard Worker 1480*c0909341SAndroid Build Coastguard Worker zip1 v2.16b, v0.16b, v16.16b 1481*c0909341SAndroid Build Coastguard Worker 1482*c0909341SAndroid Build Coastguard Worker st1 {v1.b}[0], [x5] 1483*c0909341SAndroid Build Coastguard Worker // In case sz=8, output one single pixel in out[16]. 1484*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x0] 1485*c0909341SAndroid Build Coastguard Worker 1486*c0909341SAndroid Build Coastguard Worker ret 1487*c0909341SAndroid Build Coastguard Workerendfunc 1488*c0909341SAndroid Build Coastguard Worker 1489*c0909341SAndroid Build Coastguard Workerconst edge_filter 1490*c0909341SAndroid Build Coastguard Worker .byte 0, 4, 8, 0 1491*c0909341SAndroid Build Coastguard Worker .byte 0, 5, 6, 0 1492*c0909341SAndroid Build Coastguard Worker// Leaving out the coeffs for strength=3 1493*c0909341SAndroid Build Coastguard Worker// .byte 2, 4, 4, 0 1494*c0909341SAndroid Build Coastguard Workerendconst 1495*c0909341SAndroid Build Coastguard Worker 1496*c0909341SAndroid Build Coastguard Worker// void ipred_z1_filter_edge_8bpc_neon(pixel *out, const int sz, 1497*c0909341SAndroid Build Coastguard Worker// const pixel *const in, const int end, 1498*c0909341SAndroid Build Coastguard Worker// const int strength); 1499*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_filter_edge_8bpc_neon, export=1 1500*c0909341SAndroid Build Coastguard Worker cmp w4, #3 1501*c0909341SAndroid Build Coastguard Worker b.eq L(fivetap) // if (strength == 3) goto fivetap 1502*c0909341SAndroid Build Coastguard Worker 1503*c0909341SAndroid Build Coastguard Worker movrel x5, edge_filter, -3 1504*c0909341SAndroid Build Coastguard Worker add x5, x5, w4, uxtw #2 // edge_filter + (strength - 1)*4 + 1 1505*c0909341SAndroid Build Coastguard Worker 1506*c0909341SAndroid Build Coastguard Worker ld1 {v31.h}[0], [x5] // kernel[1-2] 1507*c0909341SAndroid Build Coastguard Worker 1508*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2], #16 1509*c0909341SAndroid Build Coastguard Worker 1510*c0909341SAndroid Build Coastguard Worker dup v30.16b, v31.b[0] 1511*c0909341SAndroid Build Coastguard Worker dup v31.16b, v31.b[1] 1512*c0909341SAndroid Build Coastguard Worker1: 1513*c0909341SAndroid Build Coastguard Worker // in[end], is the last valid pixel. We produce 16 pixels out by 1514*c0909341SAndroid Build Coastguard Worker // using 18 pixels in - the last pixel used is [17] of the ones 1515*c0909341SAndroid Build Coastguard Worker // read/buffered. 1516*c0909341SAndroid Build Coastguard Worker cmp w3, #17 1517*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x2], #16 1518*c0909341SAndroid Build Coastguard Worker b.lt 2f 1519*c0909341SAndroid Build Coastguard Worker ext v2.16b, v0.16b, v1.16b, #1 1520*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1521*c0909341SAndroid Build Coastguard Worker umull v4.8h, v0.8b, v30.8b 1522*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v2.8b, v31.8b 1523*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v3.8b, v30.8b 1524*c0909341SAndroid Build Coastguard Worker umull2 v5.8h, v0.16b, v30.16b 1525*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v2.16b, v31.16b 1526*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v3.16b, v30.16b 1527*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1528*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 1529*c0909341SAndroid Build Coastguard Worker rshrn v4.8b, v4.8h, #4 1530*c0909341SAndroid Build Coastguard Worker rshrn2 v4.16b, v5.8h, #4 1531*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1532*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [x0], #16 1533*c0909341SAndroid Build Coastguard Worker b.gt 1b 1534*c0909341SAndroid Build Coastguard Worker ret 1535*c0909341SAndroid Build Coastguard Worker2: 1536*c0909341SAndroid Build Coastguard Worker // Right padding 1537*c0909341SAndroid Build Coastguard Worker 1538*c0909341SAndroid Build Coastguard Worker // x2[w3-32] is the padding pixel (x2 points 32 bytes ahead) 1539*c0909341SAndroid Build Coastguard Worker movrel x5, padding_mask 1540*c0909341SAndroid Build Coastguard Worker sub w6, w3, #32 1541*c0909341SAndroid Build Coastguard Worker sub x5, x5, w3, uxtw 1542*c0909341SAndroid Build Coastguard Worker add x6, x2, w6, sxtw 1543*c0909341SAndroid Build Coastguard Worker 1544*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x5] // padding_mask 1545*c0909341SAndroid Build Coastguard Worker 1546*c0909341SAndroid Build Coastguard Worker ld1r {v1.16b}, [x6] 1547*c0909341SAndroid Build Coastguard Worker bit v0.16b, v1.16b, v2.16b // Pad v0-v1 1548*c0909341SAndroid Build Coastguard Worker 1549*c0909341SAndroid Build Coastguard Worker // Filter one block 1550*c0909341SAndroid Build Coastguard Worker ext v2.16b, v0.16b, v1.16b, #1 1551*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1552*c0909341SAndroid Build Coastguard Worker umull v4.8h, v0.8b, v30.8b 1553*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v2.8b, v31.8b 1554*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v3.8b, v30.8b 1555*c0909341SAndroid Build Coastguard Worker umull2 v5.8h, v0.16b, v30.16b 1556*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v2.16b, v31.16b 1557*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v3.16b, v30.16b 1558*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1559*c0909341SAndroid Build Coastguard Worker rshrn v4.8b, v4.8h, #4 1560*c0909341SAndroid Build Coastguard Worker rshrn2 v4.16b, v5.8h, #4 1561*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [x0], #16 1562*c0909341SAndroid Build Coastguard Worker b.le 9f 1563*c0909341SAndroid Build Coastguard Worker5: 1564*c0909341SAndroid Build Coastguard Worker // After one block, any remaining output would only be filtering 1565*c0909341SAndroid Build Coastguard Worker // padding - thus just store the padding. 1566*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1567*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], #16 1568*c0909341SAndroid Build Coastguard Worker b.gt 5b 1569*c0909341SAndroid Build Coastguard Worker9: 1570*c0909341SAndroid Build Coastguard Worker ret 1571*c0909341SAndroid Build Coastguard Worker 1572*c0909341SAndroid Build Coastguard WorkerL(fivetap): 1573*c0909341SAndroid Build Coastguard Worker sub x2, x2, #1 // topleft -= 1 1574*c0909341SAndroid Build Coastguard Worker movi v29.16b, #2 1575*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2], #16 1576*c0909341SAndroid Build Coastguard Worker movi v30.16b, #4 1577*c0909341SAndroid Build Coastguard Worker movi v31.16b, #4 1578*c0909341SAndroid Build Coastguard Worker ins v0.b[0], v0.b[1] 1579*c0909341SAndroid Build Coastguard Worker1: 1580*c0909341SAndroid Build Coastguard Worker // in[end+1], is the last valid pixel. We produce 16 pixels out by 1581*c0909341SAndroid Build Coastguard Worker // using 20 pixels in - the last pixel used is [19] of the ones 1582*c0909341SAndroid Build Coastguard Worker // read/buffered. 1583*c0909341SAndroid Build Coastguard Worker cmp w3, #18 1584*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x2], #16 1585*c0909341SAndroid Build Coastguard Worker b.lt 2f // if (end + 1 < 19) 1586*c0909341SAndroid Build Coastguard Worker ext v2.16b, v0.16b, v1.16b, #1 1587*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1588*c0909341SAndroid Build Coastguard Worker ext v4.16b, v0.16b, v1.16b, #3 1589*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1590*c0909341SAndroid Build Coastguard Worker umull v6.8h, v0.8b, v29.8b 1591*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v2.8b, v30.8b 1592*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v3.8b, v31.8b 1593*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v4.8b, v30.8b 1594*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v5.8b, v29.8b 1595*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v0.16b, v29.16b 1596*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v2.16b, v30.16b 1597*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v3.16b, v31.16b 1598*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v4.16b, v30.16b 1599*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v5.16b, v29.16b 1600*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1601*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 1602*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #4 1603*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v7.8h, #4 1604*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1605*c0909341SAndroid Build Coastguard Worker st1 {v6.16b}, [x0], #16 1606*c0909341SAndroid Build Coastguard Worker b.gt 1b 1607*c0909341SAndroid Build Coastguard Worker ret 1608*c0909341SAndroid Build Coastguard Worker2: 1609*c0909341SAndroid Build Coastguard Worker // Right padding 1610*c0909341SAndroid Build Coastguard Worker 1611*c0909341SAndroid Build Coastguard Worker // x2[w3+1-32] is the padding pixel (x2 points 32 bytes ahead) 1612*c0909341SAndroid Build Coastguard Worker movrel x5, padding_mask, -1 1613*c0909341SAndroid Build Coastguard Worker sub w6, w3, #31 1614*c0909341SAndroid Build Coastguard Worker sub x5, x5, w3, uxtw 1615*c0909341SAndroid Build Coastguard Worker add x6, x2, w6, sxtw 1616*c0909341SAndroid Build Coastguard Worker 1617*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x5] // padding_mask 1618*c0909341SAndroid Build Coastguard Worker 1619*c0909341SAndroid Build Coastguard Worker ld1r {v28.16b}, [x6] 1620*c0909341SAndroid Build Coastguard Worker bit v0.16b, v28.16b, v2.16b // Pad v0-v1 1621*c0909341SAndroid Build Coastguard Worker bit v1.16b, v28.16b, v3.16b 1622*c0909341SAndroid Build Coastguard Worker4: 1623*c0909341SAndroid Build Coastguard Worker // Filter one block 1624*c0909341SAndroid Build Coastguard Worker ext v2.16b, v0.16b, v1.16b, #1 1625*c0909341SAndroid Build Coastguard Worker ext v3.16b, v0.16b, v1.16b, #2 1626*c0909341SAndroid Build Coastguard Worker ext v4.16b, v0.16b, v1.16b, #3 1627*c0909341SAndroid Build Coastguard Worker ext v5.16b, v0.16b, v1.16b, #4 1628*c0909341SAndroid Build Coastguard Worker umull v6.8h, v0.8b, v29.8b 1629*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v2.8b, v30.8b 1630*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v3.8b, v31.8b 1631*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v4.8b, v30.8b 1632*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v5.8b, v29.8b 1633*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v0.16b, v29.16b 1634*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v2.16b, v30.16b 1635*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v3.16b, v31.16b 1636*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v4.16b, v30.16b 1637*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v5.16b, v29.16b 1638*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1639*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 1640*c0909341SAndroid Build Coastguard Worker mov v1.16b, v28.16b 1641*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #4 1642*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v7.8h, #4 1643*c0909341SAndroid Build Coastguard Worker sub w3, w3, #16 1644*c0909341SAndroid Build Coastguard Worker st1 {v6.16b}, [x0], #16 1645*c0909341SAndroid Build Coastguard Worker b.le 9f 1646*c0909341SAndroid Build Coastguard Worker // v0-v1[w3+1] is the last valid pixel; if (w3 + 1 > 0) we need to 1647*c0909341SAndroid Build Coastguard Worker // filter properly once more - aka (w3 >= 0). 1648*c0909341SAndroid Build Coastguard Worker cmp w3, #0 1649*c0909341SAndroid Build Coastguard Worker b.ge 4b 1650*c0909341SAndroid Build Coastguard Worker5: 1651*c0909341SAndroid Build Coastguard Worker // When w3 <= 0, all remaining pixels in v0-v1 are equal to the 1652*c0909341SAndroid Build Coastguard Worker // last valid pixel - thus just output that without filtering. 1653*c0909341SAndroid Build Coastguard Worker subs w1, w1, #16 1654*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], #16 1655*c0909341SAndroid Build Coastguard Worker b.gt 5b 1656*c0909341SAndroid Build Coastguard Worker9: 1657*c0909341SAndroid Build Coastguard Worker ret 1658*c0909341SAndroid Build Coastguard Workerendfunc 1659*c0909341SAndroid Build Coastguard Worker 1660*c0909341SAndroid Build Coastguard Worker// void ipred_pixel_set_8bpc_neon(pixel *out, const pixel px, 1661*c0909341SAndroid Build Coastguard Worker// const int n); 1662*c0909341SAndroid Build Coastguard Workerfunction ipred_pixel_set_8bpc_neon, export=1 1663*c0909341SAndroid Build Coastguard Worker dup v0.16b, w1 1664*c0909341SAndroid Build Coastguard Worker1: 1665*c0909341SAndroid Build Coastguard Worker subs w2, w2, #16 1666*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x0], #16 1667*c0909341SAndroid Build Coastguard Worker b.gt 1b 1668*c0909341SAndroid Build Coastguard Worker ret 1669*c0909341SAndroid Build Coastguard Workerendfunc 1670*c0909341SAndroid Build Coastguard Worker 1671*c0909341SAndroid Build Coastguard Worker// void ipred_z1_fill1_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1672*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 1673*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1674*c0909341SAndroid Build Coastguard Worker// const int dx, const int max_base_x); 1675*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_fill1_8bpc_neon, export=1 1676*c0909341SAndroid Build Coastguard Worker clz w9, w3 1677*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z1_fill1_tbl 1678*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 1679*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 1680*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // top[max_base_x] 1681*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 1682*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 1683*c0909341SAndroid Build Coastguard Worker mov w7, w5 1684*c0909341SAndroid Build Coastguard Worker mov w15, #64 1685*c0909341SAndroid Build Coastguard Worker br x8 1686*c0909341SAndroid Build Coastguard Worker40: 1687*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1688*c0909341SAndroid Build Coastguard Worker4: 1689*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1690*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1691*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1692*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1693*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1694*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1695*c0909341SAndroid Build Coastguard Worker b.ge 49f 1696*c0909341SAndroid Build Coastguard Worker ldr d0, [x2, w8, uxtw] // top[base] 1697*c0909341SAndroid Build Coastguard Worker ldr d2, [x2, w10, uxtw] 1698*c0909341SAndroid Build Coastguard Worker dup v4.4h, w9 // frac 1699*c0909341SAndroid Build Coastguard Worker dup v5.4h, w11 1700*c0909341SAndroid Build Coastguard Worker ext v1.8b, v0.8b, v0.8b, #1 // top[base+1] 1701*c0909341SAndroid Build Coastguard Worker ext v3.8b, v2.8b, v2.8b, #1 1702*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v1.8b, v0.8b // top[base+1]-top[base] 1703*c0909341SAndroid Build Coastguard Worker usubl v7.8h, v3.8b, v2.8b 1704*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v0.8b, #6 // top[base]*64 1705*c0909341SAndroid Build Coastguard Worker ushll v17.8h, v2.8b, #6 1706*c0909341SAndroid Build Coastguard Worker mla v16.4h, v6.4h, v4.4h // + top[base+1]*frac 1707*c0909341SAndroid Build Coastguard Worker mla v17.4h, v7.4h, v5.4h 1708*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 1709*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 1710*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 1711*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1712*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1713*c0909341SAndroid Build Coastguard Worker st1 {v17.s}[0], [x0], x1 1714*c0909341SAndroid Build Coastguard Worker b.gt 4b 1715*c0909341SAndroid Build Coastguard Worker ret 1716*c0909341SAndroid Build Coastguard Worker 1717*c0909341SAndroid Build Coastguard Worker49: 1718*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 1719*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1720*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 1721*c0909341SAndroid Build Coastguard Worker b.gt 49b 1722*c0909341SAndroid Build Coastguard Worker ret 1723*c0909341SAndroid Build Coastguard Worker 1724*c0909341SAndroid Build Coastguard Worker80: 1725*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1726*c0909341SAndroid Build Coastguard Worker8: 1727*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1728*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1729*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1730*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1731*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1732*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1733*c0909341SAndroid Build Coastguard Worker b.ge 89f 1734*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // top[base] 1735*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 1736*c0909341SAndroid Build Coastguard Worker dup v4.8b, w9 // frac 1737*c0909341SAndroid Build Coastguard Worker dup v5.8b, w11 1738*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 1739*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 1740*c0909341SAndroid Build Coastguard Worker dup v6.8b, w9 // 64 - frac 1741*c0909341SAndroid Build Coastguard Worker dup v7.8b, w11 1742*c0909341SAndroid Build Coastguard Worker ext v1.16b, v0.16b, v0.16b, #1 // top[base+1] 1743*c0909341SAndroid Build Coastguard Worker ext v3.16b, v2.16b, v2.16b, #1 1744*c0909341SAndroid Build Coastguard Worker umull v16.8h, v0.8b, v6.8b // top[base]*(64-frac) 1745*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v1.8b, v4.8b // + top[base+1]*frac 1746*c0909341SAndroid Build Coastguard Worker umull v17.8h, v2.8b, v7.8b 1747*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v3.8b, v5.8b 1748*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 1749*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 1750*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 1751*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1752*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1753*c0909341SAndroid Build Coastguard Worker st1 {v17.8b}, [x0], x1 1754*c0909341SAndroid Build Coastguard Worker b.gt 8b 1755*c0909341SAndroid Build Coastguard Worker ret 1756*c0909341SAndroid Build Coastguard Worker 1757*c0909341SAndroid Build Coastguard Worker89: 1758*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 1759*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1760*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 1761*c0909341SAndroid Build Coastguard Worker b.gt 89b 1762*c0909341SAndroid Build Coastguard Worker ret 1763*c0909341SAndroid Build Coastguard Worker 1764*c0909341SAndroid Build Coastguard Worker160: 1765*c0909341SAndroid Build Coastguard Worker320: 1766*c0909341SAndroid Build Coastguard Worker640: 1767*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1768*c0909341SAndroid Build Coastguard Worker 1769*c0909341SAndroid Build Coastguard Worker mov w12, w3 1770*c0909341SAndroid Build Coastguard Worker 1771*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 1772*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 1773*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 1774*c0909341SAndroid Build Coastguard Worker1: 1775*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1776*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1777*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1778*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1779*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1780*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1781*c0909341SAndroid Build Coastguard Worker b.ge 169f 1782*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw 1783*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw 1784*c0909341SAndroid Build Coastguard Worker dup v4.16b, w9 // frac 1785*c0909341SAndroid Build Coastguard Worker dup v5.16b, w11 1786*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x8], #32 // top[base] 1787*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x10], #32 1788*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 1789*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 1790*c0909341SAndroid Build Coastguard Worker dup v6.16b, w9 // 64 - frac 1791*c0909341SAndroid Build Coastguard Worker dup v7.16b, w11 1792*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1793*c0909341SAndroid Build Coastguard Worker2: 1794*c0909341SAndroid Build Coastguard Worker ext v16.16b, v0.16b, v1.16b, #1 // top[base+1] 1795*c0909341SAndroid Build Coastguard Worker ext v17.16b, v2.16b, v3.16b, #1 1796*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1797*c0909341SAndroid Build Coastguard Worker umull v18.8h, v0.8b, v6.8b // top[base]*(64-frac) 1798*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v16.8b, v4.8b // + top[base+1]*frac 1799*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v0.16b, v6.16b 1800*c0909341SAndroid Build Coastguard Worker umlal2 v19.8h, v16.16b, v4.16b 1801*c0909341SAndroid Build Coastguard Worker umull v20.8h, v2.8b, v7.8b 1802*c0909341SAndroid Build Coastguard Worker umlal v20.8h, v17.8b, v5.8b 1803*c0909341SAndroid Build Coastguard Worker umull2 v21.8h, v2.16b, v7.16b 1804*c0909341SAndroid Build Coastguard Worker umlal2 v21.8h, v17.16b, v5.16b 1805*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v18.8h, #6 1806*c0909341SAndroid Build Coastguard Worker rshrn2 v16.16b, v19.8h, #6 1807*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v20.8h, #6 1808*c0909341SAndroid Build Coastguard Worker rshrn2 v17.16b, v21.8h, #6 1809*c0909341SAndroid Build Coastguard Worker st1 {v16.16b}, [x0], #16 1810*c0909341SAndroid Build Coastguard Worker st1 {v17.16b}, [x13], #16 1811*c0909341SAndroid Build Coastguard Worker b.le 3f 1812*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 1813*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x8], #16 // top[base] 1814*c0909341SAndroid Build Coastguard Worker mov v2.16b, v3.16b 1815*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x10], #16 1816*c0909341SAndroid Build Coastguard Worker b 2b 1817*c0909341SAndroid Build Coastguard Worker 1818*c0909341SAndroid Build Coastguard Worker3: 1819*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1820*c0909341SAndroid Build Coastguard Worker b.le 9f 1821*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1822*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 1823*c0909341SAndroid Build Coastguard Worker mov w3, w12 1824*c0909341SAndroid Build Coastguard Worker b 1b 1825*c0909341SAndroid Build Coastguard Worker9: 1826*c0909341SAndroid Build Coastguard Worker ret 1827*c0909341SAndroid Build Coastguard Worker 1828*c0909341SAndroid Build Coastguard Worker169: 1829*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0], #16 1830*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 1831*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x13], #16 1832*c0909341SAndroid Build Coastguard Worker b.gt 169b 1833*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1834*c0909341SAndroid Build Coastguard Worker b.le 9b 1835*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 1836*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 1837*c0909341SAndroid Build Coastguard Worker mov w3, w12 1838*c0909341SAndroid Build Coastguard Worker b 169b 1839*c0909341SAndroid Build Coastguard Workerendfunc 1840*c0909341SAndroid Build Coastguard Worker 1841*c0909341SAndroid Build Coastguard Workerjumptable ipred_z1_fill1_tbl 1842*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z1_fill1_tbl 1843*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z1_fill1_tbl 1844*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z1_fill1_tbl 1845*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z1_fill1_tbl 1846*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z1_fill1_tbl 1847*c0909341SAndroid Build Coastguard Workerendjumptable 1848*c0909341SAndroid Build Coastguard Worker 1849*c0909341SAndroid Build Coastguard Workerfunction ipred_z1_fill2_8bpc_neon, export=1 1850*c0909341SAndroid Build Coastguard Worker cmp w3, #8 1851*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // top[max_base_x] 1852*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 1853*c0909341SAndroid Build Coastguard Worker mov w7, w5 1854*c0909341SAndroid Build Coastguard Worker mov w15, #64 1855*c0909341SAndroid Build Coastguard Worker b.eq 8f 1856*c0909341SAndroid Build Coastguard Worker 1857*c0909341SAndroid Build Coastguard Worker4: // w == 4 1858*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1859*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1860*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1861*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1862*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1863*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1864*c0909341SAndroid Build Coastguard Worker b.ge 49f 1865*c0909341SAndroid Build Coastguard Worker ldr d0, [x2, w8, uxtw] // top[base] 1866*c0909341SAndroid Build Coastguard Worker ldr d2, [x2, w10, uxtw] 1867*c0909341SAndroid Build Coastguard Worker dup v4.4h, w9 // frac 1868*c0909341SAndroid Build Coastguard Worker dup v5.4h, w11 1869*c0909341SAndroid Build Coastguard Worker uzp2 v1.8b, v0.8b, v0.8b // top[base+1] 1870*c0909341SAndroid Build Coastguard Worker uzp1 v0.8b, v0.8b, v0.8b // top[base] 1871*c0909341SAndroid Build Coastguard Worker uzp2 v3.8b, v2.8b, v2.8b 1872*c0909341SAndroid Build Coastguard Worker uzp1 v2.8b, v2.8b, v2.8b 1873*c0909341SAndroid Build Coastguard Worker usubl v6.8h, v1.8b, v0.8b // top[base+1]-top[base] 1874*c0909341SAndroid Build Coastguard Worker usubl v7.8h, v3.8b, v2.8b 1875*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v0.8b, #6 // top[base]*64 1876*c0909341SAndroid Build Coastguard Worker ushll v17.8h, v2.8b, #6 1877*c0909341SAndroid Build Coastguard Worker mla v16.4h, v6.4h, v4.4h // + top[base+1]*frac 1878*c0909341SAndroid Build Coastguard Worker mla v17.4h, v7.4h, v5.4h 1879*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 1880*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 1881*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 1882*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1883*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1884*c0909341SAndroid Build Coastguard Worker st1 {v17.s}[0], [x0], x1 1885*c0909341SAndroid Build Coastguard Worker b.gt 4b 1886*c0909341SAndroid Build Coastguard Worker ret 1887*c0909341SAndroid Build Coastguard Worker 1888*c0909341SAndroid Build Coastguard Worker49: 1889*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 1890*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1891*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 1892*c0909341SAndroid Build Coastguard Worker b.gt 49b 1893*c0909341SAndroid Build Coastguard Worker ret 1894*c0909341SAndroid Build Coastguard Worker 1895*c0909341SAndroid Build Coastguard Worker8: // w == 8 1896*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 1897*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 1898*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1899*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_x 1900*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 1901*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 1902*c0909341SAndroid Build Coastguard Worker b.ge 89f 1903*c0909341SAndroid Build Coastguard Worker ldr q0, [x2, w8, uxtw] // top[base] 1904*c0909341SAndroid Build Coastguard Worker ldr q2, [x2, w10, uxtw] 1905*c0909341SAndroid Build Coastguard Worker dup v4.8b, w9 // frac 1906*c0909341SAndroid Build Coastguard Worker dup v5.8b, w11 1907*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 1908*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 1909*c0909341SAndroid Build Coastguard Worker dup v6.8b, w9 // 64 - frac 1910*c0909341SAndroid Build Coastguard Worker dup v7.8b, w11 1911*c0909341SAndroid Build Coastguard Worker uzp2 v1.16b, v0.16b, v0.16b // top[base+1] 1912*c0909341SAndroid Build Coastguard Worker uzp1 v0.16b, v0.16b, v0.16b // top[base] 1913*c0909341SAndroid Build Coastguard Worker uzp2 v3.16b, v2.16b, v2.16b 1914*c0909341SAndroid Build Coastguard Worker uzp1 v2.16b, v2.16b, v2.16b 1915*c0909341SAndroid Build Coastguard Worker umull v16.8h, v1.8b, v4.8b // top[base+1]*frac 1916*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v0.8b, v6.8b // + top[base]*(64-frac) 1917*c0909341SAndroid Build Coastguard Worker umull v17.8h, v3.8b, v5.8b 1918*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v2.8b, v7.8b 1919*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 1920*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 1921*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 1922*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // xpos += dx 1923*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1924*c0909341SAndroid Build Coastguard Worker st1 {v17.8b}, [x0], x1 1925*c0909341SAndroid Build Coastguard Worker b.gt 8b 1926*c0909341SAndroid Build Coastguard Worker ret 1927*c0909341SAndroid Build Coastguard Worker 1928*c0909341SAndroid Build Coastguard Worker89: 1929*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 1930*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1931*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 1932*c0909341SAndroid Build Coastguard Worker b.gt 89b 1933*c0909341SAndroid Build Coastguard Worker ret 1934*c0909341SAndroid Build Coastguard Workerendfunc 1935*c0909341SAndroid Build Coastguard Worker 1936*c0909341SAndroid Build Coastguard Worker// void ipred_reverse_8bpc_neon(pixel *dst, const pixel *const src, 1937*c0909341SAndroid Build Coastguard Worker// const int n); 1938*c0909341SAndroid Build Coastguard Workerfunction ipred_reverse_8bpc_neon, export=1 1939*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 1940*c0909341SAndroid Build Coastguard Worker add x3, x0, #8 1941*c0909341SAndroid Build Coastguard Worker mov x4, #16 1942*c0909341SAndroid Build Coastguard Worker1: 1943*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1] 1944*c0909341SAndroid Build Coastguard Worker subs w2, w2, #16 1945*c0909341SAndroid Build Coastguard Worker rev64 v0.16b, v0.16b 1946*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 1947*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], x4 1948*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x3], x4 1949*c0909341SAndroid Build Coastguard Worker b.gt 1b 1950*c0909341SAndroid Build Coastguard Worker ret 1951*c0909341SAndroid Build Coastguard Workerendfunc 1952*c0909341SAndroid Build Coastguard Worker 1953*c0909341SAndroid Build Coastguard Workerconst increments 1954*c0909341SAndroid Build Coastguard Worker .short 0, 1, 2, 3, 4, 5, 6, 7 1955*c0909341SAndroid Build Coastguard Worker .short 8, 9, 10, 11, 12, 13, 14, 15 1956*c0909341SAndroid Build Coastguard Workerendconst 1957*c0909341SAndroid Build Coastguard Worker 1958*c0909341SAndroid Build Coastguard Worker// void ipred_z2_fill1_8bpc_neon(pixel *dst, const ptrdiff_t stride, 1959*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 1960*c0909341SAndroid Build Coastguard Worker// const pixel *const left, 1961*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 1962*c0909341SAndroid Build Coastguard Worker// const int dx, const int dy); 1963*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill1_8bpc_neon, export=1 1964*c0909341SAndroid Build Coastguard Worker clz w10, w4 1965*c0909341SAndroid Build Coastguard Worker movrel x9, ipred_z2_fill1_tbl 1966*c0909341SAndroid Build Coastguard Worker sub w10, w10, #25 1967*c0909341SAndroid Build Coastguard Worker ldrsw x10, [x9, w10, uxtw #2] 1968*c0909341SAndroid Build Coastguard Worker mov w8, #(1 << 6) // xpos = 1 << 6 1969*c0909341SAndroid Build Coastguard Worker add x9, x9, x10 1970*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 1971*c0909341SAndroid Build Coastguard Worker 1972*c0909341SAndroid Build Coastguard Worker movrel x11, increments 1973*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 1974*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 1975*c0909341SAndroid Build Coastguard Worker 1976*c0909341SAndroid Build Coastguard Worker br x9 1977*c0909341SAndroid Build Coastguard Worker40: 1978*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1979*c0909341SAndroid Build Coastguard Worker 1980*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 1981*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 1982*c0909341SAndroid Build Coastguard Worker 1983*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 1984*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 1985*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 1986*c0909341SAndroid Build Coastguard Worker 1987*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3} 1988*c0909341SAndroid Build Coastguard Worker 1989*c0909341SAndroid Build Coastguard Worker // Worst case height for w=4 is 16, but we need at least h+1 elements 1990*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x3] // left[] 1991*c0909341SAndroid Build Coastguard Worker 1992*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 1993*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 1994*c0909341SAndroid Build Coastguard Worker 1995*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 1996*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 1997*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 1998*c0909341SAndroid Build Coastguard Worker 1999*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2000*c0909341SAndroid Build Coastguard Worker 2001*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 2002*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 2003*c0909341SAndroid Build Coastguard Worker 2004*c0909341SAndroid Build Coastguard Worker tbl v16.8b, {v0.16b}, v29.8b // left[base_y] 2005*c0909341SAndroid Build Coastguard Worker 2006*c0909341SAndroid Build Coastguard Worker trn1 v30.2s, v30.2s, v28.2s // base_y + 1, base_y + 2 2007*c0909341SAndroid Build Coastguard Worker 2008*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 2009*c0909341SAndroid Build Coastguard Worker 2010*c0909341SAndroid Build Coastguard Worker trn1 v31.2s, v31.2s, v31.2s // {0,1,2,3,0,1,2,3} 2011*c0909341SAndroid Build Coastguard Worker 2012*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v27.2s, v27.2s // frac_y 2013*c0909341SAndroid Build Coastguard Worker trn1 v28.2s, v28.2s, v28.2s // 64 - frac_y 2014*c0909341SAndroid Build Coastguard Worker 2015*c0909341SAndroid Build Coastguard Worker movi v29.8b, #2 2016*c0909341SAndroid Build Coastguard Worker4: 2017*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2018*c0909341SAndroid Build Coastguard Worker dup v6.4h, w8 // xpos 2019*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2020*c0909341SAndroid Build Coastguard Worker cmp w9, #-4 // base_x <= -4 2021*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2022*c0909341SAndroid Build Coastguard Worker b.le 49f 2023*c0909341SAndroid Build Coastguard Worker 2024*c0909341SAndroid Build Coastguard Worker dup v7.4h, w8 // xpos 2025*c0909341SAndroid Build Coastguard Worker 2026*c0909341SAndroid Build Coastguard Worker ldr d2, [x2, w9, sxtw] // top[base_x] 2027*c0909341SAndroid Build Coastguard Worker ldr d4, [x2, w11, sxtw] 2028*c0909341SAndroid Build Coastguard Worker 2029*c0909341SAndroid Build Coastguard Worker trn1 v6.2d, v6.2d, v7.2d // xpos 2030*c0909341SAndroid Build Coastguard Worker 2031*c0909341SAndroid Build Coastguard Worker // Cut corners here; only doing tbl over v0 here; we only 2032*c0909341SAndroid Build Coastguard Worker // seem to need the last pixel, from v1, after skipping to the 2033*c0909341SAndroid Build Coastguard Worker // left-only codepath below. 2034*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b}, v30.8b // left[base_y+1], left[base_y+2] 2035*c0909341SAndroid Build Coastguard Worker 2036*c0909341SAndroid Build Coastguard Worker shrn v20.8b, v6.8h, #6 // first base_x for each row 2037*c0909341SAndroid Build Coastguard Worker xtn v6.8b, v6.8h // (uint8_t)xpos 2038*c0909341SAndroid Build Coastguard Worker 2039*c0909341SAndroid Build Coastguard Worker ext v3.8b, v2.8b, v2.8b, #1 // top[base_x+1] 2040*c0909341SAndroid Build Coastguard Worker ext v5.8b, v4.8b, v4.8b, #1 2041*c0909341SAndroid Build Coastguard Worker 2042*c0909341SAndroid Build Coastguard Worker and v6.8b, v6.8b, v25.8b // frac_x 2043*c0909341SAndroid Build Coastguard Worker 2044*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s // left[base_y], left[base_y+1] 2045*c0909341SAndroid Build Coastguard Worker 2046*c0909341SAndroid Build Coastguard Worker trn1 v2.2s, v2.2s, v4.2s // top[base_x] 2047*c0909341SAndroid Build Coastguard Worker trn1 v3.2s, v3.2s, v5.2s // top[base_x+1] 2048*c0909341SAndroid Build Coastguard Worker 2049*c0909341SAndroid Build Coastguard Worker sub v7.8b, v26.8b, v6.8b // 64 - frac_x 2050*c0909341SAndroid Build Coastguard Worker 2051*c0909341SAndroid Build Coastguard Worker add v20.8b, v20.8b, v31.8b // actual base_x 2052*c0909341SAndroid Build Coastguard Worker 2053*c0909341SAndroid Build Coastguard Worker umull v16.8h, v16.8b, v28.8b // left[base_y]*(64-frac_y) 2054*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 2055*c0909341SAndroid Build Coastguard Worker 2056*c0909341SAndroid Build Coastguard Worker umull v22.8h, v2.8b, v7.8b // top[base_x]-*(64-frac_x) 2057*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v3.8b, v6.8b // + top[base_x+1]*frac_x 2058*c0909341SAndroid Build Coastguard Worker 2059*c0909341SAndroid Build Coastguard Worker cmge v20.8b, v20.8b, #0 2060*c0909341SAndroid Build Coastguard Worker 2061*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 2062*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 2063*c0909341SAndroid Build Coastguard Worker 2064*c0909341SAndroid Build Coastguard Worker bit v16.8b, v22.8b, v20.8b 2065*c0909341SAndroid Build Coastguard Worker 2066*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 2067*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2068*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2069*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[1], [x0], x1 2070*c0909341SAndroid Build Coastguard Worker b.le 9f 2071*c0909341SAndroid Build Coastguard Worker 2072*c0909341SAndroid Build Coastguard Worker ext v16.8b, v17.8b, v17.8b, #4 2073*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v29.8b // base_y += 2 2074*c0909341SAndroid Build Coastguard Worker b 4b 2075*c0909341SAndroid Build Coastguard Worker 2076*c0909341SAndroid Build Coastguard Worker49: 2077*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b, v1.16b}, v30.8b // left[base_y+1], left[base_y+2] 2078*c0909341SAndroid Build Coastguard Worker 2079*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s // left[base_y], left[base_y+1] 2080*c0909341SAndroid Build Coastguard Worker 2081*c0909341SAndroid Build Coastguard Worker umull v18.8h, v16.8b, v28.8b // left[base_y]*(64-frac_t) 2082*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 2083*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v18.8h, #6 2084*c0909341SAndroid Build Coastguard Worker 2085*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 2086*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2087*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x0], x1 2088*c0909341SAndroid Build Coastguard Worker b.le 9f 2089*c0909341SAndroid Build Coastguard Worker 2090*c0909341SAndroid Build Coastguard Worker ext v16.8b, v17.8b, v17.8b, #4 2091*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v29.8b // base_y += 2 2092*c0909341SAndroid Build Coastguard Worker b 49b 2093*c0909341SAndroid Build Coastguard Worker 2094*c0909341SAndroid Build Coastguard Worker9: 2095*c0909341SAndroid Build Coastguard Worker ret 2096*c0909341SAndroid Build Coastguard Worker 2097*c0909341SAndroid Build Coastguard Worker80: 2098*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2099*c0909341SAndroid Build Coastguard Worker 2100*c0909341SAndroid Build Coastguard Worker dup v30.8h, w7 // -dy 2101*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2102*c0909341SAndroid Build Coastguard Worker 2103*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v30.8h // {0,1,2,3,4,5,6,7}* -dy 2104*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 2105*c0909341SAndroid Build Coastguard Worker add v30.8h, v16.8h, v30.8h // -= dy 2106*c0909341SAndroid Build Coastguard Worker 2107*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3,4,5,6,7} 2108*c0909341SAndroid Build Coastguard Worker 2109*c0909341SAndroid Build Coastguard Worker // Worst case height for w=8 is 32, but we need at least h+1 elements 2110*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b}, [x3] // left[] 2111*c0909341SAndroid Build Coastguard Worker 2112*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 2113*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 2114*c0909341SAndroid Build Coastguard Worker 2115*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 2116*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2117*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 2118*c0909341SAndroid Build Coastguard Worker 2119*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2120*c0909341SAndroid Build Coastguard Worker 2121*c0909341SAndroid Build Coastguard Worker // Cut corners here; for the first row we don't expect to need to 2122*c0909341SAndroid Build Coastguard Worker // read outside of v0. 2123*c0909341SAndroid Build Coastguard Worker tbl v18.8b, {v0.16b}, v29.8b // left[base_y] 2124*c0909341SAndroid Build Coastguard Worker 2125*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v19.8b // base_y + 2 2126*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y + 1 2127*c0909341SAndroid Build Coastguard Worker 2128*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 2129*c0909341SAndroid Build Coastguard Worker 2130*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7} 2131*c0909341SAndroid Build Coastguard Worker 2132*c0909341SAndroid Build Coastguard Worker movi v24.8b, #2 // 2 2133*c0909341SAndroid Build Coastguard Worker8: 2134*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2135*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2136*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2137*c0909341SAndroid Build Coastguard Worker cmp w9, #-8 // base_x <= -8 2138*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2139*c0909341SAndroid Build Coastguard Worker b.le 89f 2140*c0909341SAndroid Build Coastguard Worker 2141*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2142*c0909341SAndroid Build Coastguard Worker 2143*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 2144*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 2145*c0909341SAndroid Build Coastguard Worker 2146*c0909341SAndroid Build Coastguard Worker // Cut corners here; only doing tbl over v0-v1 here; we only 2147*c0909341SAndroid Build Coastguard Worker // seem to need the last pixel, from v2, after skipping to the 2148*c0909341SAndroid Build Coastguard Worker // left-only codepath below. 2149*c0909341SAndroid Build Coastguard Worker tbl v19.8b, {v0.16b, v1.16b}, v29.8b // left[base_y+1] 2150*c0909341SAndroid Build Coastguard Worker 2151*c0909341SAndroid Build Coastguard Worker shrn v21.8b, v16.8h, #6 // first base_x 2152*c0909341SAndroid Build Coastguard Worker shrn2 v21.16b, v17.8h, #6 2153*c0909341SAndroid Build Coastguard Worker xtn v16.8b, v16.8h // (uint8_t)xpos 2154*c0909341SAndroid Build Coastguard Worker xtn2 v16.16b, v17.8h 2155*c0909341SAndroid Build Coastguard Worker 2156*c0909341SAndroid Build Coastguard Worker tbl v20.8b, {v0.16b, v1.16b}, v30.8b // left[base_y+2] 2157*c0909341SAndroid Build Coastguard Worker 2158*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #1 // top[base_x+1] 2159*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #1 2160*c0909341SAndroid Build Coastguard Worker 2161*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2162*c0909341SAndroid Build Coastguard Worker 2163*c0909341SAndroid Build Coastguard Worker trn1 v4.2d, v4.2d, v6.2d // top[base_x] 2164*c0909341SAndroid Build Coastguard Worker trn1 v5.2d, v5.2d, v7.2d // top[base_x+1] 2165*c0909341SAndroid Build Coastguard Worker 2166*c0909341SAndroid Build Coastguard Worker sub v7.16b, v26.16b, v16.16b // 64 - frac_x 2167*c0909341SAndroid Build Coastguard Worker 2168*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v31.16b // actual base_x 2169*c0909341SAndroid Build Coastguard Worker 2170*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2171*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2172*c0909341SAndroid Build Coastguard Worker umull v17.8h, v19.8b, v28.8b 2173*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v20.8b, v27.8b 2174*c0909341SAndroid Build Coastguard Worker 2175*c0909341SAndroid Build Coastguard Worker umull v22.8h, v4.8b, v7.8b // top[base_x]-*(64-frac_x) 2176*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v5.8b, v16.8b // + top[base_x+1]*frac_x 2177*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v4.16b, v7.16b 2178*c0909341SAndroid Build Coastguard Worker umlal2 v23.8h, v5.16b, v16.16b 2179*c0909341SAndroid Build Coastguard Worker 2180*c0909341SAndroid Build Coastguard Worker cmge v21.16b, v21.16b, #0 2181*c0909341SAndroid Build Coastguard Worker 2182*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 2183*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 2184*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 2185*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #6 2186*c0909341SAndroid Build Coastguard Worker 2187*c0909341SAndroid Build Coastguard Worker bit v6.16b, v22.16b, v21.16b 2188*c0909341SAndroid Build Coastguard Worker 2189*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 2190*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2191*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2192*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 2193*c0909341SAndroid Build Coastguard Worker b.le 9f 2194*c0909341SAndroid Build Coastguard Worker 2195*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2196*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 2 2197*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 2 2198*c0909341SAndroid Build Coastguard Worker b 8b 2199*c0909341SAndroid Build Coastguard Worker 2200*c0909341SAndroid Build Coastguard Worker89: 2201*c0909341SAndroid Build Coastguard Worker tbl v19.8b, {v0.16b, v1.16b, v2.16b}, v29.8b // left[base_y+1] 2202*c0909341SAndroid Build Coastguard Worker tbl v20.8b, {v0.16b, v1.16b, v2.16b}, v30.8b // left[base_y+2] 2203*c0909341SAndroid Build Coastguard Worker 2204*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2205*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2206*c0909341SAndroid Build Coastguard Worker umull v17.8h, v19.8b, v28.8b 2207*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v20.8b, v27.8b 2208*c0909341SAndroid Build Coastguard Worker 2209*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 2210*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 2211*c0909341SAndroid Build Coastguard Worker 2212*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 2213*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2214*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 2215*c0909341SAndroid Build Coastguard Worker b.le 9f 2216*c0909341SAndroid Build Coastguard Worker 2217*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2218*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 2 2219*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 2 2220*c0909341SAndroid Build Coastguard Worker b 89b 2221*c0909341SAndroid Build Coastguard Worker 2222*c0909341SAndroid Build Coastguard Worker9: 2223*c0909341SAndroid Build Coastguard Worker ret 2224*c0909341SAndroid Build Coastguard Worker 2225*c0909341SAndroid Build Coastguard Worker160: 2226*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2227*c0909341SAndroid Build Coastguard Worker 2228*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 2229*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 2230*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 2231*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 2232*c0909341SAndroid Build Coastguard Worker 2233*c0909341SAndroid Build Coastguard Worker add x11, x11, #16 // increments 2234*c0909341SAndroid Build Coastguard Worker 2235*c0909341SAndroid Build Coastguard Worker dup v18.8h, w7 // -dy 2236*c0909341SAndroid Build Coastguard Worker movi v17.16b, #1 2237*c0909341SAndroid Build Coastguard Worker add x3, x3, #1 // Skip past left[0] 2238*c0909341SAndroid Build Coastguard Worker 2239*c0909341SAndroid Build Coastguard Worker ld1 {v14.8h}, [x11] // {8,9,10,11,12,13,14,15} 2240*c0909341SAndroid Build Coastguard Worker 2241*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v18.8h // {0,1,2,3,4,5,6,7}* -dy 2242*c0909341SAndroid Build Coastguard Worker mul v19.8h, v14.8h, v18.8h // {8,9,10,11,12,13,14,15}* -dy 2243*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 2244*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v18.8h // -= dy 2245*c0909341SAndroid Build Coastguard Worker add v18.8h, v19.8h, v18.8h 2246*c0909341SAndroid Build Coastguard Worker 2247*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3,4,5,6,7} 2248*c0909341SAndroid Build Coastguard Worker xtn2 v31.16b, v14.8h // {8,9,10,11,12,13,14,15} 2249*c0909341SAndroid Build Coastguard Worker 2250*c0909341SAndroid Build Coastguard Worker // Worst case height is 64. 2251*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x3] // left[] 2252*c0909341SAndroid Build Coastguard Worker ld1r {v15.16b}, [x2] // left[0] == top[0] 2253*c0909341SAndroid Build Coastguard Worker 2254*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 2255*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 2256*c0909341SAndroid Build Coastguard Worker 2257*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v16.8h // (uint8_t)ypos 2258*c0909341SAndroid Build Coastguard Worker xtn2 v27.16b, v18.8h 2259*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v16.8h, #6 // ypos >> 6 2260*c0909341SAndroid Build Coastguard Worker shrn2 v29.16b, v18.8h, #6 2261*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2262*c0909341SAndroid Build Coastguard Worker and v27.16b, v27.16b, v25.16b // frac_y 2263*c0909341SAndroid Build Coastguard Worker 2264*c0909341SAndroid Build Coastguard Worker // Cut corners here; for the first row we don't expect to need to 2265*c0909341SAndroid Build Coastguard Worker // read outside of v0. 2266*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b}, v29.16b // left[base_y] 2267*c0909341SAndroid Build Coastguard Worker 2268*c0909341SAndroid Build Coastguard Worker add v30.16b, v29.16b, v19.16b // base_y + 2 2269*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v17.16b // base_y + 1 2270*c0909341SAndroid Build Coastguard Worker 2271*c0909341SAndroid Build Coastguard Worker sub v28.16b, v26.16b, v27.16b // 64 - frac_y 2272*c0909341SAndroid Build Coastguard Worker 2273*c0909341SAndroid Build Coastguard Worker movi v24.16b, #2 // 2 2274*c0909341SAndroid Build Coastguard Worker16: 2275*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2276*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2277*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2278*c0909341SAndroid Build Coastguard Worker cmp w9, #-16 // base_x <= -16 2279*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2280*c0909341SAndroid Build Coastguard Worker b.le 169f 2281*c0909341SAndroid Build Coastguard Worker 2282*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2283*c0909341SAndroid Build Coastguard Worker 2284*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw 2285*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw 2286*c0909341SAndroid Build Coastguard Worker 2287*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b, v5.16b}, [x9] // top[base_x] 2288*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2289*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b, v7.16b}, [x11] 2290*c0909341SAndroid Build Coastguard Worker 2291*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2292*c0909341SAndroid Build Coastguard Worker 2293*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2294*c0909341SAndroid Build Coastguard Worker 2295*c0909341SAndroid Build Coastguard Worker shrn v21.8b, v16.8h, #6 // first base_x 2296*c0909341SAndroid Build Coastguard Worker shrn v22.8b, v17.8h, #6 2297*c0909341SAndroid Build Coastguard Worker xtn v16.8b, v16.8h // (uint8_t)xpos 2298*c0909341SAndroid Build Coastguard Worker xtn v17.8b, v17.8h 2299*c0909341SAndroid Build Coastguard Worker 2300*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b // left[base_y+2] 2301*c0909341SAndroid Build Coastguard Worker 2302*c0909341SAndroid Build Coastguard Worker trn1 v21.2d, v21.2d, v21.2d // first base_x 2303*c0909341SAndroid Build Coastguard Worker trn1 v22.2d, v22.2d, v22.2d 2304*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v16.2d // (uint8_t)xpos 2305*c0909341SAndroid Build Coastguard Worker trn1 v17.2d, v17.2d, v17.2d 2306*c0909341SAndroid Build Coastguard Worker 2307*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v5.16b, #1 // top[base_x+1] 2308*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v7.16b, #1 2309*c0909341SAndroid Build Coastguard Worker 2310*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2311*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v25.16b 2312*c0909341SAndroid Build Coastguard Worker 2313*c0909341SAndroid Build Coastguard Worker umull v10.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2314*c0909341SAndroid Build Coastguard Worker umlal v10.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2315*c0909341SAndroid Build Coastguard Worker 2316*c0909341SAndroid Build Coastguard Worker sub v8.16b, v26.16b, v16.16b // 64 - frac_x 2317*c0909341SAndroid Build Coastguard Worker sub v9.16b, v26.16b, v17.16b 2318*c0909341SAndroid Build Coastguard Worker 2319*c0909341SAndroid Build Coastguard Worker umull2 v11.8h, v18.16b, v28.16b 2320*c0909341SAndroid Build Coastguard Worker umlal2 v11.8h, v19.16b, v27.16b 2321*c0909341SAndroid Build Coastguard Worker 2322*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v31.16b // actual base_x 2323*c0909341SAndroid Build Coastguard Worker add v22.16b, v22.16b, v31.16b 2324*c0909341SAndroid Build Coastguard Worker 2325*c0909341SAndroid Build Coastguard Worker umull v12.8h, v19.8b, v28.8b 2326*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v20.8b, v27.8b 2327*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v19.16b, v28.16b 2328*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v20.16b, v27.16b 2329*c0909341SAndroid Build Coastguard Worker 2330*c0909341SAndroid Build Coastguard Worker rshrn v10.8b, v10.8h, #6 2331*c0909341SAndroid Build Coastguard Worker rshrn2 v10.16b, v11.8h, #6 2332*c0909341SAndroid Build Coastguard Worker rshrn v11.8b, v12.8h, #6 2333*c0909341SAndroid Build Coastguard Worker rshrn2 v11.16b, v13.8h, #6 2334*c0909341SAndroid Build Coastguard Worker 2335*c0909341SAndroid Build Coastguard Worker umull v12.8h, v4.8b, v8.8b // top[base_x]-*(64-frac_x) 2336*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v5.8b, v16.8b // + top[base_x+1]*frac_x 2337*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v4.16b, v8.16b 2338*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v5.16b, v16.16b 2339*c0909341SAndroid Build Coastguard Worker umull v14.8h, v6.8b, v9.8b 2340*c0909341SAndroid Build Coastguard Worker umlal v14.8h, v7.8b, v17.8b 2341*c0909341SAndroid Build Coastguard Worker umull2 v18.8h, v6.16b, v9.16b 2342*c0909341SAndroid Build Coastguard Worker umlal2 v18.8h, v7.16b, v17.16b 2343*c0909341SAndroid Build Coastguard Worker 2344*c0909341SAndroid Build Coastguard Worker cmge v21.16b, v21.16b, #0 2345*c0909341SAndroid Build Coastguard Worker cmge v22.16b, v22.16b, #0 2346*c0909341SAndroid Build Coastguard Worker 2347*c0909341SAndroid Build Coastguard Worker rshrn v12.8b, v12.8h, #6 2348*c0909341SAndroid Build Coastguard Worker rshrn2 v12.16b, v13.8h, #6 2349*c0909341SAndroid Build Coastguard Worker rshrn v13.8b, v14.8h, #6 2350*c0909341SAndroid Build Coastguard Worker rshrn2 v13.16b, v18.8h, #6 2351*c0909341SAndroid Build Coastguard Worker 2352*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v21.16b 2353*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v22.16b 2354*c0909341SAndroid Build Coastguard Worker 2355*c0909341SAndroid Build Coastguard Worker st1 {v10.16b}, [x0], x1 2356*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2357*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2358*c0909341SAndroid Build Coastguard Worker st1 {v11.16b}, [x0], x1 2359*c0909341SAndroid Build Coastguard Worker b.le 9f 2360*c0909341SAndroid Build Coastguard Worker 2361*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2362*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 2363*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 2364*c0909341SAndroid Build Coastguard Worker b 16b 2365*c0909341SAndroid Build Coastguard Worker 2366*c0909341SAndroid Build Coastguard Worker169: 2367*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b 2368*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b 2369*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2370*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b // left[base_y+2] 2371*c0909341SAndroid Build Coastguard Worker 2372*c0909341SAndroid Build Coastguard Worker umull v4.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2373*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2374*c0909341SAndroid Build Coastguard Worker umull2 v5.8h, v18.16b, v28.16b 2375*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v19.16b, v27.16b 2376*c0909341SAndroid Build Coastguard Worker umull v6.8h, v19.8b, v28.8b 2377*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v20.8b, v27.8b 2378*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v19.16b, v28.16b 2379*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v20.16b, v27.16b 2380*c0909341SAndroid Build Coastguard Worker 2381*c0909341SAndroid Build Coastguard Worker rshrn v4.8b, v4.8h, #6 2382*c0909341SAndroid Build Coastguard Worker rshrn2 v4.16b, v5.8h, #6 2383*c0909341SAndroid Build Coastguard Worker rshrn v5.8b, v6.8h, #6 2384*c0909341SAndroid Build Coastguard Worker rshrn2 v5.16b, v7.8h, #6 2385*c0909341SAndroid Build Coastguard Worker 2386*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [x0], x1 2387*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2388*c0909341SAndroid Build Coastguard Worker st1 {v5.16b}, [x0], x1 2389*c0909341SAndroid Build Coastguard Worker b.le 9f 2390*c0909341SAndroid Build Coastguard Worker 2391*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2392*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 2 2393*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 2 2394*c0909341SAndroid Build Coastguard Worker b 169b 2395*c0909341SAndroid Build Coastguard Worker 2396*c0909341SAndroid Build Coastguard Worker9: 2397*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 2398*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 2399*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 2400*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 2401*c0909341SAndroid Build Coastguard Worker ret 2402*c0909341SAndroid Build Coastguard Worker 2403*c0909341SAndroid Build Coastguard Worker320: 2404*c0909341SAndroid Build Coastguard Worker640: 2405*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2406*c0909341SAndroid Build Coastguard Worker 2407*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 2408*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 2409*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 2410*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 2411*c0909341SAndroid Build Coastguard Worker 2412*c0909341SAndroid Build Coastguard Worker add x11, x11, #16 // increments 2413*c0909341SAndroid Build Coastguard Worker 2414*c0909341SAndroid Build Coastguard Worker dup v25.8h, w7 // -dy 2415*c0909341SAndroid Build Coastguard Worker add x3, x3, #1 // Skip past left[0] 2416*c0909341SAndroid Build Coastguard Worker 2417*c0909341SAndroid Build Coastguard Worker ld1 {v14.8h}, [x11] // {8,9,10,11,12,13,14,15} 2418*c0909341SAndroid Build Coastguard Worker 2419*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 // alternating row 2420*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 // stride *= 2 2421*c0909341SAndroid Build Coastguard Worker sub x1, x1, w4, uxtw // stride -= width 2422*c0909341SAndroid Build Coastguard Worker 2423*c0909341SAndroid Build Coastguard Worker movi v11.8h, #8 2424*c0909341SAndroid Build Coastguard Worker mul v26.8h, v31.8h, v25.8h // {0,1,2,3,4,5,6,7}* -dy 2425*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v25.8h // -= dy 2426*c0909341SAndroid Build Coastguard Worker mul v25.8h, v25.8h, v11.8h // -8*dy 2427*c0909341SAndroid Build Coastguard Worker 2428*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3,4,5,6,7} 2429*c0909341SAndroid Build Coastguard Worker xtn2 v31.16b, v14.8h // {8,9,10,11,12,13,14,15} 2430*c0909341SAndroid Build Coastguard Worker 2431*c0909341SAndroid Build Coastguard Worker // Worst case height is 64. 2432*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x3] // left[] 2433*c0909341SAndroid Build Coastguard Worker ld1r {v15.16b}, [x2] // left[0] == top[0] 2434*c0909341SAndroid Build Coastguard Worker 2435*c0909341SAndroid Build Coastguard Worker mov w12, w4 // orig w 2436*c0909341SAndroid Build Coastguard Worker neg w14, w4 // -w 2437*c0909341SAndroid Build Coastguard Worker 2438*c0909341SAndroid Build Coastguard Worker1: 2439*c0909341SAndroid Build Coastguard Worker mov v23.16b, v26.16b // reset ypos 2440*c0909341SAndroid Build Coastguard Worker 2441*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2442*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2443*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2444*c0909341SAndroid Build Coastguard Worker cmp w9, w14 // base_x <= -w 2445*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2446*c0909341SAndroid Build Coastguard Worker b.le 329f 2447*c0909341SAndroid Build Coastguard Worker 2448*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2449*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2450*c0909341SAndroid Build Coastguard Worker 2451*c0909341SAndroid Build Coastguard Worker add x9, x2, w9, sxtw 2452*c0909341SAndroid Build Coastguard Worker add x11, x2, w11, sxtw 2453*c0909341SAndroid Build Coastguard Worker 2454*c0909341SAndroid Build Coastguard Worker sqshrn v21.8b, v16.8h, #6 // first base_x 2455*c0909341SAndroid Build Coastguard Worker sqshrn v22.8b, v17.8h, #6 2456*c0909341SAndroid Build Coastguard Worker xtn v16.8b, v16.8h // (uint8_t)xpos 2457*c0909341SAndroid Build Coastguard Worker xtn v17.8b, v17.8h 2458*c0909341SAndroid Build Coastguard Worker 2459*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [x9], #16 // top[base_x] 2460*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [x11], #16 2461*c0909341SAndroid Build Coastguard Worker 2462*c0909341SAndroid Build Coastguard Worker trn1 v21.2d, v21.2d, v21.2d // first base_x 2463*c0909341SAndroid Build Coastguard Worker trn1 v22.2d, v22.2d, v22.2d 2464*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v16.2d // (uint8_t)xpos 2465*c0909341SAndroid Build Coastguard Worker trn1 v17.2d, v17.2d, v17.2d 2466*c0909341SAndroid Build Coastguard Worker 2467*c0909341SAndroid Build Coastguard Worker movi v10.16b, #0x3e 2468*c0909341SAndroid Build Coastguard Worker movi v11.16b, #64 2469*c0909341SAndroid Build Coastguard Worker 2470*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v10.16b // frac_x 2471*c0909341SAndroid Build Coastguard Worker and v17.16b, v17.16b, v10.16b 2472*c0909341SAndroid Build Coastguard Worker 2473*c0909341SAndroid Build Coastguard Worker sub v8.16b, v11.16b, v16.16b // 64 - frac_x 2474*c0909341SAndroid Build Coastguard Worker sub v9.16b, v11.16b, v17.16b 2475*c0909341SAndroid Build Coastguard Worker 2476*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v31.16b // actual base_x 2477*c0909341SAndroid Build Coastguard Worker add v22.16b, v22.16b, v31.16b 2478*c0909341SAndroid Build Coastguard Worker 2479*c0909341SAndroid Build Coastguard Worker2: 2480*c0909341SAndroid Build Coastguard Worker add v13.8h, v23.8h, v25.8h // ypos -= 8*dy 2481*c0909341SAndroid Build Coastguard Worker movi v12.16b, #64 2482*c0909341SAndroid Build Coastguard Worker movi v20.16b, #2 2483*c0909341SAndroid Build Coastguard Worker movi v10.16b, #0x3e 2484*c0909341SAndroid Build Coastguard Worker 2485*c0909341SAndroid Build Coastguard Worker smov w10, v22.b[0] 2486*c0909341SAndroid Build Coastguard Worker 2487*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v23.8h // (uint8_t)ypos 2488*c0909341SAndroid Build Coastguard Worker xtn2 v27.16b, v13.8h 2489*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v23.8h, #6 // ypos >> 6 2490*c0909341SAndroid Build Coastguard Worker shrn2 v29.16b, v13.8h, #6 2491*c0909341SAndroid Build Coastguard Worker cmp w10, #0 // base_x (bottom left) >= 0 2492*c0909341SAndroid Build Coastguard Worker and v27.16b, v27.16b, v10.16b // frac_y 2493*c0909341SAndroid Build Coastguard Worker 2494*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2495*c0909341SAndroid Build Coastguard Worker 2496*c0909341SAndroid Build Coastguard Worker b.ge 4f 2497*c0909341SAndroid Build Coastguard Worker 2498*c0909341SAndroid Build Coastguard Worker add v23.8h, v13.8h, v25.8h // ypos -= 8*dy 2499*c0909341SAndroid Build Coastguard Worker movi v13.16b, #1 2500*c0909341SAndroid Build Coastguard Worker 2501*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y] 2502*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v13.16b // base_y + 1 2503*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2504*c0909341SAndroid Build Coastguard Worker 2505*c0909341SAndroid Build Coastguard Worker sub v28.16b, v12.16b, v27.16b // 64 - frac_y 2506*c0909341SAndroid Build Coastguard Worker 2507*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x9], #16 // top[base_x] 2508*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x11], #16 2509*c0909341SAndroid Build Coastguard Worker 2510*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2511*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v13.16b // base_y + 2 2512*c0909341SAndroid Build Coastguard Worker 2513*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2514*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+2] 2515*c0909341SAndroid Build Coastguard Worker 2516*c0909341SAndroid Build Coastguard Worker umull v10.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2517*c0909341SAndroid Build Coastguard Worker umlal v10.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2518*c0909341SAndroid Build Coastguard Worker umull2 v11.8h, v18.16b, v28.16b 2519*c0909341SAndroid Build Coastguard Worker umlal2 v11.8h, v19.16b, v27.16b 2520*c0909341SAndroid Build Coastguard Worker umull v12.8h, v19.8b, v28.8b 2521*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v20.8b, v27.8b 2522*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v19.16b, v28.16b 2523*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v20.16b, v27.16b 2524*c0909341SAndroid Build Coastguard Worker 2525*c0909341SAndroid Build Coastguard Worker ext v18.16b, v4.16b, v5.16b, #1 // top[base_x+1] 2526*c0909341SAndroid Build Coastguard Worker ext v19.16b, v6.16b, v7.16b, #1 2527*c0909341SAndroid Build Coastguard Worker 2528*c0909341SAndroid Build Coastguard Worker rshrn v10.8b, v10.8h, #6 2529*c0909341SAndroid Build Coastguard Worker rshrn2 v10.16b, v11.8h, #6 2530*c0909341SAndroid Build Coastguard Worker rshrn v11.8b, v12.8h, #6 2531*c0909341SAndroid Build Coastguard Worker rshrn2 v11.16b, v13.8h, #6 2532*c0909341SAndroid Build Coastguard Worker 2533*c0909341SAndroid Build Coastguard Worker umull v12.8h, v4.8b, v8.8b // top[base_x]-*(64-frac_x) 2534*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v18.8b, v16.8b // + top[base_x+1]*frac_x 2535*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v4.16b, v8.16b 2536*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v18.16b, v16.16b 2537*c0909341SAndroid Build Coastguard Worker umull v14.8h, v6.8b, v9.8b 2538*c0909341SAndroid Build Coastguard Worker umlal v14.8h, v19.8b, v17.8b 2539*c0909341SAndroid Build Coastguard Worker umull2 v20.8h, v6.16b, v9.16b 2540*c0909341SAndroid Build Coastguard Worker umlal2 v20.8h, v19.16b, v17.16b 2541*c0909341SAndroid Build Coastguard Worker 2542*c0909341SAndroid Build Coastguard Worker cmge v18.16b, v21.16b, #0 2543*c0909341SAndroid Build Coastguard Worker cmge v19.16b, v22.16b, #0 2544*c0909341SAndroid Build Coastguard Worker 2545*c0909341SAndroid Build Coastguard Worker rshrn v12.8b, v12.8h, #6 2546*c0909341SAndroid Build Coastguard Worker rshrn2 v12.16b, v13.8h, #6 2547*c0909341SAndroid Build Coastguard Worker rshrn v13.8b, v14.8h, #6 2548*c0909341SAndroid Build Coastguard Worker rshrn2 v13.16b, v20.8h, #6 2549*c0909341SAndroid Build Coastguard Worker 2550*c0909341SAndroid Build Coastguard Worker bit v10.16b, v12.16b, v18.16b 2551*c0909341SAndroid Build Coastguard Worker bit v11.16b, v13.16b, v19.16b 2552*c0909341SAndroid Build Coastguard Worker 2553*c0909341SAndroid Build Coastguard Worker st1 {v10.16b}, [x0], #16 2554*c0909341SAndroid Build Coastguard Worker subs w4, w4, #16 2555*c0909341SAndroid Build Coastguard Worker st1 {v11.16b}, [x13], #16 2556*c0909341SAndroid Build Coastguard Worker b.le 3f 2557*c0909341SAndroid Build Coastguard Worker 2558*c0909341SAndroid Build Coastguard Worker movi v10.16b, #16 2559*c0909341SAndroid Build Coastguard Worker mov v4.16b, v5.16b 2560*c0909341SAndroid Build Coastguard Worker mov v6.16b, v7.16b 2561*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v10.16b // base_x += 16 2562*c0909341SAndroid Build Coastguard Worker add v22.16b, v22.16b, v10.16b 2563*c0909341SAndroid Build Coastguard Worker b 2b 2564*c0909341SAndroid Build Coastguard Worker 2565*c0909341SAndroid Build Coastguard Worker3: 2566*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2567*c0909341SAndroid Build Coastguard Worker b.le 9f 2568*c0909341SAndroid Build Coastguard Worker movi v10.8h, #128 2569*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 2570*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 2571*c0909341SAndroid Build Coastguard Worker mov w4, w12 // reset w 2572*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v10.8h // ypos += 2*(1<<6) 2573*c0909341SAndroid Build Coastguard Worker b 1b 2574*c0909341SAndroid Build Coastguard Worker 2575*c0909341SAndroid Build Coastguard Worker4: // The rest of the row only predicted from top[] 2576*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x9], #16 // top[base_x] 2577*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x11], #16 2578*c0909341SAndroid Build Coastguard Worker 2579*c0909341SAndroid Build Coastguard Worker ext v18.16b, v4.16b, v5.16b, #1 // top[base_x+1] 2580*c0909341SAndroid Build Coastguard Worker ext v19.16b, v6.16b, v7.16b, #1 2581*c0909341SAndroid Build Coastguard Worker 2582*c0909341SAndroid Build Coastguard Worker umull v12.8h, v4.8b, v8.8b // top[base_x]-*(64-frac_x) 2583*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v18.8b, v16.8b // + top[base_x+1]*frac_x 2584*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v4.16b, v8.16b 2585*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v18.16b, v16.16b 2586*c0909341SAndroid Build Coastguard Worker umull v14.8h, v6.8b, v9.8b 2587*c0909341SAndroid Build Coastguard Worker umlal v14.8h, v19.8b, v17.8b 2588*c0909341SAndroid Build Coastguard Worker umull2 v20.8h, v6.16b, v9.16b 2589*c0909341SAndroid Build Coastguard Worker umlal2 v20.8h, v19.16b, v17.16b 2590*c0909341SAndroid Build Coastguard Worker 2591*c0909341SAndroid Build Coastguard Worker rshrn v12.8b, v12.8h, #6 2592*c0909341SAndroid Build Coastguard Worker rshrn2 v12.16b, v13.8h, #6 2593*c0909341SAndroid Build Coastguard Worker rshrn v13.8b, v14.8h, #6 2594*c0909341SAndroid Build Coastguard Worker rshrn2 v13.16b, v20.8h, #6 2595*c0909341SAndroid Build Coastguard Worker 2596*c0909341SAndroid Build Coastguard Worker st1 {v12.16b}, [x0], #16 2597*c0909341SAndroid Build Coastguard Worker subs w4, w4, #16 2598*c0909341SAndroid Build Coastguard Worker st1 {v13.16b}, [x13], #16 2599*c0909341SAndroid Build Coastguard Worker b.le 3b 2600*c0909341SAndroid Build Coastguard Worker 2601*c0909341SAndroid Build Coastguard Worker mov v4.16b, v5.16b 2602*c0909341SAndroid Build Coastguard Worker mov v6.16b, v7.16b 2603*c0909341SAndroid Build Coastguard Worker b 4b 2604*c0909341SAndroid Build Coastguard Worker 2605*c0909341SAndroid Build Coastguard Worker329: // The rest of the block only predicted from left[] 2606*c0909341SAndroid Build Coastguard Worker add x1, x1, w4, uxtw // restore stride 2607*c0909341SAndroid Build Coastguard Worker mov w12, w5 // orig remaining h 2608*c0909341SAndroid Build Coastguard Worker1: 2609*c0909341SAndroid Build Coastguard Worker add v13.8h, v23.8h, v25.8h // ypos -= 8*dy 2610*c0909341SAndroid Build Coastguard Worker movi v12.16b, #64 2611*c0909341SAndroid Build Coastguard Worker movi v10.16b, #0x3e 2612*c0909341SAndroid Build Coastguard Worker 2613*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v23.8h // (uint8_t)ypos 2614*c0909341SAndroid Build Coastguard Worker xtn2 v27.16b, v13.8h 2615*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v23.8h, #6 // ypos >> 6 2616*c0909341SAndroid Build Coastguard Worker shrn2 v29.16b, v13.8h, #6 2617*c0909341SAndroid Build Coastguard Worker and v27.16b, v27.16b, v10.16b // frac_y 2618*c0909341SAndroid Build Coastguard Worker 2619*c0909341SAndroid Build Coastguard Worker mov v18.16b, v15.16b // left[0] 2620*c0909341SAndroid Build Coastguard Worker add v23.8h, v13.8h, v25.8h // ypos -= 8*dy 2621*c0909341SAndroid Build Coastguard Worker movi v21.16b, #1 2622*c0909341SAndroid Build Coastguard Worker 2623*c0909341SAndroid Build Coastguard Worker tbx v18.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y] 2624*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // base_y + 1 2625*c0909341SAndroid Build Coastguard Worker 2626*c0909341SAndroid Build Coastguard Worker sub v28.16b, v12.16b, v27.16b // 64 - frac_y 2627*c0909341SAndroid Build Coastguard Worker2: 2628*c0909341SAndroid Build Coastguard Worker mov v19.16b, v15.16b // left[0] 2629*c0909341SAndroid Build Coastguard Worker tbx v19.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+1] 2630*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // base_y + 2 2631*c0909341SAndroid Build Coastguard Worker mov v20.16b, v15.16b // left[0] 2632*c0909341SAndroid Build Coastguard Worker tbx v20.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v29.16b // left[base_y+2] 2633*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v21.16b // next base_y 2634*c0909341SAndroid Build Coastguard Worker 2635*c0909341SAndroid Build Coastguard Worker umull v10.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2636*c0909341SAndroid Build Coastguard Worker umlal v10.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2637*c0909341SAndroid Build Coastguard Worker umull2 v11.8h, v18.16b, v28.16b 2638*c0909341SAndroid Build Coastguard Worker umlal2 v11.8h, v19.16b, v27.16b 2639*c0909341SAndroid Build Coastguard Worker umull v12.8h, v19.8b, v28.8b 2640*c0909341SAndroid Build Coastguard Worker umlal v12.8h, v20.8b, v27.8b 2641*c0909341SAndroid Build Coastguard Worker umull2 v13.8h, v19.16b, v28.16b 2642*c0909341SAndroid Build Coastguard Worker umlal2 v13.8h, v20.16b, v27.16b 2643*c0909341SAndroid Build Coastguard Worker 2644*c0909341SAndroid Build Coastguard Worker rshrn v10.8b, v10.8h, #6 2645*c0909341SAndroid Build Coastguard Worker rshrn2 v10.16b, v11.8h, #6 2646*c0909341SAndroid Build Coastguard Worker rshrn v11.8b, v12.8h, #6 2647*c0909341SAndroid Build Coastguard Worker rshrn2 v11.16b, v13.8h, #6 2648*c0909341SAndroid Build Coastguard Worker 2649*c0909341SAndroid Build Coastguard Worker st1 {v10.16b}, [x0], x1 2650*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2651*c0909341SAndroid Build Coastguard Worker st1 {v11.16b}, [x13], x1 2652*c0909341SAndroid Build Coastguard Worker b.le 3f 2653*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2654*c0909341SAndroid Build Coastguard Worker b 2b 2655*c0909341SAndroid Build Coastguard Worker 2656*c0909341SAndroid Build Coastguard Worker3: 2657*c0909341SAndroid Build Coastguard Worker subs w4, w4, #16 2658*c0909341SAndroid Build Coastguard Worker b.le 9f 2659*c0909341SAndroid Build Coastguard Worker 2660*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 2661*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 2662*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 2663*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 2664*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 2665*c0909341SAndroid Build Coastguard Worker add x13, x13, #16 2666*c0909341SAndroid Build Coastguard Worker mov w5, w12 // reset h 2667*c0909341SAndroid Build Coastguard Worker b 1b 2668*c0909341SAndroid Build Coastguard Worker 2669*c0909341SAndroid Build Coastguard Worker9: 2670*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 2671*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 2672*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 2673*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 2674*c0909341SAndroid Build Coastguard Worker ret 2675*c0909341SAndroid Build Coastguard Workerendfunc 2676*c0909341SAndroid Build Coastguard Worker 2677*c0909341SAndroid Build Coastguard Workerjumptable ipred_z2_fill1_tbl 2678*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z2_fill1_tbl 2679*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z2_fill1_tbl 2680*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z2_fill1_tbl 2681*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z2_fill1_tbl 2682*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z2_fill1_tbl 2683*c0909341SAndroid Build Coastguard Workerendjumptable 2684*c0909341SAndroid Build Coastguard Worker 2685*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill2_8bpc_neon, export=1 2686*c0909341SAndroid Build Coastguard Worker cmp w4, #8 2687*c0909341SAndroid Build Coastguard Worker mov w8, #(2 << 6) // xpos = 2 << 6 2688*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2689*c0909341SAndroid Build Coastguard Worker 2690*c0909341SAndroid Build Coastguard Worker movrel x11, increments 2691*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 2692*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 2693*c0909341SAndroid Build Coastguard Worker b.eq 80f 2694*c0909341SAndroid Build Coastguard Worker 2695*c0909341SAndroid Build Coastguard Worker40: 2696*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 2697*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2698*c0909341SAndroid Build Coastguard Worker 2699*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 2700*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 2701*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 2702*c0909341SAndroid Build Coastguard Worker 2703*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3} 2704*c0909341SAndroid Build Coastguard Worker 2705*c0909341SAndroid Build Coastguard Worker // For upsample_top, w <= 8 and h <= 8; we may need up to h+1 elements 2706*c0909341SAndroid Build Coastguard Worker // from left. 2707*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x3] // left[] 2708*c0909341SAndroid Build Coastguard Worker 2709*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 2710*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 2711*c0909341SAndroid Build Coastguard Worker 2712*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 2713*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2714*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 2715*c0909341SAndroid Build Coastguard Worker 2716*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2717*c0909341SAndroid Build Coastguard Worker 2718*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 2719*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 2720*c0909341SAndroid Build Coastguard Worker 2721*c0909341SAndroid Build Coastguard Worker tbl v16.8b, {v0.16b}, v29.8b // left[base_y] 2722*c0909341SAndroid Build Coastguard Worker 2723*c0909341SAndroid Build Coastguard Worker trn1 v30.2s, v30.2s, v28.2s // base_y + 1, base_y + 2 2724*c0909341SAndroid Build Coastguard Worker 2725*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 2726*c0909341SAndroid Build Coastguard Worker 2727*c0909341SAndroid Build Coastguard Worker trn1 v31.2s, v31.2s, v31.2s // {0,1,2,3,0,1,2,3} 2728*c0909341SAndroid Build Coastguard Worker 2729*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v27.2s, v27.2s // frac_y 2730*c0909341SAndroid Build Coastguard Worker trn1 v28.2s, v28.2s, v28.2s // 64 - frac_y 2731*c0909341SAndroid Build Coastguard Worker 2732*c0909341SAndroid Build Coastguard Worker movi v29.8b, #2 2733*c0909341SAndroid Build Coastguard Worker add v31.8b, v31.8b, v31.8b // {0,2,4,6,0,2,4,6} 2734*c0909341SAndroid Build Coastguard Worker4: 2735*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2736*c0909341SAndroid Build Coastguard Worker dup v6.4h, w8 // xpos 2737*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2738*c0909341SAndroid Build Coastguard Worker cmp w9, #-8 // base_x <= -8 2739*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2740*c0909341SAndroid Build Coastguard Worker b.le 49f 2741*c0909341SAndroid Build Coastguard Worker 2742*c0909341SAndroid Build Coastguard Worker dup v7.4h, w8 // xpos 2743*c0909341SAndroid Build Coastguard Worker 2744*c0909341SAndroid Build Coastguard Worker ldr d2, [x2, w9, sxtw] // top[base_x] 2745*c0909341SAndroid Build Coastguard Worker ldr d4, [x2, w11, sxtw] 2746*c0909341SAndroid Build Coastguard Worker 2747*c0909341SAndroid Build Coastguard Worker trn1 v6.2d, v6.2d, v7.2d // xpos 2748*c0909341SAndroid Build Coastguard Worker 2749*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b}, v30.8b // left[base_y+1], left[base_y+2] 2750*c0909341SAndroid Build Coastguard Worker 2751*c0909341SAndroid Build Coastguard Worker shrn v20.8b, v6.8h, #6 // first base_x for each row 2752*c0909341SAndroid Build Coastguard Worker xtn v6.8b, v6.8h // (uint8_t)xpos 2753*c0909341SAndroid Build Coastguard Worker 2754*c0909341SAndroid Build Coastguard Worker uzp2 v3.8b, v2.8b, v4.8b // top[base_x+1] 2755*c0909341SAndroid Build Coastguard Worker uzp1 v2.8b, v2.8b, v4.8b // top[base_x] 2756*c0909341SAndroid Build Coastguard Worker 2757*c0909341SAndroid Build Coastguard Worker and v6.8b, v6.8b, v25.8b // frac_x 2758*c0909341SAndroid Build Coastguard Worker 2759*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s // left[base_y], left[base_y+1] 2760*c0909341SAndroid Build Coastguard Worker 2761*c0909341SAndroid Build Coastguard Worker sub v7.8b, v26.8b, v6.8b // 64 - frac_x 2762*c0909341SAndroid Build Coastguard Worker 2763*c0909341SAndroid Build Coastguard Worker add v20.8b, v20.8b, v31.8b // actual base_x 2764*c0909341SAndroid Build Coastguard Worker 2765*c0909341SAndroid Build Coastguard Worker umull v16.8h, v16.8b, v28.8b // left[base_y]*(64-frac_y) 2766*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 2767*c0909341SAndroid Build Coastguard Worker 2768*c0909341SAndroid Build Coastguard Worker umull v22.8h, v2.8b, v7.8b // top[base_x]-*(64-frac_x) 2769*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v3.8b, v6.8b // + top[base_x+1]*frac_x 2770*c0909341SAndroid Build Coastguard Worker 2771*c0909341SAndroid Build Coastguard Worker cmge v20.8b, v20.8b, #0 2772*c0909341SAndroid Build Coastguard Worker 2773*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 2774*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 2775*c0909341SAndroid Build Coastguard Worker 2776*c0909341SAndroid Build Coastguard Worker bit v16.8b, v22.8b, v20.8b 2777*c0909341SAndroid Build Coastguard Worker 2778*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 2779*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2780*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2781*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[1], [x0], x1 2782*c0909341SAndroid Build Coastguard Worker b.le 9f 2783*c0909341SAndroid Build Coastguard Worker 2784*c0909341SAndroid Build Coastguard Worker ext v16.8b, v17.8b, v17.8b, #4 2785*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v29.8b // base_y += 2 2786*c0909341SAndroid Build Coastguard Worker b 4b 2787*c0909341SAndroid Build Coastguard Worker 2788*c0909341SAndroid Build Coastguard Worker49: 2789*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b}, v30.8b // left[base_y+1], left[base_y+2] 2790*c0909341SAndroid Build Coastguard Worker 2791*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s // left[base_y], left[base_y+1] 2792*c0909341SAndroid Build Coastguard Worker 2793*c0909341SAndroid Build Coastguard Worker umull v18.8h, v16.8b, v28.8b // left[base_y]*(64-frac_t) 2794*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 2795*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v18.8h, #6 2796*c0909341SAndroid Build Coastguard Worker 2797*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 2798*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2799*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x0], x1 2800*c0909341SAndroid Build Coastguard Worker b.le 9f 2801*c0909341SAndroid Build Coastguard Worker 2802*c0909341SAndroid Build Coastguard Worker ext v16.8b, v17.8b, v17.8b, #4 2803*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v29.8b // base_y += 2 2804*c0909341SAndroid Build Coastguard Worker b 49b 2805*c0909341SAndroid Build Coastguard Worker 2806*c0909341SAndroid Build Coastguard Worker9: 2807*c0909341SAndroid Build Coastguard Worker ret 2808*c0909341SAndroid Build Coastguard Worker 2809*c0909341SAndroid Build Coastguard Worker80: 2810*c0909341SAndroid Build Coastguard Worker dup v30.8h, w7 // -dy 2811*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2812*c0909341SAndroid Build Coastguard Worker 2813*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v30.8h // {0,1,2,3,4,5,6,7}* -dy 2814*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 2815*c0909341SAndroid Build Coastguard Worker add v30.8h, v16.8h, v30.8h // -= dy 2816*c0909341SAndroid Build Coastguard Worker 2817*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3,4,5,6,7} 2818*c0909341SAndroid Build Coastguard Worker 2819*c0909341SAndroid Build Coastguard Worker // For upsample_top, w <= 8 and h <= 8; we may need up to h+1 elements 2820*c0909341SAndroid Build Coastguard Worker // from left. 2821*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x3] // left[] 2822*c0909341SAndroid Build Coastguard Worker 2823*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 2824*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 2825*c0909341SAndroid Build Coastguard Worker 2826*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 2827*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2828*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 2829*c0909341SAndroid Build Coastguard Worker 2830*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y = (ypos >> 6) + 1 2831*c0909341SAndroid Build Coastguard Worker 2832*c0909341SAndroid Build Coastguard Worker tbl v18.8b, {v0.16b}, v29.8b // left[base_y] 2833*c0909341SAndroid Build Coastguard Worker 2834*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v19.8b // base_y + 2 2835*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v17.8b // base_y + 1 2836*c0909341SAndroid Build Coastguard Worker 2837*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 2838*c0909341SAndroid Build Coastguard Worker 2839*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7} 2840*c0909341SAndroid Build Coastguard Worker 2841*c0909341SAndroid Build Coastguard Worker movi v24.8b, #2 // 2 2842*c0909341SAndroid Build Coastguard Worker add v31.16b, v31.16b, v31.16b // {0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14} 2843*c0909341SAndroid Build Coastguard Worker8: 2844*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2845*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 2846*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2847*c0909341SAndroid Build Coastguard Worker cmp w9, #-16 // base_x <= -16 2848*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2849*c0909341SAndroid Build Coastguard Worker b.le 89f 2850*c0909341SAndroid Build Coastguard Worker 2851*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 2852*c0909341SAndroid Build Coastguard Worker 2853*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 2854*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 2855*c0909341SAndroid Build Coastguard Worker 2856*c0909341SAndroid Build Coastguard Worker tbl v19.8b, {v0.16b}, v29.8b // left[base_y+1] 2857*c0909341SAndroid Build Coastguard Worker 2858*c0909341SAndroid Build Coastguard Worker shrn v21.8b, v16.8h, #6 // first base_x 2859*c0909341SAndroid Build Coastguard Worker shrn2 v21.16b, v17.8h, #6 2860*c0909341SAndroid Build Coastguard Worker xtn v16.8b, v16.8h // (uint8_t)xpos 2861*c0909341SAndroid Build Coastguard Worker xtn2 v16.16b, v17.8h 2862*c0909341SAndroid Build Coastguard Worker 2863*c0909341SAndroid Build Coastguard Worker tbl v20.8b, {v0.16b}, v30.8b // left[base_y+2] 2864*c0909341SAndroid Build Coastguard Worker 2865*c0909341SAndroid Build Coastguard Worker uzp2 v5.16b, v4.16b, v6.16b // top[base_x+1] 2866*c0909341SAndroid Build Coastguard Worker uzp1 v4.16b, v4.16b, v6.16b // top[base_x] 2867*c0909341SAndroid Build Coastguard Worker 2868*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 2869*c0909341SAndroid Build Coastguard Worker 2870*c0909341SAndroid Build Coastguard Worker sub v7.16b, v26.16b, v16.16b // 64 - frac_x 2871*c0909341SAndroid Build Coastguard Worker 2872*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v31.16b // actual base_x 2873*c0909341SAndroid Build Coastguard Worker 2874*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2875*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2876*c0909341SAndroid Build Coastguard Worker umull v17.8h, v19.8b, v28.8b 2877*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v20.8b, v27.8b 2878*c0909341SAndroid Build Coastguard Worker 2879*c0909341SAndroid Build Coastguard Worker umull v22.8h, v4.8b, v7.8b // top[base_x]-*(64-frac_x) 2880*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v5.8b, v16.8b // + top[base_x+1]*frac_x 2881*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v4.16b, v7.16b 2882*c0909341SAndroid Build Coastguard Worker umlal2 v23.8h, v5.16b, v16.16b 2883*c0909341SAndroid Build Coastguard Worker 2884*c0909341SAndroid Build Coastguard Worker cmge v21.16b, v21.16b, #0 2885*c0909341SAndroid Build Coastguard Worker 2886*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 2887*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 2888*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 2889*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #6 2890*c0909341SAndroid Build Coastguard Worker 2891*c0909341SAndroid Build Coastguard Worker bit v6.16b, v22.16b, v21.16b 2892*c0909341SAndroid Build Coastguard Worker 2893*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 2894*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2895*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2896*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 2897*c0909341SAndroid Build Coastguard Worker b.le 9f 2898*c0909341SAndroid Build Coastguard Worker 2899*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2900*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 2 2901*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 2 2902*c0909341SAndroid Build Coastguard Worker b 8b 2903*c0909341SAndroid Build Coastguard Worker 2904*c0909341SAndroid Build Coastguard Worker89: 2905*c0909341SAndroid Build Coastguard Worker tbl v19.8b, {v0.16b}, v29.8b // left[base_y+1] 2906*c0909341SAndroid Build Coastguard Worker tbl v20.8b, {v0.16b}, v30.8b // left[base_y+2] 2907*c0909341SAndroid Build Coastguard Worker 2908*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 2909*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 2910*c0909341SAndroid Build Coastguard Worker umull v17.8h, v19.8b, v28.8b 2911*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v20.8b, v27.8b 2912*c0909341SAndroid Build Coastguard Worker 2913*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 2914*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 2915*c0909341SAndroid Build Coastguard Worker 2916*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 2917*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 2918*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 2919*c0909341SAndroid Build Coastguard Worker b.le 9f 2920*c0909341SAndroid Build Coastguard Worker 2921*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2922*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 2 2923*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 2 2924*c0909341SAndroid Build Coastguard Worker b 89b 2925*c0909341SAndroid Build Coastguard Worker 2926*c0909341SAndroid Build Coastguard Worker9: 2927*c0909341SAndroid Build Coastguard Worker ret 2928*c0909341SAndroid Build Coastguard Workerendfunc 2929*c0909341SAndroid Build Coastguard Worker 2930*c0909341SAndroid Build Coastguard Workerfunction ipred_z2_fill3_8bpc_neon, export=1 2931*c0909341SAndroid Build Coastguard Worker cmp w4, #8 2932*c0909341SAndroid Build Coastguard Worker mov w8, #(1 << 6) // xpos = 1 << 6 2933*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2934*c0909341SAndroid Build Coastguard Worker 2935*c0909341SAndroid Build Coastguard Worker movrel x11, increments 2936*c0909341SAndroid Build Coastguard Worker ld1 {v31.8h}, [x11] // increments 2937*c0909341SAndroid Build Coastguard Worker neg w7, w7 // -dy 2938*c0909341SAndroid Build Coastguard Worker b.eq 80f 2939*c0909341SAndroid Build Coastguard Worker 2940*c0909341SAndroid Build Coastguard Worker40: 2941*c0909341SAndroid Build Coastguard Worker dup v30.4h, w7 // -dy 2942*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 2943*c0909341SAndroid Build Coastguard Worker 2944*c0909341SAndroid Build Coastguard Worker mul v16.4h, v31.4h, v30.4h // {0,1,2,3}* -dy 2945*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 2946*c0909341SAndroid Build Coastguard Worker add v30.4h, v16.4h, v30.4h // -= dy 2947*c0909341SAndroid Build Coastguard Worker 2948*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3} 2949*c0909341SAndroid Build Coastguard Worker 2950*c0909341SAndroid Build Coastguard Worker // For upsample_left, w <= 8 and h <= 8; we may need up to 2*h+1 elements. 2951*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x3] // left[] 2952*c0909341SAndroid Build Coastguard Worker 2953*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 2954*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 2955*c0909341SAndroid Build Coastguard Worker 2956*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 2957*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 2958*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 2959*c0909341SAndroid Build Coastguard Worker 2960*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v19.8b // base_y = (ypos >> 6) + 2 2961*c0909341SAndroid Build Coastguard Worker 2962*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v17.8b // base_y + 1 2963*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v19.8b // base_y + 2 2964*c0909341SAndroid Build Coastguard Worker 2965*c0909341SAndroid Build Coastguard Worker trn1 v31.2s, v31.2s, v31.2s // {0,1,2,3,0,1,2,3} 2966*c0909341SAndroid Build Coastguard Worker 2967*c0909341SAndroid Build Coastguard Worker add v24.8b, v30.8b, v19.8b // base_y + 3 2968*c0909341SAndroid Build Coastguard Worker 2969*c0909341SAndroid Build Coastguard Worker trn1 v29.2s, v29.2s, v28.2s // base_y + 0, base_y + 2 2970*c0909341SAndroid Build Coastguard Worker trn1 v30.2s, v30.2s, v24.2s // base_y + 1, base_y + 3 2971*c0909341SAndroid Build Coastguard Worker 2972*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 2973*c0909341SAndroid Build Coastguard Worker 2974*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v27.2s, v27.2s // frac_y 2975*c0909341SAndroid Build Coastguard Worker trn1 v28.2s, v28.2s, v28.2s // 64 - frac_y 2976*c0909341SAndroid Build Coastguard Worker 2977*c0909341SAndroid Build Coastguard Worker movi v24.8b, #4 2978*c0909341SAndroid Build Coastguard Worker4: 2979*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 2980*c0909341SAndroid Build Coastguard Worker dup v6.4h, w8 // xpos 2981*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 2982*c0909341SAndroid Build Coastguard Worker cmp w9, #-4 // base_x <= -4 2983*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 2984*c0909341SAndroid Build Coastguard Worker b.le 49f 2985*c0909341SAndroid Build Coastguard Worker 2986*c0909341SAndroid Build Coastguard Worker dup v7.4h, w8 // xpos 2987*c0909341SAndroid Build Coastguard Worker 2988*c0909341SAndroid Build Coastguard Worker ldr d2, [x2, w9, sxtw] // top[base_x] 2989*c0909341SAndroid Build Coastguard Worker ldr d4, [x2, w11, sxtw] 2990*c0909341SAndroid Build Coastguard Worker 2991*c0909341SAndroid Build Coastguard Worker trn1 v6.2d, v6.2d, v7.2d // xpos 2992*c0909341SAndroid Build Coastguard Worker 2993*c0909341SAndroid Build Coastguard Worker tbl v16.8b, {v0.16b, v1.16b}, v29.8b // left[base_y+0], left[base_y+2] 2994*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b, v1.16b}, v30.8b // left[base_y+1], left[base_y+3] 2995*c0909341SAndroid Build Coastguard Worker 2996*c0909341SAndroid Build Coastguard Worker shrn v20.8b, v6.8h, #6 // first base_x for each row 2997*c0909341SAndroid Build Coastguard Worker xtn v6.8b, v6.8h // (uint8_t)xpos 2998*c0909341SAndroid Build Coastguard Worker 2999*c0909341SAndroid Build Coastguard Worker ext v3.8b, v2.8b, v2.8b, #1 // top[base_x+1] 3000*c0909341SAndroid Build Coastguard Worker ext v5.8b, v4.8b, v4.8b, #1 3001*c0909341SAndroid Build Coastguard Worker 3002*c0909341SAndroid Build Coastguard Worker and v6.8b, v6.8b, v25.8b // frac_x 3003*c0909341SAndroid Build Coastguard Worker 3004*c0909341SAndroid Build Coastguard Worker trn1 v2.2s, v2.2s, v4.2s // top[base_x] 3005*c0909341SAndroid Build Coastguard Worker trn1 v3.2s, v3.2s, v5.2s // top[base_x+1] 3006*c0909341SAndroid Build Coastguard Worker 3007*c0909341SAndroid Build Coastguard Worker sub v7.8b, v26.8b, v6.8b // 64 - frac_x 3008*c0909341SAndroid Build Coastguard Worker 3009*c0909341SAndroid Build Coastguard Worker add v20.8b, v20.8b, v31.8b // actual base_x 3010*c0909341SAndroid Build Coastguard Worker 3011*c0909341SAndroid Build Coastguard Worker umull v16.8h, v16.8b, v28.8b // left[base_y]*(64-frac_y) 3012*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 3013*c0909341SAndroid Build Coastguard Worker 3014*c0909341SAndroid Build Coastguard Worker umull v22.8h, v2.8b, v7.8b // top[base_x]-*(64-frac_x) 3015*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v3.8b, v6.8b // + top[base_x+1]*frac_x 3016*c0909341SAndroid Build Coastguard Worker 3017*c0909341SAndroid Build Coastguard Worker cmge v20.8b, v20.8b, #0 3018*c0909341SAndroid Build Coastguard Worker 3019*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3020*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 3021*c0909341SAndroid Build Coastguard Worker 3022*c0909341SAndroid Build Coastguard Worker bit v16.8b, v22.8b, v20.8b 3023*c0909341SAndroid Build Coastguard Worker 3024*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 3025*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3026*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3027*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[1], [x0], x1 3028*c0909341SAndroid Build Coastguard Worker b.le 9f 3029*c0909341SAndroid Build Coastguard Worker 3030*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 4 3031*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 4 3032*c0909341SAndroid Build Coastguard Worker b 4b 3033*c0909341SAndroid Build Coastguard Worker 3034*c0909341SAndroid Build Coastguard Worker49: 3035*c0909341SAndroid Build Coastguard Worker tbl v16.8b, {v0.16b, v1.16b}, v29.8b // left[base_y+0], left[base_y+2] 3036*c0909341SAndroid Build Coastguard Worker tbl v17.8b, {v0.16b, v1.16b}, v30.8b // left[base_y+1], left[base_y+3] 3037*c0909341SAndroid Build Coastguard Worker 3038*c0909341SAndroid Build Coastguard Worker umull v18.8h, v16.8b, v28.8b // left[base_y]*(64-frac_t) 3039*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v17.8b, v27.8b // + left[base_y+1]*frac_y 3040*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v18.8h, #6 3041*c0909341SAndroid Build Coastguard Worker 3042*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[0], [x0], x1 3043*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3044*c0909341SAndroid Build Coastguard Worker st1 {v18.s}[1], [x0], x1 3045*c0909341SAndroid Build Coastguard Worker b.le 9f 3046*c0909341SAndroid Build Coastguard Worker 3047*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v24.8b // base_y += 4 3048*c0909341SAndroid Build Coastguard Worker add v30.8b, v30.8b, v24.8b // base_y += 4 3049*c0909341SAndroid Build Coastguard Worker b 49b 3050*c0909341SAndroid Build Coastguard Worker 3051*c0909341SAndroid Build Coastguard Worker9: 3052*c0909341SAndroid Build Coastguard Worker ret 3053*c0909341SAndroid Build Coastguard Worker 3054*c0909341SAndroid Build Coastguard Worker80: 3055*c0909341SAndroid Build Coastguard Worker dup v30.8h, w7 // -dy 3056*c0909341SAndroid Build Coastguard Worker movi v17.8b, #1 3057*c0909341SAndroid Build Coastguard Worker 3058*c0909341SAndroid Build Coastguard Worker mul v16.8h, v31.8h, v30.8h // {0,1,2,3,4,5,6,7}* -dy 3059*c0909341SAndroid Build Coastguard Worker movi v25.16b, #0x3e 3060*c0909341SAndroid Build Coastguard Worker add v30.8h, v16.8h, v30.8h // -= dy 3061*c0909341SAndroid Build Coastguard Worker 3062*c0909341SAndroid Build Coastguard Worker xtn v31.8b, v31.8h // {0,1,2,3,4,5,6,7} 3063*c0909341SAndroid Build Coastguard Worker 3064*c0909341SAndroid Build Coastguard Worker // For upsample_left, w <= 8 and h <= 8; we may need up to 2*h+1 elements. 3065*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b}, [x3] // left[] 3066*c0909341SAndroid Build Coastguard Worker 3067*c0909341SAndroid Build Coastguard Worker movi v26.16b, #64 3068*c0909341SAndroid Build Coastguard Worker movi v19.16b, #2 3069*c0909341SAndroid Build Coastguard Worker 3070*c0909341SAndroid Build Coastguard Worker xtn v27.8b, v30.8h // (uint8_t)ypos 3071*c0909341SAndroid Build Coastguard Worker shrn v29.8b, v30.8h, #6 // ypos >> 6 3072*c0909341SAndroid Build Coastguard Worker and v27.8b, v27.8b, v25.8b // frac_y 3073*c0909341SAndroid Build Coastguard Worker 3074*c0909341SAndroid Build Coastguard Worker add v29.8b, v29.8b, v19.8b // base_y = (ypos >> 6) + 2 3075*c0909341SAndroid Build Coastguard Worker 3076*c0909341SAndroid Build Coastguard Worker add v28.8b, v29.8b, v17.8b // base_y + 1 3077*c0909341SAndroid Build Coastguard Worker add v30.8b, v29.8b, v19.8b // base_y + 2 3078*c0909341SAndroid Build Coastguard Worker 3079*c0909341SAndroid Build Coastguard Worker trn1 v31.2d, v31.2d, v31.2d // {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7} 3080*c0909341SAndroid Build Coastguard Worker add v24.8b, v28.8b, v19.8b // base_y + 3 3081*c0909341SAndroid Build Coastguard Worker 3082*c0909341SAndroid Build Coastguard Worker trn1 v29.2d, v29.2d, v30.2d // base_y + 0, base_y + 2 3083*c0909341SAndroid Build Coastguard Worker trn1 v30.2d, v28.2d, v24.2d // base_y + 1, base_y + 3 3084*c0909341SAndroid Build Coastguard Worker 3085*c0909341SAndroid Build Coastguard Worker sub v28.8b, v26.8b, v27.8b // 64 - frac_y 3086*c0909341SAndroid Build Coastguard Worker 3087*c0909341SAndroid Build Coastguard Worker movi v24.16b, #4 3088*c0909341SAndroid Build Coastguard Worker 3089*c0909341SAndroid Build Coastguard Worker trn1 v27.2d, v27.2d, v27.2d // frac_y 3090*c0909341SAndroid Build Coastguard Worker trn1 v28.2d, v28.2d, v28.2d // 64 - frac_y 3091*c0909341SAndroid Build Coastguard Worker8: 3092*c0909341SAndroid Build Coastguard Worker asr w9, w8, #6 // base_x 3093*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // xpos 3094*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3095*c0909341SAndroid Build Coastguard Worker cmp w9, #-8 // base_x <= -8 3096*c0909341SAndroid Build Coastguard Worker asr w11, w8, #6 // base_x 3097*c0909341SAndroid Build Coastguard Worker b.le 89f 3098*c0909341SAndroid Build Coastguard Worker 3099*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // xpos 3100*c0909341SAndroid Build Coastguard Worker 3101*c0909341SAndroid Build Coastguard Worker ldr q4, [x2, w9, sxtw] // top[base_x] 3102*c0909341SAndroid Build Coastguard Worker ldr q6, [x2, w11, sxtw] 3103*c0909341SAndroid Build Coastguard Worker 3104*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0], left[base_y+2] 3105*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1], left[base_y+3] 3106*c0909341SAndroid Build Coastguard Worker 3107*c0909341SAndroid Build Coastguard Worker shrn v21.8b, v16.8h, #6 // first base_x 3108*c0909341SAndroid Build Coastguard Worker shrn2 v21.16b, v17.8h, #6 3109*c0909341SAndroid Build Coastguard Worker xtn v16.8b, v16.8h // (uint8_t)xpos 3110*c0909341SAndroid Build Coastguard Worker xtn2 v16.16b, v17.8h 3111*c0909341SAndroid Build Coastguard Worker 3112*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #1 // top[base_x+1] 3113*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #1 3114*c0909341SAndroid Build Coastguard Worker 3115*c0909341SAndroid Build Coastguard Worker and v16.16b, v16.16b, v25.16b // frac_x 3116*c0909341SAndroid Build Coastguard Worker 3117*c0909341SAndroid Build Coastguard Worker trn1 v4.2d, v4.2d, v6.2d // top[base_x] 3118*c0909341SAndroid Build Coastguard Worker trn1 v5.2d, v5.2d, v7.2d // top[base_x+1] 3119*c0909341SAndroid Build Coastguard Worker 3120*c0909341SAndroid Build Coastguard Worker sub v7.16b, v26.16b, v16.16b // 64 - frac_x 3121*c0909341SAndroid Build Coastguard Worker 3122*c0909341SAndroid Build Coastguard Worker add v21.16b, v21.16b, v31.16b // actual base_x 3123*c0909341SAndroid Build Coastguard Worker 3124*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 3125*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 3126*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v18.16b, v28.16b 3127*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v19.16b, v27.16b 3128*c0909341SAndroid Build Coastguard Worker 3129*c0909341SAndroid Build Coastguard Worker umull v22.8h, v4.8b, v7.8b // top[base_x]-*(64-frac_x) 3130*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v5.8b, v16.8b // + top[base_x+1]*frac_x 3131*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v4.16b, v7.16b 3132*c0909341SAndroid Build Coastguard Worker umlal2 v23.8h, v5.16b, v16.16b 3133*c0909341SAndroid Build Coastguard Worker 3134*c0909341SAndroid Build Coastguard Worker cmge v21.16b, v21.16b, #0 3135*c0909341SAndroid Build Coastguard Worker 3136*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 3137*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 3138*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v22.8h, #6 3139*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v23.8h, #6 3140*c0909341SAndroid Build Coastguard Worker 3141*c0909341SAndroid Build Coastguard Worker bit v6.16b, v22.16b, v21.16b 3142*c0909341SAndroid Build Coastguard Worker 3143*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 3144*c0909341SAndroid Build Coastguard Worker sub w8, w8, w6 // xpos -= dx 3145*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3146*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 3147*c0909341SAndroid Build Coastguard Worker b.le 9f 3148*c0909341SAndroid Build Coastguard Worker 3149*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 4 3150*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 4 3151*c0909341SAndroid Build Coastguard Worker b 8b 3152*c0909341SAndroid Build Coastguard Worker 3153*c0909341SAndroid Build Coastguard Worker89: 3154*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b, v1.16b, v2.16b}, v29.16b // left[base_y+0], left[base_y+2] 3155*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b, v1.16b, v2.16b}, v30.16b // left[base_y+1], left[base_y+3] 3156*c0909341SAndroid Build Coastguard Worker 3157*c0909341SAndroid Build Coastguard Worker umull v6.8h, v18.8b, v28.8b // left[base_y]*(64-frac_y) 3158*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v19.8b, v27.8b // + left[base_y+1]*frac_y 3159*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v18.16b, v28.16b 3160*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v19.16b, v27.16b 3161*c0909341SAndroid Build Coastguard Worker 3162*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 3163*c0909341SAndroid Build Coastguard Worker rshrn2 v6.16b, v17.8h, #6 3164*c0909341SAndroid Build Coastguard Worker 3165*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[0], [x0], x1 3166*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 3167*c0909341SAndroid Build Coastguard Worker st1 {v6.d}[1], [x0], x1 3168*c0909341SAndroid Build Coastguard Worker b.le 9f 3169*c0909341SAndroid Build Coastguard Worker 3170*c0909341SAndroid Build Coastguard Worker add v29.16b, v29.16b, v24.16b // base_y += 4 3171*c0909341SAndroid Build Coastguard Worker add v30.16b, v30.16b, v24.16b // base_y += 4 3172*c0909341SAndroid Build Coastguard Worker b 89b 3173*c0909341SAndroid Build Coastguard Worker 3174*c0909341SAndroid Build Coastguard Worker9: 3175*c0909341SAndroid Build Coastguard Worker ret 3176*c0909341SAndroid Build Coastguard Workerendfunc 3177*c0909341SAndroid Build Coastguard Worker 3178*c0909341SAndroid Build Coastguard Worker 3179*c0909341SAndroid Build Coastguard Worker// void ipred_z3_fill1_8bpc_neon(pixel *dst, const ptrdiff_t stride, 3180*c0909341SAndroid Build Coastguard Worker// const pixel *const left, 3181*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 3182*c0909341SAndroid Build Coastguard Worker// const int dy, const int max_base_y); 3183*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill1_8bpc_neon, export=1 3184*c0909341SAndroid Build Coastguard Worker cmp w6, #64 3185*c0909341SAndroid Build Coastguard Worker clz w9, w3 3186*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z3_fill1_tbl 3187*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 3188*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 3189*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // left[max_base_y] 3190*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 3191*c0909341SAndroid Build Coastguard Worker movrel x11, increments 3192*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 3193*c0909341SAndroid Build Coastguard Worker ld1 {v30.8h}, [x11] // increments 3194*c0909341SAndroid Build Coastguard Worker mov w7, w5 3195*c0909341SAndroid Build Coastguard Worker b.gt L(ipred_z3_fill1_large_h16) 3196*c0909341SAndroid Build Coastguard Worker br x8 3197*c0909341SAndroid Build Coastguard Worker 3198*c0909341SAndroid Build Coastguard Worker40: 3199*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3200*c0909341SAndroid Build Coastguard Worker dup v29.4h, w5 // dy 3201*c0909341SAndroid Build Coastguard Worker 3202*c0909341SAndroid Build Coastguard Worker mul v30.4h, v30.4h, v29.4h // {0,1,2,3,4,5,6,7}*dy 3203*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3204*c0909341SAndroid Build Coastguard Worker 3205*c0909341SAndroid Build Coastguard Worker // Worst case max_base_y is width+height-1, for w=4, h=16, <= 32 3206*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] // left[] 3207*c0909341SAndroid Build Coastguard Worker add v30.4h, v29.4h, v30.4h // ypos 3208*c0909341SAndroid Build Coastguard Worker 3209*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3210*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3211*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3212*c0909341SAndroid Build Coastguard Worker 3213*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v30.8h // (uint8_t)ypos 3214*c0909341SAndroid Build Coastguard Worker uqshrn v26.8b, v30.8h, #6 // base 3215*c0909341SAndroid Build Coastguard Worker and v24.8b, v24.8b, v23.8b // frac 3216*c0909341SAndroid Build Coastguard Worker 3217*c0909341SAndroid Build Coastguard Worker mov v4.8b, v31.8b 3218*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v26.8b, v20.8b // base + 1 3219*c0909341SAndroid Build Coastguard Worker uqadd v28.8b, v26.8b, v21.8b // base + 2 3220*c0909341SAndroid Build Coastguard Worker sub v25.8b, v22.8b, v24.8b // 64 - frac 3221*c0909341SAndroid Build Coastguard Worker 3222*c0909341SAndroid Build Coastguard Worker tbx v4.8b, {v0.16b, v1.16b}, v26.8b // left[base] 3223*c0909341SAndroid Build Coastguard Worker 3224*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v27.2s, v28.2s // base + 1, base + 2 3225*c0909341SAndroid Build Coastguard Worker trn1 v24.2s, v24.2s, v24.2s // frac 3226*c0909341SAndroid Build Coastguard Worker trn1 v25.2s, v25.2s, v25.2s // 64 - frac 3227*c0909341SAndroid Build Coastguard Worker1: 3228*c0909341SAndroid Build Coastguard Worker mov v5.8b, v31.8b 3229*c0909341SAndroid Build Coastguard Worker tbx v5.8b, {v0.16b, v1.16b}, v27.8b // left[base+1], left[base+2] 3230*c0909341SAndroid Build Coastguard Worker 3231*c0909341SAndroid Build Coastguard Worker trn1 v4.2s, v4.2s, v5.2s // left[base], left[base+1] 3232*c0909341SAndroid Build Coastguard Worker 3233*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3234*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3235*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3236*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 3237*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3238*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[1], [x0], x1 3239*c0909341SAndroid Build Coastguard Worker b.le 9f 3240*c0909341SAndroid Build Coastguard Worker 3241*c0909341SAndroid Build Coastguard Worker ext v4.8b, v5.8b, v5.8b, #4 3242*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v27.8b, v21.8b // base += 2 3243*c0909341SAndroid Build Coastguard Worker b 1b 3244*c0909341SAndroid Build Coastguard Worker 3245*c0909341SAndroid Build Coastguard Worker9: 3246*c0909341SAndroid Build Coastguard Worker ret 3247*c0909341SAndroid Build Coastguard Worker 3248*c0909341SAndroid Build Coastguard Worker80: 3249*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3250*c0909341SAndroid Build Coastguard Worker dup v29.8h, w5 // dy 3251*c0909341SAndroid Build Coastguard Worker 3252*c0909341SAndroid Build Coastguard Worker mul v30.8h, v30.8h, v29.8h // {0,1,2,3,4,5,6,7}*dy 3253*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3254*c0909341SAndroid Build Coastguard Worker 3255*c0909341SAndroid Build Coastguard Worker // Worst case max_base_y is width+height-1, for w=8, h=32, <= 48 3256*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b}, [x2] // left[] 3257*c0909341SAndroid Build Coastguard Worker add v30.8h, v29.8h, v30.8h // ypos 3258*c0909341SAndroid Build Coastguard Worker 3259*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3260*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3261*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3262*c0909341SAndroid Build Coastguard Worker 3263*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v30.8h // (uint8_t)ypos 3264*c0909341SAndroid Build Coastguard Worker uqshrn v26.8b, v30.8h, #6 // base 3265*c0909341SAndroid Build Coastguard Worker and v24.8b, v24.8b, v23.8b // frac 3266*c0909341SAndroid Build Coastguard Worker 3267*c0909341SAndroid Build Coastguard Worker mov v4.8b, v31.8b 3268*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v26.8b, v20.8b // base + 1 3269*c0909341SAndroid Build Coastguard Worker uqadd v28.8b, v26.8b, v21.8b // base + 2 3270*c0909341SAndroid Build Coastguard Worker sub v25.8b, v22.8b, v24.8b // 64 - frac 3271*c0909341SAndroid Build Coastguard Worker 3272*c0909341SAndroid Build Coastguard Worker tbx v4.8b, {v0.16b, v1.16b, v2.16b}, v26.8b // left[base] 3273*c0909341SAndroid Build Coastguard Worker1: 3274*c0909341SAndroid Build Coastguard Worker mov v5.8b, v31.8b 3275*c0909341SAndroid Build Coastguard Worker mov v6.8b, v31.8b 3276*c0909341SAndroid Build Coastguard Worker tbx v5.8b, {v0.16b, v1.16b, v2.16b}, v27.8b // left[base+1] 3277*c0909341SAndroid Build Coastguard Worker tbx v6.8b, {v0.16b, v1.16b, v2.16b}, v28.8b // left[base+2] 3278*c0909341SAndroid Build Coastguard Worker 3279*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3280*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3281*c0909341SAndroid Build Coastguard Worker umull v17.8h, v5.8b, v25.8b 3282*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v6.8b, v24.8b 3283*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3284*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 3285*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 3286*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3287*c0909341SAndroid Build Coastguard Worker st1 {v17.8b}, [x0], x1 3288*c0909341SAndroid Build Coastguard Worker b.le 9f 3289*c0909341SAndroid Build Coastguard Worker 3290*c0909341SAndroid Build Coastguard Worker mov v4.8b, v6.8b 3291*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v27.8b, v21.8b // base += 2 3292*c0909341SAndroid Build Coastguard Worker uqadd v28.8b, v28.8b, v21.8b // base += 2 3293*c0909341SAndroid Build Coastguard Worker b 1b 3294*c0909341SAndroid Build Coastguard Worker 3295*c0909341SAndroid Build Coastguard Worker9: 3296*c0909341SAndroid Build Coastguard Worker ret 3297*c0909341SAndroid Build Coastguard Worker 3298*c0909341SAndroid Build Coastguard Worker160: 3299*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3300*c0909341SAndroid Build Coastguard Worker dup v28.8h, w5 // dy 3301*c0909341SAndroid Build Coastguard Worker 3302*c0909341SAndroid Build Coastguard Worker shl v29.8h, v28.8h, #3 // 8*dy 3303*c0909341SAndroid Build Coastguard Worker mul v30.8h, v30.8h, v28.8h // {0,1,2,3,4,5,6,7}*dy 3304*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3305*c0909341SAndroid Build Coastguard Worker 3306*c0909341SAndroid Build Coastguard Worker // This is only executed if we've checked that max_base_y <= 64. 3307*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2] // left[] 3308*c0909341SAndroid Build Coastguard Worker add v28.8h, v28.8h, v30.8h // ypos 3309*c0909341SAndroid Build Coastguard Worker 3310*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3311*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3312*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3313*c0909341SAndroid Build Coastguard Worker 3314*c0909341SAndroid Build Coastguard Worker add v29.8h, v28.8h, v29.8h // ypos + 8*dy 3315*c0909341SAndroid Build Coastguard Worker 3316*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v28.8h // (uint8_t)ypos 3317*c0909341SAndroid Build Coastguard Worker xtn2 v24.16b, v29.8h 3318*c0909341SAndroid Build Coastguard Worker uqshrn v26.8b, v28.8h, #6 // base 3319*c0909341SAndroid Build Coastguard Worker uqshrn2 v26.16b, v29.8h, #6 3320*c0909341SAndroid Build Coastguard Worker and v24.16b, v24.16b, v23.16b // frac 3321*c0909341SAndroid Build Coastguard Worker 3322*c0909341SAndroid Build Coastguard Worker mov v4.16b, v31.16b 3323*c0909341SAndroid Build Coastguard Worker uqadd v27.16b, v26.16b, v20.16b // base + 1 3324*c0909341SAndroid Build Coastguard Worker uqadd v28.16b, v26.16b, v21.16b // base + 2 3325*c0909341SAndroid Build Coastguard Worker sub v25.16b, v22.16b, v24.16b // 64 - frac 3326*c0909341SAndroid Build Coastguard Worker 3327*c0909341SAndroid Build Coastguard Worker tbx v4.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v26.16b // left[base] 3328*c0909341SAndroid Build Coastguard Worker1: 3329*c0909341SAndroid Build Coastguard Worker mov v5.16b, v31.16b 3330*c0909341SAndroid Build Coastguard Worker mov v6.16b, v31.16b 3331*c0909341SAndroid Build Coastguard Worker tbx v5.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v27.16b // left[base+1] 3332*c0909341SAndroid Build Coastguard Worker tbx v6.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v28.16b // left[base+2] 3333*c0909341SAndroid Build Coastguard Worker 3334*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3335*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3336*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v4.16b, v25.16b 3337*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v5.16b, v24.16b 3338*c0909341SAndroid Build Coastguard Worker umull v18.8h, v5.8b, v25.8b 3339*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v6.8b, v24.8b 3340*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v5.16b, v25.16b 3341*c0909341SAndroid Build Coastguard Worker umlal2 v19.8h, v6.16b, v24.16b 3342*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3343*c0909341SAndroid Build Coastguard Worker rshrn2 v16.16b, v17.8h, #6 3344*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v18.8h, #6 3345*c0909341SAndroid Build Coastguard Worker rshrn2 v17.16b, v19.8h, #6 3346*c0909341SAndroid Build Coastguard Worker st1 {v16.16b}, [x0], x1 3347*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3348*c0909341SAndroid Build Coastguard Worker st1 {v17.16b}, [x0], x1 3349*c0909341SAndroid Build Coastguard Worker b.le 9f 3350*c0909341SAndroid Build Coastguard Worker 3351*c0909341SAndroid Build Coastguard Worker mov v4.16b, v6.16b 3352*c0909341SAndroid Build Coastguard Worker uqadd v27.16b, v27.16b, v21.16b // base += 2 3353*c0909341SAndroid Build Coastguard Worker uqadd v28.16b, v28.16b, v21.16b // base += 2 3354*c0909341SAndroid Build Coastguard Worker b 1b 3355*c0909341SAndroid Build Coastguard Worker 3356*c0909341SAndroid Build Coastguard Worker9: 3357*c0909341SAndroid Build Coastguard Worker ret 3358*c0909341SAndroid Build Coastguard Worker320: 3359*c0909341SAndroid Build Coastguard Worker640: 3360*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3361*c0909341SAndroid Build Coastguard Worker dup v28.8h, w5 // dy 3362*c0909341SAndroid Build Coastguard Worker mov w12, w3 3363*c0909341SAndroid Build Coastguard Worker 3364*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 3365*c0909341SAndroid Build Coastguard Worker 3366*c0909341SAndroid Build Coastguard Worker shl v29.8h, v28.8h, #3 // 8*dy 3367*c0909341SAndroid Build Coastguard Worker mul v30.8h, v30.8h, v28.8h // {0,1,2,3,4,5,6,7}*dy 3368*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3369*c0909341SAndroid Build Coastguard Worker 3370*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3371*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 3372*c0909341SAndroid Build Coastguard Worker add v30.8h, v28.8h, v30.8h // ypos 3373*c0909341SAndroid Build Coastguard Worker 3374*c0909341SAndroid Build Coastguard Worker // This is only executed if we've checked that max_base_y <= 64. 3375*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2] // left[] 3376*c0909341SAndroid Build Coastguard Worker 3377*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3378*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3379*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3380*c0909341SAndroid Build Coastguard Worker 3381*c0909341SAndroid Build Coastguard Worker1: 3382*c0909341SAndroid Build Coastguard Worker mov v26.16b, v30.16b // reset ypos 3383*c0909341SAndroid Build Coastguard Worker 3384*c0909341SAndroid Build Coastguard Worker2: 3385*c0909341SAndroid Build Coastguard Worker add v27.8h, v26.8h, v29.8h // ypos + 8*dy 3386*c0909341SAndroid Build Coastguard Worker uqshrn v16.8b, v26.8h, #6 // base 3387*c0909341SAndroid Build Coastguard Worker uqshrn2 v16.16b, v27.8h, #6 3388*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v26.8h // (uint8_t)ypos 3389*c0909341SAndroid Build Coastguard Worker xtn2 v24.16b, v27.8h 3390*c0909341SAndroid Build Coastguard Worker umov w14, v16.b[0] 3391*c0909341SAndroid Build Coastguard Worker and v24.16b, v24.16b, v23.16b // frac 3392*c0909341SAndroid Build Coastguard Worker 3393*c0909341SAndroid Build Coastguard Worker uqadd v17.16b, v16.16b, v20.16b // base + 1 3394*c0909341SAndroid Build Coastguard Worker cmp w14, w6 // base >= max_base_y 3395*c0909341SAndroid Build Coastguard Worker uqadd v18.16b, v16.16b, v21.16b // base + 2 3396*c0909341SAndroid Build Coastguard Worker sub v25.16b, v22.16b, v24.16b // 64 - frac 3397*c0909341SAndroid Build Coastguard Worker 3398*c0909341SAndroid Build Coastguard Worker b.ge 4f 3399*c0909341SAndroid Build Coastguard Worker 3400*c0909341SAndroid Build Coastguard Worker mov v4.16b, v31.16b 3401*c0909341SAndroid Build Coastguard Worker mov v5.16b, v31.16b 3402*c0909341SAndroid Build Coastguard Worker mov v6.16b, v31.16b 3403*c0909341SAndroid Build Coastguard Worker tbx v4.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v16.16b // left[base] 3404*c0909341SAndroid Build Coastguard Worker tbx v5.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v17.16b // left[base+1] 3405*c0909341SAndroid Build Coastguard Worker tbx v6.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v18.16b // left[base+2] 3406*c0909341SAndroid Build Coastguard Worker 3407*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 3408*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3409*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3410*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v4.16b, v25.16b 3411*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v5.16b, v24.16b 3412*c0909341SAndroid Build Coastguard Worker umull v18.8h, v5.8b, v25.8b 3413*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v6.8b, v24.8b 3414*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v5.16b, v25.16b 3415*c0909341SAndroid Build Coastguard Worker umlal2 v19.8h, v6.16b, v24.16b 3416*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3417*c0909341SAndroid Build Coastguard Worker rshrn2 v16.16b, v17.8h, #6 3418*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v18.8h, #6 3419*c0909341SAndroid Build Coastguard Worker rshrn2 v17.16b, v19.8h, #6 3420*c0909341SAndroid Build Coastguard Worker st1 {v16.16b}, [x0], #16 3421*c0909341SAndroid Build Coastguard Worker st1 {v17.16b}, [x13], #16 3422*c0909341SAndroid Build Coastguard Worker b.le 3f 3423*c0909341SAndroid Build Coastguard Worker add v26.8h, v27.8h, v29.8h // ypos += 16*dy 3424*c0909341SAndroid Build Coastguard Worker b 2b 3425*c0909341SAndroid Build Coastguard Worker 3426*c0909341SAndroid Build Coastguard Worker3: 3427*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3428*c0909341SAndroid Build Coastguard Worker b.le 9f 3429*c0909341SAndroid Build Coastguard Worker movi v16.8h, #128 3430*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 3431*c0909341SAndroid Build Coastguard Worker add x13, x13, x1 3432*c0909341SAndroid Build Coastguard Worker add v30.8h, v30.8h, v16.8h // ypos = dy + y*(1<<6)*2 3433*c0909341SAndroid Build Coastguard Worker mov w3, w12 3434*c0909341SAndroid Build Coastguard Worker b 1b 3435*c0909341SAndroid Build Coastguard Worker 3436*c0909341SAndroid Build Coastguard Worker4: 3437*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 3438*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0], #16 3439*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x13], #16 3440*c0909341SAndroid Build Coastguard Worker b.gt 4b 3441*c0909341SAndroid Build Coastguard Worker b 3b 3442*c0909341SAndroid Build Coastguard Worker 3443*c0909341SAndroid Build Coastguard Worker9: 3444*c0909341SAndroid Build Coastguard Worker ret 3445*c0909341SAndroid Build Coastguard Worker 3446*c0909341SAndroid Build Coastguard WorkerL(ipred_z3_fill1_large_h16): 3447*c0909341SAndroid Build Coastguard Worker // Fallback case for max_base_y > 64; similar to the z1 3448*c0909341SAndroid Build Coastguard Worker // implementation. This does the filtering vertically, filling out 3449*c0909341SAndroid Build Coastguard Worker // a 2x pixel column at a time. 3450*c0909341SAndroid Build Coastguard Worker mov w15, #64 3451*c0909341SAndroid Build Coastguard Worker add x13, x0, x1 3452*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3453*c0909341SAndroid Build Coastguard Worker 3454*c0909341SAndroid Build Coastguard Worker mov w12, w4 3455*c0909341SAndroid Build Coastguard Worker1: 3456*c0909341SAndroid Build Coastguard Worker lsr w8, w7, #6 // base 3457*c0909341SAndroid Build Coastguard Worker and w9, w7, #0x3e // frac 3458*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // ypos += dy 3459*c0909341SAndroid Build Coastguard Worker cmp w8, w6 // base >= max_base_y 3460*c0909341SAndroid Build Coastguard Worker lsr w10, w7, #6 // base 3461*c0909341SAndroid Build Coastguard Worker and w11, w7, #0x3e // frac 3462*c0909341SAndroid Build Coastguard Worker b.ge ipred_z3_fill_padding_neon 3463*c0909341SAndroid Build Coastguard Worker add x8, x2, w8, uxtw 3464*c0909341SAndroid Build Coastguard Worker add x10, x2, w10, uxtw 3465*c0909341SAndroid Build Coastguard Worker dup v4.16b, w9 // frac 3466*c0909341SAndroid Build Coastguard Worker dup v5.16b, w11 3467*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x8], #32 // left[base] 3468*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x10], #32 3469*c0909341SAndroid Build Coastguard Worker sub w9, w15, w9 // 64 - frac 3470*c0909341SAndroid Build Coastguard Worker sub w11, w15, w11 3471*c0909341SAndroid Build Coastguard Worker dup v6.16b, w9 // 64 - frac 3472*c0909341SAndroid Build Coastguard Worker dup v7.16b, w11 3473*c0909341SAndroid Build Coastguard Worker add w7, w7, w5 // ypos += dy 3474*c0909341SAndroid Build Coastguard Worker2: 3475*c0909341SAndroid Build Coastguard Worker ext v16.16b, v0.16b, v1.16b, #1 // left[base+1] 3476*c0909341SAndroid Build Coastguard Worker ext v17.16b, v2.16b, v3.16b, #1 3477*c0909341SAndroid Build Coastguard Worker subs w4, w4, #16 3478*c0909341SAndroid Build Coastguard Worker umull v18.8h, v16.8b, v4.8b // left[base+1]*frac 3479*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v0.8b, v6.8b // + left[base]*(64-frac) 3480*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v16.16b, v4.16b 3481*c0909341SAndroid Build Coastguard Worker umlal2 v19.8h, v0.16b, v6.16b 3482*c0909341SAndroid Build Coastguard Worker umull v20.8h, v17.8b, v5.8b 3483*c0909341SAndroid Build Coastguard Worker umlal v20.8h, v2.8b, v7.8b 3484*c0909341SAndroid Build Coastguard Worker umull2 v21.8h, v17.16b, v5.16b 3485*c0909341SAndroid Build Coastguard Worker umlal2 v21.8h, v2.16b, v7.16b 3486*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v18.8h, #6 3487*c0909341SAndroid Build Coastguard Worker rshrn2 v16.16b, v19.8h, #6 3488*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v20.8h, #6 3489*c0909341SAndroid Build Coastguard Worker rshrn2 v17.16b, v21.8h, #6 3490*c0909341SAndroid Build Coastguard Worker zip1 v18.16b, v16.16b, v17.16b 3491*c0909341SAndroid Build Coastguard Worker zip2 v19.16b, v16.16b, v17.16b 3492*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[0], [x0], x1 3493*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[1], [x13], x1 3494*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[2], [x0], x1 3495*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[3], [x13], x1 3496*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[4], [x0], x1 3497*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[5], [x13], x1 3498*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[6], [x0], x1 3499*c0909341SAndroid Build Coastguard Worker st1 {v18.h}[7], [x13], x1 3500*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[0], [x0], x1 3501*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[1], [x13], x1 3502*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[2], [x0], x1 3503*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[3], [x13], x1 3504*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[4], [x0], x1 3505*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[5], [x13], x1 3506*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[6], [x0], x1 3507*c0909341SAndroid Build Coastguard Worker st1 {v19.h}[7], [x13], x1 3508*c0909341SAndroid Build Coastguard Worker b.le 3f 3509*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 3510*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x8], #16 // left[base] 3511*c0909341SAndroid Build Coastguard Worker mov v2.16b, v3.16b 3512*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x10], #16 3513*c0909341SAndroid Build Coastguard Worker b 2b 3514*c0909341SAndroid Build Coastguard Worker 3515*c0909341SAndroid Build Coastguard Worker3: 3516*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3517*c0909341SAndroid Build Coastguard Worker b.le 9f 3518*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3519*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3520*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3521*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3522*c0909341SAndroid Build Coastguard Worker add x0, x0, #2 3523*c0909341SAndroid Build Coastguard Worker add x13, x13, #2 3524*c0909341SAndroid Build Coastguard Worker mov w4, w12 3525*c0909341SAndroid Build Coastguard Worker b 1b 3526*c0909341SAndroid Build Coastguard Worker9: 3527*c0909341SAndroid Build Coastguard Worker ret 3528*c0909341SAndroid Build Coastguard Workerendfunc 3529*c0909341SAndroid Build Coastguard Worker 3530*c0909341SAndroid Build Coastguard Workerjumptable ipred_z3_fill1_tbl 3531*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z3_fill1_tbl 3532*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z3_fill1_tbl 3533*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z3_fill1_tbl 3534*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z3_fill1_tbl 3535*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z3_fill1_tbl 3536*c0909341SAndroid Build Coastguard Workerendjumptable 3537*c0909341SAndroid Build Coastguard Worker 3538*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill_padding_neon, export=0 3539*c0909341SAndroid Build Coastguard Worker cmp w3, #16 3540*c0909341SAndroid Build Coastguard Worker movrel x8, ipred_z3_fill_padding_tbl 3541*c0909341SAndroid Build Coastguard Worker b.gt ipred_z3_fill_padding_wide 3542*c0909341SAndroid Build Coastguard Worker // w3 = remaining width, w4 = constant height 3543*c0909341SAndroid Build Coastguard Worker mov w12, w4 3544*c0909341SAndroid Build Coastguard Worker 3545*c0909341SAndroid Build Coastguard Worker1: 3546*c0909341SAndroid Build Coastguard Worker // Fill a WxH rectangle with padding. W can be any number; 3547*c0909341SAndroid Build Coastguard Worker // this fills the exact width by filling in the largest 3548*c0909341SAndroid Build Coastguard Worker // power of two in the remaining width, and repeating. 3549*c0909341SAndroid Build Coastguard Worker clz w9, w3 3550*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 3551*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x8, w9, uxtw #2] 3552*c0909341SAndroid Build Coastguard Worker add x9, x8, x9 3553*c0909341SAndroid Build Coastguard Worker br x9 3554*c0909341SAndroid Build Coastguard Worker 3555*c0909341SAndroid Build Coastguard Worker20: 3556*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3557*c0909341SAndroid Build Coastguard Worker2: 3558*c0909341SAndroid Build Coastguard Worker st1 {v31.h}[0], [x0], x1 3559*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3560*c0909341SAndroid Build Coastguard Worker st1 {v31.h}[0], [x13], x1 3561*c0909341SAndroid Build Coastguard Worker st1 {v31.h}[0], [x0], x1 3562*c0909341SAndroid Build Coastguard Worker st1 {v31.h}[0], [x13], x1 3563*c0909341SAndroid Build Coastguard Worker b.gt 2b 3564*c0909341SAndroid Build Coastguard Worker subs w3, w3, #2 3565*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3566*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3567*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3568*c0909341SAndroid Build Coastguard Worker b.le 9f 3569*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3570*c0909341SAndroid Build Coastguard Worker add x0, x0, #2 3571*c0909341SAndroid Build Coastguard Worker add x13, x13, #2 3572*c0909341SAndroid Build Coastguard Worker mov w4, w12 3573*c0909341SAndroid Build Coastguard Worker b 1b 3574*c0909341SAndroid Build Coastguard Worker 3575*c0909341SAndroid Build Coastguard Worker40: 3576*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3577*c0909341SAndroid Build Coastguard Worker4: 3578*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 3579*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3580*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x13], x1 3581*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x0], x1 3582*c0909341SAndroid Build Coastguard Worker st1 {v31.s}[0], [x13], x1 3583*c0909341SAndroid Build Coastguard Worker b.gt 4b 3584*c0909341SAndroid Build Coastguard Worker subs w3, w3, #4 3585*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3586*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3587*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3588*c0909341SAndroid Build Coastguard Worker b.le 9f 3589*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3590*c0909341SAndroid Build Coastguard Worker add x0, x0, #4 3591*c0909341SAndroid Build Coastguard Worker add x13, x13, #4 3592*c0909341SAndroid Build Coastguard Worker mov w4, w12 3593*c0909341SAndroid Build Coastguard Worker b 1b 3594*c0909341SAndroid Build Coastguard Worker 3595*c0909341SAndroid Build Coastguard Worker80: 3596*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3597*c0909341SAndroid Build Coastguard Worker8: 3598*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 3599*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3600*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x13], x1 3601*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x0], x1 3602*c0909341SAndroid Build Coastguard Worker st1 {v31.8b}, [x13], x1 3603*c0909341SAndroid Build Coastguard Worker b.gt 8b 3604*c0909341SAndroid Build Coastguard Worker subs w3, w3, #8 3605*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3606*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3607*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3608*c0909341SAndroid Build Coastguard Worker b.le 9f 3609*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3610*c0909341SAndroid Build Coastguard Worker add x0, x0, #8 3611*c0909341SAndroid Build Coastguard Worker add x13, x13, #8 3612*c0909341SAndroid Build Coastguard Worker mov w4, w12 3613*c0909341SAndroid Build Coastguard Worker b 1b 3614*c0909341SAndroid Build Coastguard Worker 3615*c0909341SAndroid Build Coastguard Worker160: 3616*c0909341SAndroid Build Coastguard Worker320: 3617*c0909341SAndroid Build Coastguard Worker640: 3618*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3619*c0909341SAndroid Build Coastguard Worker16: 3620*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0], x1 3621*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 3622*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x13], x1 3623*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0], x1 3624*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x13], x1 3625*c0909341SAndroid Build Coastguard Worker b.gt 16b 3626*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 3627*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3628*c0909341SAndroid Build Coastguard Worker msub x0, x1, x12, x0 // ptr -= h * stride 3629*c0909341SAndroid Build Coastguard Worker msub x13, x1, x12, x13 3630*c0909341SAndroid Build Coastguard Worker b.le 9f 3631*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3632*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 3633*c0909341SAndroid Build Coastguard Worker add x13, x13, #16 3634*c0909341SAndroid Build Coastguard Worker mov w4, w12 3635*c0909341SAndroid Build Coastguard Worker b 1b 3636*c0909341SAndroid Build Coastguard Worker 3637*c0909341SAndroid Build Coastguard Worker9: 3638*c0909341SAndroid Build Coastguard Worker ret 3639*c0909341SAndroid Build Coastguard Workerendfunc 3640*c0909341SAndroid Build Coastguard Worker 3641*c0909341SAndroid Build Coastguard Workerjumptable ipred_z3_fill_padding_tbl 3642*c0909341SAndroid Build Coastguard Worker .word 640b - ipred_z3_fill_padding_tbl 3643*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_z3_fill_padding_tbl 3644*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_z3_fill_padding_tbl 3645*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_z3_fill_padding_tbl 3646*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_z3_fill_padding_tbl 3647*c0909341SAndroid Build Coastguard Worker .word 20b - ipred_z3_fill_padding_tbl 3648*c0909341SAndroid Build Coastguard Workerendjumptable 3649*c0909341SAndroid Build Coastguard Worker 3650*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill_padding_wide 3651*c0909341SAndroid Build Coastguard Worker // Fill a WxH rectangle with padding, with W > 16. 3652*c0909341SAndroid Build Coastguard Worker lsr x1, x1, #1 3653*c0909341SAndroid Build Coastguard Worker mov w12, w3 3654*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 3655*c0909341SAndroid Build Coastguard Worker1: 3656*c0909341SAndroid Build Coastguard Worker ands w5, w3, #15 3657*c0909341SAndroid Build Coastguard Worker b.eq 2f 3658*c0909341SAndroid Build Coastguard Worker // If the width isn't aligned to 16, first do one 16 byte write 3659*c0909341SAndroid Build Coastguard Worker // and align the start pointer. 3660*c0909341SAndroid Build Coastguard Worker sub w3, w3, w5 3661*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0] 3662*c0909341SAndroid Build Coastguard Worker add x0, x0, w5, uxtw 3663*c0909341SAndroid Build Coastguard Worker2: 3664*c0909341SAndroid Build Coastguard Worker // Fill the rest of the line with aligned 16 byte writes. 3665*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 3666*c0909341SAndroid Build Coastguard Worker st1 {v31.16b}, [x0], #16 3667*c0909341SAndroid Build Coastguard Worker b.gt 2b 3668*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 3669*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 3670*c0909341SAndroid Build Coastguard Worker b.le 9f 3671*c0909341SAndroid Build Coastguard Worker mov w3, w12 3672*c0909341SAndroid Build Coastguard Worker b 1b 3673*c0909341SAndroid Build Coastguard Worker9: 3674*c0909341SAndroid Build Coastguard Worker ret 3675*c0909341SAndroid Build Coastguard Workerendfunc 3676*c0909341SAndroid Build Coastguard Worker 3677*c0909341SAndroid Build Coastguard Workerfunction ipred_z3_fill2_8bpc_neon, export=1 3678*c0909341SAndroid Build Coastguard Worker cmp w3, #8 3679*c0909341SAndroid Build Coastguard Worker add x10, x2, w6, uxtw // left[max_base_y] 3680*c0909341SAndroid Build Coastguard Worker movrel x11, increments 3681*c0909341SAndroid Build Coastguard Worker ld1r {v31.16b}, [x10] // padding 3682*c0909341SAndroid Build Coastguard Worker ld1 {v30.8h}, [x11] // increments 3683*c0909341SAndroid Build Coastguard Worker b.eq 80f 3684*c0909341SAndroid Build Coastguard Worker 3685*c0909341SAndroid Build Coastguard Worker40: // w == 4 3686*c0909341SAndroid Build Coastguard Worker dup v29.4h, w5 // dy 3687*c0909341SAndroid Build Coastguard Worker 3688*c0909341SAndroid Build Coastguard Worker mul v30.4h, v30.4h, v29.4h // {0,1,2,3,4,5,6,7}*dy 3689*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3690*c0909341SAndroid Build Coastguard Worker 3691*c0909341SAndroid Build Coastguard Worker // Worst case max_base_y is 2*(width+height)-2, but width+height <= 16, 3692*c0909341SAndroid Build Coastguard Worker // so max_base_y <= 32. 3693*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] // left[] 3694*c0909341SAndroid Build Coastguard Worker add v30.4h, v29.4h, v30.4h // ypos 3695*c0909341SAndroid Build Coastguard Worker 3696*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3697*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3698*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3699*c0909341SAndroid Build Coastguard Worker 3700*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v30.8h // (uint8_t)ypos 3701*c0909341SAndroid Build Coastguard Worker uqshrn v26.8b, v30.8h, #6 // base 3702*c0909341SAndroid Build Coastguard Worker and v24.8b, v24.8b, v23.8b // frac 3703*c0909341SAndroid Build Coastguard Worker 3704*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v26.8b, v20.8b // base + 1 3705*c0909341SAndroid Build Coastguard Worker uqadd v28.8b, v26.8b, v21.8b // base + 2 3706*c0909341SAndroid Build Coastguard Worker sub v25.8b, v22.8b, v24.8b // 64 - frac 3707*c0909341SAndroid Build Coastguard Worker uqadd v29.8b, v27.8b, v21.8b // base + 3 3708*c0909341SAndroid Build Coastguard Worker 3709*c0909341SAndroid Build Coastguard Worker trn1 v24.2s, v24.2s, v24.2s // frac 3710*c0909341SAndroid Build Coastguard Worker trn1 v26.2s, v26.2s, v28.2s // base + 0, base + 2 3711*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v27.2s, v29.2s // base + 1, base + 3 3712*c0909341SAndroid Build Coastguard Worker trn1 v25.2s, v25.2s, v25.2s // 64 - frac 3713*c0909341SAndroid Build Coastguard Worker 3714*c0909341SAndroid Build Coastguard Worker movi v21.16b, #4 3715*c0909341SAndroid Build Coastguard Worker1: 3716*c0909341SAndroid Build Coastguard Worker mov v4.8b, v31.8b 3717*c0909341SAndroid Build Coastguard Worker mov v5.8b, v31.8b 3718*c0909341SAndroid Build Coastguard Worker tbx v4.8b, {v0.16b, v1.16b}, v26.8b // left[base], left[base+2] 3719*c0909341SAndroid Build Coastguard Worker tbx v5.8b, {v0.16b, v1.16b}, v27.8b // left[base+1], left[base+3] 3720*c0909341SAndroid Build Coastguard Worker 3721*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3722*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3723*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3724*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[0], [x0], x1 3725*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3726*c0909341SAndroid Build Coastguard Worker st1 {v16.s}[1], [x0], x1 3727*c0909341SAndroid Build Coastguard Worker b.le 9f 3728*c0909341SAndroid Build Coastguard Worker 3729*c0909341SAndroid Build Coastguard Worker uqadd v26.8b, v26.8b, v21.8b // base += 4 3730*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v27.8b, v21.8b // base += 4 3731*c0909341SAndroid Build Coastguard Worker b 1b 3732*c0909341SAndroid Build Coastguard Worker 3733*c0909341SAndroid Build Coastguard Worker9: 3734*c0909341SAndroid Build Coastguard Worker ret 3735*c0909341SAndroid Build Coastguard Worker 3736*c0909341SAndroid Build Coastguard Worker80: // w == 8 3737*c0909341SAndroid Build Coastguard Worker dup v29.8h, w5 // dy 3738*c0909341SAndroid Build Coastguard Worker 3739*c0909341SAndroid Build Coastguard Worker mul v30.8h, v30.8h, v29.8h // {0,1,2,3,4,5,6,7}*dy 3740*c0909341SAndroid Build Coastguard Worker movi v23.16b, #0x3e 3741*c0909341SAndroid Build Coastguard Worker 3742*c0909341SAndroid Build Coastguard Worker // Worst case max_base_y is 2*(width+height)-2, but width+height <= 16, 3743*c0909341SAndroid Build Coastguard Worker // so max_base_y <= 32. 3744*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x2] // left[] 3745*c0909341SAndroid Build Coastguard Worker add v30.8h, v29.8h, v30.8h // ypos 3746*c0909341SAndroid Build Coastguard Worker 3747*c0909341SAndroid Build Coastguard Worker movi v22.16b, #64 3748*c0909341SAndroid Build Coastguard Worker movi v20.16b, #1 3749*c0909341SAndroid Build Coastguard Worker movi v21.16b, #2 3750*c0909341SAndroid Build Coastguard Worker 3751*c0909341SAndroid Build Coastguard Worker xtn v24.8b, v30.8h // (uint8_t)ypos 3752*c0909341SAndroid Build Coastguard Worker uqshrn v26.8b, v30.8h, #6 // base 3753*c0909341SAndroid Build Coastguard Worker and v24.8b, v24.8b, v23.8b // frac 3754*c0909341SAndroid Build Coastguard Worker 3755*c0909341SAndroid Build Coastguard Worker uqadd v27.8b, v26.8b, v20.8b // base + 1 3756*c0909341SAndroid Build Coastguard Worker uqadd v28.8b, v26.8b, v21.8b // base + 2 3757*c0909341SAndroid Build Coastguard Worker sub v25.8b, v22.8b, v24.8b // 64 - frac 3758*c0909341SAndroid Build Coastguard Worker uqadd v29.8b, v27.8b, v21.8b // base + 3 3759*c0909341SAndroid Build Coastguard Worker 3760*c0909341SAndroid Build Coastguard Worker trn1 v24.2d, v24.2d, v24.2d // frac 3761*c0909341SAndroid Build Coastguard Worker trn1 v26.2d, v26.2d, v28.2d // base + 0, base + 2 3762*c0909341SAndroid Build Coastguard Worker trn1 v27.2d, v27.2d, v29.2d // base + 1, base + 3 3763*c0909341SAndroid Build Coastguard Worker trn1 v25.2d, v25.2d, v25.2d // 64 - frac 3764*c0909341SAndroid Build Coastguard Worker 3765*c0909341SAndroid Build Coastguard Worker movi v21.16b, #4 3766*c0909341SAndroid Build Coastguard Worker1: 3767*c0909341SAndroid Build Coastguard Worker mov v4.16b, v31.16b 3768*c0909341SAndroid Build Coastguard Worker mov v5.16b, v31.16b 3769*c0909341SAndroid Build Coastguard Worker tbx v4.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v26.16b // left[base], left[base+2] 3770*c0909341SAndroid Build Coastguard Worker tbx v5.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v27.16b // left[base+1], left[base+3] 3771*c0909341SAndroid Build Coastguard Worker 3772*c0909341SAndroid Build Coastguard Worker umull v16.8h, v4.8b, v25.8b // left[base]*(64-frac) 3773*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v5.8b, v24.8b // + left[base+1]*frac 3774*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v4.16b, v25.16b 3775*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v5.16b, v24.16b 3776*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v16.8h, #6 3777*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v17.8h, #6 3778*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 3779*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3780*c0909341SAndroid Build Coastguard Worker st1 {v17.8b}, [x0], x1 3781*c0909341SAndroid Build Coastguard Worker b.le 9f 3782*c0909341SAndroid Build Coastguard Worker 3783*c0909341SAndroid Build Coastguard Worker uqadd v26.16b, v26.16b, v21.16b // base += 4 3784*c0909341SAndroid Build Coastguard Worker uqadd v27.16b, v27.16b, v21.16b // base += 4 3785*c0909341SAndroid Build Coastguard Worker b 1b 3786*c0909341SAndroid Build Coastguard Worker 3787*c0909341SAndroid Build Coastguard Worker9: 3788*c0909341SAndroid Build Coastguard Worker ret 3789*c0909341SAndroid Build Coastguard Workerendfunc 3790*c0909341SAndroid Build Coastguard Worker 3791*c0909341SAndroid Build Coastguard Worker 3792*c0909341SAndroid Build Coastguard Worker// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride, 3793*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 3794*c0909341SAndroid Build Coastguard Worker// const int width, const int height, const int filt_idx, 3795*c0909341SAndroid Build Coastguard Worker// const int max_width, const int max_height); 3796*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_8bpc_neon, export=1 3797*c0909341SAndroid Build Coastguard Worker and w5, w5, #511 3798*c0909341SAndroid Build Coastguard Worker movrel x6, X(filter_intra_taps) 3799*c0909341SAndroid Build Coastguard Worker lsl w5, w5, #6 3800*c0909341SAndroid Build Coastguard Worker add x6, x6, w5, uxtw 3801*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [x6], #32 3802*c0909341SAndroid Build Coastguard Worker clz w9, w3 3803*c0909341SAndroid Build Coastguard Worker movrel x5, ipred_filter_tbl 3804*c0909341SAndroid Build Coastguard Worker ld1 {v20.8b, v21.8b, v22.8b}, [x6] 3805*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 3806*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x5, w9, uxtw #2] 3807*c0909341SAndroid Build Coastguard Worker sxtl v16.8h, v16.8b 3808*c0909341SAndroid Build Coastguard Worker sxtl v17.8h, v17.8b 3809*c0909341SAndroid Build Coastguard Worker add x5, x5, x9 3810*c0909341SAndroid Build Coastguard Worker sxtl v18.8h, v18.8b 3811*c0909341SAndroid Build Coastguard Worker sxtl v19.8h, v19.8b 3812*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 3813*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3814*c0909341SAndroid Build Coastguard Worker sxtl v20.8h, v20.8b 3815*c0909341SAndroid Build Coastguard Worker sxtl v21.8h, v21.8b 3816*c0909341SAndroid Build Coastguard Worker sxtl v22.8h, v22.8b 3817*c0909341SAndroid Build Coastguard Worker br x5 3818*c0909341SAndroid Build Coastguard Worker40: 3819*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3820*c0909341SAndroid Build Coastguard Worker ldur s0, [x2, #1] // top (0-3) 3821*c0909341SAndroid Build Coastguard Worker sub x2, x2, #2 3822*c0909341SAndroid Build Coastguard Worker mov x7, #-2 3823*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b // top (0-3) 3824*c0909341SAndroid Build Coastguard Worker4: 3825*c0909341SAndroid Build Coastguard Worker ld1 {v1.s}[0], [x2], x7 // left (0-1) + topleft (2) 3826*c0909341SAndroid Build Coastguard Worker mul v2.8h, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 3827*c0909341SAndroid Build Coastguard Worker mla v2.8h, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 3828*c0909341SAndroid Build Coastguard Worker mla v2.8h, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 3829*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b // left (0-1) + topleft (2) 3830*c0909341SAndroid Build Coastguard Worker mla v2.8h, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 3831*c0909341SAndroid Build Coastguard Worker mla v2.8h, v16.8h, v1.h[2] // p0(topleft) * filter(0) 3832*c0909341SAndroid Build Coastguard Worker mla v2.8h, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 3833*c0909341SAndroid Build Coastguard Worker mla v2.8h, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 3834*c0909341SAndroid Build Coastguard Worker sqrshrun v2.8b, v2.8h, #4 3835*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3836*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[0], [x0], x1 3837*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v2.8b 3838*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[1], [x6], x1 3839*c0909341SAndroid Build Coastguard Worker ext v0.16b, v0.16b, v0.16b, #8 // move top from [4-7] to [0-3] 3840*c0909341SAndroid Build Coastguard Worker b.gt 4b 3841*c0909341SAndroid Build Coastguard Worker ret 3842*c0909341SAndroid Build Coastguard Worker80: 3843*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3844*c0909341SAndroid Build Coastguard Worker ldur d0, [x2, #1] // top (0-7) 3845*c0909341SAndroid Build Coastguard Worker sub x2, x2, #2 3846*c0909341SAndroid Build Coastguard Worker mov x7, #-2 3847*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b // top (0-7) 3848*c0909341SAndroid Build Coastguard Worker8: 3849*c0909341SAndroid Build Coastguard Worker ld1 {v1.s}[0], [x2], x7 // left (0-1) + topleft (2) 3850*c0909341SAndroid Build Coastguard Worker mul v2.8h, v17.8h, v0.h[0] // p1(top[0]) * filter(1) 3851*c0909341SAndroid Build Coastguard Worker mla v2.8h, v18.8h, v0.h[1] // p2(top[1]) * filter(2) 3852*c0909341SAndroid Build Coastguard Worker mla v2.8h, v19.8h, v0.h[2] // p3(top[2]) * filter(3) 3853*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b // left (0-1) + topleft (2) 3854*c0909341SAndroid Build Coastguard Worker mla v2.8h, v20.8h, v0.h[3] // p4(top[3]) * filter(4) 3855*c0909341SAndroid Build Coastguard Worker mla v2.8h, v16.8h, v1.h[2] // p0(topleft) * filter(0) 3856*c0909341SAndroid Build Coastguard Worker mla v2.8h, v21.8h, v1.h[1] // p5(left[0]) * filter(5) 3857*c0909341SAndroid Build Coastguard Worker mla v2.8h, v22.8h, v1.h[0] // p6(left[1]) * filter(6) 3858*c0909341SAndroid Build Coastguard Worker mul v3.8h, v17.8h, v0.h[4] // p1(top[0]) * filter(1) 3859*c0909341SAndroid Build Coastguard Worker mla v3.8h, v18.8h, v0.h[5] // p2(top[1]) * filter(2) 3860*c0909341SAndroid Build Coastguard Worker mla v3.8h, v19.8h, v0.h[6] // p3(top[2]) * filter(3) 3861*c0909341SAndroid Build Coastguard Worker sqrshrun v2.8b, v2.8h, #4 3862*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v2.8b // first block, in 16 bit 3863*c0909341SAndroid Build Coastguard Worker mla v3.8h, v20.8h, v0.h[7] // p4(top[3]) * filter(4) 3864*c0909341SAndroid Build Coastguard Worker mla v3.8h, v16.8h, v0.h[3] // p0(topleft) * filter(0) 3865*c0909341SAndroid Build Coastguard Worker mla v3.8h, v21.8h, v1.h[3] // p5(left[0]) * filter(5) 3866*c0909341SAndroid Build Coastguard Worker mla v3.8h, v22.8h, v1.h[7] // p6(left[1]) * filter(6) 3867*c0909341SAndroid Build Coastguard Worker sqrshrun v3.8b, v3.8h, #4 3868*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3869*c0909341SAndroid Build Coastguard Worker st2 {v2.s, v3.s}[0], [x0], x1 3870*c0909341SAndroid Build Coastguard Worker zip2 v0.2s, v2.2s, v3.2s 3871*c0909341SAndroid Build Coastguard Worker st2 {v2.s, v3.s}[1], [x6], x1 3872*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 3873*c0909341SAndroid Build Coastguard Worker b.gt 8b 3874*c0909341SAndroid Build Coastguard Worker ret 3875*c0909341SAndroid Build Coastguard Worker160: 3876*c0909341SAndroid Build Coastguard Worker320: 3877*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3878*c0909341SAndroid Build Coastguard Worker add x8, x2, #1 3879*c0909341SAndroid Build Coastguard Worker sub x2, x2, #2 3880*c0909341SAndroid Build Coastguard Worker mov x7, #-2 3881*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 3882*c0909341SAndroid Build Coastguard Worker mov w9, w3 3883*c0909341SAndroid Build Coastguard Worker 3884*c0909341SAndroid Build Coastguard Worker1: 3885*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x2], x7 // left (0-1) + topleft (2) 3886*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b // left (0-1) + topleft (2) 3887*c0909341SAndroid Build Coastguard Worker2: 3888*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x8], #16 // top(0-15) 3889*c0909341SAndroid Build Coastguard Worker mul v3.8h, v16.8h, v0.h[2] // p0(topleft) * filter(0) 3890*c0909341SAndroid Build Coastguard Worker mla v3.8h, v21.8h, v0.h[1] // p5(left[0]) * filter(5) 3891*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v2.8b // top(0-7) 3892*c0909341SAndroid Build Coastguard Worker uxtl2 v2.8h, v2.16b // top(8-15) 3893*c0909341SAndroid Build Coastguard Worker mla v3.8h, v22.8h, v0.h[0] // p6(left[1]) * filter(6) 3894*c0909341SAndroid Build Coastguard Worker mla v3.8h, v17.8h, v1.h[0] // p1(top[0]) * filter(1) 3895*c0909341SAndroid Build Coastguard Worker mla v3.8h, v18.8h, v1.h[1] // p2(top[1]) * filter(2) 3896*c0909341SAndroid Build Coastguard Worker mla v3.8h, v19.8h, v1.h[2] // p3(top[2]) * filter(3) 3897*c0909341SAndroid Build Coastguard Worker mla v3.8h, v20.8h, v1.h[3] // p4(top[3]) * filter(4) 3898*c0909341SAndroid Build Coastguard Worker 3899*c0909341SAndroid Build Coastguard Worker mul v4.8h, v17.8h, v1.h[4] // p1(top[0]) * filter(1) 3900*c0909341SAndroid Build Coastguard Worker mla v4.8h, v18.8h, v1.h[5] // p2(top[1]) * filter(2) 3901*c0909341SAndroid Build Coastguard Worker mla v4.8h, v19.8h, v1.h[6] // p3(top[2]) * filter(3) 3902*c0909341SAndroid Build Coastguard Worker sqrshrun v3.8b, v3.8h, #4 3903*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v3.8b // first block, in 16 bit 3904*c0909341SAndroid Build Coastguard Worker mla v4.8h, v20.8h, v1.h[7] // p4(top[3]) * filter(4) 3905*c0909341SAndroid Build Coastguard Worker mla v4.8h, v16.8h, v1.h[3] // p0(topleft) * filter(0) 3906*c0909341SAndroid Build Coastguard Worker mla v4.8h, v21.8h, v0.h[3] // p5(left[0]) * filter(5) 3907*c0909341SAndroid Build Coastguard Worker mla v4.8h, v22.8h, v0.h[7] // p6(left[1]) * filter(6) 3908*c0909341SAndroid Build Coastguard Worker 3909*c0909341SAndroid Build Coastguard Worker mul v5.8h, v17.8h, v2.h[0] // p1(top[0]) * filter(1) 3910*c0909341SAndroid Build Coastguard Worker mla v5.8h, v18.8h, v2.h[1] // p2(top[1]) * filter(2) 3911*c0909341SAndroid Build Coastguard Worker mla v5.8h, v19.8h, v2.h[2] // p3(top[2]) * filter(3) 3912*c0909341SAndroid Build Coastguard Worker sqrshrun v4.8b, v4.8h, #4 3913*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v4.8b // second block, in 16 bit 3914*c0909341SAndroid Build Coastguard Worker mla v5.8h, v20.8h, v2.h[3] // p4(top[3]) * filter(4) 3915*c0909341SAndroid Build Coastguard Worker mla v5.8h, v16.8h, v1.h[7] // p0(topleft) * filter(0) 3916*c0909341SAndroid Build Coastguard Worker mla v5.8h, v21.8h, v0.h[3] // p5(left[0]) * filter(5) 3917*c0909341SAndroid Build Coastguard Worker mla v5.8h, v22.8h, v0.h[7] // p6(left[1]) * filter(6) 3918*c0909341SAndroid Build Coastguard Worker 3919*c0909341SAndroid Build Coastguard Worker mul v6.8h, v17.8h, v2.h[4] // p1(top[0]) * filter(1) 3920*c0909341SAndroid Build Coastguard Worker mla v6.8h, v18.8h, v2.h[5] // p2(top[1]) * filter(2) 3921*c0909341SAndroid Build Coastguard Worker mla v6.8h, v19.8h, v2.h[6] // p3(top[2]) * filter(3) 3922*c0909341SAndroid Build Coastguard Worker sqrshrun v5.8b, v5.8h, #4 3923*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v5.8b // third block, in 16 bit 3924*c0909341SAndroid Build Coastguard Worker mla v6.8h, v20.8h, v2.h[7] // p4(top[3]) * filter(4) 3925*c0909341SAndroid Build Coastguard Worker mla v6.8h, v16.8h, v2.h[3] // p0(topleft) * filter(0) 3926*c0909341SAndroid Build Coastguard Worker mla v6.8h, v21.8h, v0.h[3] // p5(left[0]) * filter(5) 3927*c0909341SAndroid Build Coastguard Worker mla v6.8h, v22.8h, v0.h[7] // p6(left[1]) * filter(6) 3928*c0909341SAndroid Build Coastguard Worker 3929*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 3930*c0909341SAndroid Build Coastguard Worker sqrshrun v6.8b, v6.8h, #4 3931*c0909341SAndroid Build Coastguard Worker 3932*c0909341SAndroid Build Coastguard Worker st4 {v3.s, v4.s, v5.s, v6.s}[0], [x0], #16 3933*c0909341SAndroid Build Coastguard Worker st4 {v3.s, v4.s, v5.s, v6.s}[1], [x6], #16 3934*c0909341SAndroid Build Coastguard Worker b.le 8f 3935*c0909341SAndroid Build Coastguard Worker ins v0.h[2], v2.h[7] 3936*c0909341SAndroid Build Coastguard Worker ins v0.b[0], v6.b[7] 3937*c0909341SAndroid Build Coastguard Worker ins v0.b[2], v6.b[3] 3938*c0909341SAndroid Build Coastguard Worker b 2b 3939*c0909341SAndroid Build Coastguard Worker8: 3940*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 3941*c0909341SAndroid Build Coastguard Worker b.le 9f 3942*c0909341SAndroid Build Coastguard Worker sub x8, x6, w9, uxtw 3943*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 3944*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 3945*c0909341SAndroid Build Coastguard Worker mov w3, w9 3946*c0909341SAndroid Build Coastguard Worker b 1b 3947*c0909341SAndroid Build Coastguard Worker9: 3948*c0909341SAndroid Build Coastguard Worker ret 3949*c0909341SAndroid Build Coastguard Workerendfunc 3950*c0909341SAndroid Build Coastguard Worker 3951*c0909341SAndroid Build Coastguard Workerjumptable ipred_filter_tbl 3952*c0909341SAndroid Build Coastguard Worker .word 320b - ipred_filter_tbl 3953*c0909341SAndroid Build Coastguard Worker .word 160b - ipred_filter_tbl 3954*c0909341SAndroid Build Coastguard Worker .word 80b - ipred_filter_tbl 3955*c0909341SAndroid Build Coastguard Worker .word 40b - ipred_filter_tbl 3956*c0909341SAndroid Build Coastguard Workerendjumptable 3957*c0909341SAndroid Build Coastguard Worker 3958*c0909341SAndroid Build Coastguard Worker// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride, 3959*c0909341SAndroid Build Coastguard Worker// const pixel *const pal, const uint8_t *idx, 3960*c0909341SAndroid Build Coastguard Worker// const int w, const int h); 3961*c0909341SAndroid Build Coastguard Workerfunction pal_pred_8bpc_neon, export=1 3962*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 3963*c0909341SAndroid Build Coastguard Worker clz w9, w4 3964*c0909341SAndroid Build Coastguard Worker movrel x6, pal_pred_tbl 3965*c0909341SAndroid Build Coastguard Worker sub w9, w9, #25 3966*c0909341SAndroid Build Coastguard Worker movi v31.16b, #7 3967*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x6, w9, uxtw #2] 3968*c0909341SAndroid Build Coastguard Worker add x6, x6, x9 3969*c0909341SAndroid Build Coastguard Worker add x2, x0, x1 3970*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3971*c0909341SAndroid Build Coastguard Worker br x6 3972*c0909341SAndroid Build Coastguard Worker40: 3973*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3974*c0909341SAndroid Build Coastguard Worker4: 3975*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x3], #8 3976*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 3977*c0909341SAndroid Build Coastguard Worker ushr v3.8b, v1.8b, #4 3978*c0909341SAndroid Build Coastguard Worker and v2.8b, v1.8b, v31.8b 3979*c0909341SAndroid Build Coastguard Worker zip1 v1.16b, v2.16b, v3.16b 3980*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v0.16b}, v1.16b 3981*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[0], [x0], x1 3982*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[1], [x2], x1 3983*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[2], [x0], x1 3984*c0909341SAndroid Build Coastguard Worker st1 {v1.s}[3], [x2], x1 3985*c0909341SAndroid Build Coastguard Worker b.gt 4b 3986*c0909341SAndroid Build Coastguard Worker ret 3987*c0909341SAndroid Build Coastguard Worker80: 3988*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3989*c0909341SAndroid Build Coastguard Worker8: 3990*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x3], #16 3991*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 3992*c0909341SAndroid Build Coastguard Worker ushr v4.16b, v1.16b, #4 3993*c0909341SAndroid Build Coastguard Worker and v3.16b, v1.16b, v31.16b 3994*c0909341SAndroid Build Coastguard Worker zip1 v1.16b, v3.16b, v4.16b 3995*c0909341SAndroid Build Coastguard Worker zip2 v2.16b, v3.16b, v4.16b 3996*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v0.16b}, v1.16b 3997*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[0], [x0], x1 3998*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v0.16b}, v2.16b 3999*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[1], [x2], x1 4000*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[0], [x0], x1 4001*c0909341SAndroid Build Coastguard Worker st1 {v2.d}[1], [x2], x1 4002*c0909341SAndroid Build Coastguard Worker b.gt 8b 4003*c0909341SAndroid Build Coastguard Worker ret 4004*c0909341SAndroid Build Coastguard Worker160: 4005*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4006*c0909341SAndroid Build Coastguard Worker16: 4007*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b, v2.16b}, [x3], #32 4008*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 4009*c0909341SAndroid Build Coastguard Worker ushr v5.16b, v1.16b, #4 4010*c0909341SAndroid Build Coastguard Worker and v4.16b, v1.16b, v31.16b 4011*c0909341SAndroid Build Coastguard Worker ushr v7.16b, v2.16b, #4 4012*c0909341SAndroid Build Coastguard Worker and v6.16b, v2.16b, v31.16b 4013*c0909341SAndroid Build Coastguard Worker zip1 v1.16b, v4.16b, v5.16b 4014*c0909341SAndroid Build Coastguard Worker zip2 v2.16b, v4.16b, v5.16b 4015*c0909341SAndroid Build Coastguard Worker zip1 v3.16b, v6.16b, v7.16b 4016*c0909341SAndroid Build Coastguard Worker tbl v1.16b, {v0.16b}, v1.16b 4017*c0909341SAndroid Build Coastguard Worker zip2 v4.16b, v6.16b, v7.16b 4018*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v0.16b}, v2.16b 4019*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [x0], x1 4020*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v0.16b}, v3.16b 4021*c0909341SAndroid Build Coastguard Worker st1 {v2.16b}, [x2], x1 4022*c0909341SAndroid Build Coastguard Worker tbl v4.16b, {v0.16b}, v4.16b 4023*c0909341SAndroid Build Coastguard Worker st1 {v3.16b}, [x0], x1 4024*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [x2], x1 4025*c0909341SAndroid Build Coastguard Worker b.gt 16b 4026*c0909341SAndroid Build Coastguard Worker ret 4027*c0909341SAndroid Build Coastguard Worker320: 4028*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4029*c0909341SAndroid Build Coastguard Worker32: 4030*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x3], #64 4031*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 4032*c0909341SAndroid Build Coastguard Worker ushr v21.16b, v16.16b, #4 4033*c0909341SAndroid Build Coastguard Worker and v20.16b, v16.16b, v31.16b 4034*c0909341SAndroid Build Coastguard Worker ushr v23.16b, v17.16b, #4 4035*c0909341SAndroid Build Coastguard Worker and v22.16b, v17.16b, v31.16b 4036*c0909341SAndroid Build Coastguard Worker ushr v25.16b, v18.16b, #4 4037*c0909341SAndroid Build Coastguard Worker and v24.16b, v18.16b, v31.16b 4038*c0909341SAndroid Build Coastguard Worker ushr v27.16b, v19.16b, #4 4039*c0909341SAndroid Build Coastguard Worker and v26.16b, v19.16b, v31.16b 4040*c0909341SAndroid Build Coastguard Worker zip1 v16.16b, v20.16b, v21.16b 4041*c0909341SAndroid Build Coastguard Worker zip2 v17.16b, v20.16b, v21.16b 4042*c0909341SAndroid Build Coastguard Worker zip1 v18.16b, v22.16b, v23.16b 4043*c0909341SAndroid Build Coastguard Worker zip2 v19.16b, v22.16b, v23.16b 4044*c0909341SAndroid Build Coastguard Worker zip1 v20.16b, v24.16b, v25.16b 4045*c0909341SAndroid Build Coastguard Worker zip2 v21.16b, v24.16b, v25.16b 4046*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v0.16b}, v16.16b 4047*c0909341SAndroid Build Coastguard Worker zip1 v22.16b, v26.16b, v27.16b 4048*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v0.16b}, v17.16b 4049*c0909341SAndroid Build Coastguard Worker zip2 v23.16b, v26.16b, v27.16b 4050*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b}, v18.16b 4051*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b}, v19.16b 4052*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b}, v20.16b 4053*c0909341SAndroid Build Coastguard Worker st1 {v16.16b, v17.16b}, [x0], x1 4054*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v0.16b}, v21.16b 4055*c0909341SAndroid Build Coastguard Worker st1 {v18.16b, v19.16b}, [x2], x1 4056*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v0.16b}, v22.16b 4057*c0909341SAndroid Build Coastguard Worker st1 {v20.16b, v21.16b}, [x0], x1 4058*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v0.16b}, v23.16b 4059*c0909341SAndroid Build Coastguard Worker st1 {v22.16b, v23.16b}, [x2], x1 4060*c0909341SAndroid Build Coastguard Worker b.gt 32b 4061*c0909341SAndroid Build Coastguard Worker ret 4062*c0909341SAndroid Build Coastguard Worker640: 4063*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4064*c0909341SAndroid Build Coastguard Worker64: 4065*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x3], #64 4066*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 4067*c0909341SAndroid Build Coastguard Worker ushr v21.16b, v16.16b, #4 4068*c0909341SAndroid Build Coastguard Worker and v20.16b, v16.16b, v31.16b 4069*c0909341SAndroid Build Coastguard Worker ushr v23.16b, v17.16b, #4 4070*c0909341SAndroid Build Coastguard Worker and v22.16b, v17.16b, v31.16b 4071*c0909341SAndroid Build Coastguard Worker ushr v25.16b, v18.16b, #4 4072*c0909341SAndroid Build Coastguard Worker and v24.16b, v18.16b, v31.16b 4073*c0909341SAndroid Build Coastguard Worker ushr v27.16b, v19.16b, #4 4074*c0909341SAndroid Build Coastguard Worker and v26.16b, v19.16b, v31.16b 4075*c0909341SAndroid Build Coastguard Worker zip1 v16.16b, v20.16b, v21.16b 4076*c0909341SAndroid Build Coastguard Worker zip2 v17.16b, v20.16b, v21.16b 4077*c0909341SAndroid Build Coastguard Worker zip1 v18.16b, v22.16b, v23.16b 4078*c0909341SAndroid Build Coastguard Worker zip2 v19.16b, v22.16b, v23.16b 4079*c0909341SAndroid Build Coastguard Worker zip1 v20.16b, v24.16b, v25.16b 4080*c0909341SAndroid Build Coastguard Worker zip2 v21.16b, v24.16b, v25.16b 4081*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v0.16b}, v16.16b 4082*c0909341SAndroid Build Coastguard Worker zip1 v22.16b, v26.16b, v27.16b 4083*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v0.16b}, v17.16b 4084*c0909341SAndroid Build Coastguard Worker zip2 v23.16b, v26.16b, v27.16b 4085*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v0.16b}, v18.16b 4086*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v0.16b}, v19.16b 4087*c0909341SAndroid Build Coastguard Worker st1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x0], x1 4088*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v0.16b}, v20.16b 4089*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v0.16b}, v21.16b 4090*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v0.16b}, v22.16b 4091*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v0.16b}, v23.16b 4092*c0909341SAndroid Build Coastguard Worker st1 {v20.16b, v21.16b, v22.16b, v23.16b}, [x2], x1 4093*c0909341SAndroid Build Coastguard Worker b.gt 64b 4094*c0909341SAndroid Build Coastguard Worker ret 4095*c0909341SAndroid Build Coastguard Workerendfunc 4096*c0909341SAndroid Build Coastguard Worker 4097*c0909341SAndroid Build Coastguard Workerjumptable pal_pred_tbl 4098*c0909341SAndroid Build Coastguard Worker .word 640b - pal_pred_tbl 4099*c0909341SAndroid Build Coastguard Worker .word 320b - pal_pred_tbl 4100*c0909341SAndroid Build Coastguard Worker .word 160b - pal_pred_tbl 4101*c0909341SAndroid Build Coastguard Worker .word 80b - pal_pred_tbl 4102*c0909341SAndroid Build Coastguard Worker .word 40b - pal_pred_tbl 4103*c0909341SAndroid Build Coastguard Workerendjumptable 4104*c0909341SAndroid Build Coastguard Worker 4105*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, 4106*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4107*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4108*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 4109*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_8bpc_neon, export=1 4110*c0909341SAndroid Build Coastguard Worker clz w9, w3 4111*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_128_tbl 4112*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4113*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4114*c0909341SAndroid Build Coastguard Worker movi v0.8h, #128 // dc 4115*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4116*c0909341SAndroid Build Coastguard Worker add x7, x7, x9 4117*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4118*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4119*c0909341SAndroid Build Coastguard Worker br x7 4120*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4): 4121*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4122*c0909341SAndroid Build Coastguard Worker1: 4123*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x5], #32 4124*c0909341SAndroid Build Coastguard Worker mul v2.8h, v2.8h, v1.8h // diff = ac * alpha 4125*c0909341SAndroid Build Coastguard Worker mul v3.8h, v3.8h, v1.8h 4126*c0909341SAndroid Build Coastguard Worker cmlt v4.8h, v2.8h, #0 // sign 4127*c0909341SAndroid Build Coastguard Worker cmlt v5.8h, v3.8h, #0 4128*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v4.8h // diff + sign 4129*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v5.8h 4130*c0909341SAndroid Build Coastguard Worker srshr v2.8h, v2.8h, #6 // (diff + sign + 32) >> 6 = apply_sign() 4131*c0909341SAndroid Build Coastguard Worker srshr v3.8h, v3.8h, #6 4132*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v0.8h // dc + apply_sign() 4133*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v0.8h 4134*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h // iclip_pixel(dc + apply_sign()) 4135*c0909341SAndroid Build Coastguard Worker sqxtun v3.8b, v3.8h 4136*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[0], [x0], x1 4137*c0909341SAndroid Build Coastguard Worker st1 {v2.s}[1], [x6], x1 4138*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4139*c0909341SAndroid Build Coastguard Worker st1 {v3.s}[0], [x0], x1 4140*c0909341SAndroid Build Coastguard Worker st1 {v3.s}[1], [x6], x1 4141*c0909341SAndroid Build Coastguard Worker b.gt 1b 4142*c0909341SAndroid Build Coastguard Worker ret 4143*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8): 4144*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4145*c0909341SAndroid Build Coastguard Worker1: 4146*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [x5], #64 4147*c0909341SAndroid Build Coastguard Worker mul v2.8h, v2.8h, v1.8h // diff = ac * alpha 4148*c0909341SAndroid Build Coastguard Worker mul v3.8h, v3.8h, v1.8h 4149*c0909341SAndroid Build Coastguard Worker mul v4.8h, v4.8h, v1.8h 4150*c0909341SAndroid Build Coastguard Worker mul v5.8h, v5.8h, v1.8h 4151*c0909341SAndroid Build Coastguard Worker cmlt v16.8h, v2.8h, #0 // sign 4152*c0909341SAndroid Build Coastguard Worker cmlt v17.8h, v3.8h, #0 4153*c0909341SAndroid Build Coastguard Worker cmlt v18.8h, v4.8h, #0 4154*c0909341SAndroid Build Coastguard Worker cmlt v19.8h, v5.8h, #0 4155*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v16.8h // diff + sign 4156*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v17.8h 4157*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v18.8h 4158*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v19.8h 4159*c0909341SAndroid Build Coastguard Worker srshr v2.8h, v2.8h, #6 // (diff + sign + 32) >> 6 = apply_sign() 4160*c0909341SAndroid Build Coastguard Worker srshr v3.8h, v3.8h, #6 4161*c0909341SAndroid Build Coastguard Worker srshr v4.8h, v4.8h, #6 4162*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #6 4163*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v0.8h // dc + apply_sign() 4164*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v0.8h 4165*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v0.8h 4166*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v0.8h 4167*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h // iclip_pixel(dc + apply_sign()) 4168*c0909341SAndroid Build Coastguard Worker sqxtun v3.8b, v3.8h 4169*c0909341SAndroid Build Coastguard Worker sqxtun v4.8b, v4.8h 4170*c0909341SAndroid Build Coastguard Worker sqxtun v5.8b, v5.8h 4171*c0909341SAndroid Build Coastguard Worker st1 {v2.8b}, [x0], x1 4172*c0909341SAndroid Build Coastguard Worker st1 {v3.8b}, [x6], x1 4173*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4174*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [x0], x1 4175*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [x6], x1 4176*c0909341SAndroid Build Coastguard Worker b.gt 1b 4177*c0909341SAndroid Build Coastguard Worker ret 4178*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16): 4179*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4180*c0909341SAndroid Build Coastguard Worker add x7, x5, w3, uxtw #1 4181*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 4182*c0909341SAndroid Build Coastguard Worker mov w9, w3 4183*c0909341SAndroid Build Coastguard Worker1: 4184*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x5], #32 4185*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x7], #32 4186*c0909341SAndroid Build Coastguard Worker mul v2.8h, v2.8h, v1.8h // diff = ac * alpha 4187*c0909341SAndroid Build Coastguard Worker mul v3.8h, v3.8h, v1.8h 4188*c0909341SAndroid Build Coastguard Worker mul v4.8h, v4.8h, v1.8h 4189*c0909341SAndroid Build Coastguard Worker mul v5.8h, v5.8h, v1.8h 4190*c0909341SAndroid Build Coastguard Worker cmlt v16.8h, v2.8h, #0 // sign 4191*c0909341SAndroid Build Coastguard Worker cmlt v17.8h, v3.8h, #0 4192*c0909341SAndroid Build Coastguard Worker cmlt v18.8h, v4.8h, #0 4193*c0909341SAndroid Build Coastguard Worker cmlt v19.8h, v5.8h, #0 4194*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v16.8h // diff + sign 4195*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v17.8h 4196*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v18.8h 4197*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v19.8h 4198*c0909341SAndroid Build Coastguard Worker srshr v2.8h, v2.8h, #6 // (diff + sign + 32) >> 6 = apply_sign() 4199*c0909341SAndroid Build Coastguard Worker srshr v3.8h, v3.8h, #6 4200*c0909341SAndroid Build Coastguard Worker srshr v4.8h, v4.8h, #6 4201*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #6 4202*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v0.8h // dc + apply_sign() 4203*c0909341SAndroid Build Coastguard Worker add v3.8h, v3.8h, v0.8h 4204*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v0.8h 4205*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v0.8h 4206*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h // iclip_pixel(dc + apply_sign()) 4207*c0909341SAndroid Build Coastguard Worker sqxtun v3.8b, v3.8h 4208*c0909341SAndroid Build Coastguard Worker sqxtun v4.8b, v4.8h 4209*c0909341SAndroid Build Coastguard Worker sqxtun v5.8b, v5.8h 4210*c0909341SAndroid Build Coastguard Worker subs w3, w3, #16 4211*c0909341SAndroid Build Coastguard Worker st1 {v2.8b, v3.8b}, [x0], #16 4212*c0909341SAndroid Build Coastguard Worker st1 {v4.8b, v5.8b}, [x6], #16 4213*c0909341SAndroid Build Coastguard Worker b.gt 1b 4214*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 4215*c0909341SAndroid Build Coastguard Worker add x5, x5, w9, uxtw #1 4216*c0909341SAndroid Build Coastguard Worker add x7, x7, w9, uxtw #1 4217*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 4218*c0909341SAndroid Build Coastguard Worker add x6, x6, x1 4219*c0909341SAndroid Build Coastguard Worker mov w3, w9 4220*c0909341SAndroid Build Coastguard Worker b.gt 1b 4221*c0909341SAndroid Build Coastguard Worker ret 4222*c0909341SAndroid Build Coastguard Workerendfunc 4223*c0909341SAndroid Build Coastguard Worker 4224*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_128_tbl 4225*c0909341SAndroid Build Coastguard Workeripred_cfl_splat_tbl: 4226*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - ipred_cfl_128_tbl 4227*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w16) - ipred_cfl_128_tbl 4228*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w8) - ipred_cfl_128_tbl 4229*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_splat_w4) - ipred_cfl_128_tbl 4230*c0909341SAndroid Build Coastguard Workerendjumptable 4231*c0909341SAndroid Build Coastguard Worker 4232*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, 4233*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4234*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4235*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 4236*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_8bpc_neon, export=1 4237*c0909341SAndroid Build Coastguard Worker clz w9, w3 4238*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_top_tbl 4239*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4240*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4241*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4242*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 4243*c0909341SAndroid Build Coastguard Worker add x7, x7, x9 4244*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4245*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4246*c0909341SAndroid Build Coastguard Worker br x7 4247*c0909341SAndroid Build Coastguard Worker4: 4248*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4249*c0909341SAndroid Build Coastguard Worker ld1r {v0.2s}, [x2] 4250*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4251*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4252*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4253*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 4254*c0909341SAndroid Build Coastguard Worker8: 4255*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4256*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 4257*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4258*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4259*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4260*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 4261*c0909341SAndroid Build Coastguard Worker16: 4262*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4263*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] 4264*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 4265*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #4 4266*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4267*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4268*c0909341SAndroid Build Coastguard Worker32: 4269*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4270*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x2] 4271*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 4272*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 4273*c0909341SAndroid Build Coastguard Worker add v2.4h, v2.4h, v3.4h 4274*c0909341SAndroid Build Coastguard Worker urshr v2.4h, v2.4h, #5 4275*c0909341SAndroid Build Coastguard Worker dup v0.8h, v2.h[0] 4276*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4277*c0909341SAndroid Build Coastguard Workerendfunc 4278*c0909341SAndroid Build Coastguard Worker 4279*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_top_tbl 4280*c0909341SAndroid Build Coastguard Worker .word 32b - ipred_cfl_top_tbl 4281*c0909341SAndroid Build Coastguard Worker .word 16b - ipred_cfl_top_tbl 4282*c0909341SAndroid Build Coastguard Worker .word 8b - ipred_cfl_top_tbl 4283*c0909341SAndroid Build Coastguard Worker .word 4b - ipred_cfl_top_tbl 4284*c0909341SAndroid Build Coastguard Workerendjumptable 4285*c0909341SAndroid Build Coastguard Worker 4286*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, 4287*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4288*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4289*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 4290*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_8bpc_neon, export=1 4291*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw 4292*c0909341SAndroid Build Coastguard Worker clz w9, w3 4293*c0909341SAndroid Build Coastguard Worker clz w8, w4 4294*c0909341SAndroid Build Coastguard Worker movrel x10, ipred_cfl_splat_tbl 4295*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_left_tbl 4296*c0909341SAndroid Build Coastguard Worker sub w9, w9, #26 4297*c0909341SAndroid Build Coastguard Worker sub w8, w8, #26 4298*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x10, w9, uxtw #2] 4299*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 4300*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4301*c0909341SAndroid Build Coastguard Worker add x9, x10, x9 4302*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 4303*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4304*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4305*c0909341SAndroid Build Coastguard Worker br x7 4306*c0909341SAndroid Build Coastguard Worker 4307*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4): 4308*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4309*c0909341SAndroid Build Coastguard Worker ld1r {v0.2s}, [x2] 4310*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4311*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4312*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4313*c0909341SAndroid Build Coastguard Worker br x9 4314*c0909341SAndroid Build Coastguard Worker 4315*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8): 4316*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4317*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2] 4318*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4319*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #3 4320*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4321*c0909341SAndroid Build Coastguard Worker br x9 4322*c0909341SAndroid Build Coastguard Worker 4323*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16): 4324*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4325*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2] 4326*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 4327*c0909341SAndroid Build Coastguard Worker urshr v0.4h, v0.4h, #4 4328*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4329*c0909341SAndroid Build Coastguard Worker br x9 4330*c0909341SAndroid Build Coastguard Worker 4331*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32): 4332*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4333*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x2] 4334*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 4335*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 4336*c0909341SAndroid Build Coastguard Worker add v2.4h, v2.4h, v3.4h 4337*c0909341SAndroid Build Coastguard Worker urshr v2.4h, v2.4h, #5 4338*c0909341SAndroid Build Coastguard Worker dup v0.8h, v2.h[0] 4339*c0909341SAndroid Build Coastguard Worker br x9 4340*c0909341SAndroid Build Coastguard Workerendfunc 4341*c0909341SAndroid Build Coastguard Worker 4342*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_left_tbl 4343*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h32) - ipred_cfl_left_tbl 4344*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h16) - ipred_cfl_left_tbl 4345*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h8) - ipred_cfl_left_tbl 4346*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_left_h4) - ipred_cfl_left_tbl 4347*c0909341SAndroid Build Coastguard Workerendjumptable 4348*c0909341SAndroid Build Coastguard Worker 4349*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride, 4350*c0909341SAndroid Build Coastguard Worker// const pixel *const topleft, 4351*c0909341SAndroid Build Coastguard Worker// const int width, const int height, 4352*c0909341SAndroid Build Coastguard Worker// const int16_t *ac, const int alpha); 4353*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_8bpc_neon, export=1 4354*c0909341SAndroid Build Coastguard Worker sub x2, x2, w4, uxtw 4355*c0909341SAndroid Build Coastguard Worker add w8, w3, w4 // width + height 4356*c0909341SAndroid Build Coastguard Worker dup v1.8h, w6 // alpha 4357*c0909341SAndroid Build Coastguard Worker clz w9, w3 4358*c0909341SAndroid Build Coastguard Worker clz w6, w4 4359*c0909341SAndroid Build Coastguard Worker dup v16.8h, w8 // width + height 4360*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_tbl 4361*c0909341SAndroid Build Coastguard Worker rbit w8, w8 // rbit(width + height) 4362*c0909341SAndroid Build Coastguard Worker sub w9, w9, #22 // 26 leading bits, minus table offset 4 4363*c0909341SAndroid Build Coastguard Worker sub w6, w6, #26 4364*c0909341SAndroid Build Coastguard Worker clz w8, w8 // ctz(width + height) 4365*c0909341SAndroid Build Coastguard Worker ldrsw x9, [x7, w9, uxtw #2] 4366*c0909341SAndroid Build Coastguard Worker ldrsw x6, [x7, w6, uxtw #2] 4367*c0909341SAndroid Build Coastguard Worker neg w8, w8 // -ctz(width + height) 4368*c0909341SAndroid Build Coastguard Worker add x9, x7, x9 4369*c0909341SAndroid Build Coastguard Worker add x7, x7, x6 4370*c0909341SAndroid Build Coastguard Worker ushr v16.8h, v16.8h, #1 // (width + height) >> 1 4371*c0909341SAndroid Build Coastguard Worker dup v17.8h, w8 // -ctz(width + height) 4372*c0909341SAndroid Build Coastguard Worker add x6, x0, x1 4373*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 4374*c0909341SAndroid Build Coastguard Worker br x7 4375*c0909341SAndroid Build Coastguard Worker 4376*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4): 4377*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4378*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x2], #4 4379*c0909341SAndroid Build Coastguard Worker ins v0.s[1], wzr 4380*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 4381*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4382*c0909341SAndroid Build Coastguard Worker br x9 4383*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4): 4384*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4385*c0909341SAndroid Build Coastguard Worker ld1 {v2.s}[0], [x2] 4386*c0909341SAndroid Build Coastguard Worker ins v2.s[1], wzr 4387*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 4388*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.8b 4389*c0909341SAndroid Build Coastguard Worker cmp w4, #4 4390*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 4391*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 4392*c0909341SAndroid Build Coastguard Worker b.eq 1f 4393*c0909341SAndroid Build Coastguard Worker // h = 8/16 4394*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 4395*c0909341SAndroid Build Coastguard Worker movk w16, #(0x5556/2), lsl #16 4396*c0909341SAndroid Build Coastguard Worker add w17, w4, w4 // w17 = 2*h = 16 or 32 4397*c0909341SAndroid Build Coastguard Worker lsr w16, w16, w17 4398*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 4399*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 4400*c0909341SAndroid Build Coastguard Worker1: 4401*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4402*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w4) 4403*c0909341SAndroid Build Coastguard Worker 4404*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8): 4405*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4406*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x2], #8 4407*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.8b 4408*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 4409*c0909341SAndroid Build Coastguard Worker br x9 4410*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8): 4411*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4412*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x2] 4413*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 4414*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.8b 4415*c0909341SAndroid Build Coastguard Worker cmp w4, #8 4416*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 4417*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 4418*c0909341SAndroid Build Coastguard Worker b.eq 1f 4419*c0909341SAndroid Build Coastguard Worker // h = 4/16/32 4420*c0909341SAndroid Build Coastguard Worker cmp w4, #32 4421*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 4422*c0909341SAndroid Build Coastguard Worker mov w17, #(0x5556/2) 4423*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4424*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 4425*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 4426*c0909341SAndroid Build Coastguard Worker1: 4427*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4428*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w8) 4429*c0909341SAndroid Build Coastguard Worker 4430*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16): 4431*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4432*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x2], #16 4433*c0909341SAndroid Build Coastguard Worker uaddlv h0, v0.16b 4434*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 4435*c0909341SAndroid Build Coastguard Worker br x9 4436*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16): 4437*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4438*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x2] 4439*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 4440*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 4441*c0909341SAndroid Build Coastguard Worker cmp w4, #16 4442*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 4443*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 4444*c0909341SAndroid Build Coastguard Worker b.eq 1f 4445*c0909341SAndroid Build Coastguard Worker // h = 4/8/32 4446*c0909341SAndroid Build Coastguard Worker cmp w4, #4 4447*c0909341SAndroid Build Coastguard Worker mov w16, #(0x3334/2) 4448*c0909341SAndroid Build Coastguard Worker mov w17, #(0x5556/2) 4449*c0909341SAndroid Build Coastguard Worker csel w16, w16, w17, eq 4450*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 4451*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 4452*c0909341SAndroid Build Coastguard Worker1: 4453*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4454*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4455*c0909341SAndroid Build Coastguard Worker 4456*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32): 4457*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4458*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x2], #32 4459*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 4460*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 4461*c0909341SAndroid Build Coastguard Worker add x2, x2, #1 4462*c0909341SAndroid Build Coastguard Worker add v0.4h, v2.4h, v3.4h 4463*c0909341SAndroid Build Coastguard Worker br x9 4464*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32): 4465*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4466*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x2] 4467*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v16.4h 4468*c0909341SAndroid Build Coastguard Worker uaddlv h2, v2.16b 4469*c0909341SAndroid Build Coastguard Worker uaddlv h3, v3.16b 4470*c0909341SAndroid Build Coastguard Worker cmp w4, #32 4471*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 4472*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v3.4h 4473*c0909341SAndroid Build Coastguard Worker ushl v0.4h, v0.4h, v17.4h 4474*c0909341SAndroid Build Coastguard Worker b.eq 1f 4475*c0909341SAndroid Build Coastguard Worker // h = 8/16 4476*c0909341SAndroid Build Coastguard Worker mov w16, #(0x5556/2) 4477*c0909341SAndroid Build Coastguard Worker movk w16, #(0x3334/2), lsl #16 4478*c0909341SAndroid Build Coastguard Worker add w17, w4, w4 // w17 = 2*h = 16 or 32 4479*c0909341SAndroid Build Coastguard Worker lsr w16, w16, w17 4480*c0909341SAndroid Build Coastguard Worker dup v16.4h, w16 4481*c0909341SAndroid Build Coastguard Worker sqdmulh v0.4h, v0.4h, v16.4h 4482*c0909341SAndroid Build Coastguard Worker1: 4483*c0909341SAndroid Build Coastguard Worker dup v0.8h, v0.h[0] 4484*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_splat_w16) 4485*c0909341SAndroid Build Coastguard Workerendfunc 4486*c0909341SAndroid Build Coastguard Worker 4487*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_tbl 4488*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h32) - ipred_cfl_tbl 4489*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h16) - ipred_cfl_tbl 4490*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h8) - ipred_cfl_tbl 4491*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_h4) - ipred_cfl_tbl 4492*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w32) - ipred_cfl_tbl 4493*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w16) - ipred_cfl_tbl 4494*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w8) - ipred_cfl_tbl 4495*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_w4) - ipred_cfl_tbl 4496*c0909341SAndroid Build Coastguard Workerendjumptable 4497*c0909341SAndroid Build Coastguard Worker 4498*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx, 4499*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 4500*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 4501*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_8bpc_neon, export=1 4502*c0909341SAndroid Build Coastguard Worker clz w8, w5 4503*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 4504*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_420_tbl 4505*c0909341SAndroid Build Coastguard Worker sub w8, w8, #27 4506*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 4507*c0909341SAndroid Build Coastguard Worker movi v16.8h, #0 4508*c0909341SAndroid Build Coastguard Worker movi v17.8h, #0 4509*c0909341SAndroid Build Coastguard Worker movi v18.8h, #0 4510*c0909341SAndroid Build Coastguard Worker movi v19.8h, #0 4511*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 4512*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 4513*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 4514*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 4515*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 4516*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 4517*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 4518*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 4519*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 4520*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 4521*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 4522*c0909341SAndroid Build Coastguard Worker br x7 4523*c0909341SAndroid Build Coastguard Worker 4524*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4): 4525*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4526*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 4527*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 4528*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x10], x2 4529*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x1], x2 4530*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x10], x2 4531*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4532*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4533*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 4534*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4535*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4536*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], #16 4537*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4538*c0909341SAndroid Build Coastguard Worker b.gt 1b 4539*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v0.2d, v0.2d 4540*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v0.2d, v0.2d 4541*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad): 4542*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 4543*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 4544*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4545*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4546*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4547*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4548*c0909341SAndroid Build Coastguard Worker b.gt 2b 4549*c0909341SAndroid Build Coastguard Worker3: 4550*c0909341SAndroid Build Coastguard Worker // Aggregate the sums 4551*c0909341SAndroid Build Coastguard Worker add v0.8h, v16.8h, v17.8h 4552*c0909341SAndroid Build Coastguard Worker uaddlv s0, v0.8h // sum 4553*c0909341SAndroid Build Coastguard Worker sub x0, x0, w6, uxtw #3 4554*c0909341SAndroid Build Coastguard Worker urshl v4.2s, v0.2s, v31.2s // (sum + (1 << (log2sz - 1))) >>= log2sz 4555*c0909341SAndroid Build Coastguard Worker dup v4.8h, v4.h[0] 4556*c0909341SAndroid Build Coastguard Worker6: // Subtract dc from ac 4557*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0] 4558*c0909341SAndroid Build Coastguard Worker subs w6, w6, #4 4559*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v4.8h 4560*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v4.8h 4561*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4562*c0909341SAndroid Build Coastguard Worker b.gt 6b 4563*c0909341SAndroid Build Coastguard Worker ret 4564*c0909341SAndroid Build Coastguard Worker 4565*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8): 4566*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4567*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_420_w8_wpad) 4568*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 4569*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4570*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x10], x2 4571*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x1], x2 4572*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4573*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x10], x2 4574*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4575*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4576*c0909341SAndroid Build Coastguard Worker uaddlp v3.8h, v3.16b 4577*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 4578*c0909341SAndroid Build Coastguard Worker add v2.8h, v2.8h, v3.8h 4579*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4580*c0909341SAndroid Build Coastguard Worker shl v1.8h, v2.8h, #1 4581*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4582*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4583*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4584*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4585*c0909341SAndroid Build Coastguard Worker b.gt 1b 4586*c0909341SAndroid Build Coastguard Worker mov v0.16b, v1.16b 4587*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 4588*c0909341SAndroid Build Coastguard Worker 4589*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad): 4590*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 4591*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 4592*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x10], x2 4593*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x1], x2 4594*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x10], x2 4595*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4596*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4597*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v1.8h 4598*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4599*c0909341SAndroid Build Coastguard Worker dup v1.4h, v0.h[3] 4600*c0909341SAndroid Build Coastguard Worker dup v3.4h, v0.h[7] 4601*c0909341SAndroid Build Coastguard Worker trn2 v2.2d, v0.2d, v0.2d 4602*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4603*c0909341SAndroid Build Coastguard Worker st1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32 4604*c0909341SAndroid Build Coastguard Worker add v16.4h, v16.4h, v0.4h 4605*c0909341SAndroid Build Coastguard Worker add v17.4h, v17.4h, v1.4h 4606*c0909341SAndroid Build Coastguard Worker add v18.4h, v18.4h, v2.4h 4607*c0909341SAndroid Build Coastguard Worker add v19.4h, v19.4h, v3.4h 4608*c0909341SAndroid Build Coastguard Worker b.gt 1b 4609*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v2.2d, v3.2d 4610*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v2.2d, v3.2d 4611*c0909341SAndroid Build Coastguard Worker 4612*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad): 4613*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 4614*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 4615*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4616*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4617*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4618*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4619*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4620*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v0.8h 4621*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v1.8h 4622*c0909341SAndroid Build Coastguard Worker b.gt 2b 4623*c0909341SAndroid Build Coastguard Worker3: 4624*c0909341SAndroid Build Coastguard Worker 4625*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_calc_subtract_dc): 4626*c0909341SAndroid Build Coastguard Worker // Aggregate the sums 4627*c0909341SAndroid Build Coastguard Worker add v0.8h, v16.8h, v17.8h 4628*c0909341SAndroid Build Coastguard Worker add v2.8h, v18.8h, v19.8h 4629*c0909341SAndroid Build Coastguard Worker uaddlp v0.4s, v0.8h 4630*c0909341SAndroid Build Coastguard Worker uaddlp v2.4s, v2.8h 4631*c0909341SAndroid Build Coastguard Worker add v0.4s, v0.4s, v2.4s 4632*c0909341SAndroid Build Coastguard Worker addv s0, v0.4s // sum 4633*c0909341SAndroid Build Coastguard Worker sub x0, x0, w6, uxtw #4 4634*c0909341SAndroid Build Coastguard Worker urshl v4.2s, v0.2s, v31.2s // (sum + (1 << (log2sz - 1))) >>= log2sz 4635*c0909341SAndroid Build Coastguard Worker dup v4.8h, v4.h[0] 4636*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_subtract_dc): 4637*c0909341SAndroid Build Coastguard Worker6: // Subtract dc from ac 4638*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0] 4639*c0909341SAndroid Build Coastguard Worker subs w6, w6, #4 4640*c0909341SAndroid Build Coastguard Worker sub v0.8h, v0.8h, v4.8h 4641*c0909341SAndroid Build Coastguard Worker sub v1.8h, v1.8h, v4.8h 4642*c0909341SAndroid Build Coastguard Worker sub v2.8h, v2.8h, v4.8h 4643*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v4.8h 4644*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4645*c0909341SAndroid Build Coastguard Worker b.gt 6b 4646*c0909341SAndroid Build Coastguard Worker ret 4647*c0909341SAndroid Build Coastguard Worker 4648*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16): 4649*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4650*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_420_w16_tbl 4651*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 4652*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 4653*c0909341SAndroid Build Coastguard Worker br x7 4654*c0909341SAndroid Build Coastguard Worker 4655*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0): 4656*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4657*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 4658*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x1], x2 4659*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x10], x2 4660*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4661*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b, v5.16b}, [x1], x2 4662*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4663*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b, v7.16b}, [x10], x2 4664*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4665*c0909341SAndroid Build Coastguard Worker uaddlp v3.8h, v3.16b 4666*c0909341SAndroid Build Coastguard Worker uaddlp v4.8h, v4.16b 4667*c0909341SAndroid Build Coastguard Worker uaddlp v5.8h, v5.16b 4668*c0909341SAndroid Build Coastguard Worker uaddlp v6.8h, v6.16b 4669*c0909341SAndroid Build Coastguard Worker uaddlp v7.8h, v7.16b 4670*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 4671*c0909341SAndroid Build Coastguard Worker add v1.8h, v1.8h, v3.8h 4672*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v6.8h 4673*c0909341SAndroid Build Coastguard Worker add v5.8h, v5.8h, v7.8h 4674*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4675*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #1 4676*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #1 4677*c0909341SAndroid Build Coastguard Worker shl v3.8h, v5.8h, #1 4678*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4679*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4680*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4681*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4682*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4683*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4684*c0909341SAndroid Build Coastguard Worker b.gt 1b 4685*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4686*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4687*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 4688*c0909341SAndroid Build Coastguard Worker 4689*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1): 4690*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4691*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 4692*c0909341SAndroid Build Coastguard Worker ldr d1, [x1, #16] 4693*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4694*c0909341SAndroid Build Coastguard Worker ldr d3, [x10, #16] 4695*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x10], x2 4696*c0909341SAndroid Build Coastguard Worker uaddlp v1.4h, v1.8b 4697*c0909341SAndroid Build Coastguard Worker ldr d5, [x1, #16] 4698*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4699*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [x1], x2 4700*c0909341SAndroid Build Coastguard Worker uaddlp v3.4h, v3.8b 4701*c0909341SAndroid Build Coastguard Worker ldr d7, [x10, #16] 4702*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4703*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [x10], x2 4704*c0909341SAndroid Build Coastguard Worker uaddlp v5.4h, v5.8b 4705*c0909341SAndroid Build Coastguard Worker uaddlp v4.8h, v4.16b 4706*c0909341SAndroid Build Coastguard Worker uaddlp v7.4h, v7.8b 4707*c0909341SAndroid Build Coastguard Worker uaddlp v6.8h, v6.16b 4708*c0909341SAndroid Build Coastguard Worker add v1.4h, v1.4h, v3.4h 4709*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 4710*c0909341SAndroid Build Coastguard Worker add v5.4h, v5.4h, v7.4h 4711*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v6.8h 4712*c0909341SAndroid Build Coastguard Worker shl v1.4h, v1.4h, #1 4713*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4714*c0909341SAndroid Build Coastguard Worker shl v3.4h, v5.4h, #1 4715*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #1 4716*c0909341SAndroid Build Coastguard Worker dup v4.4h, v1.h[3] 4717*c0909341SAndroid Build Coastguard Worker dup v5.4h, v3.h[3] 4718*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v1.2d, v4.2d 4719*c0909341SAndroid Build Coastguard Worker trn1 v3.2d, v3.2d, v5.2d 4720*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4721*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4722*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4723*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4724*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4725*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4726*c0909341SAndroid Build Coastguard Worker b.gt 1b 4727*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4728*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4729*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 4730*c0909341SAndroid Build Coastguard Worker 4731*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2): 4732*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4733*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 4734*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4735*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x10], x2 4736*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [x1], x2 4737*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4738*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [x10], x2 4739*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4740*c0909341SAndroid Build Coastguard Worker uaddlp v4.8h, v4.16b 4741*c0909341SAndroid Build Coastguard Worker uaddlp v6.8h, v6.16b 4742*c0909341SAndroid Build Coastguard Worker add v0.8h, v0.8h, v2.8h 4743*c0909341SAndroid Build Coastguard Worker add v4.8h, v4.8h, v6.8h 4744*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #1 4745*c0909341SAndroid Build Coastguard Worker shl v2.8h, v4.8h, #1 4746*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 4747*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 4748*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4749*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4750*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4751*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4752*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4753*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4754*c0909341SAndroid Build Coastguard Worker b.gt 1b 4755*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4756*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4757*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 4758*c0909341SAndroid Build Coastguard Worker 4759*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3): 4760*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4761*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 4762*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 4763*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x10], x2 4764*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [x1], x2 4765*c0909341SAndroid Build Coastguard Worker uaddlp v0.4h, v0.8b 4766*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [x10], x2 4767*c0909341SAndroid Build Coastguard Worker uaddlp v2.4h, v2.8b 4768*c0909341SAndroid Build Coastguard Worker uaddlp v4.4h, v4.8b 4769*c0909341SAndroid Build Coastguard Worker uaddlp v6.4h, v6.8b 4770*c0909341SAndroid Build Coastguard Worker add v0.4h, v0.4h, v2.4h 4771*c0909341SAndroid Build Coastguard Worker add v4.4h, v4.4h, v6.4h 4772*c0909341SAndroid Build Coastguard Worker shl v0.4h, v0.4h, #1 4773*c0909341SAndroid Build Coastguard Worker shl v2.4h, v4.4h, #1 4774*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[3] 4775*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[3] 4776*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v1.2d 4777*c0909341SAndroid Build Coastguard Worker trn1 v2.2d, v2.2d, v3.2d 4778*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4779*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4780*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4781*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4782*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4783*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4784*c0909341SAndroid Build Coastguard Worker b.gt 1b 4785*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4786*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4787*c0909341SAndroid Build Coastguard Worker 4788*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad): 4789*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 4790*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 4791*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 4792*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4793*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4794*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4795*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4796*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4797*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4798*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4799*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4800*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4801*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4802*c0909341SAndroid Build Coastguard Worker b.gt 2b 4803*c0909341SAndroid Build Coastguard Worker3: 4804*c0909341SAndroid Build Coastguard Worker 4805*c0909341SAndroid Build Coastguard Worker // Double the height and reuse the w8 summing/subtracting 4806*c0909341SAndroid Build Coastguard Worker lsl w6, w6, #1 4807*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_calc_subtract_dc) 4808*c0909341SAndroid Build Coastguard Workerendfunc 4809*c0909341SAndroid Build Coastguard Worker 4810*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_420_tbl 4811*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16) - ipred_cfl_ac_420_tbl 4812*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w8) - ipred_cfl_ac_420_tbl 4813*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w4) - ipred_cfl_ac_420_tbl 4814*c0909341SAndroid Build Coastguard Workerendjumptable 4815*c0909341SAndroid Build Coastguard Worker 4816*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_420_w16_tbl 4817*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad0) - ipred_cfl_ac_420_w16_tbl 4818*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad1) - ipred_cfl_ac_420_w16_tbl 4819*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad2) - ipred_cfl_ac_420_w16_tbl 4820*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_420_w16_wpad3) - ipred_cfl_ac_420_w16_tbl 4821*c0909341SAndroid Build Coastguard Workerendjumptable 4822*c0909341SAndroid Build Coastguard Worker 4823*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx, 4824*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 4825*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 4826*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_8bpc_neon, export=1 4827*c0909341SAndroid Build Coastguard Worker clz w8, w5 4828*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 4829*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_422_tbl 4830*c0909341SAndroid Build Coastguard Worker sub w8, w8, #27 4831*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 4832*c0909341SAndroid Build Coastguard Worker movi v16.8h, #0 4833*c0909341SAndroid Build Coastguard Worker movi v17.8h, #0 4834*c0909341SAndroid Build Coastguard Worker movi v18.8h, #0 4835*c0909341SAndroid Build Coastguard Worker movi v19.8h, #0 4836*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 4837*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 4838*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 4839*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 4840*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 4841*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 4842*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 4843*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 4844*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 4845*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 4846*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 4847*c0909341SAndroid Build Coastguard Worker br x7 4848*c0909341SAndroid Build Coastguard Worker 4849*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4): 4850*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4851*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input 4852*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 4853*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x10], x2 4854*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x1], x2 4855*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x10], x2 4856*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4857*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4858*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4859*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #2 4860*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 4861*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4862*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4863*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 4864*c0909341SAndroid Build Coastguard Worker b.gt 1b 4865*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v1.2d, v1.2d 4866*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v1.2d, v1.2d 4867*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 4868*c0909341SAndroid Build Coastguard Worker 4869*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8): 4870*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4871*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_422_w8_wpad) 4872*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 4873*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4874*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x10], x2 4875*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x1], x2 4876*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4877*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x10], x2 4878*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4879*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4880*c0909341SAndroid Build Coastguard Worker uaddlp v3.8h, v3.16b 4881*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4882*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #2 4883*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 4884*c0909341SAndroid Build Coastguard Worker shl v3.8h, v3.8h, #2 4885*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 4886*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4887*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4888*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4889*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4890*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4891*c0909341SAndroid Build Coastguard Worker b.gt 1b 4892*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 4893*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4894*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 4895*c0909341SAndroid Build Coastguard Worker 4896*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad): 4897*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 4898*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 4899*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x10], x2 4900*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x1], x2 4901*c0909341SAndroid Build Coastguard Worker ld1 {v2.d}[1], [x10], x2 4902*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4903*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4904*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4905*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 4906*c0909341SAndroid Build Coastguard Worker dup v4.4h, v0.h[3] 4907*c0909341SAndroid Build Coastguard Worker dup v5.8h, v0.h[7] 4908*c0909341SAndroid Build Coastguard Worker dup v6.4h, v2.h[3] 4909*c0909341SAndroid Build Coastguard Worker dup v7.8h, v2.h[7] 4910*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v0.2d, v5.2d 4911*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v4.2d 4912*c0909341SAndroid Build Coastguard Worker trn2 v3.2d, v2.2d, v7.2d 4913*c0909341SAndroid Build Coastguard Worker trn1 v2.2d, v2.2d, v6.2d 4914*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 4915*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4916*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4917*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4918*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4919*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4920*c0909341SAndroid Build Coastguard Worker b.gt 1b 4921*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 4922*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4923*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 4924*c0909341SAndroid Build Coastguard Worker 4925*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16): 4926*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4927*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_422_w16_tbl 4928*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 4929*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 4930*c0909341SAndroid Build Coastguard Worker br x7 4931*c0909341SAndroid Build Coastguard Worker 4932*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0): 4933*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4934*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, without padding 4935*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x1], x2 4936*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x10], x2 4937*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4938*c0909341SAndroid Build Coastguard Worker uaddlp v1.8h, v1.16b 4939*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4940*c0909341SAndroid Build Coastguard Worker uaddlp v3.8h, v3.16b 4941*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4942*c0909341SAndroid Build Coastguard Worker shl v1.8h, v1.8h, #2 4943*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 4944*c0909341SAndroid Build Coastguard Worker shl v3.8h, v3.8h, #2 4945*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4946*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4947*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4948*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4949*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4950*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4951*c0909341SAndroid Build Coastguard Worker b.gt 1b 4952*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4953*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4954*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 4955*c0909341SAndroid Build Coastguard Worker 4956*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1): 4957*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4958*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 4 4959*c0909341SAndroid Build Coastguard Worker ldr d1, [x1, #16] 4960*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4961*c0909341SAndroid Build Coastguard Worker ldr d3, [x10, #16] 4962*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x10], x2 4963*c0909341SAndroid Build Coastguard Worker uaddlp v1.4h, v1.8b 4964*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4965*c0909341SAndroid Build Coastguard Worker uaddlp v3.4h, v3.8b 4966*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4967*c0909341SAndroid Build Coastguard Worker shl v1.4h, v1.4h, #2 4968*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4969*c0909341SAndroid Build Coastguard Worker shl v3.4h, v3.4h, #2 4970*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 4971*c0909341SAndroid Build Coastguard Worker dup v4.4h, v1.h[3] 4972*c0909341SAndroid Build Coastguard Worker dup v5.4h, v3.h[3] 4973*c0909341SAndroid Build Coastguard Worker trn1 v1.2d, v1.2d, v4.2d 4974*c0909341SAndroid Build Coastguard Worker trn1 v3.2d, v3.2d, v5.2d 4975*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4976*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4977*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 4978*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 4979*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 4980*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 4981*c0909341SAndroid Build Coastguard Worker b.gt 1b 4982*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 4983*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 4984*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 4985*c0909341SAndroid Build Coastguard Worker 4986*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2): 4987*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 4988*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 8 4989*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 4990*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x10], x2 4991*c0909341SAndroid Build Coastguard Worker uaddlp v0.8h, v0.16b 4992*c0909341SAndroid Build Coastguard Worker uaddlp v2.8h, v2.16b 4993*c0909341SAndroid Build Coastguard Worker shl v0.8h, v0.8h, #2 4994*c0909341SAndroid Build Coastguard Worker shl v2.8h, v2.8h, #2 4995*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 4996*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 4997*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 4998*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 4999*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5000*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5001*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5002*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5003*c0909341SAndroid Build Coastguard Worker b.gt 1b 5004*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5005*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5006*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5007*c0909341SAndroid Build Coastguard Worker 5008*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3): 5009*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5010*c0909341SAndroid Build Coastguard Worker1: // Copy and subsample input, padding 12 5011*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 5012*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x10], x2 5013*c0909341SAndroid Build Coastguard Worker uaddlp v0.4h, v0.8b 5014*c0909341SAndroid Build Coastguard Worker uaddlp v2.4h, v2.8b 5015*c0909341SAndroid Build Coastguard Worker shl v0.4h, v0.4h, #2 5016*c0909341SAndroid Build Coastguard Worker shl v2.4h, v2.4h, #2 5017*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[3] 5018*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[3] 5019*c0909341SAndroid Build Coastguard Worker trn1 v0.2d, v0.2d, v1.2d 5020*c0909341SAndroid Build Coastguard Worker trn1 v2.2d, v2.2d, v3.2d 5021*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5022*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5023*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5024*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5025*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5026*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5027*c0909341SAndroid Build Coastguard Worker b.gt 1b 5028*c0909341SAndroid Build Coastguard Worker mov v0.16b, v2.16b 5029*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5030*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5031*c0909341SAndroid Build Coastguard Workerendfunc 5032*c0909341SAndroid Build Coastguard Worker 5033*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_422_tbl 5034*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16) - ipred_cfl_ac_422_tbl 5035*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w8) - ipred_cfl_ac_422_tbl 5036*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w4) - ipred_cfl_ac_422_tbl 5037*c0909341SAndroid Build Coastguard Workerendjumptable 5038*c0909341SAndroid Build Coastguard Worker 5039*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_422_w16_tbl 5040*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad0) - ipred_cfl_ac_422_w16_tbl 5041*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad1) - ipred_cfl_ac_422_w16_tbl 5042*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad2) - ipred_cfl_ac_422_w16_tbl 5043*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_422_w16_wpad3) - ipred_cfl_ac_422_w16_tbl 5044*c0909341SAndroid Build Coastguard Workerendjumptable 5045*c0909341SAndroid Build Coastguard Worker 5046*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_8bpc_neon(int16_t *const ac, const pixel *const ypx, 5047*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, const int w_pad, 5048*c0909341SAndroid Build Coastguard Worker// const int h_pad, const int cw, const int ch); 5049*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_8bpc_neon, export=1 5050*c0909341SAndroid Build Coastguard Worker clz w8, w5 5051*c0909341SAndroid Build Coastguard Worker lsl w4, w4, #2 5052*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_444_tbl 5053*c0909341SAndroid Build Coastguard Worker sub w8, w8, #26 5054*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x7, w8, uxtw #2] 5055*c0909341SAndroid Build Coastguard Worker movi v16.8h, #0 5056*c0909341SAndroid Build Coastguard Worker movi v17.8h, #0 5057*c0909341SAndroid Build Coastguard Worker movi v18.8h, #0 5058*c0909341SAndroid Build Coastguard Worker movi v19.8h, #0 5059*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 5060*c0909341SAndroid Build Coastguard Worker sub w8, w6, w4 // height - h_pad 5061*c0909341SAndroid Build Coastguard Worker rbit w9, w5 // rbit(width) 5062*c0909341SAndroid Build Coastguard Worker rbit w10, w6 // rbit(height) 5063*c0909341SAndroid Build Coastguard Worker clz w9, w9 // ctz(width) 5064*c0909341SAndroid Build Coastguard Worker clz w10, w10 // ctz(height) 5065*c0909341SAndroid Build Coastguard Worker add w9, w9, w10 // log2sz 5066*c0909341SAndroid Build Coastguard Worker add x10, x1, x2 5067*c0909341SAndroid Build Coastguard Worker dup v31.4s, w9 5068*c0909341SAndroid Build Coastguard Worker lsl x2, x2, #1 5069*c0909341SAndroid Build Coastguard Worker neg v31.4s, v31.4s // -log2sz 5070*c0909341SAndroid Build Coastguard Worker br x7 5071*c0909341SAndroid Build Coastguard Worker 5072*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4): 5073*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5074*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 5075*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x1], x2 5076*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[1], [x10], x2 5077*c0909341SAndroid Build Coastguard Worker ld1 {v1.s}[0], [x1], x2 5078*c0909341SAndroid Build Coastguard Worker ld1 {v1.s}[1], [x10], x2 5079*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #3 5080*c0909341SAndroid Build Coastguard Worker ushll v1.8h, v1.8b, #3 5081*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5082*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5083*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5084*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], #32 5085*c0909341SAndroid Build Coastguard Worker b.gt 1b 5086*c0909341SAndroid Build Coastguard Worker trn2 v0.2d, v1.2d, v1.2d 5087*c0909341SAndroid Build Coastguard Worker trn2 v1.2d, v1.2d, v1.2d 5088*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w4_hpad) 5089*c0909341SAndroid Build Coastguard Worker 5090*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8): 5091*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5092*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input 5093*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 5094*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [x10], x2 5095*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x1], x2 5096*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #3 5097*c0909341SAndroid Build Coastguard Worker ld1 {v3.8b}, [x10], x2 5098*c0909341SAndroid Build Coastguard Worker ushll v1.8h, v1.8b, #3 5099*c0909341SAndroid Build Coastguard Worker ushll v2.8h, v2.8b, #3 5100*c0909341SAndroid Build Coastguard Worker ushll v3.8h, v3.8b, #3 5101*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5102*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5103*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5104*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5105*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5106*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5107*c0909341SAndroid Build Coastguard Worker b.gt 1b 5108*c0909341SAndroid Build Coastguard Worker mov v0.16b, v3.16b 5109*c0909341SAndroid Build Coastguard Worker mov v1.16b, v3.16b 5110*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_hpad) 5111*c0909341SAndroid Build Coastguard Worker 5112*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16): 5113*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5114*c0909341SAndroid Build Coastguard Worker cbnz w3, L(ipred_cfl_ac_444_w16_wpad) 5115*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 5116*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x1], x2 5117*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x10], x2 5118*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [x1], x2 5119*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v0.16b, #3 5120*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #3 5121*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [x10], x2 5122*c0909341SAndroid Build Coastguard Worker ushll2 v3.8h, v2.16b, #3 5123*c0909341SAndroid Build Coastguard Worker ushll v2.8h, v2.8b, #3 5124*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v4.16b, #3 5125*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v4.8b, #3 5126*c0909341SAndroid Build Coastguard Worker ushll2 v7.8h, v6.16b, #3 5127*c0909341SAndroid Build Coastguard Worker ushll v6.8h, v6.8b, #3 5128*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5129*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5130*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5131*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5132*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5133*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5134*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5135*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5136*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5137*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5138*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5139*c0909341SAndroid Build Coastguard Worker b.gt 1b 5140*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 5141*c0909341SAndroid Build Coastguard Worker mov v1.16b, v7.16b 5142*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 5143*c0909341SAndroid Build Coastguard Worker mov v3.16b, v7.16b 5144*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5145*c0909341SAndroid Build Coastguard Worker 5146*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad): 5147*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 5148*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 5149*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x10], x2 5150*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [x1], x2 5151*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [x10], x2 5152*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #3 5153*c0909341SAndroid Build Coastguard Worker ushll v2.8h, v2.8b, #3 5154*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v4.8b, #3 5155*c0909341SAndroid Build Coastguard Worker ushll v6.8h, v6.8b, #3 5156*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5157*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5158*c0909341SAndroid Build Coastguard Worker dup v5.8h, v4.h[7] 5159*c0909341SAndroid Build Coastguard Worker dup v7.8h, v6.h[7] 5160*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 5161*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5162*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5163*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5164*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5165*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5166*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5167*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5168*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5169*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5170*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5171*c0909341SAndroid Build Coastguard Worker b.gt 1b 5172*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 5173*c0909341SAndroid Build Coastguard Worker mov v1.16b, v7.16b 5174*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 5175*c0909341SAndroid Build Coastguard Worker mov v3.16b, v7.16b 5176*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w16_hpad) 5177*c0909341SAndroid Build Coastguard Worker 5178*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32): 5179*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5180*c0909341SAndroid Build Coastguard Worker movrel x7, ipred_cfl_ac_444_w32_tbl 5181*c0909341SAndroid Build Coastguard Worker lsr w3, w3, #1 5182*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x7, w3, uxtw #2] 5183*c0909341SAndroid Build Coastguard Worker add x7, x7, x3 5184*c0909341SAndroid Build Coastguard Worker br x7 5185*c0909341SAndroid Build Coastguard Worker 5186*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0): 5187*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5188*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, without padding 5189*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x1], x2 5190*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b, v7.16b}, [x10], x2 5191*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v2.8b, #3 5192*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v2.16b, #3 5193*c0909341SAndroid Build Coastguard Worker ushll v2.8h, v3.8b, #3 5194*c0909341SAndroid Build Coastguard Worker ushll2 v3.8h, v3.16b, #3 5195*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v6.8b, #3 5196*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v6.16b, #3 5197*c0909341SAndroid Build Coastguard Worker ushll v6.8h, v7.8b, #3 5198*c0909341SAndroid Build Coastguard Worker ushll2 v7.8h, v7.16b, #3 5199*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5200*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5201*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5202*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5203*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5204*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5205*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5206*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5207*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5208*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5209*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5210*c0909341SAndroid Build Coastguard Worker b.gt 1b 5211*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5212*c0909341SAndroid Build Coastguard Worker 5213*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2): 5214*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5215*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 8 5216*c0909341SAndroid Build Coastguard Worker ldr d2, [x1, #16] 5217*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x1], x2 5218*c0909341SAndroid Build Coastguard Worker ldr d6, [x10, #16] 5219*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x10], x2 5220*c0909341SAndroid Build Coastguard Worker ushll v2.8h, v2.8b, #3 5221*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v1.8b, #3 5222*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v1.16b, #3 5223*c0909341SAndroid Build Coastguard Worker ushll v6.8h, v6.8b, #3 5224*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v5.8b, #3 5225*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v5.16b, #3 5226*c0909341SAndroid Build Coastguard Worker dup v3.8h, v2.h[7] 5227*c0909341SAndroid Build Coastguard Worker dup v7.8h, v6.h[7] 5228*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5229*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5230*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5231*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5232*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5233*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5234*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5235*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5236*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5237*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5238*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5239*c0909341SAndroid Build Coastguard Worker b.gt 1b 5240*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5241*c0909341SAndroid Build Coastguard Worker 5242*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4): 5243*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5244*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 16 5245*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x1], x2 5246*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x10], x2 5247*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v1.8b, #3 5248*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v1.16b, #3 5249*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v5.8b, #3 5250*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v5.16b, #3 5251*c0909341SAndroid Build Coastguard Worker dup v2.8h, v1.h[7] 5252*c0909341SAndroid Build Coastguard Worker dup v3.8h, v1.h[7] 5253*c0909341SAndroid Build Coastguard Worker dup v6.8h, v5.h[7] 5254*c0909341SAndroid Build Coastguard Worker dup v7.8h, v5.h[7] 5255*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5256*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5257*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5258*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5259*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5260*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5261*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5262*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5263*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5264*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5265*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5266*c0909341SAndroid Build Coastguard Worker b.gt 1b 5267*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_444_w32_hpad) 5268*c0909341SAndroid Build Coastguard Worker 5269*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6): 5270*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 5271*c0909341SAndroid Build Coastguard Worker1: // Copy and expand input, padding 24 5272*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [x1], x2 5273*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [x10], x2 5274*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #3 5275*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v4.8b, #3 5276*c0909341SAndroid Build Coastguard Worker dup v1.8h, v0.h[7] 5277*c0909341SAndroid Build Coastguard Worker dup v2.8h, v0.h[7] 5278*c0909341SAndroid Build Coastguard Worker dup v3.8h, v0.h[7] 5279*c0909341SAndroid Build Coastguard Worker dup v5.8h, v4.h[7] 5280*c0909341SAndroid Build Coastguard Worker dup v6.8h, v4.h[7] 5281*c0909341SAndroid Build Coastguard Worker dup v7.8h, v4.h[7] 5282*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 5283*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 5284*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v0.8h 5285*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v1.8h 5286*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v2.8h 5287*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v3.8h 5288*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5289*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5290*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5291*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5292*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5293*c0909341SAndroid Build Coastguard Worker b.gt 1b 5294*c0909341SAndroid Build Coastguard Worker 5295*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad): 5296*c0909341SAndroid Build Coastguard Worker cbz w4, 3f 5297*c0909341SAndroid Build Coastguard Worker2: // Vertical padding (h_pad > 0) 5298*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 5299*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5300*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5301*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5302*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5303*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5304*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], #64 5305*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v4.8h 5306*c0909341SAndroid Build Coastguard Worker add v17.8h, v17.8h, v5.8h 5307*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v6.8h 5308*c0909341SAndroid Build Coastguard Worker add v19.8h, v19.8h, v7.8h 5309*c0909341SAndroid Build Coastguard Worker b.gt 2b 5310*c0909341SAndroid Build Coastguard Worker3: 5311*c0909341SAndroid Build Coastguard Worker 5312*c0909341SAndroid Build Coastguard Worker // Quadruple the height and reuse the w8 subtracting 5313*c0909341SAndroid Build Coastguard Worker lsl w6, w6, #2 5314*c0909341SAndroid Build Coastguard Worker // Aggregate the sums, with wider intermediates earlier than in 5315*c0909341SAndroid Build Coastguard Worker // ipred_cfl_ac_420_w8_calc_subtract_dc. 5316*c0909341SAndroid Build Coastguard Worker uaddlp v0.4s, v16.8h 5317*c0909341SAndroid Build Coastguard Worker uaddlp v1.4s, v17.8h 5318*c0909341SAndroid Build Coastguard Worker uaddlp v2.4s, v18.8h 5319*c0909341SAndroid Build Coastguard Worker uaddlp v3.4s, v19.8h 5320*c0909341SAndroid Build Coastguard Worker add v0.4s, v0.4s, v1.4s 5321*c0909341SAndroid Build Coastguard Worker add v2.4s, v2.4s, v3.4s 5322*c0909341SAndroid Build Coastguard Worker add v0.4s, v0.4s, v2.4s 5323*c0909341SAndroid Build Coastguard Worker addv s0, v0.4s // sum 5324*c0909341SAndroid Build Coastguard Worker sub x0, x0, w6, uxtw #4 5325*c0909341SAndroid Build Coastguard Worker urshl v4.2s, v0.2s, v31.2s // (sum + (1 << (log2sz - 1))) >>= log2sz 5326*c0909341SAndroid Build Coastguard Worker dup v4.8h, v4.h[0] 5327*c0909341SAndroid Build Coastguard Worker b L(ipred_cfl_ac_420_w8_subtract_dc) 5328*c0909341SAndroid Build Coastguard Workerendfunc 5329*c0909341SAndroid Build Coastguard Worker 5330*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_444_tbl 5331*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32) - ipred_cfl_ac_444_tbl 5332*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w16) - ipred_cfl_ac_444_tbl 5333*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w8) - ipred_cfl_ac_444_tbl 5334*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w4) - ipred_cfl_ac_444_tbl 5335*c0909341SAndroid Build Coastguard Workerendjumptable 5336*c0909341SAndroid Build Coastguard Worker 5337*c0909341SAndroid Build Coastguard Workerjumptable ipred_cfl_ac_444_w32_tbl 5338*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad0) - ipred_cfl_ac_444_w32_tbl 5339*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad2) - ipred_cfl_ac_444_w32_tbl 5340*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad4) - ipred_cfl_ac_444_w32_tbl 5341*c0909341SAndroid Build Coastguard Worker .word L(ipred_cfl_ac_444_w32_wpad6) - ipred_cfl_ac_444_w32_tbl 5342*c0909341SAndroid Build Coastguard Workerendjumptable 5343