1*c0909341SAndroid Build Coastguard Worker/****************************************************************************** 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Worker// The exported functions in this file have got the following signature: 32*c0909341SAndroid Build Coastguard Worker// void itxfm_add(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob, 33*c0909341SAndroid Build Coastguard Worker// int bitdepth_max); 34*c0909341SAndroid Build Coastguard Worker 35*c0909341SAndroid Build Coastguard Worker// Most of the functions use the following register layout: 36*c0909341SAndroid Build Coastguard Worker// x0-x3 external parameters 37*c0909341SAndroid Build Coastguard Worker// x4 function pointer to first transform 38*c0909341SAndroid Build Coastguard Worker// x5 function pointer to second transform 39*c0909341SAndroid Build Coastguard Worker// x6 output parameter for helper function 40*c0909341SAndroid Build Coastguard Worker// x7 input parameter for helper function 41*c0909341SAndroid Build Coastguard Worker// x8 input stride for helper function 42*c0909341SAndroid Build Coastguard Worker// x9-x12 scratch variables for helper functions 43*c0909341SAndroid Build Coastguard Worker// x13 pointer to list of eob thresholds 44*c0909341SAndroid Build Coastguard Worker// x14 return pointer for helper function 45*c0909341SAndroid Build Coastguard Worker// x15 return pointer for main function 46*c0909341SAndroid Build Coastguard Worker 47*c0909341SAndroid Build Coastguard Worker// The SIMD registers most often use the following layout: 48*c0909341SAndroid Build Coastguard Worker// v0-v1 multiplication coefficients 49*c0909341SAndroid Build Coastguard Worker// v2-v7 scratch registers 50*c0909341SAndroid Build Coastguard Worker// v8-v15 unused 51*c0909341SAndroid Build Coastguard Worker// v16-v31 inputs/outputs of transforms 52*c0909341SAndroid Build Coastguard Worker 53*c0909341SAndroid Build Coastguard Workerconst idct_coeffs, align=4 54*c0909341SAndroid Build Coastguard Worker // idct4 55*c0909341SAndroid Build Coastguard Worker .int 2896, 2896*8*(1<<16), 1567, 3784 56*c0909341SAndroid Build Coastguard Worker // idct8 57*c0909341SAndroid Build Coastguard Worker .int 799, 4017, 3406, 2276 58*c0909341SAndroid Build Coastguard Worker // idct16 59*c0909341SAndroid Build Coastguard Worker .int 401, 4076, 3166, 2598 60*c0909341SAndroid Build Coastguard Worker .int 1931, 3612, 3920, 1189 61*c0909341SAndroid Build Coastguard Worker // idct32 62*c0909341SAndroid Build Coastguard Worker .int 201, 4091, 3035, 2751 63*c0909341SAndroid Build Coastguard Worker .int 1751, 3703, 3857, 1380 64*c0909341SAndroid Build Coastguard Worker .int 995, 3973, 3513, 2106 65*c0909341SAndroid Build Coastguard Worker .int 2440, 3290, 4052, 601 66*c0909341SAndroid Build Coastguard Workerendconst 67*c0909341SAndroid Build Coastguard Worker 68*c0909341SAndroid Build Coastguard Workerconst idct64_coeffs, align=4 69*c0909341SAndroid Build Coastguard Worker .int 101*8*(1<<16), 4095*8*(1<<16), 2967*8*(1<<16), -2824*8*(1<<16) 70*c0909341SAndroid Build Coastguard Worker .int 1660*8*(1<<16), 3745*8*(1<<16), 3822*8*(1<<16), -1474*8*(1<<16) 71*c0909341SAndroid Build Coastguard Worker .int 4076, 401, 4017, 799 72*c0909341SAndroid Build Coastguard Worker 73*c0909341SAndroid Build Coastguard Worker .int 4036*8*(1<<16), -700*8*(1<<16), 2359*8*(1<<16), 3349*8*(1<<16) 74*c0909341SAndroid Build Coastguard Worker .int 3461*8*(1<<16), -2191*8*(1<<16), 897*8*(1<<16), 3996*8*(1<<16) 75*c0909341SAndroid Build Coastguard Worker .int -3166, -2598, -799, -4017 76*c0909341SAndroid Build Coastguard Worker 77*c0909341SAndroid Build Coastguard Worker .int 501*8*(1<<16), 4065*8*(1<<16), 3229*8*(1<<16), -2520*8*(1<<16) 78*c0909341SAndroid Build Coastguard Worker .int 2019*8*(1<<16), 3564*8*(1<<16), 3948*8*(1<<16), -1092*8*(1<<16) 79*c0909341SAndroid Build Coastguard Worker .int 3612, 1931, 2276, 3406 80*c0909341SAndroid Build Coastguard Worker 81*c0909341SAndroid Build Coastguard Worker .int 4085*8*(1<<16), -301*8*(1<<16), 2675*8*(1<<16), 3102*8*(1<<16) 82*c0909341SAndroid Build Coastguard Worker .int 3659*8*(1<<16), -1842*8*(1<<16), 1285*8*(1<<16), 3889*8*(1<<16) 83*c0909341SAndroid Build Coastguard Worker .int -3920, -1189, -3406, -2276 84*c0909341SAndroid Build Coastguard Workerendconst 85*c0909341SAndroid Build Coastguard Worker 86*c0909341SAndroid Build Coastguard Workerconst iadst4_coeffs, align=4 87*c0909341SAndroid Build Coastguard Worker .int 1321, 3803, 2482, 3344 88*c0909341SAndroid Build Coastguard Workerendconst 89*c0909341SAndroid Build Coastguard Worker 90*c0909341SAndroid Build Coastguard Workerconst iadst8_coeffs, align=4 91*c0909341SAndroid Build Coastguard Worker .int 4076, 401, 3612, 1931 92*c0909341SAndroid Build Coastguard Worker .int 2598, 3166, 1189, 3920 93*c0909341SAndroid Build Coastguard Worker // idct_coeffs 94*c0909341SAndroid Build Coastguard Worker .int 2896, 0, 1567, 3784 95*c0909341SAndroid Build Coastguard Workerendconst 96*c0909341SAndroid Build Coastguard Worker 97*c0909341SAndroid Build Coastguard Workerconst iadst16_coeffs, align=4 98*c0909341SAndroid Build Coastguard Worker .int 4091, 201, 3973, 995 99*c0909341SAndroid Build Coastguard Worker .int 3703, 1751, 3290, 2440 100*c0909341SAndroid Build Coastguard Worker .int 2751, 3035, 2106, 3513 101*c0909341SAndroid Build Coastguard Worker .int 1380, 3857, 601, 4052 102*c0909341SAndroid Build Coastguard Workerendconst 103*c0909341SAndroid Build Coastguard Worker 104*c0909341SAndroid Build Coastguard Worker.macro mul_mla d, s0, s1, c0, c1 105*c0909341SAndroid Build Coastguard Worker mul \d\().4s, \s0\().4s, \c0 106*c0909341SAndroid Build Coastguard Worker mla \d\().4s, \s1\().4s, \c1 107*c0909341SAndroid Build Coastguard Worker.endm 108*c0909341SAndroid Build Coastguard Worker 109*c0909341SAndroid Build Coastguard Worker.macro mul_mls d, s0, s1, c0, c1 110*c0909341SAndroid Build Coastguard Worker mul \d\().4s, \s0\().4s, \c0 111*c0909341SAndroid Build Coastguard Worker mls \d\().4s, \s1\().4s, \c1 112*c0909341SAndroid Build Coastguard Worker.endm 113*c0909341SAndroid Build Coastguard Worker 114*c0909341SAndroid Build Coastguard Worker.macro scale_input sz, c, r0, r1, r2 r3, r4, r5, r6, r7 115*c0909341SAndroid Build Coastguard Worker sqrdmulh \r0\sz, \r0\sz, \c 116*c0909341SAndroid Build Coastguard Worker sqrdmulh \r1\sz, \r1\sz, \c 117*c0909341SAndroid Build Coastguard Worker sqrdmulh \r2\sz, \r2\sz, \c 118*c0909341SAndroid Build Coastguard Worker sqrdmulh \r3\sz, \r3\sz, \c 119*c0909341SAndroid Build Coastguard Worker.ifnb \r4 120*c0909341SAndroid Build Coastguard Worker sqrdmulh \r4\sz, \r4\sz, \c 121*c0909341SAndroid Build Coastguard Worker sqrdmulh \r5\sz, \r5\sz, \c 122*c0909341SAndroid Build Coastguard Worker sqrdmulh \r6\sz, \r6\sz, \c 123*c0909341SAndroid Build Coastguard Worker sqrdmulh \r7\sz, \r7\sz, \c 124*c0909341SAndroid Build Coastguard Worker.endif 125*c0909341SAndroid Build Coastguard Worker.endm 126*c0909341SAndroid Build Coastguard Worker 127*c0909341SAndroid Build Coastguard Worker.macro smin_4s r0, r1, r2 128*c0909341SAndroid Build Coastguard Worker smin \r0\().4s, \r1\().4s, \r2\().4s 129*c0909341SAndroid Build Coastguard Worker.endm 130*c0909341SAndroid Build Coastguard Worker.macro smax_4s r0, r1, r2 131*c0909341SAndroid Build Coastguard Worker smax \r0\().4s, \r1\().4s, \r2\().4s 132*c0909341SAndroid Build Coastguard Worker.endm 133*c0909341SAndroid Build Coastguard Worker 134*c0909341SAndroid Build Coastguard Worker.macro load_add_store load, shift, addsrc, adddst, min, store, dst, src, shiftbits=4 135*c0909341SAndroid Build Coastguard Worker.ifnb \load 136*c0909341SAndroid Build Coastguard Worker ld1 {\load}, [\src], x1 137*c0909341SAndroid Build Coastguard Worker.endif 138*c0909341SAndroid Build Coastguard Worker.ifnb \shift 139*c0909341SAndroid Build Coastguard Worker srshr \shift, \shift, #\shiftbits 140*c0909341SAndroid Build Coastguard Worker.endif 141*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc 142*c0909341SAndroid Build Coastguard Worker usqadd \adddst, \addsrc 143*c0909341SAndroid Build Coastguard Worker.endif 144*c0909341SAndroid Build Coastguard Worker.ifnb \min 145*c0909341SAndroid Build Coastguard Worker smin \min, \min, v7.8h 146*c0909341SAndroid Build Coastguard Worker.endif 147*c0909341SAndroid Build Coastguard Worker.ifnb \store 148*c0909341SAndroid Build Coastguard Worker st1 {\store}, [\dst], x1 149*c0909341SAndroid Build Coastguard Worker.endif 150*c0909341SAndroid Build Coastguard Worker.endm 151*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x16 dst, src 152*c0909341SAndroid Build Coastguard Worker mov \src, \dst 153*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 154*c0909341SAndroid Build Coastguard Worker load_add_store v2.8h, v16.8h, , , , , \dst, \src 155*c0909341SAndroid Build Coastguard Worker load_add_store v3.8h, v17.8h, , , , , \dst, \src 156*c0909341SAndroid Build Coastguard Worker load_add_store v4.8h, v18.8h, v16.8h, v2.8h, , , \dst, \src 157*c0909341SAndroid Build Coastguard Worker load_add_store v5.8h, v19.8h, v17.8h, v3.8h, v2.8h, , \dst, \src 158*c0909341SAndroid Build Coastguard Worker load_add_store v16.8h, v20.8h, v18.8h, v4.8h, v3.8h, v2.8h, \dst, \src 159*c0909341SAndroid Build Coastguard Worker load_add_store v17.8h, v21.8h, v19.8h, v5.8h, v4.8h, v3.8h, \dst, \src 160*c0909341SAndroid Build Coastguard Worker load_add_store v18.8h, v22.8h, v20.8h, v16.8h, v5.8h, v4.8h, \dst, \src 161*c0909341SAndroid Build Coastguard Worker load_add_store v19.8h, v23.8h, v21.8h, v17.8h, v16.8h, v5.8h, \dst, \src 162*c0909341SAndroid Build Coastguard Worker load_add_store v20.8h, v24.8h, v22.8h, v18.8h, v17.8h, v16.8h, \dst, \src 163*c0909341SAndroid Build Coastguard Worker load_add_store v21.8h, v25.8h, v23.8h, v19.8h, v18.8h, v17.8h, \dst, \src 164*c0909341SAndroid Build Coastguard Worker load_add_store v22.8h, v26.8h, v24.8h, v20.8h, v19.8h, v18.8h, \dst, \src 165*c0909341SAndroid Build Coastguard Worker load_add_store v23.8h, v27.8h, v25.8h, v21.8h, v20.8h, v19.8h, \dst, \src 166*c0909341SAndroid Build Coastguard Worker load_add_store v24.8h, v28.8h, v26.8h, v22.8h, v21.8h, v20.8h, \dst, \src 167*c0909341SAndroid Build Coastguard Worker load_add_store v25.8h, v29.8h, v27.8h, v23.8h, v22.8h, v21.8h, \dst, \src 168*c0909341SAndroid Build Coastguard Worker load_add_store v26.8h, v30.8h, v28.8h, v24.8h, v23.8h, v22.8h, \dst, \src 169*c0909341SAndroid Build Coastguard Worker load_add_store v27.8h, v31.8h, v29.8h, v25.8h, v24.8h, v23.8h, \dst, \src 170*c0909341SAndroid Build Coastguard Worker load_add_store , , v30.8h, v26.8h, v25.8h, v24.8h, \dst, \src 171*c0909341SAndroid Build Coastguard Worker load_add_store , , v31.8h, v27.8h, v26.8h, v25.8h, \dst, \src 172*c0909341SAndroid Build Coastguard Worker load_add_store , , , , v27.8h, v26.8h, \dst, \src 173*c0909341SAndroid Build Coastguard Worker load_add_store , , , , , v27.8h, \dst, \src 174*c0909341SAndroid Build Coastguard Worker.endm 175*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x8 dst, src, shiftbits=4 176*c0909341SAndroid Build Coastguard Worker mov \src, \dst 177*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 178*c0909341SAndroid Build Coastguard Worker load_add_store v2.8h, v16.8h, , , , , \dst, \src, \shiftbits 179*c0909341SAndroid Build Coastguard Worker load_add_store v3.8h, v17.8h, , , , , \dst, \src, \shiftbits 180*c0909341SAndroid Build Coastguard Worker load_add_store v4.8h, v18.8h, v16.8h, v2.8h, , , \dst, \src, \shiftbits 181*c0909341SAndroid Build Coastguard Worker load_add_store v5.8h, v19.8h, v17.8h, v3.8h, v2.8h, , \dst, \src, \shiftbits 182*c0909341SAndroid Build Coastguard Worker load_add_store v16.8h, v20.8h, v18.8h, v4.8h, v3.8h, v2.8h, \dst, \src, \shiftbits 183*c0909341SAndroid Build Coastguard Worker load_add_store v17.8h, v21.8h, v19.8h, v5.8h, v4.8h, v3.8h, \dst, \src, \shiftbits 184*c0909341SAndroid Build Coastguard Worker load_add_store v18.8h, v22.8h, v20.8h, v16.8h, v5.8h, v4.8h, \dst, \src, \shiftbits 185*c0909341SAndroid Build Coastguard Worker load_add_store v19.8h, v23.8h, v21.8h, v17.8h, v16.8h, v5.8h, \dst, \src, \shiftbits 186*c0909341SAndroid Build Coastguard Worker load_add_store , , v22.8h, v18.8h, v17.8h, v16.8h, \dst, \src, \shiftbits 187*c0909341SAndroid Build Coastguard Worker load_add_store , , v23.8h, v19.8h, v18.8h, v17.8h, \dst, \src, \shiftbits 188*c0909341SAndroid Build Coastguard Worker load_add_store , , , , v19.8h, v18.8h, \dst, \src, \shiftbits 189*c0909341SAndroid Build Coastguard Worker load_add_store , , , , , v19.8h, \dst, \src, \shiftbits 190*c0909341SAndroid Build Coastguard Worker.endm 191*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x4 dst, src, shiftbits=4 192*c0909341SAndroid Build Coastguard Worker mov \src, \dst 193*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 194*c0909341SAndroid Build Coastguard Worker load_add_store v2.8h, v16.8h, , , , , \dst, \src, \shiftbits 195*c0909341SAndroid Build Coastguard Worker load_add_store v3.8h, v17.8h, , , , , \dst, \src, \shiftbits 196*c0909341SAndroid Build Coastguard Worker load_add_store v4.8h, v18.8h, v16.8h, v2.8h, , , \dst, \src, \shiftbits 197*c0909341SAndroid Build Coastguard Worker load_add_store v5.8h, v19.8h, v17.8h, v3.8h, v2.8h, , \dst, \src, \shiftbits 198*c0909341SAndroid Build Coastguard Worker load_add_store , , v18.8h, v4.8h, v3.8h, v2.8h, \dst, \src, \shiftbits 199*c0909341SAndroid Build Coastguard Worker load_add_store , , v19.8h, v5.8h, v4.8h, v3.8h, \dst, \src, \shiftbits 200*c0909341SAndroid Build Coastguard Worker load_add_store , , , , v5.8h, v4.8h, \dst, \src, \shiftbits 201*c0909341SAndroid Build Coastguard Worker load_add_store , , , , , v5.8h, \dst, \src, \shiftbits 202*c0909341SAndroid Build Coastguard Worker.endm 203*c0909341SAndroid Build Coastguard Worker.macro load_add_store4 load, inssrc, insdst, shift, addsrc, adddst, min, store, dst, src 204*c0909341SAndroid Build Coastguard Worker.ifnb \load 205*c0909341SAndroid Build Coastguard Worker ld1 {\load}[0], [\src], x1 206*c0909341SAndroid Build Coastguard Worker.endif 207*c0909341SAndroid Build Coastguard Worker.ifnb \inssrc 208*c0909341SAndroid Build Coastguard Worker ins \insdst\().d[1], \inssrc\().d[0] 209*c0909341SAndroid Build Coastguard Worker.endif 210*c0909341SAndroid Build Coastguard Worker.ifnb \shift 211*c0909341SAndroid Build Coastguard Worker srshr \shift, \shift, #4 212*c0909341SAndroid Build Coastguard Worker.endif 213*c0909341SAndroid Build Coastguard Worker.ifnb \load 214*c0909341SAndroid Build Coastguard Worker ld1 {\load}[1], [\src], x1 215*c0909341SAndroid Build Coastguard Worker.endif 216*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc 217*c0909341SAndroid Build Coastguard Worker usqadd \adddst, \addsrc 218*c0909341SAndroid Build Coastguard Worker.endif 219*c0909341SAndroid Build Coastguard Worker.ifnb \store 220*c0909341SAndroid Build Coastguard Worker st1 {\store}[0], [\dst], x1 221*c0909341SAndroid Build Coastguard Worker.endif 222*c0909341SAndroid Build Coastguard Worker.ifnb \min 223*c0909341SAndroid Build Coastguard Worker smin \min, \min, v7.8h 224*c0909341SAndroid Build Coastguard Worker.endif 225*c0909341SAndroid Build Coastguard Worker.ifnb \store 226*c0909341SAndroid Build Coastguard Worker st1 {\store}[1], [\dst], x1 227*c0909341SAndroid Build Coastguard Worker.endif 228*c0909341SAndroid Build Coastguard Worker.endm 229*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x16 dst, src 230*c0909341SAndroid Build Coastguard Worker mov \src, \dst 231*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 232*c0909341SAndroid Build Coastguard Worker load_add_store4 v0.d, v17, v16, , , , , , \dst, \src 233*c0909341SAndroid Build Coastguard Worker load_add_store4 v1.d, v19, v18, , , , , , \dst, \src 234*c0909341SAndroid Build Coastguard Worker load_add_store4 v2.d, v21, v20, v16.8h, , , , , \dst, \src 235*c0909341SAndroid Build Coastguard Worker load_add_store4 v3.d, v23, v22, v18.8h, v16.8h, v0.8h, , , \dst, \src 236*c0909341SAndroid Build Coastguard Worker load_add_store4 v17.d, v25, v24, v20.8h, v18.8h, v1.8h, v0.8h, , \dst, \src 237*c0909341SAndroid Build Coastguard Worker load_add_store4 v19.d, v27, v26, v22.8h, v20.8h, v2.8h, v1.8h, v0.d, \dst, \src 238*c0909341SAndroid Build Coastguard Worker load_add_store4 v21.d, v29, v28, v24.8h, v22.8h, v3.8h, v2.8h, v1.d, \dst, \src 239*c0909341SAndroid Build Coastguard Worker load_add_store4 v23.d, v31, v30, v26.8h, v24.8h, v17.8h, v3.8h, v2.d, \dst, \src 240*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , v28.8h, v26.8h, v19.8h, v17.8h, v3.d, \dst, \src 241*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , v30.8h, v28.8h, v21.8h, v19.8h, v17.d, \dst, \src 242*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , v30.8h, v23.8h, v21.8h, v19.d, \dst, \src 243*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , , , v23.8h, v21.d, \dst, \src 244*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , , , , v23.d, \dst, \src 245*c0909341SAndroid Build Coastguard Worker.endm 246*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x8 dst, src 247*c0909341SAndroid Build Coastguard Worker mov \src, \dst 248*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 249*c0909341SAndroid Build Coastguard Worker load_add_store4 v0.d, v17, v16, , , , , , \dst, \src 250*c0909341SAndroid Build Coastguard Worker load_add_store4 v1.d, v19, v18, , , , , , \dst, \src 251*c0909341SAndroid Build Coastguard Worker load_add_store4 v2.d, v21, v20, v16.8h, , , , , \dst, \src 252*c0909341SAndroid Build Coastguard Worker load_add_store4 v3.d, v23, v22, v18.8h, v16.8h, v0.8h, , , \dst, \src 253*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , v20.8h, v18.8h, v1.8h, v0.8h, , \dst, \src 254*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , v22.8h, v20.8h, v2.8h, v1.8h, v0.d, \dst, \src 255*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , v22.8h, v3.8h, v2.8h, v1.d, \dst, \src 256*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , , , v3.8h, v2.d, \dst, \src 257*c0909341SAndroid Build Coastguard Worker load_add_store4 , , , , , , , v3.d, \dst, \src 258*c0909341SAndroid Build Coastguard Worker.endm 259*c0909341SAndroid Build Coastguard Worker 260*c0909341SAndroid Build Coastguard Worker.macro idct_dc w, h, shift 261*c0909341SAndroid Build Coastguard Worker cbnz w3, 1f 262*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 263*c0909341SAndroid Build Coastguard Worker ld1r {v16.4s}, [x2] 264*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 265*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.4s, v16.4s, v0.s[0] 266*c0909341SAndroid Build Coastguard Worker str wzr, [x2] 267*c0909341SAndroid Build Coastguard Worker.if (\w == 2*\h) || (2*\w == \h) 268*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.4s, v20.4s, v0.s[0] 269*c0909341SAndroid Build Coastguard Worker.endif 270*c0909341SAndroid Build Coastguard Worker.if \shift > 0 271*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v20.4s, #\shift 272*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #\shift 273*c0909341SAndroid Build Coastguard Worker.else 274*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v20.4s 275*c0909341SAndroid Build Coastguard Worker sqxtn2 v16.8h, v20.4s 276*c0909341SAndroid Build Coastguard Worker.endif 277*c0909341SAndroid Build Coastguard Worker sqrdmulh v16.8h, v16.8h, v0.h[1] 278*c0909341SAndroid Build Coastguard Worker srshr v16.8h, v16.8h, #4 279*c0909341SAndroid Build Coastguard Worker mov w4, #\h 280*c0909341SAndroid Build Coastguard Worker b idct_dc_w\w\()_neon 281*c0909341SAndroid Build Coastguard Worker1: 282*c0909341SAndroid Build Coastguard Worker.endm 283*c0909341SAndroid Build Coastguard Worker 284*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w4_neon 285*c0909341SAndroid Build Coastguard Worker mvni v31.8h, #0xfc, lsl #8 // 0x3ff 286*c0909341SAndroid Build Coastguard Worker1: 287*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[0], [x0], x1 288*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x0], x1 289*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[0], [x0], x1 290*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 291*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x0], x1 292*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 293*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #2 294*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v16.8h 295*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 296*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x0], x1 297*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 298*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], x1 299*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[0], [x0], x1 300*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[1], [x0], x1 301*c0909341SAndroid Build Coastguard Worker b.gt 1b 302*c0909341SAndroid Build Coastguard Worker ret 303*c0909341SAndroid Build Coastguard Workerendfunc 304*c0909341SAndroid Build Coastguard Worker 305*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w8_neon 306*c0909341SAndroid Build Coastguard Worker mvni v31.8h, #0xfc, lsl #8 // 0x3ff 307*c0909341SAndroid Build Coastguard Worker1: 308*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x0], x1 309*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 310*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x0], x1 311*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 312*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x0], x1 313*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v16.8h 314*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x0], x1 315*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v16.8h 316*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, v16.8h 317*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #2 318*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 319*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 320*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x0], x1 321*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 322*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x0], x1 323*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 324*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x0], x1 325*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x0], x1 326*c0909341SAndroid Build Coastguard Worker b.gt 1b 327*c0909341SAndroid Build Coastguard Worker ret 328*c0909341SAndroid Build Coastguard Workerendfunc 329*c0909341SAndroid Build Coastguard Worker 330*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w16_neon 331*c0909341SAndroid Build Coastguard Worker mvni v31.8h, #0xfc, lsl #8 // 0x3ff 332*c0909341SAndroid Build Coastguard Worker1: 333*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x0], x1 334*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 335*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x0], x1 336*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 337*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v16.8h 338*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #1 339*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v16.8h 340*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, v16.8h 341*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 342*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 343*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 344*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x0], x1 345*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 346*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x0], x1 347*c0909341SAndroid Build Coastguard Worker b.gt 1b 348*c0909341SAndroid Build Coastguard Worker ret 349*c0909341SAndroid Build Coastguard Workerendfunc 350*c0909341SAndroid Build Coastguard Worker 351*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w32_neon 352*c0909341SAndroid Build Coastguard Worker mvni v31.8h, #0xfc, lsl #8 // 0x3ff 353*c0909341SAndroid Build Coastguard Worker1: 354*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0] 355*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 356*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 357*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v16.8h 358*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v16.8h 359*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, v16.8h 360*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 361*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 362*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 363*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 364*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1 365*c0909341SAndroid Build Coastguard Worker b.gt 1b 366*c0909341SAndroid Build Coastguard Worker ret 367*c0909341SAndroid Build Coastguard Workerendfunc 368*c0909341SAndroid Build Coastguard Worker 369*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w64_neon 370*c0909341SAndroid Build Coastguard Worker mvni v31.8h, #0xfc, lsl #8 // 0x3ff 371*c0909341SAndroid Build Coastguard Worker sub x1, x1, #64 372*c0909341SAndroid Build Coastguard Worker1: 373*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 374*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 375*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 376*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0] 377*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v16.8h 378*c0909341SAndroid Build Coastguard Worker sub x0, x0, #64 379*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v16.8h 380*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, v16.8h 381*c0909341SAndroid Build Coastguard Worker usqadd v4.8h, v16.8h 382*c0909341SAndroid Build Coastguard Worker usqadd v5.8h, v16.8h 383*c0909341SAndroid Build Coastguard Worker usqadd v6.8h, v16.8h 384*c0909341SAndroid Build Coastguard Worker usqadd v7.8h, v16.8h 385*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 386*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 387*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v31.8h 388*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v31.8h 389*c0909341SAndroid Build Coastguard Worker smin v4.8h, v4.8h, v31.8h 390*c0909341SAndroid Build Coastguard Worker smin v5.8h, v5.8h, v31.8h 391*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64 392*c0909341SAndroid Build Coastguard Worker smin v6.8h, v6.8h, v31.8h 393*c0909341SAndroid Build Coastguard Worker smin v7.8h, v7.8h, v31.8h 394*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1 395*c0909341SAndroid Build Coastguard Worker b.gt 1b 396*c0909341SAndroid Build Coastguard Worker ret 397*c0909341SAndroid Build Coastguard Workerendfunc 398*c0909341SAndroid Build Coastguard Worker 399*c0909341SAndroid Build Coastguard Worker.macro iwht4 400*c0909341SAndroid Build Coastguard Worker add v16.4s, v16.4s, v17.4s 401*c0909341SAndroid Build Coastguard Worker sub v21.4s, v18.4s, v19.4s 402*c0909341SAndroid Build Coastguard Worker sub v20.4s, v16.4s, v21.4s 403*c0909341SAndroid Build Coastguard Worker sshr v20.4s, v20.4s, #1 404*c0909341SAndroid Build Coastguard Worker sub v18.4s, v20.4s, v17.4s 405*c0909341SAndroid Build Coastguard Worker sub v17.4s, v20.4s, v19.4s 406*c0909341SAndroid Build Coastguard Worker add v19.4s, v21.4s, v18.4s 407*c0909341SAndroid Build Coastguard Worker sub v16.4s, v16.4s, v17.4s 408*c0909341SAndroid Build Coastguard Worker.endm 409*c0909341SAndroid Build Coastguard Worker 410*c0909341SAndroid Build Coastguard Worker.macro idct_4 r0, r1, r2, r3 411*c0909341SAndroid Build Coastguard Worker mul_mla v6, \r1, \r3, v0.s[3], v0.s[2] 412*c0909341SAndroid Build Coastguard Worker mul_mla v2, \r0, \r2, v0.s[0], v0.s[0] 413*c0909341SAndroid Build Coastguard Worker mul_mls v4, \r1, \r3, v0.s[2], v0.s[3] 414*c0909341SAndroid Build Coastguard Worker mul_mls v3, \r0, \r2, v0.s[0], v0.s[0] 415*c0909341SAndroid Build Coastguard Worker srshr v6.4s, v6.4s, #12 416*c0909341SAndroid Build Coastguard Worker srshr v2.4s, v2.4s, #12 417*c0909341SAndroid Build Coastguard Worker srshr v7.4s, v4.4s, #12 418*c0909341SAndroid Build Coastguard Worker srshr v3.4s, v3.4s, #12 419*c0909341SAndroid Build Coastguard Worker sqadd \r0\().4s, v2.4s, v6.4s 420*c0909341SAndroid Build Coastguard Worker sqsub \r3\().4s, v2.4s, v6.4s 421*c0909341SAndroid Build Coastguard Worker sqadd \r1\().4s, v3.4s, v7.4s 422*c0909341SAndroid Build Coastguard Worker sqsub \r2\().4s, v3.4s, v7.4s 423*c0909341SAndroid Build Coastguard Worker.endm 424*c0909341SAndroid Build Coastguard Worker 425*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x4_neon 426*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 427*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs 428*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x16] 429*c0909341SAndroid Build Coastguard Worker idct_4 v16, v17, v18, v19 430*c0909341SAndroid Build Coastguard Worker ret 431*c0909341SAndroid Build Coastguard Workerendfunc 432*c0909341SAndroid Build Coastguard Worker 433*c0909341SAndroid Build Coastguard Worker.macro iadst_4x4 o0, o1, o2, o3 434*c0909341SAndroid Build Coastguard Worker movrel x16, iadst4_coeffs 435*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x16] 436*c0909341SAndroid Build Coastguard Worker 437*c0909341SAndroid Build Coastguard Worker sub v3.4s, v16.4s, v18.4s 438*c0909341SAndroid Build Coastguard Worker mul v4.4s, v16.4s, v0.s[0] 439*c0909341SAndroid Build Coastguard Worker mla v4.4s, v18.4s, v0.s[1] 440*c0909341SAndroid Build Coastguard Worker mla v4.4s, v19.4s, v0.s[2] 441*c0909341SAndroid Build Coastguard Worker mul v7.4s, v17.4s, v0.s[3] 442*c0909341SAndroid Build Coastguard Worker add v3.4s, v3.4s, v19.4s 443*c0909341SAndroid Build Coastguard Worker mul v5.4s, v16.4s, v0.s[2] 444*c0909341SAndroid Build Coastguard Worker mls v5.4s, v18.4s, v0.s[0] 445*c0909341SAndroid Build Coastguard Worker mls v5.4s, v19.4s, v0.s[1] 446*c0909341SAndroid Build Coastguard Worker 447*c0909341SAndroid Build Coastguard Worker add \o3\().4s, v4.4s, v5.4s 448*c0909341SAndroid Build Coastguard Worker mul \o2\().4s, v3.4s, v0.s[3] 449*c0909341SAndroid Build Coastguard Worker add \o0\().4s, v4.4s, v7.4s 450*c0909341SAndroid Build Coastguard Worker add \o1\().4s, v5.4s, v7.4s 451*c0909341SAndroid Build Coastguard Worker sub \o3\().4s, \o3\().4s, v7.4s 452*c0909341SAndroid Build Coastguard Worker 453*c0909341SAndroid Build Coastguard Worker srshr \o0\().4s, \o0\().4s, #12 454*c0909341SAndroid Build Coastguard Worker srshr \o2\().4s, \o2\().4s, #12 455*c0909341SAndroid Build Coastguard Worker srshr \o1\().4s, \o1\().4s, #12 456*c0909341SAndroid Build Coastguard Worker srshr \o3\().4s, \o3\().4s, #12 457*c0909341SAndroid Build Coastguard Worker.endm 458*c0909341SAndroid Build Coastguard Worker 459*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x4_neon 460*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 461*c0909341SAndroid Build Coastguard Worker iadst_4x4 v16, v17, v18, v19 462*c0909341SAndroid Build Coastguard Worker ret 463*c0909341SAndroid Build Coastguard Workerendfunc 464*c0909341SAndroid Build Coastguard Worker 465*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x4_neon 466*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 467*c0909341SAndroid Build Coastguard Worker iadst_4x4 v19, v18, v17, v16 468*c0909341SAndroid Build Coastguard Worker ret 469*c0909341SAndroid Build Coastguard Workerendfunc 470*c0909341SAndroid Build Coastguard Worker 471*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x4_neon 472*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 473*c0909341SAndroid Build Coastguard Worker movz w16, #(5793-4096)*8, lsl #16 474*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 475*c0909341SAndroid Build Coastguard Worker sqrdmulh v4.4s, v16.4s, v0.s[0] 476*c0909341SAndroid Build Coastguard Worker sqrdmulh v5.4s, v17.4s, v0.s[0] 477*c0909341SAndroid Build Coastguard Worker sqrdmulh v6.4s, v18.4s, v0.s[0] 478*c0909341SAndroid Build Coastguard Worker sqrdmulh v7.4s, v19.4s, v0.s[0] 479*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v4.4s 480*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v17.4s, v5.4s 481*c0909341SAndroid Build Coastguard Worker sqadd v18.4s, v18.4s, v6.4s 482*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v19.4s, v7.4s 483*c0909341SAndroid Build Coastguard Worker ret 484*c0909341SAndroid Build Coastguard Workerendfunc 485*c0909341SAndroid Build Coastguard Worker 486*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_wht_wht_4x4_16bpc_neon, export=1 487*c0909341SAndroid Build Coastguard Worker mov x15, x30 488*c0909341SAndroid Build Coastguard Worker movi v30.4s, #0 489*c0909341SAndroid Build Coastguard Worker movi v31.4s, #0 490*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x2] 491*c0909341SAndroid Build Coastguard Worker st1 {v30.4s, v31.4s}, [x2], #32 492*c0909341SAndroid Build Coastguard Worker 493*c0909341SAndroid Build Coastguard Worker sshr v16.4s, v16.4s, #2 494*c0909341SAndroid Build Coastguard Worker sshr v17.4s, v17.4s, #2 495*c0909341SAndroid Build Coastguard Worker sshr v18.4s, v18.4s, #2 496*c0909341SAndroid Build Coastguard Worker sshr v19.4s, v19.4s, #2 497*c0909341SAndroid Build Coastguard Worker 498*c0909341SAndroid Build Coastguard Worker iwht4 499*c0909341SAndroid Build Coastguard Worker 500*c0909341SAndroid Build Coastguard Worker st1 {v30.4s, v31.4s}, [x2], #32 501*c0909341SAndroid Build Coastguard Worker transpose_4x4s v16, v17, v18, v19, v20, v21, v22, v23 502*c0909341SAndroid Build Coastguard Worker 503*c0909341SAndroid Build Coastguard Worker iwht4 504*c0909341SAndroid Build Coastguard Worker 505*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[0], [x0], x1 506*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 507*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x0], x1 508*c0909341SAndroid Build Coastguard Worker sqxtn2 v16.8h, v17.4s 509*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[0], [x0], x1 510*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v18.4s 511*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x0], x1 512*c0909341SAndroid Build Coastguard Worker sqxtn2 v18.8h, v19.4s 513*c0909341SAndroid Build Coastguard Worker 514*c0909341SAndroid Build Coastguard Worker b L(itx_4x4_end) 515*c0909341SAndroid Build Coastguard Workerendfunc 516*c0909341SAndroid Build Coastguard Worker 517*c0909341SAndroid Build Coastguard Worker// HBD inv_txfm_add_4x4_neon deviates from the common pattern with registers 518*c0909341SAndroid Build Coastguard Worker// x0-x4 external parameters 519*c0909341SAndroid Build Coastguard Worker// x5 function pointer to first transform 520*c0909341SAndroid Build Coastguard Worker// x6 function pointer to second transform 521*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x4_neon 522*c0909341SAndroid Build Coastguard Worker movi v30.4s, #0 523*c0909341SAndroid Build Coastguard Worker movi v31.4s, #0 524*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x2] 525*c0909341SAndroid Build Coastguard Worker st1 {v30.4s, v31.4s}, [x2], #32 526*c0909341SAndroid Build Coastguard Worker 527*c0909341SAndroid Build Coastguard Worker blr x5 528*c0909341SAndroid Build Coastguard Worker 529*c0909341SAndroid Build Coastguard Worker st1 {v30.4s, v31.4s}, [x2], #32 530*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 531*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v17.4s 532*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v18.4s 533*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v19.4s 534*c0909341SAndroid Build Coastguard Worker transpose_4x4h v16, v17, v18, v19, v20, v21, v22, v23 535*c0909341SAndroid Build Coastguard Worker 536*c0909341SAndroid Build Coastguard Worker blr x6 537*c0909341SAndroid Build Coastguard Worker 538*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[0], [x0], x1 539*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x0], x1 540*c0909341SAndroid Build Coastguard Worker ins v16.d[1], v17.d[0] 541*c0909341SAndroid Build Coastguard Worker ins v18.d[1], v19.d[0] 542*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[0], [x0], x1 543*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x0], x1 544*c0909341SAndroid Build Coastguard Worker srshr v16.8h, v16.8h, #4 545*c0909341SAndroid Build Coastguard Worker srshr v18.8h, v18.8h, #4 546*c0909341SAndroid Build Coastguard Worker 547*c0909341SAndroid Build Coastguard WorkerL(itx_4x4_end): 548*c0909341SAndroid Build Coastguard Worker dup v31.8h, w4 549*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #2 550*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v16.8h 551*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, v18.8h 552*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v31.8h 553*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x0], x1 554*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v31.8h 555*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], x1 556*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[0], [x0], x1 557*c0909341SAndroid Build Coastguard Worker st1 {v1.d}[1], [x0], x1 558*c0909341SAndroid Build Coastguard Worker 559*c0909341SAndroid Build Coastguard Worker ret x15 560*c0909341SAndroid Build Coastguard Workerendfunc 561*c0909341SAndroid Build Coastguard Worker 562*c0909341SAndroid Build Coastguard Worker.macro def_fn_4x4 txfm1, txfm2 563*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_4x4_16bpc_neon, export=1 564*c0909341SAndroid Build Coastguard Worker mov x15, x30 565*c0909341SAndroid Build Coastguard Worker 566*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 567*c0909341SAndroid Build Coastguard Worker cbnz w3, 1f 568*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 569*c0909341SAndroid Build Coastguard Worker ld1r {v16.4s}, [x2] 570*c0909341SAndroid Build Coastguard Worker dup v4.2s, w16 571*c0909341SAndroid Build Coastguard Worker str wzr, [x2] 572*c0909341SAndroid Build Coastguard Worker sqrdmulh v16.4s, v16.4s, v4.s[0] 573*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[0], [x0], x1 574*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v16.4s 575*c0909341SAndroid Build Coastguard Worker sqxtn2 v20.8h, v16.4s 576*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x0], x1 577*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.8h, v20.8h, v4.h[1] 578*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[0], [x0], x1 579*c0909341SAndroid Build Coastguard Worker srshr v16.8h, v20.8h, #4 580*c0909341SAndroid Build Coastguard Worker ld1 {v1.d}[1], [x0], x1 581*c0909341SAndroid Build Coastguard Worker srshr v18.8h, v20.8h, #4 582*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 583*c0909341SAndroid Build Coastguard Worker b L(itx_4x4_end) 584*c0909341SAndroid Build Coastguard Worker1: 585*c0909341SAndroid Build Coastguard Worker.endif 586*c0909341SAndroid Build Coastguard Worker adr x5, inv_\txfm1\()_4s_x4_neon 587*c0909341SAndroid Build Coastguard Worker movrel x6, X(inv_\txfm2\()_4h_x4_neon) 588*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_4x4_neon 589*c0909341SAndroid Build Coastguard Workerendfunc 590*c0909341SAndroid Build Coastguard Worker.endm 591*c0909341SAndroid Build Coastguard Worker 592*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, dct 593*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, identity 594*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, adst 595*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, flipadst 596*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, identity 597*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, dct 598*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, adst 599*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, flipadst 600*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, dct 601*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, adst 602*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, flipadst 603*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, dct 604*c0909341SAndroid Build Coastguard Worker 605*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, identity 606*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, identity 607*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, adst 608*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, flipadst 609*c0909341SAndroid Build Coastguard Worker 610*c0909341SAndroid Build Coastguard Worker.macro idct_8 r0, r1, r2, r3, r4, r5, r6, r7 611*c0909341SAndroid Build Coastguard Worker idct_4 \r0, \r2, \r4, \r6 612*c0909341SAndroid Build Coastguard Worker 613*c0909341SAndroid Build Coastguard Worker movi v5.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 614*c0909341SAndroid Build Coastguard Worker mvni v4.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 615*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6 616*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 617*c0909341SAndroid Build Coastguard Worker.endr 618*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6 619*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 620*c0909341SAndroid Build Coastguard Worker.endr 621*c0909341SAndroid Build Coastguard Worker 622*c0909341SAndroid Build Coastguard Worker mul_mls v2, \r1, \r7, v1.s[0], v1.s[1] // -> t4a 623*c0909341SAndroid Build Coastguard Worker mul_mla v3, \r1, \r7, v1.s[1], v1.s[0] // -> t7a 624*c0909341SAndroid Build Coastguard Worker mul_mls v6, \r5, \r3, v1.s[2], v1.s[3] // -> t5a 625*c0909341SAndroid Build Coastguard Worker mul_mla v7, \r5, \r3, v1.s[3], v1.s[2] // -> t6a 626*c0909341SAndroid Build Coastguard Worker srshr \r1\().4s, v2.4s, #12 // t4a 627*c0909341SAndroid Build Coastguard Worker srshr \r7\().4s, v3.4s, #12 // t7a 628*c0909341SAndroid Build Coastguard Worker srshr \r3\().4s, v6.4s, #12 // t5a 629*c0909341SAndroid Build Coastguard Worker srshr \r5\().4s, v7.4s, #12 // t6a 630*c0909341SAndroid Build Coastguard Worker 631*c0909341SAndroid Build Coastguard Worker sqadd v2.4s, \r1\().4s, \r3\().4s // t4 632*c0909341SAndroid Build Coastguard Worker sqsub \r1\().4s, \r1\().4s, \r3\().4s // t5a 633*c0909341SAndroid Build Coastguard Worker sqadd v3.4s, \r7\().4s, \r5\().4s // t7 634*c0909341SAndroid Build Coastguard Worker sqsub \r3\().4s, \r7\().4s, \r5\().4s // t6a 635*c0909341SAndroid Build Coastguard Worker 636*c0909341SAndroid Build Coastguard Worker.irp r, v2, \r1, v3, \r3 637*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 638*c0909341SAndroid Build Coastguard Worker.endr 639*c0909341SAndroid Build Coastguard Worker.irp r, v2, \r1, v3, \r3 640*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 641*c0909341SAndroid Build Coastguard Worker.endr 642*c0909341SAndroid Build Coastguard Worker 643*c0909341SAndroid Build Coastguard Worker mul_mls v7, \r3, \r1, v0.s[0], v0.s[0] // -> t5 644*c0909341SAndroid Build Coastguard Worker mul_mla v6, \r3, \r1, v0.s[0], v0.s[0] // -> t6 645*c0909341SAndroid Build Coastguard Worker srshr v7.4s, v7.4s, #12 // t5 646*c0909341SAndroid Build Coastguard Worker srshr v6.4s, v6.4s, #12 // t6 647*c0909341SAndroid Build Coastguard Worker 648*c0909341SAndroid Build Coastguard Worker sqsub \r7\().4s, \r0\().4s, v3.4s // out7 649*c0909341SAndroid Build Coastguard Worker sqadd \r0\().4s, \r0\().4s, v3.4s // out0 650*c0909341SAndroid Build Coastguard Worker sqadd \r1\().4s, \r2\().4s, v6.4s // out1 651*c0909341SAndroid Build Coastguard Worker sqsub v6.4s, \r2\().4s, v6.4s // out6 652*c0909341SAndroid Build Coastguard Worker sqadd \r2\().4s, \r4\().4s, v7.4s // out2 653*c0909341SAndroid Build Coastguard Worker sqsub \r5\().4s, \r4\().4s, v7.4s // out5 654*c0909341SAndroid Build Coastguard Worker sqadd \r3\().4s, \r6\().4s, v2.4s // out3 655*c0909341SAndroid Build Coastguard Worker sqsub \r4\().4s, \r6\().4s, v2.4s // out4 656*c0909341SAndroid Build Coastguard Worker mov \r6\().16b, v6.16b // out6 657*c0909341SAndroid Build Coastguard Worker.endm 658*c0909341SAndroid Build Coastguard Worker 659*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x8_neon 660*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 661*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs 662*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 663*c0909341SAndroid Build Coastguard Worker idct_8 v16, v17, v18, v19, v20, v21, v22, v23 664*c0909341SAndroid Build Coastguard Worker ret 665*c0909341SAndroid Build Coastguard Workerendfunc 666*c0909341SAndroid Build Coastguard Worker 667*c0909341SAndroid Build Coastguard Worker.macro iadst_8 o0, o1, o2, o3, o4, o5, o6, o7 668*c0909341SAndroid Build Coastguard Worker movrel x16, iadst8_coeffs 669*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16], #32 670*c0909341SAndroid Build Coastguard Worker 671*c0909341SAndroid Build Coastguard Worker mul_mla v2, v23, v16, v0.s[0], v0.s[1] 672*c0909341SAndroid Build Coastguard Worker mul_mls v4, v23, v16, v0.s[1], v0.s[0] 673*c0909341SAndroid Build Coastguard Worker mul_mla v6, v21, v18, v0.s[2], v0.s[3] 674*c0909341SAndroid Build Coastguard Worker srshr v16.4s, v2.4s, #12 // t0a 675*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v4.4s, #12 // t1a 676*c0909341SAndroid Build Coastguard Worker mul_mls v2, v21, v18, v0.s[3], v0.s[2] 677*c0909341SAndroid Build Coastguard Worker mul_mla v4, v19, v20, v1.s[0], v1.s[1] 678*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v6.4s, #12 // t2a 679*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v2.4s, #12 // t3a 680*c0909341SAndroid Build Coastguard Worker mul_mls v6, v19, v20, v1.s[1], v1.s[0] 681*c0909341SAndroid Build Coastguard Worker mul_mla v2, v17, v22, v1.s[2], v1.s[3] 682*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v4.4s, #12 // t4a 683*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v6.4s, #12 // t5a 684*c0909341SAndroid Build Coastguard Worker mul_mls v4, v17, v22, v1.s[3], v1.s[2] 685*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v2.4s, #12 // t6a 686*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v4.4s, #12 // t7a 687*c0909341SAndroid Build Coastguard Worker 688*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x16] 689*c0909341SAndroid Build Coastguard Worker 690*c0909341SAndroid Build Coastguard Worker movi v1.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 691*c0909341SAndroid Build Coastguard Worker 692*c0909341SAndroid Build Coastguard Worker sqadd v2.4s, v16.4s, v20.4s // t0 693*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v16.4s, v20.4s // t4 694*c0909341SAndroid Build Coastguard Worker mvni v20.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 695*c0909341SAndroid Build Coastguard Worker sqadd v4.4s, v23.4s, v19.4s // t1 696*c0909341SAndroid Build Coastguard Worker sqsub v5.4s, v23.4s, v19.4s // t5 697*c0909341SAndroid Build Coastguard Worker sqadd v6.4s, v18.4s, v22.4s // t2 698*c0909341SAndroid Build Coastguard Worker sqsub v7.4s, v18.4s, v22.4s // t6 699*c0909341SAndroid Build Coastguard Worker sqadd v18.4s, v21.4s, v17.4s // t3 700*c0909341SAndroid Build Coastguard Worker sqsub v19.4s, v21.4s, v17.4s // t7 701*c0909341SAndroid Build Coastguard Worker 702*c0909341SAndroid Build Coastguard Worker.irp r, v2, v3, v4, v5, v6, v7, v18, v19 703*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v1 704*c0909341SAndroid Build Coastguard Worker.endr 705*c0909341SAndroid Build Coastguard Worker.irp r, v2, v3, v4, v5, v6, v7, v18, v19 706*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v20 707*c0909341SAndroid Build Coastguard Worker.endr 708*c0909341SAndroid Build Coastguard Worker 709*c0909341SAndroid Build Coastguard Worker mul_mla v16, v3, v5, v0.s[3], v0.s[2] 710*c0909341SAndroid Build Coastguard Worker mul_mls v20, v3, v5, v0.s[2], v0.s[3] 711*c0909341SAndroid Build Coastguard Worker mul_mls v22, v19, v7, v0.s[3], v0.s[2] 712*c0909341SAndroid Build Coastguard Worker 713*c0909341SAndroid Build Coastguard Worker srshr v3.4s, v16.4s, #12 // t4a 714*c0909341SAndroid Build Coastguard Worker srshr v5.4s, v20.4s, #12 // t5a 715*c0909341SAndroid Build Coastguard Worker 716*c0909341SAndroid Build Coastguard Worker mul_mla v16, v19, v7, v0.s[2], v0.s[3] 717*c0909341SAndroid Build Coastguard Worker 718*c0909341SAndroid Build Coastguard Worker srshr v7.4s, v22.4s, #12 // t6a 719*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v16.4s, #12 // t7a 720*c0909341SAndroid Build Coastguard Worker 721*c0909341SAndroid Build Coastguard Worker sqadd \o0\().4s, v2.4s, v6.4s // out0 722*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v2.4s, v6.4s // t2 723*c0909341SAndroid Build Coastguard Worker sqadd \o7\().4s, v4.4s, v18.4s // out7 724*c0909341SAndroid Build Coastguard Worker sqsub v4.4s, v4.4s, v18.4s // t3 725*c0909341SAndroid Build Coastguard Worker 726*c0909341SAndroid Build Coastguard Worker mvni v18.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 727*c0909341SAndroid Build Coastguard Worker 728*c0909341SAndroid Build Coastguard Worker sqadd \o1\().4s, v3.4s, v7.4s // out1 729*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v3.4s, v7.4s // t6 730*c0909341SAndroid Build Coastguard Worker sqadd \o6\().4s, v5.4s, v19.4s // out6 731*c0909341SAndroid Build Coastguard Worker sqsub v5.4s, v5.4s, v19.4s // t7 732*c0909341SAndroid Build Coastguard Worker 733*c0909341SAndroid Build Coastguard Worker // Not clipping the output registers, as they will be downshifted and 734*c0909341SAndroid Build Coastguard Worker // narrowed afterwards anyway. 735*c0909341SAndroid Build Coastguard Worker.irp r, v2, v4, v3, v5 736*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v1 737*c0909341SAndroid Build Coastguard Worker.endr 738*c0909341SAndroid Build Coastguard Worker.irp r, v2, v4, v3, v5 739*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v18 740*c0909341SAndroid Build Coastguard Worker.endr 741*c0909341SAndroid Build Coastguard Worker 742*c0909341SAndroid Build Coastguard Worker sqneg \o7\().4s, \o7\().4s // out7 743*c0909341SAndroid Build Coastguard Worker sqneg \o1\().4s, \o1\().4s // out1 744*c0909341SAndroid Build Coastguard Worker 745*c0909341SAndroid Build Coastguard Worker mul_mla v18, v2, v4, v0.s[0], v0.s[0] // -> out3 (v19 or v20) 746*c0909341SAndroid Build Coastguard Worker mul_mls v6, v2, v4, v0.s[0], v0.s[0] // -> out4 (v20 or v19) 747*c0909341SAndroid Build Coastguard Worker mul_mls v20, v3, v5, v0.s[0], v0.s[0] // -> out5 (v21 or v18) 748*c0909341SAndroid Build Coastguard Worker srshr v2.4s, v18.4s, #12 // out3 749*c0909341SAndroid Build Coastguard Worker mul_mla v18, v3, v5, v0.s[0], v0.s[0] // -> out2 (v18 or v21) 750*c0909341SAndroid Build Coastguard Worker srshr v3.4s, v20.4s, #12 // out5 751*c0909341SAndroid Build Coastguard Worker srshr \o2\().4s, v18.4s, #12 // out2 (v18 or v21) 752*c0909341SAndroid Build Coastguard Worker srshr \o4\().4s, v6.4s, #12 // out4 (v20 or v19) 753*c0909341SAndroid Build Coastguard Worker 754*c0909341SAndroid Build Coastguard Worker sqneg \o3\().4s, v2.4s // out3 755*c0909341SAndroid Build Coastguard Worker sqneg \o5\().4s, v3.4s // out5 756*c0909341SAndroid Build Coastguard Worker.endm 757*c0909341SAndroid Build Coastguard Worker 758*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x8_neon 759*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 760*c0909341SAndroid Build Coastguard Worker iadst_8 v16, v17, v18, v19, v20, v21, v22, v23 761*c0909341SAndroid Build Coastguard Worker ret 762*c0909341SAndroid Build Coastguard Workerendfunc 763*c0909341SAndroid Build Coastguard Worker 764*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x8_neon 765*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 766*c0909341SAndroid Build Coastguard Worker iadst_8 v23, v22, v21, v20, v19, v18, v17, v16 767*c0909341SAndroid Build Coastguard Worker ret 768*c0909341SAndroid Build Coastguard Workerendfunc 769*c0909341SAndroid Build Coastguard Worker 770*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x8_neon 771*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 772*c0909341SAndroid Build Coastguard Worker sqshl v16.4s, v16.4s, #1 773*c0909341SAndroid Build Coastguard Worker sqshl v17.4s, v17.4s, #1 774*c0909341SAndroid Build Coastguard Worker sqshl v18.4s, v18.4s, #1 775*c0909341SAndroid Build Coastguard Worker sqshl v19.4s, v19.4s, #1 776*c0909341SAndroid Build Coastguard Worker sqshl v20.4s, v20.4s, #1 777*c0909341SAndroid Build Coastguard Worker sqshl v21.4s, v21.4s, #1 778*c0909341SAndroid Build Coastguard Worker sqshl v22.4s, v22.4s, #1 779*c0909341SAndroid Build Coastguard Worker sqshl v23.4s, v23.4s, #1 780*c0909341SAndroid Build Coastguard Worker ret 781*c0909341SAndroid Build Coastguard Workerendfunc 782*c0909341SAndroid Build Coastguard Worker 783*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x8_neon 784*c0909341SAndroid Build Coastguard Worker movi v31.4s, #0 785*c0909341SAndroid Build Coastguard Worker 786*c0909341SAndroid Build Coastguard Worker cmp w3, w13 787*c0909341SAndroid Build Coastguard Worker mov x11, #32 788*c0909341SAndroid Build Coastguard Worker b.lt 1f 789*c0909341SAndroid Build Coastguard Worker 790*c0909341SAndroid Build Coastguard Worker add x6, x2, #16 791*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 792*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 793*c0909341SAndroid Build Coastguard Worker st1 {v31.4s}, [x6], x11 794*c0909341SAndroid Build Coastguard Worker.endr 795*c0909341SAndroid Build Coastguard Worker 796*c0909341SAndroid Build Coastguard Worker blr x4 797*c0909341SAndroid Build Coastguard Worker 798*c0909341SAndroid Build Coastguard Worker sqrshrn v24.4h, v16.4s, #1 799*c0909341SAndroid Build Coastguard Worker sqrshrn v25.4h, v17.4s, #1 800*c0909341SAndroid Build Coastguard Worker sqrshrn v26.4h, v18.4s, #1 801*c0909341SAndroid Build Coastguard Worker sqrshrn v27.4h, v19.4s, #1 802*c0909341SAndroid Build Coastguard Worker sqrshrn2 v24.8h, v20.4s, #1 803*c0909341SAndroid Build Coastguard Worker sqrshrn2 v25.8h, v21.4s, #1 804*c0909341SAndroid Build Coastguard Worker sqrshrn2 v26.8h, v22.4s, #1 805*c0909341SAndroid Build Coastguard Worker sqrshrn2 v27.8h, v23.4s, #1 806*c0909341SAndroid Build Coastguard Worker 807*c0909341SAndroid Build Coastguard Worker transpose_4x8h v24, v25, v26, v27, v2, v3, v4, v5 808*c0909341SAndroid Build Coastguard Worker 809*c0909341SAndroid Build Coastguard Worker b 2f 810*c0909341SAndroid Build Coastguard Worker 811*c0909341SAndroid Build Coastguard Worker1: 812*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h 813*c0909341SAndroid Build Coastguard Worker movi \i, #0 814*c0909341SAndroid Build Coastguard Worker.endr 815*c0909341SAndroid Build Coastguard Worker 816*c0909341SAndroid Build Coastguard Worker2: 817*c0909341SAndroid Build Coastguard Worker 818*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 819*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 820*c0909341SAndroid Build Coastguard Worker st1 {v31.4s}, [x2], x11 821*c0909341SAndroid Build Coastguard Worker.endr 822*c0909341SAndroid Build Coastguard Worker 823*c0909341SAndroid Build Coastguard Worker blr x4 824*c0909341SAndroid Build Coastguard Worker 825*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 826*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #1 827*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #1 828*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #1 829*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #1 830*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #1 831*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #1 832*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #1 833*c0909341SAndroid Build Coastguard Worker 834*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v20, v21, v22, v23 835*c0909341SAndroid Build Coastguard Worker 836*c0909341SAndroid Build Coastguard Worker mov v20.16b, v24.16b 837*c0909341SAndroid Build Coastguard Worker mov v21.16b, v25.16b 838*c0909341SAndroid Build Coastguard Worker mov v22.16b, v26.16b 839*c0909341SAndroid Build Coastguard Worker mov v23.16b, v27.16b 840*c0909341SAndroid Build Coastguard Worker 841*c0909341SAndroid Build Coastguard Worker blr x5 842*c0909341SAndroid Build Coastguard Worker 843*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7 844*c0909341SAndroid Build Coastguard Worker ret x15 845*c0909341SAndroid Build Coastguard Workerendfunc 846*c0909341SAndroid Build Coastguard Worker 847*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8 txfm1, txfm2, eob_half 848*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_8x8_16bpc_neon, export=1 849*c0909341SAndroid Build Coastguard Worker mov x15, x30 850*c0909341SAndroid Build Coastguard Worker 851*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 852*c0909341SAndroid Build Coastguard Worker idct_dc 8, 8, 1 853*c0909341SAndroid Build Coastguard Worker.endif 854*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_8h_x8_neon) 855*c0909341SAndroid Build Coastguard Worker mov w13, #\eob_half 856*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x8_neon 857*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_8x8_neon 858*c0909341SAndroid Build Coastguard Workerendfunc 859*c0909341SAndroid Build Coastguard Worker.endm 860*c0909341SAndroid Build Coastguard Worker 861*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, dct, 10 862*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, identity, 10 863*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, adst, 10 864*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, flipadst, 10 865*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, identity, 4 866*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, dct, 10 867*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, adst, 10 868*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, flipadst, 10 869*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, dct, 10 870*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, adst, 10 871*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, flipadst, 10 872*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, dct, 4 873*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, identity, 4 874*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, identity, 4 875*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, adst, 4 876*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, flipadst, 4 877*c0909341SAndroid Build Coastguard Worker 878*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x4_neon 879*c0909341SAndroid Build Coastguard Worker movi v28.4s, #0 880*c0909341SAndroid Build Coastguard Worker movi v29.4s, #0 881*c0909341SAndroid Build Coastguard Worker movi v30.4s, #0 882*c0909341SAndroid Build Coastguard Worker movi v31.4s, #0 883*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x2] 884*c0909341SAndroid Build Coastguard Worker st1 {v28.4s,v29.4s,v30.4s,v31.4s}, [x2], #64 885*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 886*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 887*c0909341SAndroid Build Coastguard Worker ld1 {v20.4s,v21.4s,v22.4s,v23.4s}, [x2] 888*c0909341SAndroid Build Coastguard Worker st1 {v28.4s,v29.4s,v30.4s,v31.4s}, [x2] 889*c0909341SAndroid Build Coastguard Worker 890*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 891*c0909341SAndroid Build Coastguard Worker 892*c0909341SAndroid Build Coastguard Worker blr x4 893*c0909341SAndroid Build Coastguard Worker 894*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 895*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v17.4s 896*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v18.4s 897*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v19.4s 898*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v20.4s 899*c0909341SAndroid Build Coastguard Worker sqxtn v21.4h, v21.4s 900*c0909341SAndroid Build Coastguard Worker sqxtn v22.4h, v22.4s 901*c0909341SAndroid Build Coastguard Worker sqxtn v23.4h, v23.4s 902*c0909341SAndroid Build Coastguard Worker 903*c0909341SAndroid Build Coastguard Worker transpose_4x4h v16, v17, v18, v19, v4, v5, v6, v7 904*c0909341SAndroid Build Coastguard Worker transpose_4x4h v20, v21, v22, v23, v4, v5, v6, v7 905*c0909341SAndroid Build Coastguard Worker ins v16.d[1], v20.d[0] 906*c0909341SAndroid Build Coastguard Worker ins v17.d[1], v21.d[0] 907*c0909341SAndroid Build Coastguard Worker ins v18.d[1], v22.d[0] 908*c0909341SAndroid Build Coastguard Worker ins v19.d[1], v23.d[0] 909*c0909341SAndroid Build Coastguard Worker 910*c0909341SAndroid Build Coastguard Worker blr x5 911*c0909341SAndroid Build Coastguard Worker 912*c0909341SAndroid Build Coastguard Worker load_add_store_8x4 x0, x7 913*c0909341SAndroid Build Coastguard Worker ret x15 914*c0909341SAndroid Build Coastguard Workerendfunc 915*c0909341SAndroid Build Coastguard Worker 916*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x8_neon 917*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 918*c0909341SAndroid Build Coastguard Worker movi v31.4s, #0 919*c0909341SAndroid Build Coastguard Worker dup v30.2s, w16 920*c0909341SAndroid Build Coastguard Worker 921*c0909341SAndroid Build Coastguard Worker cmp w3, w13 922*c0909341SAndroid Build Coastguard Worker mov x11, #32 923*c0909341SAndroid Build Coastguard Worker b.lt 1f 924*c0909341SAndroid Build Coastguard Worker 925*c0909341SAndroid Build Coastguard Worker add x6, x2, #16 926*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 927*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 928*c0909341SAndroid Build Coastguard Worker st1 {v31.4s}, [x6], x11 929*c0909341SAndroid Build Coastguard Worker.endr 930*c0909341SAndroid Build Coastguard Worker scale_input .4s, v30.s[0], v16, v17, v18, v19 931*c0909341SAndroid Build Coastguard Worker blr x4 932*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v16.4s 933*c0909341SAndroid Build Coastguard Worker sqxtn v21.4h, v17.4s 934*c0909341SAndroid Build Coastguard Worker sqxtn v22.4h, v18.4s 935*c0909341SAndroid Build Coastguard Worker sqxtn v23.4h, v19.4s 936*c0909341SAndroid Build Coastguard Worker transpose_4x4h v20, v21, v22, v23, v4, v5, v6, v7 937*c0909341SAndroid Build Coastguard Worker 938*c0909341SAndroid Build Coastguard Worker b 2f 939*c0909341SAndroid Build Coastguard Worker 940*c0909341SAndroid Build Coastguard Worker1: 941*c0909341SAndroid Build Coastguard Worker.irp i, v20, v21, v22, v23 942*c0909341SAndroid Build Coastguard Worker movi \i\().4h, #0 943*c0909341SAndroid Build Coastguard Worker.endr 944*c0909341SAndroid Build Coastguard Worker 945*c0909341SAndroid Build Coastguard Worker2: 946*c0909341SAndroid Build Coastguard Worker 947*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 948*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 949*c0909341SAndroid Build Coastguard Worker st1 {v31.4s}, [x2], x11 950*c0909341SAndroid Build Coastguard Worker.endr 951*c0909341SAndroid Build Coastguard Worker scale_input .4s, v30.s[0], v16, v17, v18, v19 952*c0909341SAndroid Build Coastguard Worker blr x4 953*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 954*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v17.4s 955*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v18.4s 956*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v19.4s 957*c0909341SAndroid Build Coastguard Worker transpose_4x4h v16, v17, v18, v19, v4, v5, v6, v7 958*c0909341SAndroid Build Coastguard Worker 959*c0909341SAndroid Build Coastguard Worker blr x5 960*c0909341SAndroid Build Coastguard Worker 961*c0909341SAndroid Build Coastguard Worker load_add_store_4x8 x0, x7 962*c0909341SAndroid Build Coastguard Worker ret x15 963*c0909341SAndroid Build Coastguard Workerendfunc 964*c0909341SAndroid Build Coastguard Worker 965*c0909341SAndroid Build Coastguard Worker.macro def_fn_48 w, h, txfm1, txfm2, eob_half 966*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1 967*c0909341SAndroid Build Coastguard Worker mov x15, x30 968*c0909341SAndroid Build Coastguard Worker 969*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 970*c0909341SAndroid Build Coastguard Worker idct_dc \w, \h, 0 971*c0909341SAndroid Build Coastguard Worker.endif 972*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x\w\()_neon 973*c0909341SAndroid Build Coastguard Worker.if \w == 4 974*c0909341SAndroid Build Coastguard Worker mov w13, #\eob_half 975*c0909341SAndroid Build Coastguard Worker.endif 976*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_\w\()h_x\h\()_neon) 977*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_\w\()x\h\()_neon 978*c0909341SAndroid Build Coastguard Workerendfunc 979*c0909341SAndroid Build Coastguard Worker.endm 980*c0909341SAndroid Build Coastguard Worker 981*c0909341SAndroid Build Coastguard Worker.macro def_fns_48 w, h 982*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, dct, 13 983*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, identity, 13 984*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, adst, 13 985*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, flipadst, 13 986*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, identity, 4 987*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, dct, 13 988*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, adst, 13 989*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, flipadst, 13 990*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, dct, 13 991*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, adst, 13 992*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, flipadst, 13 993*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, dct, 16 994*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, identity, 4 995*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, identity, 4 996*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, adst, 16 997*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, flipadst, 16 998*c0909341SAndroid Build Coastguard Worker.endm 999*c0909341SAndroid Build Coastguard Worker 1000*c0909341SAndroid Build Coastguard Workerdef_fns_48 4, 8 1001*c0909341SAndroid Build Coastguard Workerdef_fns_48 8, 4 1002*c0909341SAndroid Build Coastguard Worker 1003*c0909341SAndroid Build Coastguard Worker 1004*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x16_neon 1005*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 1006*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs 1007*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16], #32 1008*c0909341SAndroid Build Coastguard Worker 1009*c0909341SAndroid Build Coastguard Worker idct_8 v16, v18, v20, v22, v24, v26, v28, v30 1010*c0909341SAndroid Build Coastguard Worker 1011*c0909341SAndroid Build Coastguard Worker // idct_8 leaves the row_clip_max/min constants in v5 and v4 1012*c0909341SAndroid Build Coastguard Worker.irp r, v16, v18, v20, v22, v24, v26, v28, v30 1013*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 1014*c0909341SAndroid Build Coastguard Worker.endr 1015*c0909341SAndroid Build Coastguard Worker.irp r, v16, v18, v20, v22, v24, v26, v28, v30 1016*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 1017*c0909341SAndroid Build Coastguard Worker.endr 1018*c0909341SAndroid Build Coastguard Worker 1019*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 1020*c0909341SAndroid Build Coastguard Worker sub x16, x16, #32 1021*c0909341SAndroid Build Coastguard Worker 1022*c0909341SAndroid Build Coastguard Worker mul_mls v2, v17, v31, v0.s[0], v0.s[1] // -> t8a 1023*c0909341SAndroid Build Coastguard Worker mul_mla v3, v17, v31, v0.s[1], v0.s[0] // -> t15a 1024*c0909341SAndroid Build Coastguard Worker mul_mls v6, v25, v23, v0.s[2], v0.s[3] // -> t9a 1025*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v2.4s, #12 // t8a 1026*c0909341SAndroid Build Coastguard Worker srshr v31.4s, v3.4s, #12 // t15a 1027*c0909341SAndroid Build Coastguard Worker mul_mla v2, v25, v23, v0.s[3], v0.s[2] // -> t14a 1028*c0909341SAndroid Build Coastguard Worker mul_mls v3, v21, v27, v1.s[0], v1.s[1] // -> t10a 1029*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v6.4s, #12 // t9a 1030*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v2.4s, #12 // t14a 1031*c0909341SAndroid Build Coastguard Worker mul_mla v6, v21, v27, v1.s[1], v1.s[0] // -> t13a 1032*c0909341SAndroid Build Coastguard Worker mul_mls v2, v29, v19, v1.s[2], v1.s[3] // -> t11a 1033*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v3.4s, #12 // t10a 1034*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t13a 1035*c0909341SAndroid Build Coastguard Worker mul_mla v3, v29, v19, v1.s[3], v1.s[2] // -> t12a 1036*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v2.4s, #12 // t11a 1037*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v3.4s, #12 // t12a 1038*c0909341SAndroid Build Coastguard Worker 1039*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x16] 1040*c0909341SAndroid Build Coastguard Worker 1041*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v17.4s, v23.4s // t9 1042*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v17.4s, v23.4s // t8 1043*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v25.4s // t14 1044*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v25.4s // t15 1045*c0909341SAndroid Build Coastguard Worker sqsub v23.4s, v19.4s, v21.4s // t10 1046*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v19.4s, v21.4s // t11 1047*c0909341SAndroid Build Coastguard Worker sqadd v25.4s, v29.4s, v27.4s // t12 1048*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v29.4s, v27.4s // t13 1049*c0909341SAndroid Build Coastguard Worker 1050*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v23, v19, v25, v29 1051*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 1052*c0909341SAndroid Build Coastguard Worker.endr 1053*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v23, v19, v25, v29 1054*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 1055*c0909341SAndroid Build Coastguard Worker.endr 1056*c0909341SAndroid Build Coastguard Worker 1057*c0909341SAndroid Build Coastguard Worker mul_mls v7, v3, v2, v0.s[2], v0.s[3] // -> t9a 1058*c0909341SAndroid Build Coastguard Worker mul_mla v6, v3, v2, v0.s[3], v0.s[2] // -> t14a 1059*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v7.4s, #12 // t9a 1060*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t14a 1061*c0909341SAndroid Build Coastguard Worker 1062*c0909341SAndroid Build Coastguard Worker mul_mls v7, v29, v23, v0.s[2], v0.s[3] // -> t13a 1063*c0909341SAndroid Build Coastguard Worker mul_mla v6, v29, v23, v0.s[3], v0.s[2] // -> t10a 1064*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v7.4s, #12 // t13a 1065*c0909341SAndroid Build Coastguard Worker neg v6.4s, v6.4s 1066*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v6.4s, #12 // t10a 1067*c0909341SAndroid Build Coastguard Worker 1068*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v17.4s, v19.4s // t11a 1069*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v17.4s, v19.4s // t8a 1070*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v25.4s // t12a 1071*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v25.4s // t15a 1072*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v21.4s, v23.4s // t9 1073*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v21.4s, v23.4s // t10 1074*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v27.4s, v29.4s // t13 1075*c0909341SAndroid Build Coastguard Worker sqadd v27.4s, v27.4s, v29.4s // t14 1076*c0909341SAndroid Build Coastguard Worker 1077*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v19, v21, v25, v27 1078*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 1079*c0909341SAndroid Build Coastguard Worker.endr 1080*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v19, v21, v25, v27 1081*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 1082*c0909341SAndroid Build Coastguard Worker.endr 1083*c0909341SAndroid Build Coastguard Worker 1084*c0909341SAndroid Build Coastguard Worker mul_mls v7, v3, v2, v0.s[0], v0.s[0] // -> t11 1085*c0909341SAndroid Build Coastguard Worker mul_mla v6, v3, v2, v0.s[0], v0.s[0] // -> t12 1086*c0909341SAndroid Build Coastguard Worker mul_mls v2, v25, v21, v0.s[0], v0.s[0] // -> t10a 1087*c0909341SAndroid Build Coastguard Worker 1088*c0909341SAndroid Build Coastguard Worker srshr v7.4s, v7.4s, #12 // t11 1089*c0909341SAndroid Build Coastguard Worker srshr v6.4s, v6.4s, #12 // t12 1090*c0909341SAndroid Build Coastguard Worker mul_mla v3, v25, v21, v0.s[0], v0.s[0] // -> t13a 1091*c0909341SAndroid Build Coastguard Worker srshr v2.4s, v2.4s, #12 // t10a 1092*c0909341SAndroid Build Coastguard Worker srshr v3.4s, v3.4s, #12 // t13a 1093*c0909341SAndroid Build Coastguard Worker 1094*c0909341SAndroid Build Coastguard Worker sqadd v1.4s, v16.4s, v31.4s // out0 1095*c0909341SAndroid Build Coastguard Worker sqsub v31.4s, v16.4s, v31.4s // out15 1096*c0909341SAndroid Build Coastguard Worker mov v16.16b, v1.16b 1097*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v30.4s, v17.4s // out7 1098*c0909341SAndroid Build Coastguard Worker sqsub v1.4s, v30.4s, v17.4s // out8 1099*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v18.4s, v27.4s // out1 1100*c0909341SAndroid Build Coastguard Worker sqsub v30.4s, v18.4s, v27.4s // out14 1101*c0909341SAndroid Build Coastguard Worker sqadd v18.4s, v20.4s, v3.4s // out2 1102*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v20.4s, v3.4s // out13 1103*c0909341SAndroid Build Coastguard Worker sqadd v3.4s, v28.4s, v19.4s // out6 1104*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v28.4s, v19.4s // out9 1105*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v22.4s, v6.4s // out3 1106*c0909341SAndroid Build Coastguard Worker sqsub v28.4s, v22.4s, v6.4s // out12 1107*c0909341SAndroid Build Coastguard Worker sqadd v20.4s, v24.4s, v7.4s // out4 1108*c0909341SAndroid Build Coastguard Worker sqsub v27.4s, v24.4s, v7.4s // out11 1109*c0909341SAndroid Build Coastguard Worker sqadd v21.4s, v26.4s, v2.4s // out5 1110*c0909341SAndroid Build Coastguard Worker sqsub v26.4s, v26.4s, v2.4s // out10 1111*c0909341SAndroid Build Coastguard Worker mov v24.16b, v1.16b 1112*c0909341SAndroid Build Coastguard Worker mov v22.16b, v3.16b 1113*c0909341SAndroid Build Coastguard Worker 1114*c0909341SAndroid Build Coastguard Worker ret 1115*c0909341SAndroid Build Coastguard Workerendfunc 1116*c0909341SAndroid Build Coastguard Worker 1117*c0909341SAndroid Build Coastguard Worker.macro iadst_16 o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14, o15 1118*c0909341SAndroid Build Coastguard Worker movrel x16, iadst16_coeffs 1119*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16], #32 1120*c0909341SAndroid Build Coastguard Worker 1121*c0909341SAndroid Build Coastguard Worker mul_mla v2, v31, v16, v0.s[0], v0.s[1] // -> t0 1122*c0909341SAndroid Build Coastguard Worker mul_mls v4, v31, v16, v0.s[1], v0.s[0] // -> t1 1123*c0909341SAndroid Build Coastguard Worker mul_mla v6, v29, v18, v0.s[2], v0.s[3] // -> t2 1124*c0909341SAndroid Build Coastguard Worker srshr v16.4s, v2.4s, #12 // t0 1125*c0909341SAndroid Build Coastguard Worker srshr v31.4s, v4.4s, #12 // t1 1126*c0909341SAndroid Build Coastguard Worker mul_mls v2, v29, v18, v0.s[3], v0.s[2] // -> t3 1127*c0909341SAndroid Build Coastguard Worker mul_mla v4, v27, v20, v1.s[0], v1.s[1] // -> t4 1128*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v6.4s, #12 // t2 1129*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v2.4s, #12 // t3 1130*c0909341SAndroid Build Coastguard Worker mul_mls v6, v27, v20, v1.s[1], v1.s[0] // -> t5 1131*c0909341SAndroid Build Coastguard Worker mul_mla v2, v25, v22, v1.s[2], v1.s[3] // -> t6 1132*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v4.4s, #12 // t4 1133*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t5 1134*c0909341SAndroid Build Coastguard Worker mul_mls v4, v25, v22, v1.s[3], v1.s[2] // -> t7 1135*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 1136*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs 1137*c0909341SAndroid Build Coastguard Worker mul_mla v6, v23, v24, v0.s[0], v0.s[1] // -> t8 1138*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v2.4s, #12 // t6 1139*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v4.4s, #12 // t7 1140*c0909341SAndroid Build Coastguard Worker mul_mls v2, v23, v24, v0.s[1], v0.s[0] // -> t9 1141*c0909341SAndroid Build Coastguard Worker mul_mla v4, v21, v26, v0.s[2], v0.s[3] // -> t10 1142*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v6.4s, #12 // t8 1143*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v2.4s, #12 // t9 1144*c0909341SAndroid Build Coastguard Worker mul_mls v6, v21, v26, v0.s[3], v0.s[2] // -> t11 1145*c0909341SAndroid Build Coastguard Worker mul_mla v2, v19, v28, v1.s[0], v1.s[1] // -> t12 1146*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v4.4s, #12 // t10 1147*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v6.4s, #12 // t11 1148*c0909341SAndroid Build Coastguard Worker mul_mls v4, v19, v28, v1.s[1], v1.s[0] // -> t13 1149*c0909341SAndroid Build Coastguard Worker mul_mla v6, v17, v30, v1.s[2], v1.s[3] // -> t14 1150*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v2.4s, #12 // t12 1151*c0909341SAndroid Build Coastguard Worker srshr v28.4s, v4.4s, #12 // t13 1152*c0909341SAndroid Build Coastguard Worker mul_mls v2, v17, v30, v1.s[3], v1.s[2] // -> t15 1153*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v6.4s, #12 // t14 1154*c0909341SAndroid Build Coastguard Worker srshr v30.4s, v2.4s, #12 // t15 1155*c0909341SAndroid Build Coastguard Worker 1156*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 1157*c0909341SAndroid Build Coastguard Worker 1158*c0909341SAndroid Build Coastguard Worker movi v5.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 1159*c0909341SAndroid Build Coastguard Worker mvni v7.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 1160*c0909341SAndroid Build Coastguard Worker 1161*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v16.4s, v23.4s // t8a 1162*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v23.4s // t0a 1163*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v24.4s // t9a 1164*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v24.4s // t1a 1165*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v18.4s, v21.4s // t2a 1166*c0909341SAndroid Build Coastguard Worker sqsub v18.4s, v18.4s, v21.4s // t10a 1167*c0909341SAndroid Build Coastguard Worker sqadd v24.4s, v29.4s, v26.4s // t3a 1168*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v29.4s, v26.4s // t11a 1169*c0909341SAndroid Build Coastguard Worker sqadd v21.4s, v20.4s, v19.4s // t4a 1170*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v20.4s, v19.4s // t12a 1171*c0909341SAndroid Build Coastguard Worker sqadd v26.4s, v27.4s, v28.4s // t5a 1172*c0909341SAndroid Build Coastguard Worker sqsub v27.4s, v27.4s, v28.4s // t13a 1173*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v22.4s, v17.4s // t6a 1174*c0909341SAndroid Build Coastguard Worker sqsub v22.4s, v22.4s, v17.4s // t14a 1175*c0909341SAndroid Build Coastguard Worker sqadd v28.4s, v25.4s, v30.4s // t7a 1176*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v25.4s, v30.4s // t15a 1177*c0909341SAndroid Build Coastguard Worker 1178*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v18, v24, v29, v21, v20, v26, v27, v19, v22, v28, v25 1179*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 1180*c0909341SAndroid Build Coastguard Worker.endr 1181*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v18, v24, v29, v21, v20, v26, v27, v19, v22, v28, v25 1182*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v7 1183*c0909341SAndroid Build Coastguard Worker.endr 1184*c0909341SAndroid Build Coastguard Worker 1185*c0909341SAndroid Build Coastguard Worker mul_mla v4, v2, v3, v1.s[1], v1.s[0] // -> t8 1186*c0909341SAndroid Build Coastguard Worker mul_mls v6, v2, v3, v1.s[0], v1.s[1] // -> t9 1187*c0909341SAndroid Build Coastguard Worker mul_mla v2, v18, v29, v1.s[3], v1.s[2] // -> t10 1188*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v4.4s, #12 // t8 1189*c0909341SAndroid Build Coastguard Worker srshr v30.4s, v6.4s, #12 // t9 1190*c0909341SAndroid Build Coastguard Worker mul_mls v4, v18, v29, v1.s[2], v1.s[3] // -> t11 1191*c0909341SAndroid Build Coastguard Worker mul_mls v6, v27, v20, v1.s[1], v1.s[0] // -> t12 1192*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v2.4s, #12 // t10 1193*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v4.4s, #12 // t11 1194*c0909341SAndroid Build Coastguard Worker mul_mla v2, v27, v20, v1.s[0], v1.s[1] // -> t13 1195*c0909341SAndroid Build Coastguard Worker mul_mls v4, v25, v22, v1.s[3], v1.s[2] // -> t14 1196*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t12 1197*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v2.4s, #12 // t13 1198*c0909341SAndroid Build Coastguard Worker mul_mla v6, v25, v22, v1.s[2], v1.s[3] // -> t15 1199*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v4.4s, #12 // t14 1200*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v6.4s, #12 // t15 1201*c0909341SAndroid Build Coastguard Worker 1202*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v16.4s, v21.4s // t4 1203*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v21.4s // t0 1204*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v26.4s // t5 1205*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v26.4s // t1 1206*c0909341SAndroid Build Coastguard Worker sqadd v21.4s, v23.4s, v19.4s // t2 1207*c0909341SAndroid Build Coastguard Worker sqsub v23.4s, v23.4s, v19.4s // t6 1208*c0909341SAndroid Build Coastguard Worker sqadd v26.4s, v24.4s, v28.4s // t3 1209*c0909341SAndroid Build Coastguard Worker sqsub v24.4s, v24.4s, v28.4s // t7 1210*c0909341SAndroid Build Coastguard Worker sqadd v19.4s, v17.4s, v27.4s // t8a 1211*c0909341SAndroid Build Coastguard Worker sqsub v17.4s, v17.4s, v27.4s // t12a 1212*c0909341SAndroid Build Coastguard Worker sqadd v28.4s, v30.4s, v20.4s // t9a 1213*c0909341SAndroid Build Coastguard Worker sqsub v30.4s, v30.4s, v20.4s // t13a 1214*c0909341SAndroid Build Coastguard Worker sqadd v27.4s, v18.4s, v25.4s // t10a 1215*c0909341SAndroid Build Coastguard Worker sqsub v18.4s, v18.4s, v25.4s // t14a 1216*c0909341SAndroid Build Coastguard Worker sqadd v20.4s, v29.4s, v22.4s // t11a 1217*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v29.4s, v22.4s // t15a 1218*c0909341SAndroid Build Coastguard Worker 1219*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v21, v23, v26, v24, v19, v17, v28, v30, v27, v18, v20, v29 1220*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 1221*c0909341SAndroid Build Coastguard Worker.endr 1222*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v21, v23, v26, v24, v19, v17, v28, v30, v27, v18, v20, v29 1223*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v7 1224*c0909341SAndroid Build Coastguard Worker.endr 1225*c0909341SAndroid Build Coastguard Worker 1226*c0909341SAndroid Build Coastguard Worker mul_mla v4, v2, v3, v0.s[3], v0.s[2] // -> t4a 1227*c0909341SAndroid Build Coastguard Worker mul_mls v6, v2, v3, v0.s[2], v0.s[3] // -> t5a 1228*c0909341SAndroid Build Coastguard Worker mul_mls v2, v24, v23, v0.s[3], v0.s[2] // -> t6a 1229*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v4.4s, #12 // t4a 1230*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v6.4s, #12 // t5a 1231*c0909341SAndroid Build Coastguard Worker mul_mla v4, v24, v23, v0.s[2], v0.s[3] // -> t7a 1232*c0909341SAndroid Build Coastguard Worker mul_mla v6, v17, v30, v0.s[3], v0.s[2] // -> t12 1233*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v2.4s, #12 // t6a 1234*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v4.4s, #12 // t7a 1235*c0909341SAndroid Build Coastguard Worker mul_mls v2, v17, v30, v0.s[2], v0.s[3] // -> t13 1236*c0909341SAndroid Build Coastguard Worker mul_mls v4, v29, v18, v0.s[3], v0.s[2] // -> t14 1237*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v6.4s, #12 // t12 1238*c0909341SAndroid Build Coastguard Worker mul_mla v6, v29, v18, v0.s[2], v0.s[3] // -> t15 1239*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v2.4s, #12 // t13 1240*c0909341SAndroid Build Coastguard Worker srshr v30.4s, v4.4s, #12 // t14 1241*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v6.4s, #12 // t15 1242*c0909341SAndroid Build Coastguard Worker 1243*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v16.4s, v21.4s // t2a 1244*c0909341SAndroid Build Coastguard Worker.ifc \o0, v16 1245*c0909341SAndroid Build Coastguard Worker sqadd \o0\().4s, v16.4s, v21.4s // out0 1246*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v31.4s, v26.4s // t3a 1247*c0909341SAndroid Build Coastguard Worker sqadd \o15\().4s, v31.4s, v26.4s // out15 1248*c0909341SAndroid Build Coastguard Worker.else 1249*c0909341SAndroid Build Coastguard Worker sqadd v4.4s, v16.4s, v21.4s // out0 1250*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v31.4s, v26.4s // t3a 1251*c0909341SAndroid Build Coastguard Worker sqadd \o15\().4s, v31.4s, v26.4s // out15 1252*c0909341SAndroid Build Coastguard Worker mov \o0\().16b, v4.16b 1253*c0909341SAndroid Build Coastguard Worker.endif 1254*c0909341SAndroid Build Coastguard Worker 1255*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v29.4s, v18.4s // t15a 1256*c0909341SAndroid Build Coastguard Worker sqadd \o13\().4s, v29.4s, v18.4s // out13 1257*c0909341SAndroid Build Coastguard Worker sqadd \o2\().4s, v17.4s, v30.4s // out2 1258*c0909341SAndroid Build Coastguard Worker sqsub v26.4s, v17.4s, v30.4s // t14a 1259*c0909341SAndroid Build Coastguard Worker 1260*c0909341SAndroid Build Coastguard Worker sqadd \o1\().4s, v19.4s, v27.4s // out1 1261*c0909341SAndroid Build Coastguard Worker sqsub v27.4s, v19.4s, v27.4s // t10 1262*c0909341SAndroid Build Coastguard Worker sqadd \o14\().4s, v28.4s, v20.4s // out14 1263*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v28.4s, v20.4s // t11 1264*c0909341SAndroid Build Coastguard Worker 1265*c0909341SAndroid Build Coastguard Worker sqadd \o3\().4s, v22.4s, v24.4s // out3 1266*c0909341SAndroid Build Coastguard Worker sqsub v22.4s, v22.4s, v24.4s // t6 1267*c0909341SAndroid Build Coastguard Worker sqadd \o12\().4s, v25.4s, v23.4s // out12 1268*c0909341SAndroid Build Coastguard Worker sqsub v23.4s, v25.4s, v23.4s // t7 1269*c0909341SAndroid Build Coastguard Worker 1270*c0909341SAndroid Build Coastguard Worker // Not clipping the output registers, as they will be downshifted and 1271*c0909341SAndroid Build Coastguard Worker // narrowed afterwards anyway. 1272*c0909341SAndroid Build Coastguard Worker.irp r, v2, v21, v3, v26, v27, v20, v22, v23 1273*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 1274*c0909341SAndroid Build Coastguard Worker.endr 1275*c0909341SAndroid Build Coastguard Worker.irp r, v2, v21, v3, v26, v27, v20, v22, v23 1276*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v7 1277*c0909341SAndroid Build Coastguard Worker.endr 1278*c0909341SAndroid Build Coastguard Worker 1279*c0909341SAndroid Build Coastguard Worker sqneg \o15\().4s, \o15\().4s // out15 1280*c0909341SAndroid Build Coastguard Worker sqneg \o13\().4s, \o13\().4s // out13 1281*c0909341SAndroid Build Coastguard Worker sqneg \o1\().4s, \o1\().4s // out1 1282*c0909341SAndroid Build Coastguard Worker sqneg \o3\().4s, \o3\().4s // out3 1283*c0909341SAndroid Build Coastguard Worker 1284*c0909341SAndroid Build Coastguard Worker mul_mls v24, v2, v21, v0.s[0], v0.s[0] // -> out8 (v24 or v23) 1285*c0909341SAndroid Build Coastguard Worker mul_mla v4, v2, v21, v0.s[0], v0.s[0] // -> out7 (v23 or v24) 1286*c0909341SAndroid Build Coastguard Worker mul_mla v6, v26, v3, v0.s[0], v0.s[0] // -> out5 (v21 or v26) 1287*c0909341SAndroid Build Coastguard Worker 1288*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v24.4s, #12 // out8 1289*c0909341SAndroid Build Coastguard Worker srshr v4.4s, v4.4s, #12 // out7 1290*c0909341SAndroid Build Coastguard Worker srshr v5.4s, v6.4s, #12 // out5 1291*c0909341SAndroid Build Coastguard Worker mul_mls v6, v26, v3, v0.s[0], v0.s[0] // -> out10 (v26 or v21) 1292*c0909341SAndroid Build Coastguard Worker mul_mla v2, v22, v23, v0.s[0], v0.s[0] // -> out4 (v20 or v27) 1293*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v6.4s, #12 // out10 1294*c0909341SAndroid Build Coastguard Worker 1295*c0909341SAndroid Build Coastguard Worker mul_mls v6, v22, v23, v0.s[0], v0.s[0] // -> out11 (v27 or v20) 1296*c0909341SAndroid Build Coastguard Worker mul_mla v22, v27, v20, v0.s[0], v0.s[0] // -> out6 (v22 or v25) 1297*c0909341SAndroid Build Coastguard Worker mul_mls v21, v27, v20, v0.s[0], v0.s[0] // -> out9 (v25 or v22) 1298*c0909341SAndroid Build Coastguard Worker 1299*c0909341SAndroid Build Coastguard Worker srshr \o4\().4s, v2.4s, #12 // out4 1300*c0909341SAndroid Build Coastguard Worker srshr v6.4s, v6.4s, #12 // out11 1301*c0909341SAndroid Build Coastguard Worker srshr v7.4s, v21.4s, #12 // out9 1302*c0909341SAndroid Build Coastguard Worker srshr \o6\().4s, v22.4s, #12 // out6 1303*c0909341SAndroid Build Coastguard Worker 1304*c0909341SAndroid Build Coastguard Worker.ifc \o8, v23 1305*c0909341SAndroid Build Coastguard Worker mov \o8\().16b, v24.16b 1306*c0909341SAndroid Build Coastguard Worker mov \o10\().16b, v26.16b 1307*c0909341SAndroid Build Coastguard Worker.endif 1308*c0909341SAndroid Build Coastguard Worker 1309*c0909341SAndroid Build Coastguard Worker sqneg \o7\().4s, v4.4s // out7 1310*c0909341SAndroid Build Coastguard Worker sqneg \o5\().4s, v5.4s // out5 1311*c0909341SAndroid Build Coastguard Worker sqneg \o11\().4s, v6.4s // out11 1312*c0909341SAndroid Build Coastguard Worker sqneg \o9\().4s, v7.4s // out9 1313*c0909341SAndroid Build Coastguard Worker.endm 1314*c0909341SAndroid Build Coastguard Worker 1315*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x16_neon 1316*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 1317*c0909341SAndroid Build Coastguard Worker iadst_16 v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 1318*c0909341SAndroid Build Coastguard Worker ret 1319*c0909341SAndroid Build Coastguard Workerendfunc 1320*c0909341SAndroid Build Coastguard Worker 1321*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x16_neon 1322*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 1323*c0909341SAndroid Build Coastguard Worker iadst_16 v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16 1324*c0909341SAndroid Build Coastguard Worker ret 1325*c0909341SAndroid Build Coastguard Workerendfunc 1326*c0909341SAndroid Build Coastguard Worker 1327*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x16_neon 1328*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_CALL_TARGET 1329*c0909341SAndroid Build Coastguard Worker movz w16, #2*(5793-4096)*8, lsl #16 1330*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1331*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 1332*c0909341SAndroid Build Coastguard Worker sqrdmulh v2.4s, v\i\().4s, v0.s[0] 1333*c0909341SAndroid Build Coastguard Worker sqadd v\i\().4s, v\i\().4s, v\i\().4s 1334*c0909341SAndroid Build Coastguard Worker sqadd v\i\().4s, v\i\().4s, v2.4s 1335*c0909341SAndroid Build Coastguard Worker.endr 1336*c0909341SAndroid Build Coastguard Worker ret 1337*c0909341SAndroid Build Coastguard Workerendfunc 1338*c0909341SAndroid Build Coastguard Worker 1339*c0909341SAndroid Build Coastguard Worker.macro identity_4x16_shift1 c 1340*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1341*c0909341SAndroid Build Coastguard Worker sqrdmulh v3.4s, \i, \c 1342*c0909341SAndroid Build Coastguard Worker srshr v3.4s, v3.4s, #1 1343*c0909341SAndroid Build Coastguard Worker sqadd \i, \i, v3.4s 1344*c0909341SAndroid Build Coastguard Worker.endr 1345*c0909341SAndroid Build Coastguard Worker.endm 1346*c0909341SAndroid Build Coastguard Worker 1347*c0909341SAndroid Build Coastguard Worker.macro identity_4x16 c 1348*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1349*c0909341SAndroid Build Coastguard Worker sqrdmulh v3.4s, \i, \c 1350*c0909341SAndroid Build Coastguard Worker sqadd \i, \i, \i 1351*c0909341SAndroid Build Coastguard Worker sqadd \i, \i, v3.4s 1352*c0909341SAndroid Build Coastguard Worker.endr 1353*c0909341SAndroid Build Coastguard Worker.endm 1354*c0909341SAndroid Build Coastguard Worker 1355*c0909341SAndroid Build Coastguard Worker.macro def_horz_16 scale=0, shift=2, suffix 1356*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_16x4_neon 1357*c0909341SAndroid Build Coastguard Worker mov x14, x30 1358*c0909341SAndroid Build Coastguard Worker movi v7.4s, #0 1359*c0909341SAndroid Build Coastguard Worker.if \scale 1360*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1361*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1362*c0909341SAndroid Build Coastguard Worker.endif 1363*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1364*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x7] 1365*c0909341SAndroid Build Coastguard Worker st1 {v7.4s}, [x7], x8 1366*c0909341SAndroid Build Coastguard Worker.endr 1367*c0909341SAndroid Build Coastguard Worker.if \scale 1368*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1369*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31 1370*c0909341SAndroid Build Coastguard Worker.endif 1371*c0909341SAndroid Build Coastguard Worker blr x4 1372*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #\shift 1373*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #\shift 1374*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #\shift 1375*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #\shift 1376*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #\shift 1377*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #\shift 1378*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #\shift 1379*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #\shift 1380*c0909341SAndroid Build Coastguard Worker sqrshrn v20.4h, v24.4s, #\shift 1381*c0909341SAndroid Build Coastguard Worker sqrshrn v21.4h, v25.4s, #\shift 1382*c0909341SAndroid Build Coastguard Worker sqrshrn v22.4h, v26.4s, #\shift 1383*c0909341SAndroid Build Coastguard Worker sqrshrn v23.4h, v27.4s, #\shift 1384*c0909341SAndroid Build Coastguard Worker sqrshrn2 v20.8h, v28.4s, #\shift 1385*c0909341SAndroid Build Coastguard Worker sqrshrn2 v21.8h, v29.4s, #\shift 1386*c0909341SAndroid Build Coastguard Worker sqrshrn2 v22.8h, v30.4s, #\shift 1387*c0909341SAndroid Build Coastguard Worker sqrshrn2 v23.8h, v31.4s, #\shift 1388*c0909341SAndroid Build Coastguard Worker.if \scale 1389*c0909341SAndroid Build Coastguard Worker b L(horz_16x4_epilog) 1390*c0909341SAndroid Build Coastguard Worker.else 1391*c0909341SAndroid Build Coastguard WorkerL(horz_16x4_epilog): 1392*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v4, v5, v6, v7 1393*c0909341SAndroid Build Coastguard Worker transpose_4x8h v20, v21, v22, v23, v4, v5, v6, v7 1394*c0909341SAndroid Build Coastguard Worker 1395*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v20.8h, v17.8h, v21.8h, v18.8h, v22.8h, v19.8h, v23.8h 1396*c0909341SAndroid Build Coastguard Worker st1 {\i}, [x6], #16 1397*c0909341SAndroid Build Coastguard Worker.endr 1398*c0909341SAndroid Build Coastguard Worker 1399*c0909341SAndroid Build Coastguard Worker ret x14 1400*c0909341SAndroid Build Coastguard Worker.endif 1401*c0909341SAndroid Build Coastguard Workerendfunc 1402*c0909341SAndroid Build Coastguard Worker.endm 1403*c0909341SAndroid Build Coastguard Worker 1404*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, shift=1, suffix=_scale 1405*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, shift=2 1406*c0909341SAndroid Build Coastguard Worker 1407*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_8x16_neon 1408*c0909341SAndroid Build Coastguard Worker mov x14, x30 1409*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 1410*c0909341SAndroid Build Coastguard Worker ld1 {v\i\().8h}, [x7], x8 1411*c0909341SAndroid Build Coastguard Worker.endr 1412*c0909341SAndroid Build Coastguard Worker blr x5 1413*c0909341SAndroid Build Coastguard Worker load_add_store_8x16 x6, x7 1414*c0909341SAndroid Build Coastguard Worker ret x14 1415*c0909341SAndroid Build Coastguard Workerendfunc 1416*c0909341SAndroid Build Coastguard Worker 1417*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x16_neon 1418*c0909341SAndroid Build Coastguard Worker mov x15, x30 1419*c0909341SAndroid Build Coastguard Worker sub sp, sp, #512 1420*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 1421*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12 1422*c0909341SAndroid Build Coastguard Worker add x6, sp, #(\i*16*2) 1423*c0909341SAndroid Build Coastguard Worker.if \i > 0 1424*c0909341SAndroid Build Coastguard Worker mov w8, #(16 - \i) 1425*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1426*c0909341SAndroid Build Coastguard Worker b.lt 1f 1427*c0909341SAndroid Build Coastguard Worker.if \i < 12 1428*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 1429*c0909341SAndroid Build Coastguard Worker.endif 1430*c0909341SAndroid Build Coastguard Worker.endif 1431*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 1432*c0909341SAndroid Build Coastguard Worker mov x8, #16*4 1433*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_16x4_neon 1434*c0909341SAndroid Build Coastguard Worker.endr 1435*c0909341SAndroid Build Coastguard Worker b 3f 1436*c0909341SAndroid Build Coastguard Worker1: 1437*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 1438*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 1439*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 1440*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 1441*c0909341SAndroid Build Coastguard Worker2: 1442*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 1443*c0909341SAndroid Build Coastguard Worker.rept 2 1444*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 1445*c0909341SAndroid Build Coastguard Worker.endr 1446*c0909341SAndroid Build Coastguard Worker b.gt 2b 1447*c0909341SAndroid Build Coastguard Worker3: 1448*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8 1449*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 1450*c0909341SAndroid Build Coastguard Worker add x7, sp, #(\i*2) 1451*c0909341SAndroid Build Coastguard Worker mov x8, #32 1452*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_8x16_neon 1453*c0909341SAndroid Build Coastguard Worker.endr 1454*c0909341SAndroid Build Coastguard Worker 1455*c0909341SAndroid Build Coastguard Worker add sp, sp, #512 1456*c0909341SAndroid Build Coastguard Worker ret x15 1457*c0909341SAndroid Build Coastguard Workerendfunc 1458*c0909341SAndroid Build Coastguard Worker 1459*c0909341SAndroid Build Coastguard Workerconst eob_16x16 1460*c0909341SAndroid Build Coastguard Worker .short 10, 36, 78, 256 1461*c0909341SAndroid Build Coastguard Workerendconst 1462*c0909341SAndroid Build Coastguard Worker 1463*c0909341SAndroid Build Coastguard Workerconst eob_16x16_identity 1464*c0909341SAndroid Build Coastguard Worker .short 4, 8, 12, 256 1465*c0909341SAndroid Build Coastguard Workerendconst 1466*c0909341SAndroid Build Coastguard Worker 1467*c0909341SAndroid Build Coastguard Worker.macro def_fn_16x16 txfm1, txfm2 1468*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_16x16_16bpc_neon, export=1 1469*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 1470*c0909341SAndroid Build Coastguard Worker idct_dc 16, 16, 2 1471*c0909341SAndroid Build Coastguard Worker.endif 1472*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x16_neon 1473*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_8h_x16_neon) 1474*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity 1475*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1476*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x16 1477*c0909341SAndroid Build Coastguard Worker.else 1478*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x16_identity 1479*c0909341SAndroid Build Coastguard Worker.endif 1480*c0909341SAndroid Build Coastguard Worker.else 1481*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1482*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x16_identity 1483*c0909341SAndroid Build Coastguard Worker.else 1484*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x16 1485*c0909341SAndroid Build Coastguard Worker.endif 1486*c0909341SAndroid Build Coastguard Worker.endif 1487*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_16x16_neon 1488*c0909341SAndroid Build Coastguard Workerendfunc 1489*c0909341SAndroid Build Coastguard Worker.endm 1490*c0909341SAndroid Build Coastguard Worker 1491*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, dct 1492*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, identity 1493*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, adst 1494*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, flipadst 1495*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, identity 1496*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, dct 1497*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, adst 1498*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, flipadst 1499*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, dct 1500*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, adst 1501*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, flipadst 1502*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, dct 1503*c0909341SAndroid Build Coastguard Worker 1504*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x4_neon 1505*c0909341SAndroid Build Coastguard Worker mov x15, x30 1506*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1507*c0909341SAndroid Build Coastguard Worker 1508*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1509*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 1510*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x2], #16 1511*c0909341SAndroid Build Coastguard Worker.endr 1512*c0909341SAndroid Build Coastguard Worker 1513*c0909341SAndroid Build Coastguard Worker blr x4 1514*c0909341SAndroid Build Coastguard Worker 1515*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 1516*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #1 1517*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #1 1518*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #1 1519*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #1 1520*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #1 1521*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #1 1522*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #1 1523*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 1524*c0909341SAndroid Build Coastguard Worker blr x5 1525*c0909341SAndroid Build Coastguard Worker mov x6, x0 1526*c0909341SAndroid Build Coastguard Worker load_add_store_8x4 x6, x7 1527*c0909341SAndroid Build Coastguard Worker 1528*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v24.4s, #1 1529*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v25.4s, #1 1530*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v26.4s, #1 1531*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v27.4s, #1 1532*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v28.4s, #1 1533*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v29.4s, #1 1534*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v30.4s, #1 1535*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v31.4s, #1 1536*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 1537*c0909341SAndroid Build Coastguard Worker blr x5 1538*c0909341SAndroid Build Coastguard Worker add x6, x0, #16 1539*c0909341SAndroid Build Coastguard Worker load_add_store_8x4 x6, x7 1540*c0909341SAndroid Build Coastguard Worker 1541*c0909341SAndroid Build Coastguard Worker ret x15 1542*c0909341SAndroid Build Coastguard Workerendfunc 1543*c0909341SAndroid Build Coastguard Worker 1544*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x16_neon 1545*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #4] 1546*c0909341SAndroid Build Coastguard Worker mov x15, x30 1547*c0909341SAndroid Build Coastguard Worker 1548*c0909341SAndroid Build Coastguard Worker mov x11, #64 1549*c0909341SAndroid Build Coastguard Worker 1550*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1551*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #2] 1552*c0909341SAndroid Build Coastguard Worker b.lt 1f 1553*c0909341SAndroid Build Coastguard Worker 1554*c0909341SAndroid Build Coastguard Worker add x6, x2, #48 1555*c0909341SAndroid Build Coastguard Worker movi v2.4s, #0 1556*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 1557*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1558*c0909341SAndroid Build Coastguard Worker st1 {v2.4s}, [x6], x11 1559*c0909341SAndroid Build Coastguard Worker.endr 1560*c0909341SAndroid Build Coastguard Worker blr x4 1561*c0909341SAndroid Build Coastguard Worker sqrshrn v28.4h, v16.4s, #1 1562*c0909341SAndroid Build Coastguard Worker sqrshrn v29.4h, v17.4s, #1 1563*c0909341SAndroid Build Coastguard Worker sqrshrn v30.4h, v18.4s, #1 1564*c0909341SAndroid Build Coastguard Worker sqrshrn v31.4h, v19.4s, #1 1565*c0909341SAndroid Build Coastguard Worker transpose_4x4h v28, v29, v30, v31, v4, v5, v6, v7 1566*c0909341SAndroid Build Coastguard Worker 1567*c0909341SAndroid Build Coastguard Worker b 2f 1568*c0909341SAndroid Build Coastguard Worker1: 1569*c0909341SAndroid Build Coastguard Worker.irp i, v28.4h, v29.4h, v30.4h, v31.4h 1570*c0909341SAndroid Build Coastguard Worker movi \i, #0 1571*c0909341SAndroid Build Coastguard Worker.endr 1572*c0909341SAndroid Build Coastguard Worker2: 1573*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1574*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #0] 1575*c0909341SAndroid Build Coastguard Worker b.lt 1f 1576*c0909341SAndroid Build Coastguard Worker 1577*c0909341SAndroid Build Coastguard Worker add x6, x2, #32 1578*c0909341SAndroid Build Coastguard Worker movi v2.4s, #0 1579*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 1580*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1581*c0909341SAndroid Build Coastguard Worker st1 {v2.4s}, [x6], x11 1582*c0909341SAndroid Build Coastguard Worker.endr 1583*c0909341SAndroid Build Coastguard Worker blr x4 1584*c0909341SAndroid Build Coastguard Worker sqrshrn v24.4h, v16.4s, #1 1585*c0909341SAndroid Build Coastguard Worker sqrshrn v25.4h, v17.4s, #1 1586*c0909341SAndroid Build Coastguard Worker sqrshrn v26.4h, v18.4s, #1 1587*c0909341SAndroid Build Coastguard Worker sqrshrn v27.4h, v19.4s, #1 1588*c0909341SAndroid Build Coastguard Worker transpose_4x4h v24, v25, v26, v27, v4, v5, v6, v7 1589*c0909341SAndroid Build Coastguard Worker 1590*c0909341SAndroid Build Coastguard Worker b 2f 1591*c0909341SAndroid Build Coastguard Worker1: 1592*c0909341SAndroid Build Coastguard Worker.irp i, v24.4h, v25.4h, v26.4h, v27.4h 1593*c0909341SAndroid Build Coastguard Worker movi \i, #0 1594*c0909341SAndroid Build Coastguard Worker.endr 1595*c0909341SAndroid Build Coastguard Worker2: 1596*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1597*c0909341SAndroid Build Coastguard Worker b.lt 1f 1598*c0909341SAndroid Build Coastguard Worker 1599*c0909341SAndroid Build Coastguard Worker add x6, x2, #16 1600*c0909341SAndroid Build Coastguard Worker movi v2.4s, #0 1601*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 1602*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1603*c0909341SAndroid Build Coastguard Worker st1 {v2.4s}, [x6], x11 1604*c0909341SAndroid Build Coastguard Worker.endr 1605*c0909341SAndroid Build Coastguard Worker blr x4 1606*c0909341SAndroid Build Coastguard Worker sqrshrn v20.4h, v16.4s, #1 1607*c0909341SAndroid Build Coastguard Worker sqrshrn v21.4h, v17.4s, #1 1608*c0909341SAndroid Build Coastguard Worker sqrshrn v22.4h, v18.4s, #1 1609*c0909341SAndroid Build Coastguard Worker sqrshrn v23.4h, v19.4s, #1 1610*c0909341SAndroid Build Coastguard Worker transpose_4x4h v20, v21, v22, v23, v4, v5, v6, v7 1611*c0909341SAndroid Build Coastguard Worker 1612*c0909341SAndroid Build Coastguard Worker b 2f 1613*c0909341SAndroid Build Coastguard Worker1: 1614*c0909341SAndroid Build Coastguard Worker.irp i, v20.4h, v21.4h, v22.4h, v23.4h 1615*c0909341SAndroid Build Coastguard Worker movi \i, #0 1616*c0909341SAndroid Build Coastguard Worker.endr 1617*c0909341SAndroid Build Coastguard Worker2: 1618*c0909341SAndroid Build Coastguard Worker 1619*c0909341SAndroid Build Coastguard Worker movi v2.4s, #0 1620*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s 1621*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 1622*c0909341SAndroid Build Coastguard Worker st1 {v2.4s}, [x2], x11 1623*c0909341SAndroid Build Coastguard Worker.endr 1624*c0909341SAndroid Build Coastguard Worker blr x4 1625*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 1626*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #1 1627*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #1 1628*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #1 1629*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v4, v5, v6, v7 1630*c0909341SAndroid Build Coastguard Worker 1631*c0909341SAndroid Build Coastguard Worker blr x5 1632*c0909341SAndroid Build Coastguard Worker 1633*c0909341SAndroid Build Coastguard Worker load_add_store_4x16 x0, x6 1634*c0909341SAndroid Build Coastguard Worker 1635*c0909341SAndroid Build Coastguard Worker ret x15 1636*c0909341SAndroid Build Coastguard Workerendfunc 1637*c0909341SAndroid Build Coastguard Worker 1638*c0909341SAndroid Build Coastguard Workerconst eob_4x16 1639*c0909341SAndroid Build Coastguard Worker .short 13, 29, 45, 64 1640*c0909341SAndroid Build Coastguard Workerendconst 1641*c0909341SAndroid Build Coastguard Worker 1642*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity1 1643*c0909341SAndroid Build Coastguard Worker .short 16, 32, 48, 64 1644*c0909341SAndroid Build Coastguard Workerendconst 1645*c0909341SAndroid Build Coastguard Worker 1646*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity2 1647*c0909341SAndroid Build Coastguard Worker .short 4, 8, 12, 64 1648*c0909341SAndroid Build Coastguard Workerendconst 1649*c0909341SAndroid Build Coastguard Worker 1650*c0909341SAndroid Build Coastguard Worker.macro def_fn_416 w, h, txfm1, txfm2 1651*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1 1652*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 1653*c0909341SAndroid Build Coastguard Worker idct_dc \w, \h, 1 1654*c0909341SAndroid Build Coastguard Worker.endif 1655*c0909341SAndroid Build Coastguard Worker.if \w == 4 1656*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x\w\()_neon 1657*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_4h_x\h\()_neon) 1658*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity 1659*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1660*c0909341SAndroid Build Coastguard Worker movrel x13, eob_4x16 1661*c0909341SAndroid Build Coastguard Worker.else 1662*c0909341SAndroid Build Coastguard Worker movrel x13, eob_4x16_identity1 1663*c0909341SAndroid Build Coastguard Worker.endif 1664*c0909341SAndroid Build Coastguard Worker.else 1665*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1666*c0909341SAndroid Build Coastguard Worker movrel x13, eob_4x16_identity2 1667*c0909341SAndroid Build Coastguard Worker.else 1668*c0909341SAndroid Build Coastguard Worker movrel x13, eob_4x16 1669*c0909341SAndroid Build Coastguard Worker.endif 1670*c0909341SAndroid Build Coastguard Worker.endif 1671*c0909341SAndroid Build Coastguard Worker.else 1672*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x\w\()_neon 1673*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_8h_x\h\()_neon) 1674*c0909341SAndroid Build Coastguard Worker.endif 1675*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_\w\()x\h\()_neon 1676*c0909341SAndroid Build Coastguard Workerendfunc 1677*c0909341SAndroid Build Coastguard Worker.endm 1678*c0909341SAndroid Build Coastguard Worker 1679*c0909341SAndroid Build Coastguard Worker.macro def_fns_416 w, h 1680*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, dct 1681*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, identity 1682*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, adst 1683*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, flipadst 1684*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, identity 1685*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, dct 1686*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, adst 1687*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, flipadst 1688*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, dct 1689*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, adst 1690*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, flipadst 1691*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, dct 1692*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, identity 1693*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, identity 1694*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, adst 1695*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, flipadst 1696*c0909341SAndroid Build Coastguard Worker.endm 1697*c0909341SAndroid Build Coastguard Worker 1698*c0909341SAndroid Build Coastguard Workerdef_fns_416 4, 16 1699*c0909341SAndroid Build Coastguard Workerdef_fns_416 16, 4 1700*c0909341SAndroid Build Coastguard Worker 1701*c0909341SAndroid Build Coastguard Worker 1702*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x8_neon 1703*c0909341SAndroid Build Coastguard Worker mov x15, x30 1704*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x40]! 1705*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 1706*c0909341SAndroid Build Coastguard Worker stp d12, d13, [sp, #0x20] 1707*c0909341SAndroid Build Coastguard Worker stp d14, d15, [sp, #0x30] 1708*c0909341SAndroid Build Coastguard Worker 1709*c0909341SAndroid Build Coastguard Worker cmp w3, w13 1710*c0909341SAndroid Build Coastguard Worker mov x11, #32 1711*c0909341SAndroid Build Coastguard Worker b.lt 1f 1712*c0909341SAndroid Build Coastguard Worker 1713*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1714*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1715*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1716*c0909341SAndroid Build Coastguard Worker 1717*c0909341SAndroid Build Coastguard Worker add x6, x2, #16 1718*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1719*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1720*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x6], x11 1721*c0909341SAndroid Build Coastguard Worker.endr 1722*c0909341SAndroid Build Coastguard Worker 1723*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1724*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31 1725*c0909341SAndroid Build Coastguard Worker blr x4 1726*c0909341SAndroid Build Coastguard Worker 1727*c0909341SAndroid Build Coastguard Worker sqrshrn v8.4h, v16.4s, #1 1728*c0909341SAndroid Build Coastguard Worker sqrshrn v9.4h, v17.4s, #1 1729*c0909341SAndroid Build Coastguard Worker sqrshrn v10.4h, v18.4s, #1 1730*c0909341SAndroid Build Coastguard Worker sqrshrn v11.4h, v19.4s, #1 1731*c0909341SAndroid Build Coastguard Worker sqrshrn2 v8.8h, v20.4s, #1 1732*c0909341SAndroid Build Coastguard Worker sqrshrn2 v9.8h, v21.4s, #1 1733*c0909341SAndroid Build Coastguard Worker sqrshrn2 v10.8h, v22.4s, #1 1734*c0909341SAndroid Build Coastguard Worker sqrshrn2 v11.8h, v23.4s, #1 1735*c0909341SAndroid Build Coastguard Worker sqrshrn v12.4h, v24.4s, #1 1736*c0909341SAndroid Build Coastguard Worker sqrshrn v13.4h, v25.4s, #1 1737*c0909341SAndroid Build Coastguard Worker sqrshrn v14.4h, v26.4s, #1 1738*c0909341SAndroid Build Coastguard Worker sqrshrn v15.4h, v27.4s, #1 1739*c0909341SAndroid Build Coastguard Worker sqrshrn2 v12.8h, v28.4s, #1 1740*c0909341SAndroid Build Coastguard Worker sqrshrn2 v13.8h, v29.4s, #1 1741*c0909341SAndroid Build Coastguard Worker sqrshrn2 v14.8h, v30.4s, #1 1742*c0909341SAndroid Build Coastguard Worker sqrshrn2 v15.8h, v31.4s, #1 1743*c0909341SAndroid Build Coastguard Worker 1744*c0909341SAndroid Build Coastguard Worker transpose_4x8h v8, v9, v10, v11, v2, v3, v4, v5 1745*c0909341SAndroid Build Coastguard Worker transpose_4x8h v12, v13, v14, v15, v2, v3, v4, v5 1746*c0909341SAndroid Build Coastguard Worker 1747*c0909341SAndroid Build Coastguard Worker b 2f 1748*c0909341SAndroid Build Coastguard Worker1: 1749*c0909341SAndroid Build Coastguard Worker.irp i, v8.8h, v9.8h, v10.8h, v11.8h, v12.8h, v13.8h, v14.8h, v15.8h 1750*c0909341SAndroid Build Coastguard Worker movi \i, #0 1751*c0909341SAndroid Build Coastguard Worker.endr 1752*c0909341SAndroid Build Coastguard Worker2: 1753*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1754*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1755*c0909341SAndroid Build Coastguard Worker 1756*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1757*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 1758*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 1759*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x2], x11 1760*c0909341SAndroid Build Coastguard Worker.endr 1761*c0909341SAndroid Build Coastguard Worker 1762*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1763*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31 1764*c0909341SAndroid Build Coastguard Worker blr x4 1765*c0909341SAndroid Build Coastguard Worker 1766*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 1767*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #1 1768*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #1 1769*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #1 1770*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #1 1771*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #1 1772*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #1 1773*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #1 1774*c0909341SAndroid Build Coastguard Worker 1775*c0909341SAndroid Build Coastguard Worker mov v20.16b, v8.16b 1776*c0909341SAndroid Build Coastguard Worker mov v21.16b, v9.16b 1777*c0909341SAndroid Build Coastguard Worker mov v22.16b, v10.16b 1778*c0909341SAndroid Build Coastguard Worker mov v23.16b, v11.16b 1779*c0909341SAndroid Build Coastguard Worker 1780*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 1781*c0909341SAndroid Build Coastguard Worker 1782*c0909341SAndroid Build Coastguard Worker sqrshrn v8.4h, v24.4s, #1 1783*c0909341SAndroid Build Coastguard Worker sqrshrn v9.4h, v25.4s, #1 1784*c0909341SAndroid Build Coastguard Worker sqrshrn v10.4h, v26.4s, #1 1785*c0909341SAndroid Build Coastguard Worker sqrshrn v11.4h, v27.4s, #1 1786*c0909341SAndroid Build Coastguard Worker sqrshrn2 v8.8h, v28.4s, #1 1787*c0909341SAndroid Build Coastguard Worker sqrshrn2 v9.8h, v29.4s, #1 1788*c0909341SAndroid Build Coastguard Worker sqrshrn2 v10.8h, v30.4s, #1 1789*c0909341SAndroid Build Coastguard Worker sqrshrn2 v11.8h, v31.4s, #1 1790*c0909341SAndroid Build Coastguard Worker 1791*c0909341SAndroid Build Coastguard Worker transpose_4x8h v8, v9, v10, v11, v2, v3, v4, v5 1792*c0909341SAndroid Build Coastguard Worker 1793*c0909341SAndroid Build Coastguard Worker blr x5 1794*c0909341SAndroid Build Coastguard Worker 1795*c0909341SAndroid Build Coastguard Worker mov x6, x0 1796*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x6, x7 1797*c0909341SAndroid Build Coastguard Worker 1798*c0909341SAndroid Build Coastguard Worker mov v16.16b, v8.16b 1799*c0909341SAndroid Build Coastguard Worker mov v17.16b, v9.16b 1800*c0909341SAndroid Build Coastguard Worker mov v18.16b, v10.16b 1801*c0909341SAndroid Build Coastguard Worker mov v19.16b, v11.16b 1802*c0909341SAndroid Build Coastguard Worker mov v20.16b, v12.16b 1803*c0909341SAndroid Build Coastguard Worker mov v21.16b, v13.16b 1804*c0909341SAndroid Build Coastguard Worker mov v22.16b, v14.16b 1805*c0909341SAndroid Build Coastguard Worker mov v23.16b, v15.16b 1806*c0909341SAndroid Build Coastguard Worker 1807*c0909341SAndroid Build Coastguard Worker blr x5 1808*c0909341SAndroid Build Coastguard Worker 1809*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 1810*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7 1811*c0909341SAndroid Build Coastguard Worker 1812*c0909341SAndroid Build Coastguard Worker ldp d14, d15, [sp, #0x30] 1813*c0909341SAndroid Build Coastguard Worker ldp d12, d13, [sp, #0x20] 1814*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 1815*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x40 1816*c0909341SAndroid Build Coastguard Worker ret x15 1817*c0909341SAndroid Build Coastguard Workerendfunc 1818*c0909341SAndroid Build Coastguard Worker 1819*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x16_neon 1820*c0909341SAndroid Build Coastguard Worker mov x15, x30 1821*c0909341SAndroid Build Coastguard Worker stp d8, d9, [sp, #-0x20]! 1822*c0909341SAndroid Build Coastguard Worker stp d10, d11, [sp, #0x10] 1823*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #4] 1824*c0909341SAndroid Build Coastguard Worker 1825*c0909341SAndroid Build Coastguard Worker mov x11, #64 1826*c0909341SAndroid Build Coastguard Worker 1827*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1828*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #2] 1829*c0909341SAndroid Build Coastguard Worker b.lt 1f 1830*c0909341SAndroid Build Coastguard Worker 1831*c0909341SAndroid Build Coastguard Worker add x6, x2, #48 1832*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1833*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1834*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1835*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 1836*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1837*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x6], x11 1838*c0909341SAndroid Build Coastguard Worker.endr 1839*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1840*c0909341SAndroid Build Coastguard Worker blr x4 1841*c0909341SAndroid Build Coastguard Worker 1842*c0909341SAndroid Build Coastguard Worker sqrshrn v28.4h, v16.4s, #1 1843*c0909341SAndroid Build Coastguard Worker sqrshrn v29.4h, v17.4s, #1 1844*c0909341SAndroid Build Coastguard Worker sqrshrn v30.4h, v18.4s, #1 1845*c0909341SAndroid Build Coastguard Worker sqrshrn v31.4h, v19.4s, #1 1846*c0909341SAndroid Build Coastguard Worker sqrshrn2 v28.8h, v20.4s, #1 1847*c0909341SAndroid Build Coastguard Worker sqrshrn2 v29.8h, v21.4s, #1 1848*c0909341SAndroid Build Coastguard Worker sqrshrn2 v30.8h, v22.4s, #1 1849*c0909341SAndroid Build Coastguard Worker sqrshrn2 v31.8h, v23.4s, #1 1850*c0909341SAndroid Build Coastguard Worker transpose_4x8h v28, v29, v30, v31, v2, v3, v4, v5 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker b 2f 1853*c0909341SAndroid Build Coastguard Worker 1854*c0909341SAndroid Build Coastguard Worker1: 1855*c0909341SAndroid Build Coastguard Worker.irp i, v28.8h, v29.8h, v30.8h, v31.8h 1856*c0909341SAndroid Build Coastguard Worker movi \i, #0 1857*c0909341SAndroid Build Coastguard Worker.endr 1858*c0909341SAndroid Build Coastguard Worker 1859*c0909341SAndroid Build Coastguard Worker2: 1860*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1861*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13, #0] 1862*c0909341SAndroid Build Coastguard Worker b.lt 1f 1863*c0909341SAndroid Build Coastguard Worker 1864*c0909341SAndroid Build Coastguard Worker add x6, x2, #32 1865*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1866*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1867*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1868*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 1869*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1870*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x6], x11 1871*c0909341SAndroid Build Coastguard Worker.endr 1872*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1873*c0909341SAndroid Build Coastguard Worker blr x4 1874*c0909341SAndroid Build Coastguard Worker 1875*c0909341SAndroid Build Coastguard Worker sqrshrn v24.4h, v16.4s, #1 1876*c0909341SAndroid Build Coastguard Worker sqrshrn v25.4h, v17.4s, #1 1877*c0909341SAndroid Build Coastguard Worker sqrshrn v26.4h, v18.4s, #1 1878*c0909341SAndroid Build Coastguard Worker sqrshrn v27.4h, v19.4s, #1 1879*c0909341SAndroid Build Coastguard Worker sqrshrn2 v24.8h, v20.4s, #1 1880*c0909341SAndroid Build Coastguard Worker sqrshrn2 v25.8h, v21.4s, #1 1881*c0909341SAndroid Build Coastguard Worker sqrshrn2 v26.8h, v22.4s, #1 1882*c0909341SAndroid Build Coastguard Worker sqrshrn2 v27.8h, v23.4s, #1 1883*c0909341SAndroid Build Coastguard Worker transpose_4x8h v24, v25, v26, v27, v2, v3, v4, v5 1884*c0909341SAndroid Build Coastguard Worker 1885*c0909341SAndroid Build Coastguard Worker b 2f 1886*c0909341SAndroid Build Coastguard Worker 1887*c0909341SAndroid Build Coastguard Worker1: 1888*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h 1889*c0909341SAndroid Build Coastguard Worker movi \i, #0 1890*c0909341SAndroid Build Coastguard Worker.endr 1891*c0909341SAndroid Build Coastguard Worker 1892*c0909341SAndroid Build Coastguard Worker2: 1893*c0909341SAndroid Build Coastguard Worker cmp w3, w12 1894*c0909341SAndroid Build Coastguard Worker b.lt 1f 1895*c0909341SAndroid Build Coastguard Worker 1896*c0909341SAndroid Build Coastguard Worker add x6, x2, #16 1897*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1898*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1899*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1900*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 1901*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x6] 1902*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x6], x11 1903*c0909341SAndroid Build Coastguard Worker.endr 1904*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1905*c0909341SAndroid Build Coastguard Worker blr x4 1906*c0909341SAndroid Build Coastguard Worker 1907*c0909341SAndroid Build Coastguard Worker sqrshrn v8.4h, v16.4s, #1 1908*c0909341SAndroid Build Coastguard Worker sqrshrn v9.4h, v17.4s, #1 1909*c0909341SAndroid Build Coastguard Worker sqrshrn v10.4h, v18.4s, #1 1910*c0909341SAndroid Build Coastguard Worker sqrshrn v11.4h, v19.4s, #1 1911*c0909341SAndroid Build Coastguard Worker sqrshrn2 v8.8h, v20.4s, #1 1912*c0909341SAndroid Build Coastguard Worker sqrshrn2 v9.8h, v21.4s, #1 1913*c0909341SAndroid Build Coastguard Worker sqrshrn2 v10.8h, v22.4s, #1 1914*c0909341SAndroid Build Coastguard Worker sqrshrn2 v11.8h, v23.4s, #1 1915*c0909341SAndroid Build Coastguard Worker transpose_4x8h v8, v9, v10, v11, v2, v3, v4, v5 1916*c0909341SAndroid Build Coastguard Worker 1917*c0909341SAndroid Build Coastguard Worker b 2f 1918*c0909341SAndroid Build Coastguard Worker 1919*c0909341SAndroid Build Coastguard Worker1: 1920*c0909341SAndroid Build Coastguard Worker.irp i, v8.8h, v9.8h, v10.8h, v11.8h 1921*c0909341SAndroid Build Coastguard Worker movi \i, #0 1922*c0909341SAndroid Build Coastguard Worker.endr 1923*c0909341SAndroid Build Coastguard Worker 1924*c0909341SAndroid Build Coastguard Worker2: 1925*c0909341SAndroid Build Coastguard Worker movi v4.4s, #0 1926*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 1927*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 1928*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 1929*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x2] 1930*c0909341SAndroid Build Coastguard Worker st1 {v4.4s}, [x2], x11 1931*c0909341SAndroid Build Coastguard Worker.endr 1932*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 1933*c0909341SAndroid Build Coastguard Worker blr x4 1934*c0909341SAndroid Build Coastguard Worker 1935*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 1936*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #1 1937*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #1 1938*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #1 1939*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #1 1940*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #1 1941*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #1 1942*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #1 1943*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 1944*c0909341SAndroid Build Coastguard Worker 1945*c0909341SAndroid Build Coastguard Worker mov v20.16b, v8.16b 1946*c0909341SAndroid Build Coastguard Worker mov v21.16b, v9.16b 1947*c0909341SAndroid Build Coastguard Worker mov v22.16b, v10.16b 1948*c0909341SAndroid Build Coastguard Worker mov v23.16b, v11.16b 1949*c0909341SAndroid Build Coastguard Worker 1950*c0909341SAndroid Build Coastguard Worker blr x5 1951*c0909341SAndroid Build Coastguard Worker 1952*c0909341SAndroid Build Coastguard Worker load_add_store_8x16 x0, x6 1953*c0909341SAndroid Build Coastguard Worker 1954*c0909341SAndroid Build Coastguard Worker ldp d10, d11, [sp, #0x10] 1955*c0909341SAndroid Build Coastguard Worker ldp d8, d9, [sp], 0x20 1956*c0909341SAndroid Build Coastguard Worker 1957*c0909341SAndroid Build Coastguard Worker ret x15 1958*c0909341SAndroid Build Coastguard Workerendfunc 1959*c0909341SAndroid Build Coastguard Worker 1960*c0909341SAndroid Build Coastguard Workerconst eob_8x16 1961*c0909341SAndroid Build Coastguard Worker .short 10, 43, 75, 128 1962*c0909341SAndroid Build Coastguard Workerendconst 1963*c0909341SAndroid Build Coastguard Worker 1964*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity1 1965*c0909341SAndroid Build Coastguard Worker .short 4, 64, 96, 128 1966*c0909341SAndroid Build Coastguard Workerendconst 1967*c0909341SAndroid Build Coastguard Worker 1968*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity2 1969*c0909341SAndroid Build Coastguard Worker .short 4, 8, 12, 128 1970*c0909341SAndroid Build Coastguard Workerendconst 1971*c0909341SAndroid Build Coastguard Worker 1972*c0909341SAndroid Build Coastguard Worker.macro def_fn_816 w, h, txfm1, txfm2 1973*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1 1974*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct 1975*c0909341SAndroid Build Coastguard Worker idct_dc \w, \h, 1 1976*c0909341SAndroid Build Coastguard Worker.endif 1977*c0909341SAndroid Build Coastguard Worker adr x4, inv_\txfm1\()_4s_x\w\()_neon 1978*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_\txfm2\()_8h_x\h\()_neon) 1979*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity 1980*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1981*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x16 1982*c0909341SAndroid Build Coastguard Worker.else 1983*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x16_identity1 1984*c0909341SAndroid Build Coastguard Worker.endif 1985*c0909341SAndroid Build Coastguard Worker.else 1986*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity 1987*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x16_identity2 1988*c0909341SAndroid Build Coastguard Worker.else 1989*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x16 1990*c0909341SAndroid Build Coastguard Worker.endif 1991*c0909341SAndroid Build Coastguard Worker.endif 1992*c0909341SAndroid Build Coastguard Worker.if \h == 8 1993*c0909341SAndroid Build Coastguard Worker ldrh w13, [x13] 1994*c0909341SAndroid Build Coastguard Worker.endif 1995*c0909341SAndroid Build Coastguard Worker b inv_txfm_add_\w\()x\h\()_neon 1996*c0909341SAndroid Build Coastguard Workerendfunc 1997*c0909341SAndroid Build Coastguard Worker.endm 1998*c0909341SAndroid Build Coastguard Worker 1999*c0909341SAndroid Build Coastguard Worker.macro def_fns_816 w, h 2000*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, dct 2001*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, identity 2002*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, adst 2003*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, flipadst 2004*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, identity 2005*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, dct 2006*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, adst 2007*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, flipadst 2008*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, dct 2009*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, adst 2010*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, flipadst 2011*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, dct 2012*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, identity 2013*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, identity 2014*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, adst 2015*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, flipadst 2016*c0909341SAndroid Build Coastguard Worker.endm 2017*c0909341SAndroid Build Coastguard Worker 2018*c0909341SAndroid Build Coastguard Workerdef_fns_816 8, 16 2019*c0909341SAndroid Build Coastguard Workerdef_fns_816 16, 8 2020*c0909341SAndroid Build Coastguard Worker 2021*c0909341SAndroid Build Coastguard Workerfunction inv_dct32_odd_4s_x16_neon 2022*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs, 4*16 2023*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16], #32 2024*c0909341SAndroid Build Coastguard Worker 2025*c0909341SAndroid Build Coastguard Worker mul_mls v2, v16, v31, v0.s[0], v0.s[1] // -> t16a 2026*c0909341SAndroid Build Coastguard Worker mul_mla v4, v16, v31, v0.s[1], v0.s[0] // -> t31a 2027*c0909341SAndroid Build Coastguard Worker mul_mls v6, v24, v23, v0.s[2], v0.s[3] // -> t17a 2028*c0909341SAndroid Build Coastguard Worker srshr v16.4s, v2.4s, #12 // t16a 2029*c0909341SAndroid Build Coastguard Worker srshr v31.4s, v4.4s, #12 // t31a 2030*c0909341SAndroid Build Coastguard Worker mul_mla v2, v24, v23, v0.s[3], v0.s[2] // -> t30a 2031*c0909341SAndroid Build Coastguard Worker mul_mls v4, v20, v27, v1.s[0], v1.s[1] // -> t18a 2032*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v6.4s, #12 // t17a 2033*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v2.4s, #12 // t30a 2034*c0909341SAndroid Build Coastguard Worker mul_mla v6, v20, v27, v1.s[1], v1.s[0] // -> t29a 2035*c0909341SAndroid Build Coastguard Worker mul_mls v2, v28, v19, v1.s[2], v1.s[3] // -> t19a 2036*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v4.4s, #12 // t18a 2037*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t29a 2038*c0909341SAndroid Build Coastguard Worker mul_mla v4, v28, v19, v1.s[3], v1.s[2] // -> t28a 2039*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 2040*c0909341SAndroid Build Coastguard Worker sub x16, x16, #4*24 2041*c0909341SAndroid Build Coastguard Worker mul_mls v6, v18, v29, v0.s[0], v0.s[1] // -> t20a 2042*c0909341SAndroid Build Coastguard Worker srshr v28.4s, v2.4s, #12 // t19a 2043*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v4.4s, #12 // t28a 2044*c0909341SAndroid Build Coastguard Worker mul_mla v2, v18, v29, v0.s[1], v0.s[0] // -> t27a 2045*c0909341SAndroid Build Coastguard Worker mul_mls v4, v26, v21, v0.s[2], v0.s[3] // -> t21a 2046*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v6.4s, #12 // t20a 2047*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v2.4s, #12 // t27a 2048*c0909341SAndroid Build Coastguard Worker mul_mla v6, v26, v21, v0.s[3], v0.s[2] // -> t26a 2049*c0909341SAndroid Build Coastguard Worker mul_mls v2, v22, v25, v1.s[0], v1.s[1] // -> t22a 2050*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v4.4s, #12 // t21a 2051*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v6.4s, #12 // t26a 2052*c0909341SAndroid Build Coastguard Worker mul_mla v4, v22, v25, v1.s[1], v1.s[0] // -> t25a 2053*c0909341SAndroid Build Coastguard Worker mul_mls v6, v30, v17, v1.s[2], v1.s[3] // -> t23a 2054*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v2.4s, #12 // t22a 2055*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v4.4s, #12 // t25a 2056*c0909341SAndroid Build Coastguard Worker mul_mla v2, v30, v17, v1.s[3], v1.s[2] // -> t24a 2057*c0909341SAndroid Build Coastguard Worker srshr v30.4s, v6.4s, #12 // t23a 2058*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v2.4s, #12 // t24a 2059*c0909341SAndroid Build Coastguard Worker 2060*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x16] 2061*c0909341SAndroid Build Coastguard Worker 2062*c0909341SAndroid Build Coastguard Worker movi v5.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 2063*c0909341SAndroid Build Coastguard Worker mvni v4.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 2064*c0909341SAndroid Build Coastguard Worker 2065*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v16.4s, v24.4s // t17 2066*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v24.4s // t16 2067*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v23.4s // t30 2068*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v23.4s // t31 2069*c0909341SAndroid Build Coastguard Worker sqsub v24.4s, v28.4s, v20.4s // t18 2070*c0909341SAndroid Build Coastguard Worker sqadd v28.4s, v28.4s, v20.4s // t19 2071*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v18.4s, v26.4s // t20 2072*c0909341SAndroid Build Coastguard Worker sqsub v18.4s, v18.4s, v26.4s // t21 2073*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v30.4s, v22.4s // t22 2074*c0909341SAndroid Build Coastguard Worker sqadd v30.4s, v30.4s, v22.4s // t23 2075*c0909341SAndroid Build Coastguard Worker sqadd v26.4s, v17.4s, v25.4s // t24 2076*c0909341SAndroid Build Coastguard Worker sqsub v17.4s, v17.4s, v25.4s // t25 2077*c0909341SAndroid Build Coastguard Worker sqsub v22.4s, v29.4s, v21.4s // t26 2078*c0909341SAndroid Build Coastguard Worker sqadd v29.4s, v29.4s, v21.4s // t27 2079*c0909341SAndroid Build Coastguard Worker sqadd v25.4s, v19.4s, v27.4s // t28 2080*c0909341SAndroid Build Coastguard Worker sqsub v19.4s, v19.4s, v27.4s // t29 2081*c0909341SAndroid Build Coastguard Worker 2082*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v24, v28, v23, v18, v20, v30, v26, v17, v22, v29, v25, v19 2083*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 2084*c0909341SAndroid Build Coastguard Worker.endr 2085*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v24, v28, v23, v18, v20, v30, v26, v17, v22, v29, v25, v19 2086*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 2087*c0909341SAndroid Build Coastguard Worker.endr 2088*c0909341SAndroid Build Coastguard Worker 2089*c0909341SAndroid Build Coastguard Worker mul_mls v7, v3, v2, v1.s[0], v1.s[1] // -> t17a 2090*c0909341SAndroid Build Coastguard Worker mul_mla v6, v3, v2, v1.s[1], v1.s[0] // -> t30a 2091*c0909341SAndroid Build Coastguard Worker mul_mla v2, v19, v24, v1.s[1], v1.s[0] // -> t18a 2092*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v7.4s, #12 // t17a 2093*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v6.4s, #12 // t30a 2094*c0909341SAndroid Build Coastguard Worker neg v2.4s, v2.4s // -> t18a 2095*c0909341SAndroid Build Coastguard Worker mul_mls v7, v19, v24, v1.s[0], v1.s[1] // -> t29a 2096*c0909341SAndroid Build Coastguard Worker mul_mls v6, v22, v18, v1.s[2], v1.s[3] // -> t21a 2097*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v2.4s, #12 // t18a 2098*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v7.4s, #12 // t29a 2099*c0909341SAndroid Build Coastguard Worker mul_mla v2, v22, v18, v1.s[3], v1.s[2] // -> t26a 2100*c0909341SAndroid Build Coastguard Worker mul_mla v7, v17, v20, v1.s[3], v1.s[2] // -> t22a 2101*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v6.4s, #12 // t21a 2102*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v2.4s, #12 // t26a 2103*c0909341SAndroid Build Coastguard Worker neg v7.4s, v7.4s // -> t22a 2104*c0909341SAndroid Build Coastguard Worker mul_mls v6, v17, v20, v1.s[2], v1.s[3] // -> t25a 2105*c0909341SAndroid Build Coastguard Worker srshr v17.4s, v7.4s, #12 // t22a 2106*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v6.4s, #12 // t25a 2107*c0909341SAndroid Build Coastguard Worker 2108*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v27.4s, v24.4s // t29 2109*c0909341SAndroid Build Coastguard Worker sqadd v27.4s, v27.4s, v24.4s // t30 2110*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v21.4s, v19.4s // t18 2111*c0909341SAndroid Build Coastguard Worker sqadd v21.4s, v21.4s, v19.4s // t17 2112*c0909341SAndroid Build Coastguard Worker sqsub v24.4s, v16.4s, v28.4s // t19a 2113*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v28.4s // t16a 2114*c0909341SAndroid Build Coastguard Worker sqsub v19.4s, v30.4s, v23.4s // t20a 2115*c0909341SAndroid Build Coastguard Worker sqadd v30.4s, v30.4s, v23.4s // t23a 2116*c0909341SAndroid Build Coastguard Worker sqsub v28.4s, v17.4s, v22.4s // t21 2117*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v17.4s, v22.4s // t22 2118*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v26.4s, v29.4s // t24a 2119*c0909341SAndroid Build Coastguard Worker sqsub v26.4s, v26.4s, v29.4s // t27a 2120*c0909341SAndroid Build Coastguard Worker sqadd v22.4s, v20.4s, v18.4s // t25 2121*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v20.4s, v18.4s // t26 2122*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v31.4s, v25.4s // t28a 2123*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v25.4s // t31a 2124*c0909341SAndroid Build Coastguard Worker 2125*c0909341SAndroid Build Coastguard Worker.irp r, v2, v27, v3, v21, v24, v16, v19, v30, v28, v17, v23, v26, v22, v20, v29, v31 2126*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 2127*c0909341SAndroid Build Coastguard Worker.endr 2128*c0909341SAndroid Build Coastguard Worker.irp r, v2, v27, v3, v21, v24, v16, v19, v30, v28, v17, v23, v26, v22, v20, v29, v31 2129*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 2130*c0909341SAndroid Build Coastguard Worker.endr 2131*c0909341SAndroid Build Coastguard Worker 2132*c0909341SAndroid Build Coastguard Worker mul_mls v7, v2, v3, v0.s[2], v0.s[3] // -> t18a 2133*c0909341SAndroid Build Coastguard Worker mul_mla v6, v2, v3, v0.s[3], v0.s[2] // -> t29a 2134*c0909341SAndroid Build Coastguard Worker mul_mls v2, v29, v24, v0.s[2], v0.s[3] // -> t19 2135*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v7.4s, #12 // t18a 2136*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v6.4s, #12 // t29a 2137*c0909341SAndroid Build Coastguard Worker mul_mla v7, v29, v24, v0.s[3], v0.s[2] // -> t28 2138*c0909341SAndroid Build Coastguard Worker mul_mla v6, v26, v19, v0.s[3], v0.s[2] // -> t20 2139*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v2.4s, #12 // t19 2140*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v7.4s, #12 // t28 2141*c0909341SAndroid Build Coastguard Worker neg v6.4s, v6.4s // -> t20 2142*c0909341SAndroid Build Coastguard Worker mul_mls v2, v26, v19, v0.s[2], v0.s[3] // -> t27 2143*c0909341SAndroid Build Coastguard Worker mul_mla v7, v20, v28, v0.s[3], v0.s[2] // -> t21a 2144*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v6.4s, #12 // t20 2145*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v2.4s, #12 // t27 2146*c0909341SAndroid Build Coastguard Worker neg v7.4s, v7.4s // -> t21a 2147*c0909341SAndroid Build Coastguard Worker mul_mls v6, v20, v28, v0.s[2], v0.s[3] // -> t26a 2148*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v7.4s, #12 // t21a 2149*c0909341SAndroid Build Coastguard Worker srshr v28.4s, v6.4s, #12 // t26a 2150*c0909341SAndroid Build Coastguard Worker 2151*c0909341SAndroid Build Coastguard Worker sqsub v2.4s, v16.4s, v30.4s // t23 2152*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v16.4s, v30.4s // t16 = out16 2153*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, v31.4s, v23.4s // t24 2154*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v31.4s, v23.4s // t31 = out31 2155*c0909341SAndroid Build Coastguard Worker sqsub v23.4s, v21.4s, v17.4s // t22a 2156*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v21.4s, v17.4s // t17a = out17 2157*c0909341SAndroid Build Coastguard Worker sqadd v30.4s, v27.4s, v22.4s // t30a = out30 2158*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v27.4s, v22.4s // t25a 2159*c0909341SAndroid Build Coastguard Worker sqsub v27.4s, v18.4s, v20.4s // t21 2160*c0909341SAndroid Build Coastguard Worker sqadd v18.4s, v18.4s, v20.4s // t18 = out18 2161*c0909341SAndroid Build Coastguard Worker sqadd v7.4s, v29.4s, v26.4s // t19a = out19 2162*c0909341SAndroid Build Coastguard Worker sqsub v26.4s, v29.4s, v26.4s // t20a 2163*c0909341SAndroid Build Coastguard Worker sqadd v29.4s, v25.4s, v28.4s // t29 = out29 2164*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v25.4s, v28.4s // t26 2165*c0909341SAndroid Build Coastguard Worker sqadd v28.4s, v24.4s, v19.4s // t28a = out28 2166*c0909341SAndroid Build Coastguard Worker sqsub v24.4s, v24.4s, v19.4s // t27a 2167*c0909341SAndroid Build Coastguard Worker mov v19.16b, v7.16b // out19 2168*c0909341SAndroid Build Coastguard Worker 2169*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v17, v30, v21, v27, v18, v19, v26, v29, v25, v28, v24 2170*c0909341SAndroid Build Coastguard Worker smin \r\().4s, \r\().4s, v5.4s 2171*c0909341SAndroid Build Coastguard Worker.endr 2172*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v17, v30, v21, v27, v18, v19, v26, v29, v25, v28, v24 2173*c0909341SAndroid Build Coastguard Worker smax \r\().4s, \r\().4s, v4.4s 2174*c0909341SAndroid Build Coastguard Worker.endr 2175*c0909341SAndroid Build Coastguard Worker 2176*c0909341SAndroid Build Coastguard Worker mul_mls v7, v24, v26, v0.s[0], v0.s[0] // -> t20 2177*c0909341SAndroid Build Coastguard Worker mul_mla v6, v24, v26, v0.s[0], v0.s[0] // -> t27 2178*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v7.4s, #12 // t20 2179*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v6.4s, #12 // t27 2180*c0909341SAndroid Build Coastguard Worker 2181*c0909341SAndroid Build Coastguard Worker mul_mla v7, v25, v27, v0.s[0], v0.s[0] // -> t26a 2182*c0909341SAndroid Build Coastguard Worker mul_mls v6, v25, v27, v0.s[0], v0.s[0] // -> t21a 2183*c0909341SAndroid Build Coastguard Worker mov v27.16b, v22.16b // t27 2184*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v7.4s, #12 // t26a 2185*c0909341SAndroid Build Coastguard Worker 2186*c0909341SAndroid Build Coastguard Worker mul_mls v24, v21, v23, v0.s[0], v0.s[0] // -> t22 2187*c0909341SAndroid Build Coastguard Worker mul_mla v7, v21, v23, v0.s[0], v0.s[0] // -> t25 2188*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v6.4s, #12 // t21a 2189*c0909341SAndroid Build Coastguard Worker srshr v22.4s, v24.4s, #12 // t22 2190*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v7.4s, #12 // t25 2191*c0909341SAndroid Build Coastguard Worker 2192*c0909341SAndroid Build Coastguard Worker mul_mls v7, v3, v2, v0.s[0], v0.s[0] // -> t23a 2193*c0909341SAndroid Build Coastguard Worker mul_mla v6, v3, v2, v0.s[0], v0.s[0] // -> t24a 2194*c0909341SAndroid Build Coastguard Worker srshr v23.4s, v7.4s, #12 // t23a 2195*c0909341SAndroid Build Coastguard Worker srshr v24.4s, v6.4s, #12 // t24a 2196*c0909341SAndroid Build Coastguard Worker 2197*c0909341SAndroid Build Coastguard Worker ret 2198*c0909341SAndroid Build Coastguard Workerendfunc 2199*c0909341SAndroid Build Coastguard Worker 2200*c0909341SAndroid Build Coastguard Worker.macro def_horz_32 scale=0, shift=2, suffix 2201*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_dct_32x4_neon 2202*c0909341SAndroid Build Coastguard Worker mov x14, x30 2203*c0909341SAndroid Build Coastguard Worker movi v7.4s, #0 2204*c0909341SAndroid Build Coastguard Worker lsl x8, x8, #1 2205*c0909341SAndroid Build Coastguard Worker.if \scale 2206*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 2207*c0909341SAndroid Build Coastguard Worker dup v0.2s, w16 2208*c0909341SAndroid Build Coastguard Worker.endif 2209*c0909341SAndroid Build Coastguard Worker 2210*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 2211*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x7] 2212*c0909341SAndroid Build Coastguard Worker st1 {v7.4s}, [x7], x8 2213*c0909341SAndroid Build Coastguard Worker.endr 2214*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #4 2215*c0909341SAndroid Build Coastguard Worker add x7, x7, x8, lsr #1 2216*c0909341SAndroid Build Coastguard Worker.if \scale 2217*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 2218*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31 2219*c0909341SAndroid Build Coastguard Worker.endif 2220*c0909341SAndroid Build Coastguard Worker bl inv_dct_4s_x16_neon 2221*c0909341SAndroid Build Coastguard Worker 2222*c0909341SAndroid Build Coastguard Worker // idct_16 leaves the row_clip_max/min constants in v5 and v4 2223*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 2224*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 2225*c0909341SAndroid Build Coastguard Worker.endr 2226*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 2227*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 2228*c0909341SAndroid Build Coastguard Worker.endr 2229*c0909341SAndroid Build Coastguard Worker 2230*c0909341SAndroid Build Coastguard Worker transpose_4x4s v16, v17, v18, v19, v2, v3, v4, v5 2231*c0909341SAndroid Build Coastguard Worker transpose_4x4s v20, v21, v22, v23, v2, v3, v4, v5 2232*c0909341SAndroid Build Coastguard Worker transpose_4x4s v24, v25, v26, v27, v2, v3, v4, v5 2233*c0909341SAndroid Build Coastguard Worker transpose_4x4s v28, v29, v30, v31, v2, v3, v4, v5 2234*c0909341SAndroid Build Coastguard Worker 2235*c0909341SAndroid Build Coastguard Worker.macro store1 r0, r1, r2, r3 2236*c0909341SAndroid Build Coastguard Worker st1 {\r0}, [x6], #16 2237*c0909341SAndroid Build Coastguard Worker st1 {\r1}, [x6], #16 2238*c0909341SAndroid Build Coastguard Worker st1 {\r2}, [x6], #16 2239*c0909341SAndroid Build Coastguard Worker st1 {\r3}, [x6], #16 2240*c0909341SAndroid Build Coastguard Worker.endm 2241*c0909341SAndroid Build Coastguard Worker store1 v16.4s, v20.4s, v24.4s, v28.4s 2242*c0909341SAndroid Build Coastguard Worker store1 v17.4s, v21.4s, v25.4s, v29.4s 2243*c0909341SAndroid Build Coastguard Worker store1 v18.4s, v22.4s, v26.4s, v30.4s 2244*c0909341SAndroid Build Coastguard Worker store1 v19.4s, v23.4s, v27.4s, v31.4s 2245*c0909341SAndroid Build Coastguard Worker.purgem store1 2246*c0909341SAndroid Build Coastguard Worker sub x6, x6, #64*4 2247*c0909341SAndroid Build Coastguard Worker 2248*c0909341SAndroid Build Coastguard Worker movi v7.4s, #0 2249*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 2250*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [x7] 2251*c0909341SAndroid Build Coastguard Worker st1 {v7.4s}, [x7], x8 2252*c0909341SAndroid Build Coastguard Worker.endr 2253*c0909341SAndroid Build Coastguard Worker.if \scale 2254*c0909341SAndroid Build Coastguard Worker // This relies on the fact that the idct also leaves the right coeff in v0.s[1] 2255*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[1], v16, v17, v18, v19, v20, v21, v22, v23 2256*c0909341SAndroid Build Coastguard Worker scale_input .4s, v0.s[1], v24, v25, v26, v27, v28, v29, v30, v31 2257*c0909341SAndroid Build Coastguard Worker.endif 2258*c0909341SAndroid Build Coastguard Worker bl inv_dct32_odd_4s_x16_neon 2259*c0909341SAndroid Build Coastguard Worker transpose_4x4s v31, v30, v29, v28, v2, v3, v4, v5 2260*c0909341SAndroid Build Coastguard Worker transpose_4x4s v27, v26, v25, v24, v2, v3, v4, v5 2261*c0909341SAndroid Build Coastguard Worker transpose_4x4s v23, v22, v21, v20, v2, v3, v4, v5 2262*c0909341SAndroid Build Coastguard Worker transpose_4x4s v19, v18, v17, v16, v2, v3, v4, v5 2263*c0909341SAndroid Build Coastguard Worker.macro store2 r0, r1, r2, r3, shift 2264*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x6] 2265*c0909341SAndroid Build Coastguard Worker sqsub v4.4s, v0.4s, \r0 2266*c0909341SAndroid Build Coastguard Worker sqadd v0.4s, v0.4s, \r0 2267*c0909341SAndroid Build Coastguard Worker sqsub v5.4s, v1.4s, \r1 2268*c0909341SAndroid Build Coastguard Worker sqadd v1.4s, v1.4s, \r1 2269*c0909341SAndroid Build Coastguard Worker sqsub v6.4s, v2.4s, \r2 2270*c0909341SAndroid Build Coastguard Worker sqadd v2.4s, v2.4s, \r2 2271*c0909341SAndroid Build Coastguard Worker sqsub v7.4s, v3.4s, \r3 2272*c0909341SAndroid Build Coastguard Worker sqadd v3.4s, v3.4s, \r3 2273*c0909341SAndroid Build Coastguard Worker sqrshrn v0.4h, v0.4s, #\shift 2274*c0909341SAndroid Build Coastguard Worker sqrshrn2 v0.8h, v1.4s, #\shift 2275*c0909341SAndroid Build Coastguard Worker sqrshrn v1.4h, v2.4s, #\shift 2276*c0909341SAndroid Build Coastguard Worker sqrshrn2 v1.8h, v3.4s, #\shift 2277*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v7.4s, #\shift 2278*c0909341SAndroid Build Coastguard Worker sqrshrn2 v2.8h, v6.4s, #\shift 2279*c0909341SAndroid Build Coastguard Worker sqrshrn v3.4h, v5.4s, #\shift 2280*c0909341SAndroid Build Coastguard Worker sqrshrn2 v3.8h, v4.4s, #\shift 2281*c0909341SAndroid Build Coastguard Worker st1 {v0.8h, v1.8h}, [x6], #32 2282*c0909341SAndroid Build Coastguard Worker rev64 v2.8h, v2.8h 2283*c0909341SAndroid Build Coastguard Worker rev64 v3.8h, v3.8h 2284*c0909341SAndroid Build Coastguard Worker st1 {v2.8h, v3.8h}, [x6], #32 2285*c0909341SAndroid Build Coastguard Worker.endm 2286*c0909341SAndroid Build Coastguard Worker 2287*c0909341SAndroid Build Coastguard Worker store2 v31.4s, v27.4s, v23.4s, v19.4s, \shift 2288*c0909341SAndroid Build Coastguard Worker store2 v30.4s, v26.4s, v22.4s, v18.4s, \shift 2289*c0909341SAndroid Build Coastguard Worker store2 v29.4s, v25.4s, v21.4s, v17.4s, \shift 2290*c0909341SAndroid Build Coastguard Worker store2 v28.4s, v24.4s, v20.4s, v16.4s, \shift 2291*c0909341SAndroid Build Coastguard Worker.purgem store2 2292*c0909341SAndroid Build Coastguard Worker ret x14 2293*c0909341SAndroid Build Coastguard Workerendfunc 2294*c0909341SAndroid Build Coastguard Worker.endm 2295*c0909341SAndroid Build Coastguard Worker 2296*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=0, shift=2 2297*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=1, shift=1, suffix=_scale 2298*c0909341SAndroid Build Coastguard Worker 2299*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x32_neon 2300*c0909341SAndroid Build Coastguard Worker mov x14, x30 2301*c0909341SAndroid Build Coastguard Worker lsl x8, x8, #1 2302*c0909341SAndroid Build Coastguard Worker 2303*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 2304*c0909341SAndroid Build Coastguard Worker ld1 {v\i\().8h}, [x7], x8 2305*c0909341SAndroid Build Coastguard Worker.endr 2306*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #4 2307*c0909341SAndroid Build Coastguard Worker 2308*c0909341SAndroid Build Coastguard Worker bl X(inv_dct_8h_x16_neon) 2309*c0909341SAndroid Build Coastguard Worker 2310*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 2311*c0909341SAndroid Build Coastguard Worker st1 {v\i\().8h}, [x7], x8 2312*c0909341SAndroid Build Coastguard Worker.endr 2313*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #4 2314*c0909341SAndroid Build Coastguard Worker add x7, x7, x8, lsr #1 2315*c0909341SAndroid Build Coastguard Worker 2316*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 2317*c0909341SAndroid Build Coastguard Worker ld1 {v\i\().8h}, [x7], x8 2318*c0909341SAndroid Build Coastguard Worker.endr 2319*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #4 2320*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsr #1 2321*c0909341SAndroid Build Coastguard Worker bl X(inv_dct32_odd_8h_x16_neon) 2322*c0909341SAndroid Build Coastguard Worker 2323*c0909341SAndroid Build Coastguard Worker neg x9, x8 2324*c0909341SAndroid Build Coastguard Worker mov x10, x6 2325*c0909341SAndroid Build Coastguard Worker mvni v1.8h, #0xfc, lsl #8 // 0x3ff 2326*c0909341SAndroid Build Coastguard Worker.macro combine r0, r1, r2, r3, op, stride 2327*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x7], \stride 2328*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10], x1 2329*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h}, [x7], \stride 2330*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x10], x1 2331*c0909341SAndroid Build Coastguard Worker \op v5.8h, v5.8h, \r0 2332*c0909341SAndroid Build Coastguard Worker ld1 {v7.8h}, [x7], \stride 2333*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h}, [x10], x1 2334*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #4 2335*c0909341SAndroid Build Coastguard Worker \op v6.8h, v6.8h, \r1 2336*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v5.8h 2337*c0909341SAndroid Build Coastguard Worker srshr v6.8h, v6.8h, #4 2338*c0909341SAndroid Build Coastguard Worker \op v7.8h, v7.8h, \r2 2339*c0909341SAndroid Build Coastguard Worker ld1 {v5.8h}, [x7], \stride 2340*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, v6.8h 2341*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v1.8h 2342*c0909341SAndroid Build Coastguard Worker srshr v7.8h, v7.8h, #4 2343*c0909341SAndroid Build Coastguard Worker \op v5.8h, v5.8h, \r3 2344*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 2345*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x10], x1 2346*c0909341SAndroid Build Coastguard Worker usqadd v4.8h, v7.8h 2347*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v1.8h 2348*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #4 2349*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x6], x1 2350*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v5.8h 2351*c0909341SAndroid Build Coastguard Worker smin v4.8h, v4.8h, v1.8h 2352*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [x6], x1 2353*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v1.8h 2354*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 2355*c0909341SAndroid Build Coastguard Worker.endm 2356*c0909341SAndroid Build Coastguard Worker combine v31.8h, v30.8h, v29.8h, v28.8h, sqadd, x8 2357*c0909341SAndroid Build Coastguard Worker combine v27.8h, v26.8h, v25.8h, v24.8h, sqadd, x8 2358*c0909341SAndroid Build Coastguard Worker combine v23.8h, v22.8h, v21.8h, v20.8h, sqadd, x8 2359*c0909341SAndroid Build Coastguard Worker combine v19.8h, v18.8h, v17.8h, v16.8h, sqadd, x8 2360*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8 2361*c0909341SAndroid Build Coastguard Worker combine v16.8h, v17.8h, v18.8h, v19.8h, sqsub, x9 2362*c0909341SAndroid Build Coastguard Worker combine v20.8h, v21.8h, v22.8h, v23.8h, sqsub, x9 2363*c0909341SAndroid Build Coastguard Worker combine v24.8h, v25.8h, v26.8h, v27.8h, sqsub, x9 2364*c0909341SAndroid Build Coastguard Worker combine v28.8h, v29.8h, v30.8h, v31.8h, sqsub, x9 2365*c0909341SAndroid Build Coastguard Worker.purgem combine 2366*c0909341SAndroid Build Coastguard Worker 2367*c0909341SAndroid Build Coastguard Worker ret x14 2368*c0909341SAndroid Build Coastguard Workerendfunc 2369*c0909341SAndroid Build Coastguard Worker 2370*c0909341SAndroid Build Coastguard Workerconst eob_32x32 2371*c0909341SAndroid Build Coastguard Worker .short 10, 36, 78, 136, 210, 300, 406, 1024 2372*c0909341SAndroid Build Coastguard Workerendconst 2373*c0909341SAndroid Build Coastguard Worker 2374*c0909341SAndroid Build Coastguard Workerconst eob_16x32 2375*c0909341SAndroid Build Coastguard Worker .short 10, 36, 78, 151, 215, 279, 343, 512 2376*c0909341SAndroid Build Coastguard Workerendconst 2377*c0909341SAndroid Build Coastguard Worker 2378*c0909341SAndroid Build Coastguard Workerconst eob_16x32_shortside 2379*c0909341SAndroid Build Coastguard Worker .short 10, 36, 78, 512 2380*c0909341SAndroid Build Coastguard Workerendconst 2381*c0909341SAndroid Build Coastguard Worker 2382*c0909341SAndroid Build Coastguard Workerconst eob_8x32 2383*c0909341SAndroid Build Coastguard Worker .short 10, 43, 75, 107, 139, 171, 203, 256 2384*c0909341SAndroid Build Coastguard Workerendconst 2385*c0909341SAndroid Build Coastguard Worker 2386*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_32x32_16bpc_neon, export=1 2387*c0909341SAndroid Build Coastguard Worker movi v0.8h, #0 2388*c0909341SAndroid Build Coastguard Worker movi v1.8h, #0 2389*c0909341SAndroid Build Coastguard Worker movrel x13, eob_32x32, 2 2390*c0909341SAndroid Build Coastguard Worker 2391*c0909341SAndroid Build Coastguard Worker mov x8, #4*32 2392*c0909341SAndroid Build Coastguard Worker1: 2393*c0909341SAndroid Build Coastguard Worker mov w9, #0 2394*c0909341SAndroid Build Coastguard Worker movrel x12, eob_32x32, 2 2395*c0909341SAndroid Build Coastguard Worker2: 2396*c0909341SAndroid Build Coastguard Worker add w9, w9, #8 2397*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s, v17.4s}, [x2] 2398*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2399*c0909341SAndroid Build Coastguard Worker ld1 {v18.4s, v19.4s}, [x2] 2400*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2401*c0909341SAndroid Build Coastguard Worker ld1 {v20.4s, v21.4s}, [x2] 2402*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2403*c0909341SAndroid Build Coastguard Worker ld1 {v22.4s, v23.4s}, [x2] 2404*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2405*c0909341SAndroid Build Coastguard Worker ld1 {v24.4s, v25.4s}, [x2] 2406*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2407*c0909341SAndroid Build Coastguard Worker ld1 {v26.4s, v27.4s}, [x2] 2408*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2409*c0909341SAndroid Build Coastguard Worker ld1 {v28.4s, v29.4s}, [x2] 2410*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2411*c0909341SAndroid Build Coastguard Worker ld1 {v30.4s, v31.4s}, [x2] 2412*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2413*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 2414*c0909341SAndroid Build Coastguard Worker sqxtn2 v16.8h, v17.4s 2415*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v18.4s 2416*c0909341SAndroid Build Coastguard Worker sqxtn2 v17.8h, v19.4s 2417*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v20.4s 2418*c0909341SAndroid Build Coastguard Worker sqxtn2 v18.8h, v21.4s 2419*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v22.4s 2420*c0909341SAndroid Build Coastguard Worker sqxtn2 v19.8h, v23.4s 2421*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v24.4s 2422*c0909341SAndroid Build Coastguard Worker sqxtn2 v20.8h, v25.4s 2423*c0909341SAndroid Build Coastguard Worker sqxtn v21.4h, v26.4s 2424*c0909341SAndroid Build Coastguard Worker sqxtn2 v21.8h, v27.4s 2425*c0909341SAndroid Build Coastguard Worker sqxtn v22.4h, v28.4s 2426*c0909341SAndroid Build Coastguard Worker sqxtn2 v22.8h, v29.4s 2427*c0909341SAndroid Build Coastguard Worker sqxtn v23.4h, v30.4s 2428*c0909341SAndroid Build Coastguard Worker sqxtn2 v23.8h, v31.4s 2429*c0909341SAndroid Build Coastguard Worker transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v4, v5 2430*c0909341SAndroid Build Coastguard Worker 2431*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7, shiftbits=2 2432*c0909341SAndroid Build Coastguard Worker ldrh w11, [x12], #4 2433*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #3 2434*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*8 2435*c0909341SAndroid Build Coastguard Worker cmp w3, w11 2436*c0909341SAndroid Build Coastguard Worker b.ge 2b 2437*c0909341SAndroid Build Coastguard Worker 2438*c0909341SAndroid Build Coastguard Worker ldrh w11, [x13], #4 2439*c0909341SAndroid Build Coastguard Worker cmp w3, w11 2440*c0909341SAndroid Build Coastguard Worker b.lt 9f 2441*c0909341SAndroid Build Coastguard Worker 2442*c0909341SAndroid Build Coastguard Worker sub x0, x0, w9, uxtw #1 2443*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #3 2444*c0909341SAndroid Build Coastguard Worker msub x2, x8, x9, x2 2445*c0909341SAndroid Build Coastguard Worker add x2, x2, #4*8 2446*c0909341SAndroid Build Coastguard Worker b 1b 2447*c0909341SAndroid Build Coastguard Worker9: 2448*c0909341SAndroid Build Coastguard Worker ret 2449*c0909341SAndroid Build Coastguard Workerendfunc 2450*c0909341SAndroid Build Coastguard Worker 2451*c0909341SAndroid Build Coastguard Worker.macro shift_16_regs op, shift 2452*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 2453*c0909341SAndroid Build Coastguard Worker \op \i, \i, #\shift 2454*c0909341SAndroid Build Coastguard Worker.endr 2455*c0909341SAndroid Build Coastguard Worker.endm 2456*c0909341SAndroid Build Coastguard Worker 2457*c0909341SAndroid Build Coastguard Worker.macro def_identity_1632 w, h, wshort, hshort 2458*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1 2459*c0909341SAndroid Build Coastguard Worker movz w16, #2896*8, lsl #16 2460*c0909341SAndroid Build Coastguard Worker movz w17, #2*(5793-4096)*8, lsl #16 2461*c0909341SAndroid Build Coastguard Worker movi v0.4s, #0 2462*c0909341SAndroid Build Coastguard Worker movi v1.4s, #0 2463*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x32\hshort, 2 2464*c0909341SAndroid Build Coastguard Worker 2465*c0909341SAndroid Build Coastguard Worker mov x8, #4*\h 2466*c0909341SAndroid Build Coastguard Worker1: 2467*c0909341SAndroid Build Coastguard Worker mov w9, #0 2468*c0909341SAndroid Build Coastguard Worker movrel x12, eob_16x32\wshort, 2 2469*c0909341SAndroid Build Coastguard Worker2: 2470*c0909341SAndroid Build Coastguard Worker add w9, w9, #8 2471*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s, v17.4s}, [x2] 2472*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2473*c0909341SAndroid Build Coastguard Worker dup v2.2s, w16 2474*c0909341SAndroid Build Coastguard Worker ld1 {v18.4s, v19.4s}, [x2] 2475*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2476*c0909341SAndroid Build Coastguard Worker mov v2.s[1], w17 2477*c0909341SAndroid Build Coastguard Worker ld1 {v20.4s, v21.4s}, [x2] 2478*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2479*c0909341SAndroid Build Coastguard Worker ld1 {v22.4s, v23.4s}, [x2] 2480*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2481*c0909341SAndroid Build Coastguard Worker ld1 {v24.4s, v25.4s}, [x2] 2482*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2483*c0909341SAndroid Build Coastguard Worker ld1 {v26.4s, v27.4s}, [x2] 2484*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2485*c0909341SAndroid Build Coastguard Worker ld1 {v28.4s, v29.4s}, [x2] 2486*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2487*c0909341SAndroid Build Coastguard Worker ld1 {v30.4s, v31.4s}, [x2] 2488*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2489*c0909341SAndroid Build Coastguard Worker scale_input .4s, v2.s[0], v16, v17, v18, v19, v20, v21, v22, v23 2490*c0909341SAndroid Build Coastguard Worker scale_input .4s, v2.s[0], v24, v25, v26, v27, v28, v29, v30, v31 2491*c0909341SAndroid Build Coastguard Worker 2492*c0909341SAndroid Build Coastguard Worker.if \w == 16 2493*c0909341SAndroid Build Coastguard Worker // 16x32 2494*c0909341SAndroid Build Coastguard Worker identity_4x16_shift1 v2.s[1] 2495*c0909341SAndroid Build Coastguard Worker.else 2496*c0909341SAndroid Build Coastguard Worker // 32x16 2497*c0909341SAndroid Build Coastguard Worker shift_16_regs sqshl, 1 2498*c0909341SAndroid Build Coastguard Worker identity_4x16 v2.s[1] 2499*c0909341SAndroid Build Coastguard Worker.endif 2500*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 2501*c0909341SAndroid Build Coastguard Worker sqxtn2 v16.8h, v17.4s 2502*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v18.4s 2503*c0909341SAndroid Build Coastguard Worker sqxtn2 v17.8h, v19.4s 2504*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v20.4s 2505*c0909341SAndroid Build Coastguard Worker sqxtn2 v18.8h, v21.4s 2506*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v22.4s 2507*c0909341SAndroid Build Coastguard Worker sqxtn2 v19.8h, v23.4s 2508*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v24.4s 2509*c0909341SAndroid Build Coastguard Worker sqxtn2 v20.8h, v25.4s 2510*c0909341SAndroid Build Coastguard Worker sqxtn v21.4h, v26.4s 2511*c0909341SAndroid Build Coastguard Worker sqxtn2 v21.8h, v27.4s 2512*c0909341SAndroid Build Coastguard Worker sqxtn v22.4h, v28.4s 2513*c0909341SAndroid Build Coastguard Worker sqxtn2 v22.8h, v29.4s 2514*c0909341SAndroid Build Coastguard Worker sqxtn v23.4h, v30.4s 2515*c0909341SAndroid Build Coastguard Worker sqxtn2 v23.8h, v31.4s 2516*c0909341SAndroid Build Coastguard Worker 2517*c0909341SAndroid Build Coastguard Worker transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v4, v5 2518*c0909341SAndroid Build Coastguard Worker 2519*c0909341SAndroid Build Coastguard Worker.if \w == 16 2520*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7, shiftbits=2 2521*c0909341SAndroid Build Coastguard Worker.else 2522*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7, shiftbits=4 2523*c0909341SAndroid Build Coastguard Worker.endif 2524*c0909341SAndroid Build Coastguard Worker ldrh w11, [x12], #4 2525*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #3 2526*c0909341SAndroid Build Coastguard Worker add x0, x0, #16 2527*c0909341SAndroid Build Coastguard Worker cmp w3, w11 2528*c0909341SAndroid Build Coastguard Worker b.ge 2b 2529*c0909341SAndroid Build Coastguard Worker 2530*c0909341SAndroid Build Coastguard Worker ldrh w11, [x13], #4 2531*c0909341SAndroid Build Coastguard Worker cmp w3, w11 2532*c0909341SAndroid Build Coastguard Worker b.lt 9f 2533*c0909341SAndroid Build Coastguard Worker 2534*c0909341SAndroid Build Coastguard Worker sub x0, x0, w9, uxtw #1 2535*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #3 2536*c0909341SAndroid Build Coastguard Worker msub x2, x8, x9, x2 2537*c0909341SAndroid Build Coastguard Worker add x2, x2, #4*8 2538*c0909341SAndroid Build Coastguard Worker b 1b 2539*c0909341SAndroid Build Coastguard Worker9: 2540*c0909341SAndroid Build Coastguard Worker ret 2541*c0909341SAndroid Build Coastguard Workerendfunc 2542*c0909341SAndroid Build Coastguard Worker.endm 2543*c0909341SAndroid Build Coastguard Worker 2544*c0909341SAndroid Build Coastguard Workerdef_identity_1632 16, 32, _shortside, 2545*c0909341SAndroid Build Coastguard Workerdef_identity_1632 32, 16, , _shortside 2546*c0909341SAndroid Build Coastguard Worker 2547*c0909341SAndroid Build Coastguard Worker.macro def_identity_832 w, h 2548*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1 2549*c0909341SAndroid Build Coastguard Worker movi v0.4s, #0 2550*c0909341SAndroid Build Coastguard Worker movi v1.4s, #0 2551*c0909341SAndroid Build Coastguard Worker // Working on 8x8 blocks, read every other entry from eob_8x32 2552*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x32, 2 2553*c0909341SAndroid Build Coastguard Worker 2554*c0909341SAndroid Build Coastguard Worker mov w8, #4*\h 2555*c0909341SAndroid Build Coastguard Worker1: 2556*c0909341SAndroid Build Coastguard Worker // Working on 8x8 blocks, read every other entry from eob_8x32 2557*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #4 2558*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s, v17.4s}, [x2] 2559*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2560*c0909341SAndroid Build Coastguard Worker ld1 {v18.4s, v19.4s}, [x2] 2561*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2562*c0909341SAndroid Build Coastguard Worker ld1 {v20.4s, v21.4s}, [x2] 2563*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2564*c0909341SAndroid Build Coastguard Worker ld1 {v22.4s, v23.4s}, [x2] 2565*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2566*c0909341SAndroid Build Coastguard Worker ld1 {v24.4s, v25.4s}, [x2] 2567*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2568*c0909341SAndroid Build Coastguard Worker ld1 {v26.4s, v27.4s}, [x2] 2569*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2570*c0909341SAndroid Build Coastguard Worker ld1 {v28.4s, v29.4s}, [x2] 2571*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2572*c0909341SAndroid Build Coastguard Worker ld1 {v30.4s, v31.4s}, [x2] 2573*c0909341SAndroid Build Coastguard Worker st1 {v0.4s, v1.4s}, [x2], x8 2574*c0909341SAndroid Build Coastguard Worker 2575*c0909341SAndroid Build Coastguard Worker.if \w == 8 2576*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #1 2577*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v17.4s, #1 2578*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v18.4s, #1 2579*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v19.4s, #1 2580*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v20.4s, #1 2581*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v21.4s, #1 2582*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v22.4s, #1 2583*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #1 2584*c0909341SAndroid Build Coastguard Worker sqrshrn v20.4h, v24.4s, #1 2585*c0909341SAndroid Build Coastguard Worker sqrshrn2 v20.8h, v25.4s, #1 2586*c0909341SAndroid Build Coastguard Worker sqrshrn v21.4h, v26.4s, #1 2587*c0909341SAndroid Build Coastguard Worker sqrshrn2 v21.8h, v27.4s, #1 2588*c0909341SAndroid Build Coastguard Worker sqrshrn v22.4h, v28.4s, #1 2589*c0909341SAndroid Build Coastguard Worker sqrshrn2 v22.8h, v29.4s, #1 2590*c0909341SAndroid Build Coastguard Worker sqrshrn v23.4h, v30.4s, #1 2591*c0909341SAndroid Build Coastguard Worker sqrshrn2 v23.8h, v31.4s, #1 2592*c0909341SAndroid Build Coastguard Worker.else 2593*c0909341SAndroid Build Coastguard Worker sqxtn v16.4h, v16.4s 2594*c0909341SAndroid Build Coastguard Worker sqxtn2 v16.8h, v17.4s 2595*c0909341SAndroid Build Coastguard Worker sqxtn v17.4h, v18.4s 2596*c0909341SAndroid Build Coastguard Worker sqxtn2 v17.8h, v19.4s 2597*c0909341SAndroid Build Coastguard Worker sqxtn v18.4h, v20.4s 2598*c0909341SAndroid Build Coastguard Worker sqxtn2 v18.8h, v21.4s 2599*c0909341SAndroid Build Coastguard Worker sqxtn v19.4h, v22.4s 2600*c0909341SAndroid Build Coastguard Worker sqxtn2 v19.8h, v23.4s 2601*c0909341SAndroid Build Coastguard Worker sqxtn v20.4h, v24.4s 2602*c0909341SAndroid Build Coastguard Worker sqxtn2 v20.8h, v25.4s 2603*c0909341SAndroid Build Coastguard Worker sqxtn v21.4h, v26.4s 2604*c0909341SAndroid Build Coastguard Worker sqxtn2 v21.8h, v27.4s 2605*c0909341SAndroid Build Coastguard Worker sqxtn v22.4h, v28.4s 2606*c0909341SAndroid Build Coastguard Worker sqxtn2 v22.8h, v29.4s 2607*c0909341SAndroid Build Coastguard Worker sqxtn v23.4h, v30.4s 2608*c0909341SAndroid Build Coastguard Worker sqxtn2 v23.8h, v31.4s 2609*c0909341SAndroid Build Coastguard Worker.endif 2610*c0909341SAndroid Build Coastguard Worker 2611*c0909341SAndroid Build Coastguard Worker transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v4, v5 2612*c0909341SAndroid Build Coastguard Worker 2613*c0909341SAndroid Build Coastguard Worker 2614*c0909341SAndroid Build Coastguard Worker cmp w3, w12 2615*c0909341SAndroid Build Coastguard Worker.if \w == 8 2616*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7, shiftbits=2 2617*c0909341SAndroid Build Coastguard Worker.else 2618*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x0, x7, shiftbits=3 2619*c0909341SAndroid Build Coastguard Worker.endif 2620*c0909341SAndroid Build Coastguard Worker 2621*c0909341SAndroid Build Coastguard Worker b.lt 9f 2622*c0909341SAndroid Build Coastguard Worker.if \w == 8 2623*c0909341SAndroid Build Coastguard Worker sub x2, x2, x8, lsl #3 2624*c0909341SAndroid Build Coastguard Worker add x2, x2, #4*8 2625*c0909341SAndroid Build Coastguard Worker.else 2626*c0909341SAndroid Build Coastguard Worker sub x0, x0, x1, lsl #3 2627*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*8 2628*c0909341SAndroid Build Coastguard Worker.endif 2629*c0909341SAndroid Build Coastguard Worker b 1b 2630*c0909341SAndroid Build Coastguard Worker 2631*c0909341SAndroid Build Coastguard Worker9: 2632*c0909341SAndroid Build Coastguard Worker ret 2633*c0909341SAndroid Build Coastguard Workerendfunc 2634*c0909341SAndroid Build Coastguard Worker.endm 2635*c0909341SAndroid Build Coastguard Worker 2636*c0909341SAndroid Build Coastguard Workerdef_identity_832 8, 32 2637*c0909341SAndroid Build Coastguard Workerdef_identity_832 32, 8 2638*c0909341SAndroid Build Coastguard Worker 2639*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x32_16bpc_neon, export=1 2640*c0909341SAndroid Build Coastguard Worker idct_dc 32, 32, 2 2641*c0909341SAndroid Build Coastguard Worker 2642*c0909341SAndroid Build Coastguard Worker mov x15, x30 2643*c0909341SAndroid Build Coastguard Worker sub sp, sp, #2048 2644*c0909341SAndroid Build Coastguard Worker movrel x13, eob_32x32 2645*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2646*c0909341SAndroid Build Coastguard Worker 2647*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 2648*c0909341SAndroid Build Coastguard Worker add x6, sp, #(\i*32*2) 2649*c0909341SAndroid Build Coastguard Worker.if \i > 0 2650*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 2651*c0909341SAndroid Build Coastguard Worker cmp w3, w12 2652*c0909341SAndroid Build Coastguard Worker b.lt 1f 2653*c0909341SAndroid Build Coastguard Worker.if \i < 28 2654*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2655*c0909341SAndroid Build Coastguard Worker.endif 2656*c0909341SAndroid Build Coastguard Worker.endif 2657*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 2658*c0909341SAndroid Build Coastguard Worker mov x8, #32*4 2659*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_dct_32x4_neon 2660*c0909341SAndroid Build Coastguard Worker.endr 2661*c0909341SAndroid Build Coastguard Worker b 3f 2662*c0909341SAndroid Build Coastguard Worker 2663*c0909341SAndroid Build Coastguard Worker1: 2664*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 2665*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 2666*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 2667*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 2668*c0909341SAndroid Build Coastguard Worker2: 2669*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 2670*c0909341SAndroid Build Coastguard Worker.rept 4 2671*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 2672*c0909341SAndroid Build Coastguard Worker.endr 2673*c0909341SAndroid Build Coastguard Worker b.gt 2b 2674*c0909341SAndroid Build Coastguard Worker 2675*c0909341SAndroid Build Coastguard Worker3: 2676*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24 2677*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 2678*c0909341SAndroid Build Coastguard Worker add x7, sp, #(\i*2) 2679*c0909341SAndroid Build Coastguard Worker mov x8, #32*2 2680*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x32_neon 2681*c0909341SAndroid Build Coastguard Worker.endr 2682*c0909341SAndroid Build Coastguard Worker 2683*c0909341SAndroid Build Coastguard Worker add sp, sp, #2048 2684*c0909341SAndroid Build Coastguard Worker ret x15 2685*c0909341SAndroid Build Coastguard Workerendfunc 2686*c0909341SAndroid Build Coastguard Worker 2687*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x32_16bpc_neon, export=1 2688*c0909341SAndroid Build Coastguard Worker idct_dc 16, 32, 1 2689*c0909341SAndroid Build Coastguard Worker 2690*c0909341SAndroid Build Coastguard Worker mov x15, x30 2691*c0909341SAndroid Build Coastguard Worker sub sp, sp, #1024 2692*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x32 2693*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2694*c0909341SAndroid Build Coastguard Worker adr x4, inv_dct_4s_x16_neon 2695*c0909341SAndroid Build Coastguard Worker 2696*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 2697*c0909341SAndroid Build Coastguard Worker add x6, sp, #(\i*16*2) 2698*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 2699*c0909341SAndroid Build Coastguard Worker.if \i > 0 2700*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 2701*c0909341SAndroid Build Coastguard Worker cmp w3, w12 2702*c0909341SAndroid Build Coastguard Worker b.lt 1f 2703*c0909341SAndroid Build Coastguard Worker.if \i < 28 2704*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2705*c0909341SAndroid Build Coastguard Worker.endif 2706*c0909341SAndroid Build Coastguard Worker.endif 2707*c0909341SAndroid Build Coastguard Worker mov x8, #4*32 2708*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_scale_16x4_neon 2709*c0909341SAndroid Build Coastguard Worker.endr 2710*c0909341SAndroid Build Coastguard Worker b 3f 2711*c0909341SAndroid Build Coastguard Worker 2712*c0909341SAndroid Build Coastguard Worker1: 2713*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 2714*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 2715*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 2716*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 2717*c0909341SAndroid Build Coastguard Worker2: 2718*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 2719*c0909341SAndroid Build Coastguard Worker.rept 2 2720*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 2721*c0909341SAndroid Build Coastguard Worker.endr 2722*c0909341SAndroid Build Coastguard Worker b.gt 2b 2723*c0909341SAndroid Build Coastguard Worker 2724*c0909341SAndroid Build Coastguard Worker3: 2725*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8 2726*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 2727*c0909341SAndroid Build Coastguard Worker add x7, sp, #(\i*2) 2728*c0909341SAndroid Build Coastguard Worker mov x8, #16*2 2729*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x32_neon 2730*c0909341SAndroid Build Coastguard Worker.endr 2731*c0909341SAndroid Build Coastguard Worker 2732*c0909341SAndroid Build Coastguard Worker add sp, sp, #1024 2733*c0909341SAndroid Build Coastguard Worker ret x15 2734*c0909341SAndroid Build Coastguard Workerendfunc 2735*c0909341SAndroid Build Coastguard Worker 2736*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x16_16bpc_neon, export=1 2737*c0909341SAndroid Build Coastguard Worker idct_dc 32, 16, 1 2738*c0909341SAndroid Build Coastguard Worker 2739*c0909341SAndroid Build Coastguard Worker mov x15, x30 2740*c0909341SAndroid Build Coastguard Worker sub sp, sp, #1024 2741*c0909341SAndroid Build Coastguard Worker 2742*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x32 2743*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_dct_8h_x16_neon) 2744*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2745*c0909341SAndroid Build Coastguard Worker 2746*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12 2747*c0909341SAndroid Build Coastguard Worker add x6, sp, #(\i*32*2) 2748*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 2749*c0909341SAndroid Build Coastguard Worker.if \i > 0 2750*c0909341SAndroid Build Coastguard Worker mov w8, #(16 - \i) 2751*c0909341SAndroid Build Coastguard Worker cmp w3, w12 2752*c0909341SAndroid Build Coastguard Worker b.lt 1f 2753*c0909341SAndroid Build Coastguard Worker.if \i < 12 2754*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2755*c0909341SAndroid Build Coastguard Worker.endif 2756*c0909341SAndroid Build Coastguard Worker.endif 2757*c0909341SAndroid Build Coastguard Worker mov x8, #4*16 2758*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_scale_dct_32x4_neon 2759*c0909341SAndroid Build Coastguard Worker.endr 2760*c0909341SAndroid Build Coastguard Worker b 3f 2761*c0909341SAndroid Build Coastguard Worker 2762*c0909341SAndroid Build Coastguard Worker1: 2763*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 2764*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 2765*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 2766*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 2767*c0909341SAndroid Build Coastguard Worker2: 2768*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 2769*c0909341SAndroid Build Coastguard Worker.rept 4 2770*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 2771*c0909341SAndroid Build Coastguard Worker.endr 2772*c0909341SAndroid Build Coastguard Worker b.gt 2b 2773*c0909341SAndroid Build Coastguard Worker 2774*c0909341SAndroid Build Coastguard Worker3: 2775*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24 2776*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 2777*c0909341SAndroid Build Coastguard Worker add x7, sp, #(\i*2) 2778*c0909341SAndroid Build Coastguard Worker mov x8, #32*2 2779*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_8x16_neon 2780*c0909341SAndroid Build Coastguard Worker.endr 2781*c0909341SAndroid Build Coastguard Worker 2782*c0909341SAndroid Build Coastguard Worker add sp, sp, #1024 2783*c0909341SAndroid Build Coastguard Worker ret x15 2784*c0909341SAndroid Build Coastguard Workerendfunc 2785*c0909341SAndroid Build Coastguard Worker 2786*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_8x32_16bpc_neon, export=1 2787*c0909341SAndroid Build Coastguard Worker idct_dc 8, 32, 2 2788*c0909341SAndroid Build Coastguard Worker 2789*c0909341SAndroid Build Coastguard Worker mov x15, x30 2790*c0909341SAndroid Build Coastguard Worker sub sp, sp, #512 2791*c0909341SAndroid Build Coastguard Worker 2792*c0909341SAndroid Build Coastguard Worker movrel x13, eob_8x32 2793*c0909341SAndroid Build Coastguard Worker 2794*c0909341SAndroid Build Coastguard Worker movi v28.4s, #0 2795*c0909341SAndroid Build Coastguard Worker mov x8, #4*32 2796*c0909341SAndroid Build Coastguard Worker mov w9, #32 2797*c0909341SAndroid Build Coastguard Worker mov x6, sp 2798*c0909341SAndroid Build Coastguard Worker mov x7, x2 2799*c0909341SAndroid Build Coastguard Worker1: 2800*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23 2801*c0909341SAndroid Build Coastguard Worker ld1 {v\i\().4s}, [x7] 2802*c0909341SAndroid Build Coastguard Worker st1 {v28.4s}, [x7], x8 2803*c0909341SAndroid Build Coastguard Worker.endr 2804*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 2805*c0909341SAndroid Build Coastguard Worker sub w9, w9, #4 2806*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #3 2807*c0909341SAndroid Build Coastguard Worker add x7, x7, #4*4 2808*c0909341SAndroid Build Coastguard Worker 2809*c0909341SAndroid Build Coastguard Worker bl inv_dct_4s_x8_neon 2810*c0909341SAndroid Build Coastguard Worker 2811*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #2 2812*c0909341SAndroid Build Coastguard Worker sqrshrn v17.4h, v17.4s, #2 2813*c0909341SAndroid Build Coastguard Worker sqrshrn v18.4h, v18.4s, #2 2814*c0909341SAndroid Build Coastguard Worker sqrshrn v19.4h, v19.4s, #2 2815*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v20.4s, #2 2816*c0909341SAndroid Build Coastguard Worker sqrshrn2 v17.8h, v21.4s, #2 2817*c0909341SAndroid Build Coastguard Worker sqrshrn2 v18.8h, v22.4s, #2 2818*c0909341SAndroid Build Coastguard Worker sqrshrn2 v19.8h, v23.4s, #2 2819*c0909341SAndroid Build Coastguard Worker 2820*c0909341SAndroid Build Coastguard Worker transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 2821*c0909341SAndroid Build Coastguard Worker 2822*c0909341SAndroid Build Coastguard Worker cmp w3, w12 2823*c0909341SAndroid Build Coastguard Worker st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x6], #64 2824*c0909341SAndroid Build Coastguard Worker 2825*c0909341SAndroid Build Coastguard Worker b.ge 1b 2826*c0909341SAndroid Build Coastguard Worker cbz w9, 3f 2827*c0909341SAndroid Build Coastguard Worker 2828*c0909341SAndroid Build Coastguard Worker movi v29.8h, #0 2829*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0 2830*c0909341SAndroid Build Coastguard Worker movi v31.8h, #0 2831*c0909341SAndroid Build Coastguard Worker2: 2832*c0909341SAndroid Build Coastguard Worker subs w9, w9, #4 2833*c0909341SAndroid Build Coastguard Worker st1 {v28.8h,v29.8h,v30.8h,v31.8h}, [x6], #64 2834*c0909341SAndroid Build Coastguard Worker b.gt 2b 2835*c0909341SAndroid Build Coastguard Worker 2836*c0909341SAndroid Build Coastguard Worker3: 2837*c0909341SAndroid Build Coastguard Worker mov x6, x0 2838*c0909341SAndroid Build Coastguard Worker mov x7, sp 2839*c0909341SAndroid Build Coastguard Worker mov x8, #8*2 2840*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x32_neon 2841*c0909341SAndroid Build Coastguard Worker 2842*c0909341SAndroid Build Coastguard Worker add sp, sp, #512 2843*c0909341SAndroid Build Coastguard Worker ret x15 2844*c0909341SAndroid Build Coastguard Workerendfunc 2845*c0909341SAndroid Build Coastguard Worker 2846*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x8_16bpc_neon, export=1 2847*c0909341SAndroid Build Coastguard Worker idct_dc 32, 8, 2 2848*c0909341SAndroid Build Coastguard Worker 2849*c0909341SAndroid Build Coastguard Worker mov x15, x30 2850*c0909341SAndroid Build Coastguard Worker sub sp, sp, #512 2851*c0909341SAndroid Build Coastguard Worker 2852*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4 2853*c0909341SAndroid Build Coastguard Worker add x6, sp, #(\i*32*2) 2854*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 2855*c0909341SAndroid Build Coastguard Worker.if \i > 0 2856*c0909341SAndroid Build Coastguard Worker cmp w3, #10 2857*c0909341SAndroid Build Coastguard Worker b.lt 1f 2858*c0909341SAndroid Build Coastguard Worker.endif 2859*c0909341SAndroid Build Coastguard Worker mov x8, #8*4 2860*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_dct_32x4_neon 2861*c0909341SAndroid Build Coastguard Worker.endr 2862*c0909341SAndroid Build Coastguard Worker b 2f 2863*c0909341SAndroid Build Coastguard Worker 2864*c0909341SAndroid Build Coastguard Worker1: 2865*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 2866*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 2867*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 2868*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 2869*c0909341SAndroid Build Coastguard Worker.rept 4 2870*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 2871*c0909341SAndroid Build Coastguard Worker.endr 2872*c0909341SAndroid Build Coastguard Worker 2873*c0909341SAndroid Build Coastguard Worker2: 2874*c0909341SAndroid Build Coastguard Worker mov x8, #2*32 2875*c0909341SAndroid Build Coastguard Worker mov w9, #0 2876*c0909341SAndroid Build Coastguard Worker1: 2877*c0909341SAndroid Build Coastguard Worker add x6, x0, x9, lsl #1 2878*c0909341SAndroid Build Coastguard Worker add x7, sp, x9, lsl #1 // #(\i*2) 2879*c0909341SAndroid Build Coastguard Worker 2880*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23 2881*c0909341SAndroid Build Coastguard Worker ld1 {v\i\().8h}, [x7], x8 2882*c0909341SAndroid Build Coastguard Worker.endr 2883*c0909341SAndroid Build Coastguard Worker add w9, w9, #8 2884*c0909341SAndroid Build Coastguard Worker 2885*c0909341SAndroid Build Coastguard Worker bl X(inv_dct_8h_x8_neon) 2886*c0909341SAndroid Build Coastguard Worker 2887*c0909341SAndroid Build Coastguard Worker cmp w9, #32 2888*c0909341SAndroid Build Coastguard Worker 2889*c0909341SAndroid Build Coastguard Worker load_add_store_8x8 x6, x7 2890*c0909341SAndroid Build Coastguard Worker 2891*c0909341SAndroid Build Coastguard Worker b.lt 1b 2892*c0909341SAndroid Build Coastguard Worker 2893*c0909341SAndroid Build Coastguard Worker add sp, sp, #512 2894*c0909341SAndroid Build Coastguard Worker ret x15 2895*c0909341SAndroid Build Coastguard Workerendfunc 2896*c0909341SAndroid Build Coastguard Worker 2897*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step1_neon 2898*c0909341SAndroid Build Coastguard Worker // in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a 2899*c0909341SAndroid Build Coastguard Worker // in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a 2900*c0909341SAndroid Build Coastguard Worker // in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a 2901*c0909341SAndroid Build Coastguard Worker // in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a 2902*c0909341SAndroid Build Coastguard Worker 2903*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s, v1.4s}, [x17], #32 2904*c0909341SAndroid Build Coastguard Worker 2905*c0909341SAndroid Build Coastguard Worker sqrdmulh v23.4s, v16.4s, v0.s[1] // t63a 2906*c0909341SAndroid Build Coastguard Worker sqrdmulh v16.4s, v16.4s, v0.s[0] // t32a 2907*c0909341SAndroid Build Coastguard Worker sqrdmulh v22.4s, v17.4s, v0.s[2] // t62a 2908*c0909341SAndroid Build Coastguard Worker sqrdmulh v17.4s, v17.4s, v0.s[3] // t33a 2909*c0909341SAndroid Build Coastguard Worker sqrdmulh v21.4s, v18.4s, v1.s[1] // t61a 2910*c0909341SAndroid Build Coastguard Worker sqrdmulh v18.4s, v18.4s, v1.s[0] // t34a 2911*c0909341SAndroid Build Coastguard Worker sqrdmulh v20.4s, v19.4s, v1.s[2] // t60a 2912*c0909341SAndroid Build Coastguard Worker sqrdmulh v19.4s, v19.4s, v1.s[3] // t35a 2913*c0909341SAndroid Build Coastguard Worker 2914*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x17], #16 2915*c0909341SAndroid Build Coastguard Worker 2916*c0909341SAndroid Build Coastguard Worker sqadd v24.4s, v16.4s, v17.4s // t32 2917*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v16.4s, v17.4s // t33 2918*c0909341SAndroid Build Coastguard Worker sqsub v26.4s, v19.4s, v18.4s // t34 2919*c0909341SAndroid Build Coastguard Worker sqadd v27.4s, v19.4s, v18.4s // t35 2920*c0909341SAndroid Build Coastguard Worker sqadd v28.4s, v20.4s, v21.4s // t60 2921*c0909341SAndroid Build Coastguard Worker sqsub v29.4s, v20.4s, v21.4s // t61 2922*c0909341SAndroid Build Coastguard Worker sqsub v30.4s, v23.4s, v22.4s // t62 2923*c0909341SAndroid Build Coastguard Worker sqadd v31.4s, v23.4s, v22.4s // t63 2924*c0909341SAndroid Build Coastguard Worker 2925*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31 2926*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 2927*c0909341SAndroid Build Coastguard Worker.endr 2928*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31 2929*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 2930*c0909341SAndroid Build Coastguard Worker.endr 2931*c0909341SAndroid Build Coastguard Worker 2932*c0909341SAndroid Build Coastguard Worker mul_mla v2, v29, v26, v0.s[0], v0.s[1] // -> t34a 2933*c0909341SAndroid Build Coastguard Worker mul_mls v7, v29, v26, v0.s[1], v0.s[0] // -> t61a 2934*c0909341SAndroid Build Coastguard Worker neg v2.4s, v2.4s // t34a 2935*c0909341SAndroid Build Coastguard Worker mul_mls v6, v30, v25, v0.s[1], v0.s[0] // -> t33a 2936*c0909341SAndroid Build Coastguard Worker srshr v26.4s, v2.4s, #12 // t34a 2937*c0909341SAndroid Build Coastguard Worker mul_mla v2, v30, v25, v0.s[0], v0.s[1] // -> t62a 2938*c0909341SAndroid Build Coastguard Worker srshr v29.4s, v7.4s, #12 // t61a 2939*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v6.4s, #12 // t33a 2940*c0909341SAndroid Build Coastguard Worker srshr v30.4s, v2.4s, #12 // t62a 2941*c0909341SAndroid Build Coastguard Worker 2942*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v24.4s, v27.4s // t32a 2943*c0909341SAndroid Build Coastguard Worker sqsub v19.4s, v24.4s, v27.4s // t35a 2944*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v25.4s, v26.4s // t33 2945*c0909341SAndroid Build Coastguard Worker sqsub v18.4s, v25.4s, v26.4s // t34 2946*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v31.4s, v28.4s // t60a 2947*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v31.4s, v28.4s // t63a 2948*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v30.4s, v29.4s // t61 2949*c0909341SAndroid Build Coastguard Worker sqadd v22.4s, v30.4s, v29.4s // t62 2950*c0909341SAndroid Build Coastguard Worker 2951*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22 2952*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 2953*c0909341SAndroid Build Coastguard Worker.endr 2954*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22 2955*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 2956*c0909341SAndroid Build Coastguard Worker.endr 2957*c0909341SAndroid Build Coastguard Worker 2958*c0909341SAndroid Build Coastguard Worker mul_mla v2, v21, v18, v0.s[2], v0.s[3] // -> t61a 2959*c0909341SAndroid Build Coastguard Worker mul_mls v7, v21, v18, v0.s[3], v0.s[2] // -> t34a 2960*c0909341SAndroid Build Coastguard Worker mul_mla v6, v20, v19, v0.s[2], v0.s[3] // -> t60 2961*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v2.4s, #12 // t61a 2962*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v7.4s, #12 // t34a 2963*c0909341SAndroid Build Coastguard Worker mul_mls v2, v20, v19, v0.s[3], v0.s[2] // -> t35 2964*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v6.4s, #12 // t60 2965*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v2.4s, #12 // t35 2966*c0909341SAndroid Build Coastguard Worker 2967*c0909341SAndroid Build Coastguard Worker st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x6], #64 2968*c0909341SAndroid Build Coastguard Worker st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x6], #64 2969*c0909341SAndroid Build Coastguard Worker 2970*c0909341SAndroid Build Coastguard Worker ret 2971*c0909341SAndroid Build Coastguard Workerendfunc 2972*c0909341SAndroid Build Coastguard Worker 2973*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step2_neon 2974*c0909341SAndroid Build Coastguard Worker movrel x16, idct_coeffs 2975*c0909341SAndroid Build Coastguard Worker ld1 {v0.4s}, [x16] 2976*c0909341SAndroid Build Coastguard Worker1: 2977*c0909341SAndroid Build Coastguard Worker // t32a/33/34a/35/60/61a/62/63a 2978*c0909341SAndroid Build Coastguard Worker // t56a/57/58a/59/36/37a/38/39a 2979*c0909341SAndroid Build Coastguard Worker // t40a/41/42a/43/52/53a/54/55a 2980*c0909341SAndroid Build Coastguard Worker // t48a/49/50a/51/44/45a/46/47a 2981*c0909341SAndroid Build Coastguard Worker ldr q16, [x6, #4*4*0] // t32a 2982*c0909341SAndroid Build Coastguard Worker ldr q17, [x9, #4*4*8] // t39a 2983*c0909341SAndroid Build Coastguard Worker ldr q18, [x9, #4*4*0] // t63a 2984*c0909341SAndroid Build Coastguard Worker ldr q19, [x6, #4*4*8] // t56a 2985*c0909341SAndroid Build Coastguard Worker ldr q20, [x6, #4*4*16] // t40a 2986*c0909341SAndroid Build Coastguard Worker ldr q21, [x9, #4*4*24] // t47a 2987*c0909341SAndroid Build Coastguard Worker ldr q22, [x9, #4*4*16] // t55a 2988*c0909341SAndroid Build Coastguard Worker ldr q23, [x6, #4*4*24] // t48a 2989*c0909341SAndroid Build Coastguard Worker 2990*c0909341SAndroid Build Coastguard Worker sqadd v24.4s, v16.4s, v17.4s // t32 2991*c0909341SAndroid Build Coastguard Worker sqsub v25.4s, v16.4s, v17.4s // t39 2992*c0909341SAndroid Build Coastguard Worker sqadd v26.4s, v18.4s, v19.4s // t63 2993*c0909341SAndroid Build Coastguard Worker sqsub v27.4s, v18.4s, v19.4s // t56 2994*c0909341SAndroid Build Coastguard Worker sqsub v28.4s, v21.4s, v20.4s // t40 2995*c0909341SAndroid Build Coastguard Worker sqadd v29.4s, v21.4s, v20.4s // t47 2996*c0909341SAndroid Build Coastguard Worker sqadd v30.4s, v23.4s, v22.4s // t48 2997*c0909341SAndroid Build Coastguard Worker sqsub v31.4s, v23.4s, v22.4s // t55 2998*c0909341SAndroid Build Coastguard Worker 2999*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31 3000*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 3001*c0909341SAndroid Build Coastguard Worker.endr 3002*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31 3003*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 3004*c0909341SAndroid Build Coastguard Worker.endr 3005*c0909341SAndroid Build Coastguard Worker 3006*c0909341SAndroid Build Coastguard Worker mul_mla v2, v27, v25, v0.s[3], v0.s[2] // -> t56a 3007*c0909341SAndroid Build Coastguard Worker mul_mls v7, v27, v25, v0.s[2], v0.s[3] // -> t39a 3008*c0909341SAndroid Build Coastguard Worker mul_mla v6, v31, v28, v0.s[3], v0.s[2] // -> t40a 3009*c0909341SAndroid Build Coastguard Worker srshr v25.4s, v2.4s, #12 // t56a 3010*c0909341SAndroid Build Coastguard Worker srshr v27.4s, v7.4s, #12 // t39a 3011*c0909341SAndroid Build Coastguard Worker neg v6.4s, v6.4s // t40a 3012*c0909341SAndroid Build Coastguard Worker mul_mls v2, v31, v28, v0.s[2], v0.s[3] // -> t55a 3013*c0909341SAndroid Build Coastguard Worker srshr v31.4s, v6.4s, #12 // t40a 3014*c0909341SAndroid Build Coastguard Worker srshr v28.4s, v2.4s, #12 // t55a 3015*c0909341SAndroid Build Coastguard Worker 3016*c0909341SAndroid Build Coastguard Worker sqadd v16.4s, v24.4s, v29.4s // t32a 3017*c0909341SAndroid Build Coastguard Worker sqsub v19.4s, v24.4s, v29.4s // t47a 3018*c0909341SAndroid Build Coastguard Worker sqadd v17.4s, v27.4s, v31.4s // t39 3019*c0909341SAndroid Build Coastguard Worker sqsub v18.4s, v27.4s, v31.4s // t40 3020*c0909341SAndroid Build Coastguard Worker sqsub v20.4s, v26.4s, v30.4s // t48a 3021*c0909341SAndroid Build Coastguard Worker sqadd v23.4s, v26.4s, v30.4s // t63a 3022*c0909341SAndroid Build Coastguard Worker sqsub v21.4s, v25.4s, v28.4s // t55 3023*c0909341SAndroid Build Coastguard Worker sqadd v22.4s, v25.4s, v28.4s // t56 3024*c0909341SAndroid Build Coastguard Worker 3025*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22 3026*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 3027*c0909341SAndroid Build Coastguard Worker.endr 3028*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22 3029*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 3030*c0909341SAndroid Build Coastguard Worker.endr 3031*c0909341SAndroid Build Coastguard Worker 3032*c0909341SAndroid Build Coastguard Worker mul_mls v2, v21, v18, v0.s[0], v0.s[0] // -> t40a 3033*c0909341SAndroid Build Coastguard Worker mul_mla v7, v21, v18, v0.s[0], v0.s[0] // -> t55a 3034*c0909341SAndroid Build Coastguard Worker mul_mls v6, v20, v19, v0.s[0], v0.s[0] // -> t47 3035*c0909341SAndroid Build Coastguard Worker srshr v18.4s, v2.4s, #12 // t40a 3036*c0909341SAndroid Build Coastguard Worker srshr v21.4s, v7.4s, #12 // t55a 3037*c0909341SAndroid Build Coastguard Worker mul_mla v2, v20, v19, v0.s[0], v0.s[0] // -> t48 3038*c0909341SAndroid Build Coastguard Worker srshr v19.4s, v6.4s, #12 // t47 3039*c0909341SAndroid Build Coastguard Worker srshr v20.4s, v2.4s, #12 // t48 3040*c0909341SAndroid Build Coastguard Worker 3041*c0909341SAndroid Build Coastguard Worker str q16, [x6, #4*4*0] // t32a 3042*c0909341SAndroid Build Coastguard Worker str q17, [x9, #4*4*0] // t39 3043*c0909341SAndroid Build Coastguard Worker str q18, [x6, #4*4*8] // t40a 3044*c0909341SAndroid Build Coastguard Worker str q19, [x9, #4*4*8] // t47 3045*c0909341SAndroid Build Coastguard Worker str q20, [x6, #4*4*16] // t48 3046*c0909341SAndroid Build Coastguard Worker str q21, [x9, #4*4*16] // t55a 3047*c0909341SAndroid Build Coastguard Worker str q22, [x6, #4*4*24] // t56 3048*c0909341SAndroid Build Coastguard Worker str q23, [x9, #4*4*24] // t63a 3049*c0909341SAndroid Build Coastguard Worker 3050*c0909341SAndroid Build Coastguard Worker add x6, x6, #4*4 3051*c0909341SAndroid Build Coastguard Worker sub x9, x9, #4*4 3052*c0909341SAndroid Build Coastguard Worker cmp x6, x9 3053*c0909341SAndroid Build Coastguard Worker b.lt 1b 3054*c0909341SAndroid Build Coastguard Worker ret 3055*c0909341SAndroid Build Coastguard Workerendfunc 3056*c0909341SAndroid Build Coastguard Worker 3057*c0909341SAndroid Build Coastguard Worker.macro load8 src, strd, zero, clear 3058*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s 3059*c0909341SAndroid Build Coastguard Worker.if \clear 3060*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [\src] 3061*c0909341SAndroid Build Coastguard Worker st1 {\zero}, [\src], \strd 3062*c0909341SAndroid Build Coastguard Worker.else 3063*c0909341SAndroid Build Coastguard Worker ld1 {\i}, [\src], \strd 3064*c0909341SAndroid Build Coastguard Worker.endif 3065*c0909341SAndroid Build Coastguard Worker.endr 3066*c0909341SAndroid Build Coastguard Worker.endm 3067*c0909341SAndroid Build Coastguard Worker 3068*c0909341SAndroid Build Coastguard Worker.macro store16 dst 3069*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 3070*c0909341SAndroid Build Coastguard Worker st1 {\i}, [\dst], #16 3071*c0909341SAndroid Build Coastguard Worker.endr 3072*c0909341SAndroid Build Coastguard Worker.endm 3073*c0909341SAndroid Build Coastguard Worker 3074*c0909341SAndroid Build Coastguard Worker.macro clear_upper8 3075*c0909341SAndroid Build Coastguard Worker.irp i, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s 3076*c0909341SAndroid Build Coastguard Worker movi \i, #0 3077*c0909341SAndroid Build Coastguard Worker.endr 3078*c0909341SAndroid Build Coastguard Worker.endm 3079*c0909341SAndroid Build Coastguard Worker 3080*c0909341SAndroid Build Coastguard Worker.macro movi_if reg, val, cond 3081*c0909341SAndroid Build Coastguard Worker.if \cond 3082*c0909341SAndroid Build Coastguard Worker movi \reg, \val 3083*c0909341SAndroid Build Coastguard Worker.endif 3084*c0909341SAndroid Build Coastguard Worker.endm 3085*c0909341SAndroid Build Coastguard Worker 3086*c0909341SAndroid Build Coastguard Worker.macro movz16dup_if reg, gpr, val, cond 3087*c0909341SAndroid Build Coastguard Worker.if \cond 3088*c0909341SAndroid Build Coastguard Worker movz \gpr, \val, lsl #16 3089*c0909341SAndroid Build Coastguard Worker dup \reg, \gpr 3090*c0909341SAndroid Build Coastguard Worker.endif 3091*c0909341SAndroid Build Coastguard Worker.endm 3092*c0909341SAndroid Build Coastguard Worker 3093*c0909341SAndroid Build Coastguard Worker.macro st1_if regs, dst, cond 3094*c0909341SAndroid Build Coastguard Worker.if \cond 3095*c0909341SAndroid Build Coastguard Worker st1 \regs, \dst 3096*c0909341SAndroid Build Coastguard Worker.endif 3097*c0909341SAndroid Build Coastguard Worker.endm 3098*c0909341SAndroid Build Coastguard Worker 3099*c0909341SAndroid Build Coastguard Worker.macro str_if reg, dst, cond 3100*c0909341SAndroid Build Coastguard Worker.if \cond 3101*c0909341SAndroid Build Coastguard Worker str \reg, \dst 3102*c0909341SAndroid Build Coastguard Worker.endif 3103*c0909341SAndroid Build Coastguard Worker.endm 3104*c0909341SAndroid Build Coastguard Worker 3105*c0909341SAndroid Build Coastguard Worker.macro stroff_if reg, dst, dstoff, cond 3106*c0909341SAndroid Build Coastguard Worker.if \cond 3107*c0909341SAndroid Build Coastguard Worker str \reg, \dst, \dstoff 3108*c0909341SAndroid Build Coastguard Worker.endif 3109*c0909341SAndroid Build Coastguard Worker.endm 3110*c0909341SAndroid Build Coastguard Worker 3111*c0909341SAndroid Build Coastguard Worker.macro scale_if cond, c, r0, r1, r2, r3, r4, r5, r6, r7 3112*c0909341SAndroid Build Coastguard Worker.if \cond 3113*c0909341SAndroid Build Coastguard Worker scale_input .4s, \c, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7 3114*c0909341SAndroid Build Coastguard Worker.endif 3115*c0909341SAndroid Build Coastguard Worker.endm 3116*c0909341SAndroid Build Coastguard Worker 3117*c0909341SAndroid Build Coastguard Worker.macro def_dct64_func suffix, clear=0, scale=0 3118*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_dct\suffix\()_4s_x64_neon 3119*c0909341SAndroid Build Coastguard Worker mov x14, x30 3120*c0909341SAndroid Build Coastguard Worker mov x6, sp 3121*c0909341SAndroid Build Coastguard Worker lsl x8, x8, #2 3122*c0909341SAndroid Build Coastguard Worker 3123*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3124*c0909341SAndroid Build Coastguard Worker movi_if v7.4s, #0, \clear 3125*c0909341SAndroid Build Coastguard Worker load8 x7, x8, v7.4s, \clear 3126*c0909341SAndroid Build Coastguard Worker clear_upper8 3127*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #3 3128*c0909341SAndroid Build Coastguard Worker add x7, x7, x8, lsr #1 3129*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 3130*c0909341SAndroid Build Coastguard Worker 3131*c0909341SAndroid Build Coastguard Worker bl inv_dct_4s_x16_neon 3132*c0909341SAndroid Build Coastguard Worker 3133*c0909341SAndroid Build Coastguard Worker // idct_16 leaves the row_clip_max/min constants in v5 and v4 3134*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 3135*c0909341SAndroid Build Coastguard Worker smin_4s \r, \r, v5 3136*c0909341SAndroid Build Coastguard Worker.endr 3137*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31 3138*c0909341SAndroid Build Coastguard Worker smax_4s \r, \r, v4 3139*c0909341SAndroid Build Coastguard Worker.endr 3140*c0909341SAndroid Build Coastguard Worker 3141*c0909341SAndroid Build Coastguard Worker store16 x6 3142*c0909341SAndroid Build Coastguard Worker 3143*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3144*c0909341SAndroid Build Coastguard Worker movi_if v7.8h, #0, \clear 3145*c0909341SAndroid Build Coastguard Worker load8 x7, x8, v7.4s, \clear 3146*c0909341SAndroid Build Coastguard Worker clear_upper8 3147*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsl #3 3148*c0909341SAndroid Build Coastguard Worker lsr x8, x8, #1 3149*c0909341SAndroid Build Coastguard Worker sub x7, x7, x8, lsr #1 3150*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23 3151*c0909341SAndroid Build Coastguard Worker 3152*c0909341SAndroid Build Coastguard Worker bl inv_dct32_odd_4s_x16_neon 3153*c0909341SAndroid Build Coastguard Worker 3154*c0909341SAndroid Build Coastguard Worker add x10, x6, #16*15 3155*c0909341SAndroid Build Coastguard Worker sub x6, x6, #16*16 3156*c0909341SAndroid Build Coastguard Worker 3157*c0909341SAndroid Build Coastguard Worker mov x9, #-16 3158*c0909341SAndroid Build Coastguard Worker 3159*c0909341SAndroid Build Coastguard Worker movi v1.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 3160*c0909341SAndroid Build Coastguard Worker mvni v0.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 3161*c0909341SAndroid Build Coastguard Worker 3162*c0909341SAndroid Build Coastguard Worker.macro store_addsub r0, r1, r2, r3 3163*c0909341SAndroid Build Coastguard Worker ld1 {v2.4s}, [x6], #16 3164*c0909341SAndroid Build Coastguard Worker ld1 {v3.4s}, [x6], #16 3165*c0909341SAndroid Build Coastguard Worker sqadd v6.4s, v2.4s, \r0 3166*c0909341SAndroid Build Coastguard Worker sqsub \r0, v2.4s, \r0 3167*c0909341SAndroid Build Coastguard Worker ld1 {v4.4s}, [x6], #16 3168*c0909341SAndroid Build Coastguard Worker sqadd v7.4s, v3.4s, \r1 3169*c0909341SAndroid Build Coastguard Worker sqsub \r1, v3.4s, \r1 3170*c0909341SAndroid Build Coastguard Worker smin v6.4s, v6.4s, v1.4s 3171*c0909341SAndroid Build Coastguard Worker smin \r0, \r0, v1.4s 3172*c0909341SAndroid Build Coastguard Worker ld1 {v5.4s}, [x6], #16 3173*c0909341SAndroid Build Coastguard Worker sqadd v2.4s, v4.4s, \r2 3174*c0909341SAndroid Build Coastguard Worker sub x6, x6, #16*4 3175*c0909341SAndroid Build Coastguard Worker smax v6.4s, v6.4s, v0.4s 3176*c0909341SAndroid Build Coastguard Worker smax \r0, \r0, v0.4s 3177*c0909341SAndroid Build Coastguard Worker sqsub \r2, v4.4s, \r2 3178*c0909341SAndroid Build Coastguard Worker smin v7.4s, v7.4s, v1.4s 3179*c0909341SAndroid Build Coastguard Worker smin \r1, \r1, v1.4s 3180*c0909341SAndroid Build Coastguard Worker st1 {v6.4s}, [x6], #16 3181*c0909341SAndroid Build Coastguard Worker st1 {\r0}, [x10], x9 3182*c0909341SAndroid Build Coastguard Worker smin v2.4s, v2.4s, v1.4s 3183*c0909341SAndroid Build Coastguard Worker smin \r2, \r2, v1.4s 3184*c0909341SAndroid Build Coastguard Worker smax v7.4s, v7.4s, v0.4s 3185*c0909341SAndroid Build Coastguard Worker smax \r1, \r1, v0.4s 3186*c0909341SAndroid Build Coastguard Worker sqadd v3.4s, v5.4s, \r3 3187*c0909341SAndroid Build Coastguard Worker sqsub \r3, v5.4s, \r3 3188*c0909341SAndroid Build Coastguard Worker smax v2.4s, v2.4s, v0.4s 3189*c0909341SAndroid Build Coastguard Worker smax \r2, \r2, v0.4s 3190*c0909341SAndroid Build Coastguard Worker smin v3.4s, v3.4s, v1.4s 3191*c0909341SAndroid Build Coastguard Worker smin \r3, \r3, v1.4s 3192*c0909341SAndroid Build Coastguard Worker st1 {v7.4s}, [x6], #16 3193*c0909341SAndroid Build Coastguard Worker st1 {\r1}, [x10], x9 3194*c0909341SAndroid Build Coastguard Worker smax v3.4s, v3.4s, v0.4s 3195*c0909341SAndroid Build Coastguard Worker smax \r3, \r3, v0.4s 3196*c0909341SAndroid Build Coastguard Worker st1 {v2.4s}, [x6], #16 3197*c0909341SAndroid Build Coastguard Worker st1 {\r2}, [x10], x9 3198*c0909341SAndroid Build Coastguard Worker st1 {v3.4s}, [x6], #16 3199*c0909341SAndroid Build Coastguard Worker st1 {\r3}, [x10], x9 3200*c0909341SAndroid Build Coastguard Worker.endm 3201*c0909341SAndroid Build Coastguard Worker store_addsub v31.4s, v30.4s, v29.4s, v28.4s 3202*c0909341SAndroid Build Coastguard Worker store_addsub v27.4s, v26.4s, v25.4s, v24.4s 3203*c0909341SAndroid Build Coastguard Worker store_addsub v23.4s, v22.4s, v21.4s, v20.4s 3204*c0909341SAndroid Build Coastguard Worker store_addsub v19.4s, v18.4s, v17.4s, v16.4s 3205*c0909341SAndroid Build Coastguard Worker.purgem store_addsub 3206*c0909341SAndroid Build Coastguard Worker 3207*c0909341SAndroid Build Coastguard Worker add x6, x6, #4*4*16 3208*c0909341SAndroid Build Coastguard Worker 3209*c0909341SAndroid Build Coastguard Worker movrel x17, idct64_coeffs 3210*c0909341SAndroid Build Coastguard Worker movi v5.4s, #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff 3211*c0909341SAndroid Build Coastguard Worker mvni v4.4s, #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000 3212*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3213*c0909341SAndroid Build Coastguard Worker movi_if v7.4s, #0, \clear 3214*c0909341SAndroid Build Coastguard Worker add x9, x7, x8, lsl #4 // offset 16 3215*c0909341SAndroid Build Coastguard Worker add x10, x7, x8, lsl #3 // offset 8 3216*c0909341SAndroid Build Coastguard Worker sub x9, x9, x8 // offset 15 3217*c0909341SAndroid Build Coastguard Worker sub x11, x10, x8 // offset 7 3218*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s}, [x7] // in1 (offset 0) 3219*c0909341SAndroid Build Coastguard Worker ld1 {v17.4s}, [x9] // in31 (offset 15) 3220*c0909341SAndroid Build Coastguard Worker ld1 {v18.4s}, [x10] // in17 (offset 8) 3221*c0909341SAndroid Build Coastguard Worker ld1 {v19.4s}, [x11] // in15 (offset 7) 3222*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x7], \clear 3223*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x9], \clear 3224*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x10], \clear 3225*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x11], \clear 3226*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19 3227*c0909341SAndroid Build Coastguard Worker bl inv_dct64_step1_neon 3228*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3229*c0909341SAndroid Build Coastguard Worker movi_if v7.4s, #0, \clear 3230*c0909341SAndroid Build Coastguard Worker add x7, x7, x8, lsl #2 // offset 4 3231*c0909341SAndroid Build Coastguard Worker sub x9, x9, x8, lsl #2 // offset 11 3232*c0909341SAndroid Build Coastguard Worker sub x10, x7, x8 // offset 3 3233*c0909341SAndroid Build Coastguard Worker add x11, x9, x8 // offset 12 3234*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s}, [x10] // in7 (offset 3) 3235*c0909341SAndroid Build Coastguard Worker ld1 {v17.4s}, [x11] // in25 (offset 12) 3236*c0909341SAndroid Build Coastguard Worker ld1 {v18.4s}, [x9] // in23 (offset 11) 3237*c0909341SAndroid Build Coastguard Worker ld1 {v19.4s}, [x7] // in9 (offset 4) 3238*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x7], \clear 3239*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x9], \clear 3240*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x10], \clear 3241*c0909341SAndroid Build Coastguard Worker st1_if {v7.4s}, [x11], \clear 3242*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19 3243*c0909341SAndroid Build Coastguard Worker bl inv_dct64_step1_neon 3244*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3245*c0909341SAndroid Build Coastguard Worker movi_if v7.4s, #0, \clear 3246*c0909341SAndroid Build Coastguard Worker sub x10, x10, x8, lsl #1 // offset 1 3247*c0909341SAndroid Build Coastguard Worker sub x9, x9, x8, lsl #1 // offset 9 3248*c0909341SAndroid Build Coastguard Worker add x7, x7, x8 // offset 5 3249*c0909341SAndroid Build Coastguard Worker add x11, x11, x8 // offset 13 3250*c0909341SAndroid Build Coastguard Worker ldr q16, [x10, x8] // in5 (offset 2) 3251*c0909341SAndroid Build Coastguard Worker ldr q17, [x11] // in27 (offset 13) 3252*c0909341SAndroid Build Coastguard Worker ldr q18, [x9, x8] // in21 (offset 10) 3253*c0909341SAndroid Build Coastguard Worker ldr q19, [x7] // in11 (offset 5) 3254*c0909341SAndroid Build Coastguard Worker stroff_if q7, [x10, x8], \clear 3255*c0909341SAndroid Build Coastguard Worker str_if q7, [x11], \clear 3256*c0909341SAndroid Build Coastguard Worker stroff_if q7, [x9, x8], \clear 3257*c0909341SAndroid Build Coastguard Worker str_if q7, [x7], \clear 3258*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19 3259*c0909341SAndroid Build Coastguard Worker bl inv_dct64_step1_neon 3260*c0909341SAndroid Build Coastguard Worker movz16dup_if v0.2s, w16, #2896*8, \scale 3261*c0909341SAndroid Build Coastguard Worker movi_if v7.4s, #0, \clear 3262*c0909341SAndroid Build Coastguard Worker ldr q16, [x10] // in3 (offset 1) 3263*c0909341SAndroid Build Coastguard Worker ldr q17, [x11, x8] // in29 (offset 14) 3264*c0909341SAndroid Build Coastguard Worker ldr q18, [x9] // in19 (offset 9) 3265*c0909341SAndroid Build Coastguard Worker ldr q19, [x7, x8] // in13 (offset 6) 3266*c0909341SAndroid Build Coastguard Worker str_if q7, [x10], \clear 3267*c0909341SAndroid Build Coastguard Worker stroff_if q7, [x11, x8], \clear 3268*c0909341SAndroid Build Coastguard Worker str_if q7, [x9], \clear 3269*c0909341SAndroid Build Coastguard Worker stroff_if q7, [x7, x8], \clear 3270*c0909341SAndroid Build Coastguard Worker scale_if \scale, v0.s[0], v16, v17, v18, v19 3271*c0909341SAndroid Build Coastguard Worker bl inv_dct64_step1_neon 3272*c0909341SAndroid Build Coastguard Worker 3273*c0909341SAndroid Build Coastguard Worker sub x6, x6, #4*4*32 3274*c0909341SAndroid Build Coastguard Worker add x9, x6, #4*4*7 3275*c0909341SAndroid Build Coastguard Worker 3276*c0909341SAndroid Build Coastguard Worker bl inv_dct64_step2_neon 3277*c0909341SAndroid Build Coastguard Worker 3278*c0909341SAndroid Build Coastguard Worker ret x14 3279*c0909341SAndroid Build Coastguard Workerendfunc 3280*c0909341SAndroid Build Coastguard Worker.endm 3281*c0909341SAndroid Build Coastguard Worker 3282*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear, clear=1 3283*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear_scale, clear=1, scale=1 3284*c0909341SAndroid Build Coastguard Worker 3285*c0909341SAndroid Build Coastguard Worker 3286*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz_dct_64x4_neon 3287*c0909341SAndroid Build Coastguard Worker mov x14, x30 3288*c0909341SAndroid Build Coastguard Worker 3289*c0909341SAndroid Build Coastguard Worker mov x7, sp 3290*c0909341SAndroid Build Coastguard Worker add x8, sp, #4*4*(64 - 4) 3291*c0909341SAndroid Build Coastguard Worker add x9, x6, #2*56 3292*c0909341SAndroid Build Coastguard Worker mov x10, #2*64 3293*c0909341SAndroid Build Coastguard Worker mov x11, #-4*4*4 3294*c0909341SAndroid Build Coastguard Worker 3295*c0909341SAndroid Build Coastguard Worker dup v7.4s, w12 3296*c0909341SAndroid Build Coastguard Worker1: 3297*c0909341SAndroid Build Coastguard Worker ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x7], #64 3298*c0909341SAndroid Build Coastguard Worker ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x8], x11 3299*c0909341SAndroid Build Coastguard Worker ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x7], #64 3300*c0909341SAndroid Build Coastguard Worker ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x8], x11 3301*c0909341SAndroid Build Coastguard Worker transpose_4x4s v16, v17, v18, v19, v2, v3, v4, v5 3302*c0909341SAndroid Build Coastguard Worker transpose_4x4s v20, v21, v22, v23, v2, v3, v4, v5 3303*c0909341SAndroid Build Coastguard Worker transpose_4x4s v31, v30, v29, v28, v2, v3, v4, v5 3304*c0909341SAndroid Build Coastguard Worker transpose_4x4s v27, v26, v25, v24, v2, v3, v4, v5 3305*c0909341SAndroid Build Coastguard Worker 3306*c0909341SAndroid Build Coastguard Worker.macro store_addsub src0, src1, src2, src3 3307*c0909341SAndroid Build Coastguard Worker sqsub v1.4s, \src0, \src1 3308*c0909341SAndroid Build Coastguard Worker sqadd v0.4s, \src0, \src1 3309*c0909341SAndroid Build Coastguard Worker sqsub v3.4s, \src2, \src3 3310*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v7.4s 3311*c0909341SAndroid Build Coastguard Worker sqadd v2.4s, \src2, \src3 3312*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v7.4s 3313*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v7.4s 3314*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v7.4s 3315*c0909341SAndroid Build Coastguard Worker sqxtn v3.4h, v3.4s 3316*c0909341SAndroid Build Coastguard Worker sqxtn2 v3.8h, v1.4s 3317*c0909341SAndroid Build Coastguard Worker sqxtn v0.4h, v0.4s 3318*c0909341SAndroid Build Coastguard Worker sqxtn2 v0.8h, v2.4s 3319*c0909341SAndroid Build Coastguard Worker rev64 v3.8h, v3.8h 3320*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x10 3321*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x9], x10 3322*c0909341SAndroid Build Coastguard Worker.endm 3323*c0909341SAndroid Build Coastguard Worker store_addsub v16.4s, v31.4s, v20.4s, v27.4s 3324*c0909341SAndroid Build Coastguard Worker store_addsub v17.4s, v30.4s, v21.4s, v26.4s 3325*c0909341SAndroid Build Coastguard Worker store_addsub v18.4s, v29.4s, v22.4s, v25.4s 3326*c0909341SAndroid Build Coastguard Worker store_addsub v19.4s, v28.4s, v23.4s, v24.4s 3327*c0909341SAndroid Build Coastguard Worker.purgem store_addsub 3328*c0909341SAndroid Build Coastguard Worker sub x6, x6, x10, lsl #2 3329*c0909341SAndroid Build Coastguard Worker sub x9, x9, x10, lsl #2 3330*c0909341SAndroid Build Coastguard Worker add x6, x6, #16 3331*c0909341SAndroid Build Coastguard Worker sub x9, x9, #16 3332*c0909341SAndroid Build Coastguard Worker 3333*c0909341SAndroid Build Coastguard Worker cmp x7, x8 3334*c0909341SAndroid Build Coastguard Worker b.lt 1b 3335*c0909341SAndroid Build Coastguard Worker ret x14 3336*c0909341SAndroid Build Coastguard Workerendfunc 3337*c0909341SAndroid Build Coastguard Worker 3338*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x64_neon 3339*c0909341SAndroid Build Coastguard Worker mov x14, x30 3340*c0909341SAndroid Build Coastguard Worker lsl x8, x8, #1 3341*c0909341SAndroid Build Coastguard Worker 3342*c0909341SAndroid Build Coastguard Worker mov x7, sp 3343*c0909341SAndroid Build Coastguard Worker add x8, sp, #2*8*(64 - 4) 3344*c0909341SAndroid Build Coastguard Worker add x9, x6, x1, lsl #6 3345*c0909341SAndroid Build Coastguard Worker sub x9, x9, x1 3346*c0909341SAndroid Build Coastguard Worker neg x10, x1 3347*c0909341SAndroid Build Coastguard Worker mov x11, #-2*8*4 3348*c0909341SAndroid Build Coastguard Worker 3349*c0909341SAndroid Build Coastguard Worker1: 3350*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [x7], #64 3351*c0909341SAndroid Build Coastguard Worker ld1 {v28.8h, v29.8h, v30.8h, v31.8h}, [x8], x11 3352*c0909341SAndroid Build Coastguard Worker ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [x7], #64 3353*c0909341SAndroid Build Coastguard Worker ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x8], x11 3354*c0909341SAndroid Build Coastguard Worker 3355*c0909341SAndroid Build Coastguard Worker mvni v7.8h, #0xfc, lsl #8 // 0x3ff 3356*c0909341SAndroid Build Coastguard Worker.macro add_dest_addsub src0, src1, src2, src3 3357*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x6], x1 3358*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x9], x10 3359*c0909341SAndroid Build Coastguard Worker sqadd v4.8h, \src0, \src1 3360*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h}, [x6] 3361*c0909341SAndroid Build Coastguard Worker sqsub \src0, \src0, \src1 3362*c0909341SAndroid Build Coastguard Worker ld1 {v3.8h}, [x9] 3363*c0909341SAndroid Build Coastguard Worker sqadd v5.8h, \src2, \src3 3364*c0909341SAndroid Build Coastguard Worker sqsub \src2, \src2, \src3 3365*c0909341SAndroid Build Coastguard Worker sub x6, x6, x1 3366*c0909341SAndroid Build Coastguard Worker sub x9, x9, x10 3367*c0909341SAndroid Build Coastguard Worker srshr v4.8h, v4.8h, #4 3368*c0909341SAndroid Build Coastguard Worker srshr v5.8h, v5.8h, #4 3369*c0909341SAndroid Build Coastguard Worker srshr \src0, \src0, #4 3370*c0909341SAndroid Build Coastguard Worker usqadd v0.8h, v4.8h 3371*c0909341SAndroid Build Coastguard Worker srshr \src2, \src2, #4 3372*c0909341SAndroid Build Coastguard Worker usqadd v1.8h, \src0 3373*c0909341SAndroid Build Coastguard Worker usqadd v2.8h, v5.8h 3374*c0909341SAndroid Build Coastguard Worker smin v0.8h, v0.8h, v7.8h 3375*c0909341SAndroid Build Coastguard Worker usqadd v3.8h, \src2 3376*c0909341SAndroid Build Coastguard Worker smin v1.8h, v1.8h, v7.8h 3377*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [x6], x1 3378*c0909341SAndroid Build Coastguard Worker smin v2.8h, v2.8h, v7.8h 3379*c0909341SAndroid Build Coastguard Worker st1 {v1.8h}, [x9], x10 3380*c0909341SAndroid Build Coastguard Worker smin v3.8h, v3.8h, v7.8h 3381*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [x6], x1 3382*c0909341SAndroid Build Coastguard Worker st1 {v3.8h}, [x9], x10 3383*c0909341SAndroid Build Coastguard Worker.endm 3384*c0909341SAndroid Build Coastguard Worker add_dest_addsub v16.8h, v31.8h, v17.8h, v30.8h 3385*c0909341SAndroid Build Coastguard Worker add_dest_addsub v18.8h, v29.8h, v19.8h, v28.8h 3386*c0909341SAndroid Build Coastguard Worker add_dest_addsub v20.8h, v27.8h, v21.8h, v26.8h 3387*c0909341SAndroid Build Coastguard Worker add_dest_addsub v22.8h, v25.8h, v23.8h, v24.8h 3388*c0909341SAndroid Build Coastguard Worker.purgem add_dest_addsub 3389*c0909341SAndroid Build Coastguard Worker cmp x7, x8 3390*c0909341SAndroid Build Coastguard Worker b.lt 1b 3391*c0909341SAndroid Build Coastguard Worker 3392*c0909341SAndroid Build Coastguard Worker ret x14 3393*c0909341SAndroid Build Coastguard Workerendfunc 3394*c0909341SAndroid Build Coastguard Worker 3395*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x64_16bpc_neon, export=1 3396*c0909341SAndroid Build Coastguard Worker idct_dc 64, 64, 2 3397*c0909341SAndroid Build Coastguard Worker 3398*c0909341SAndroid Build Coastguard Worker mov x15, x30 3399*c0909341SAndroid Build Coastguard Worker 3400*c0909341SAndroid Build Coastguard Worker sub_sp 64*32*2+64*4*4 3401*c0909341SAndroid Build Coastguard Worker add x5, sp, #64*4*4 3402*c0909341SAndroid Build Coastguard Worker 3403*c0909341SAndroid Build Coastguard Worker movrel x13, eob_32x32 3404*c0909341SAndroid Build Coastguard Worker 3405*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 3406*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*64*2) 3407*c0909341SAndroid Build Coastguard Worker.if \i > 0 3408*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 3409*c0909341SAndroid Build Coastguard Worker cmp w3, w12 3410*c0909341SAndroid Build Coastguard Worker b.lt 1f 3411*c0909341SAndroid Build Coastguard Worker.endif 3412*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 3413*c0909341SAndroid Build Coastguard Worker mov x8, #32*4 3414*c0909341SAndroid Build Coastguard Worker mov x12, #-2 // shift 3415*c0909341SAndroid Build Coastguard Worker bl inv_txfm_dct_clear_4s_x64_neon 3416*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*64*2) 3417*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_dct_64x4_neon 3418*c0909341SAndroid Build Coastguard Worker.if \i < 28 3419*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3420*c0909341SAndroid Build Coastguard Worker.endif 3421*c0909341SAndroid Build Coastguard Worker.endr 3422*c0909341SAndroid Build Coastguard Worker b 3f 3423*c0909341SAndroid Build Coastguard Worker 3424*c0909341SAndroid Build Coastguard Worker1: 3425*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 3426*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 3427*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 3428*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 3429*c0909341SAndroid Build Coastguard Worker2: 3430*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 3431*c0909341SAndroid Build Coastguard Worker.rept 4 3432*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 3433*c0909341SAndroid Build Coastguard Worker.endr 3434*c0909341SAndroid Build Coastguard Worker b.gt 2b 3435*c0909341SAndroid Build Coastguard Worker 3436*c0909341SAndroid Build Coastguard Worker3: 3437*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56 3438*c0909341SAndroid Build Coastguard Worker add x7, x5, #(\i*2) 3439*c0909341SAndroid Build Coastguard Worker mov x8, #64*2 3440*c0909341SAndroid Build Coastguard Worker bl X(inv_txfm_dct_8h_x64_neon) 3441*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 3442*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x64_neon 3443*c0909341SAndroid Build Coastguard Worker.endr 3444*c0909341SAndroid Build Coastguard Worker 3445*c0909341SAndroid Build Coastguard Worker add sp, x5, #64*32*2 3446*c0909341SAndroid Build Coastguard Worker ret x15 3447*c0909341SAndroid Build Coastguard Workerendfunc 3448*c0909341SAndroid Build Coastguard Worker 3449*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x32_16bpc_neon, export=1 3450*c0909341SAndroid Build Coastguard Worker idct_dc 64, 32, 1 3451*c0909341SAndroid Build Coastguard Worker 3452*c0909341SAndroid Build Coastguard Worker mov x15, x30 3453*c0909341SAndroid Build Coastguard Worker 3454*c0909341SAndroid Build Coastguard Worker sub_sp 64*32*2+64*4*4 3455*c0909341SAndroid Build Coastguard Worker add x5, sp, #64*4*4 3456*c0909341SAndroid Build Coastguard Worker 3457*c0909341SAndroid Build Coastguard Worker movrel x13, eob_32x32 3458*c0909341SAndroid Build Coastguard Worker 3459*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 3460*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*64*2) 3461*c0909341SAndroid Build Coastguard Worker.if \i > 0 3462*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 3463*c0909341SAndroid Build Coastguard Worker cmp w3, w12 3464*c0909341SAndroid Build Coastguard Worker b.lt 1f 3465*c0909341SAndroid Build Coastguard Worker.endif 3466*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 3467*c0909341SAndroid Build Coastguard Worker mov x8, #32*4 3468*c0909341SAndroid Build Coastguard Worker mov x12, #-1 // shift 3469*c0909341SAndroid Build Coastguard Worker bl inv_txfm_dct_clear_scale_4s_x64_neon 3470*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*64*2) 3471*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_dct_64x4_neon 3472*c0909341SAndroid Build Coastguard Worker.if \i < 28 3473*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3474*c0909341SAndroid Build Coastguard Worker.endif 3475*c0909341SAndroid Build Coastguard Worker.endr 3476*c0909341SAndroid Build Coastguard Worker b 3f 3477*c0909341SAndroid Build Coastguard Worker 3478*c0909341SAndroid Build Coastguard Worker1: 3479*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 3480*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 3481*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 3482*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 3483*c0909341SAndroid Build Coastguard Worker2: 3484*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 3485*c0909341SAndroid Build Coastguard Worker.rept 4 3486*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 3487*c0909341SAndroid Build Coastguard Worker.endr 3488*c0909341SAndroid Build Coastguard Worker b.gt 2b 3489*c0909341SAndroid Build Coastguard Worker 3490*c0909341SAndroid Build Coastguard Worker3: 3491*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56 3492*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 3493*c0909341SAndroid Build Coastguard Worker add x7, x5, #(\i*2) 3494*c0909341SAndroid Build Coastguard Worker mov x8, #64*2 3495*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x32_neon 3496*c0909341SAndroid Build Coastguard Worker.endr 3497*c0909341SAndroid Build Coastguard Worker 3498*c0909341SAndroid Build Coastguard Worker add sp, x5, #64*32*2 3499*c0909341SAndroid Build Coastguard Worker ret x15 3500*c0909341SAndroid Build Coastguard Workerendfunc 3501*c0909341SAndroid Build Coastguard Worker 3502*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x64_16bpc_neon, export=1 3503*c0909341SAndroid Build Coastguard Worker idct_dc 32, 64, 1 3504*c0909341SAndroid Build Coastguard Worker 3505*c0909341SAndroid Build Coastguard Worker mov x15, x30 3506*c0909341SAndroid Build Coastguard Worker 3507*c0909341SAndroid Build Coastguard Worker sub_sp 32*32*2+64*8*2 3508*c0909341SAndroid Build Coastguard Worker add x5, sp, #64*8*2 3509*c0909341SAndroid Build Coastguard Worker 3510*c0909341SAndroid Build Coastguard Worker movrel x13, eob_32x32 3511*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3512*c0909341SAndroid Build Coastguard Worker 3513*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 3514*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*32*2) 3515*c0909341SAndroid Build Coastguard Worker.if \i > 0 3516*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 3517*c0909341SAndroid Build Coastguard Worker cmp w3, w12 3518*c0909341SAndroid Build Coastguard Worker b.lt 1f 3519*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3520*c0909341SAndroid Build Coastguard Worker.endif 3521*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 3522*c0909341SAndroid Build Coastguard Worker mov x8, #32*4 3523*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_scale_dct_32x4_neon 3524*c0909341SAndroid Build Coastguard Worker.endr 3525*c0909341SAndroid Build Coastguard Worker b 3f 3526*c0909341SAndroid Build Coastguard Worker 3527*c0909341SAndroid Build Coastguard Worker1: 3528*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 3529*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 3530*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 3531*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 3532*c0909341SAndroid Build Coastguard Worker2: 3533*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 3534*c0909341SAndroid Build Coastguard Worker.rept 4 3535*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 3536*c0909341SAndroid Build Coastguard Worker.endr 3537*c0909341SAndroid Build Coastguard Worker b.gt 2b 3538*c0909341SAndroid Build Coastguard Worker 3539*c0909341SAndroid Build Coastguard Worker3: 3540*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24 3541*c0909341SAndroid Build Coastguard Worker add x7, x5, #(\i*2) 3542*c0909341SAndroid Build Coastguard Worker mov x8, #32*2 3543*c0909341SAndroid Build Coastguard Worker bl X(inv_txfm_dct_8h_x64_neon) 3544*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 3545*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x64_neon 3546*c0909341SAndroid Build Coastguard Worker.endr 3547*c0909341SAndroid Build Coastguard Worker 3548*c0909341SAndroid Build Coastguard Worker add sp, x5, #32*32*2 3549*c0909341SAndroid Build Coastguard Worker ret x15 3550*c0909341SAndroid Build Coastguard Workerendfunc 3551*c0909341SAndroid Build Coastguard Worker 3552*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x16_16bpc_neon, export=1 3553*c0909341SAndroid Build Coastguard Worker idct_dc 64, 16, 2 3554*c0909341SAndroid Build Coastguard Worker 3555*c0909341SAndroid Build Coastguard Worker mov x15, x30 3556*c0909341SAndroid Build Coastguard Worker 3557*c0909341SAndroid Build Coastguard Worker sub_sp 64*16*2+64*4*4 3558*c0909341SAndroid Build Coastguard Worker add x4, sp, #64*4*4 3559*c0909341SAndroid Build Coastguard Worker 3560*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x32 3561*c0909341SAndroid Build Coastguard Worker 3562*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12 3563*c0909341SAndroid Build Coastguard Worker add x6, x4, #(\i*64*2) 3564*c0909341SAndroid Build Coastguard Worker.if \i > 0 3565*c0909341SAndroid Build Coastguard Worker mov w8, #(16 - \i) 3566*c0909341SAndroid Build Coastguard Worker cmp w3, w12 3567*c0909341SAndroid Build Coastguard Worker b.lt 1f 3568*c0909341SAndroid Build Coastguard Worker.endif 3569*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 3570*c0909341SAndroid Build Coastguard Worker mov x8, #16*4 3571*c0909341SAndroid Build Coastguard Worker mov x12, #-2 // shift 3572*c0909341SAndroid Build Coastguard Worker bl inv_txfm_dct_clear_4s_x64_neon 3573*c0909341SAndroid Build Coastguard Worker add x6, x4, #(\i*64*2) 3574*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_dct_64x4_neon 3575*c0909341SAndroid Build Coastguard Worker.if \i < 12 3576*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3577*c0909341SAndroid Build Coastguard Worker.endif 3578*c0909341SAndroid Build Coastguard Worker.endr 3579*c0909341SAndroid Build Coastguard Worker b 3f 3580*c0909341SAndroid Build Coastguard Worker 3581*c0909341SAndroid Build Coastguard Worker1: 3582*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 3583*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 3584*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 3585*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 3586*c0909341SAndroid Build Coastguard Worker2: 3587*c0909341SAndroid Build Coastguard Worker subs w8, w8, #2 3588*c0909341SAndroid Build Coastguard Worker.rept 4 3589*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 3590*c0909341SAndroid Build Coastguard Worker.endr 3591*c0909341SAndroid Build Coastguard Worker b.gt 2b 3592*c0909341SAndroid Build Coastguard Worker 3593*c0909341SAndroid Build Coastguard Worker3: 3594*c0909341SAndroid Build Coastguard Worker movrel x5, X(inv_dct_8h_x16_neon) 3595*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56 3596*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 3597*c0909341SAndroid Build Coastguard Worker add x7, x4, #(\i*2) 3598*c0909341SAndroid Build Coastguard Worker mov x8, #64*2 3599*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_8x16_neon 3600*c0909341SAndroid Build Coastguard Worker.endr 3601*c0909341SAndroid Build Coastguard Worker 3602*c0909341SAndroid Build Coastguard Worker add sp, x4, #64*16*2 3603*c0909341SAndroid Build Coastguard Worker ret x15 3604*c0909341SAndroid Build Coastguard Workerendfunc 3605*c0909341SAndroid Build Coastguard Worker 3606*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x64_16bpc_neon, export=1 3607*c0909341SAndroid Build Coastguard Worker idct_dc 16, 64, 2 3608*c0909341SAndroid Build Coastguard Worker 3609*c0909341SAndroid Build Coastguard Worker mov x15, x30 3610*c0909341SAndroid Build Coastguard Worker 3611*c0909341SAndroid Build Coastguard Worker sub_sp 16*32*2+64*8*2 3612*c0909341SAndroid Build Coastguard Worker add x5, sp, #64*8*2 3613*c0909341SAndroid Build Coastguard Worker 3614*c0909341SAndroid Build Coastguard Worker movrel x13, eob_16x32 3615*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3616*c0909341SAndroid Build Coastguard Worker 3617*c0909341SAndroid Build Coastguard Worker adr x4, inv_dct_4s_x16_neon 3618*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28 3619*c0909341SAndroid Build Coastguard Worker add x6, x5, #(\i*16*2) 3620*c0909341SAndroid Build Coastguard Worker.if \i > 0 3621*c0909341SAndroid Build Coastguard Worker mov w8, #(32 - \i) 3622*c0909341SAndroid Build Coastguard Worker cmp w3, w12 3623*c0909341SAndroid Build Coastguard Worker b.lt 1f 3624*c0909341SAndroid Build Coastguard Worker.if \i < 28 3625*c0909341SAndroid Build Coastguard Worker ldrh w12, [x13], #2 3626*c0909341SAndroid Build Coastguard Worker.endif 3627*c0909341SAndroid Build Coastguard Worker.endif 3628*c0909341SAndroid Build Coastguard Worker add x7, x2, #(\i*4) 3629*c0909341SAndroid Build Coastguard Worker mov x8, #32*4 3630*c0909341SAndroid Build Coastguard Worker bl inv_txfm_horz_16x4_neon 3631*c0909341SAndroid Build Coastguard Worker.endr 3632*c0909341SAndroid Build Coastguard Worker b 3f 3633*c0909341SAndroid Build Coastguard Worker 3634*c0909341SAndroid Build Coastguard Worker1: 3635*c0909341SAndroid Build Coastguard Worker movi v4.8h, #0 3636*c0909341SAndroid Build Coastguard Worker movi v5.8h, #0 3637*c0909341SAndroid Build Coastguard Worker movi v6.8h, #0 3638*c0909341SAndroid Build Coastguard Worker movi v7.8h, #0 3639*c0909341SAndroid Build Coastguard Worker2: 3640*c0909341SAndroid Build Coastguard Worker subs w8, w8, #4 3641*c0909341SAndroid Build Coastguard Worker.rept 2 3642*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64 3643*c0909341SAndroid Build Coastguard Worker.endr 3644*c0909341SAndroid Build Coastguard Worker b.gt 2b 3645*c0909341SAndroid Build Coastguard Worker 3646*c0909341SAndroid Build Coastguard Worker3: 3647*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8 3648*c0909341SAndroid Build Coastguard Worker add x7, x5, #(\i*2) 3649*c0909341SAndroid Build Coastguard Worker mov x8, #16*2 3650*c0909341SAndroid Build Coastguard Worker bl X(inv_txfm_dct_8h_x64_neon) 3651*c0909341SAndroid Build Coastguard Worker add x6, x0, #(\i*2) 3652*c0909341SAndroid Build Coastguard Worker bl inv_txfm_add_vert_dct_8x64_neon 3653*c0909341SAndroid Build Coastguard Worker.endr 3654*c0909341SAndroid Build Coastguard Worker 3655*c0909341SAndroid Build Coastguard Worker add sp, x5, #16*32*2 3656*c0909341SAndroid Build Coastguard Worker ret x15 3657*c0909341SAndroid Build Coastguard Workerendfunc 3658