1*c0909341SAndroid Build Coastguard Worker; Copyright © 2023, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2023, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 64 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Workerconst pb_0to63, db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 32*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 33*c0909341SAndroid Build Coastguard Worker db 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 34*c0909341SAndroid Build Coastguard Worker db 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 35*c0909341SAndroid Build Coastguard Worker db 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 36*c0909341SAndroid Build Coastguard Worker%endif 37*c0909341SAndroid Build Coastguard Workerpal_idx_w8_padh: db 0, 1, 2, 3, 3, 3, 3, 3, 8, 9, 10, 11, 11, 11, 11, 11 38*c0909341SAndroid Build Coastguard Worker 39*c0909341SAndroid Build Coastguard Workerpb_1_16: times 4 db 1, 16 40*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 41*c0909341SAndroid Build Coastguard Workerpb_32: times 4 db 32 42*c0909341SAndroid Build Coastguard Worker%endif 43*c0909341SAndroid Build Coastguard Worker 44*c0909341SAndroid Build Coastguard Worker%macro JMP_TABLE 2-* 45*c0909341SAndroid Build Coastguard Worker %xdefine %1_table (%%table - 2*4) 46*c0909341SAndroid Build Coastguard Worker %xdefine %%base mangle(private_prefix %+ _%1) 47*c0909341SAndroid Build Coastguard Worker %%table: 48*c0909341SAndroid Build Coastguard Worker %rep %0 - 1 49*c0909341SAndroid Build Coastguard Worker dd %%base %+ .w%2 - (%%table - 2*4) 50*c0909341SAndroid Build Coastguard Worker %rotate 1 51*c0909341SAndroid Build Coastguard Worker %endrep 52*c0909341SAndroid Build Coastguard Worker%endmacro 53*c0909341SAndroid Build Coastguard Worker 54*c0909341SAndroid Build Coastguard WorkerJMP_TABLE pal_idx_finish_ssse3, 4, 8, 16, 32, 64 55*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 56*c0909341SAndroid Build Coastguard WorkerJMP_TABLE pal_idx_finish_avx2, 4, 8, 16, 32, 64 57*c0909341SAndroid Build Coastguard WorkerJMP_TABLE pal_idx_finish_avx512icl, 4, 8, 16, 32, 64 58*c0909341SAndroid Build Coastguard Worker%endif 59*c0909341SAndroid Build Coastguard Worker 60*c0909341SAndroid Build Coastguard WorkerSECTION .text 61*c0909341SAndroid Build Coastguard Worker 62*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 63*c0909341SAndroid Build Coastguard Workercglobal pal_idx_finish, 2, 7, 6, dst, src, bw, bh, w, h 64*c0909341SAndroid Build Coastguard Worker%define base r6-pal_idx_finish_ssse3_table 65*c0909341SAndroid Build Coastguard Worker LEA r6, pal_idx_finish_ssse3_table 66*c0909341SAndroid Build Coastguard Worker tzcnt bwd, bwm 67*c0909341SAndroid Build Coastguard Worker movifnidn bhd, bhm 68*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 69*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 70*c0909341SAndroid Build Coastguard Worker movsxd bwq, [r6+bwq*4] 71*c0909341SAndroid Build Coastguard Worker movddup m3, [base+pb_1_16] 72*c0909341SAndroid Build Coastguard Worker add bwq, r6 73*c0909341SAndroid Build Coastguard Worker sub bhd, hd 74*c0909341SAndroid Build Coastguard Worker jmp bwq 75*c0909341SAndroid Build Coastguard Worker.w4: 76*c0909341SAndroid Build Coastguard Worker mova m0, [srcq] 77*c0909341SAndroid Build Coastguard Worker add srcq, 16 78*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 79*c0909341SAndroid Build Coastguard Worker packuswb m0, m0 80*c0909341SAndroid Build Coastguard Worker movq [dstq], m0 81*c0909341SAndroid Build Coastguard Worker add dstq, 8 82*c0909341SAndroid Build Coastguard Worker sub hd, 4 83*c0909341SAndroid Build Coastguard Worker jg .w4 84*c0909341SAndroid Build Coastguard Worker test bhd, bhd 85*c0909341SAndroid Build Coastguard Worker jz .w4_end 86*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q3333 87*c0909341SAndroid Build Coastguard Worker.w4_padv: 88*c0909341SAndroid Build Coastguard Worker movq [dstq], m0 89*c0909341SAndroid Build Coastguard Worker add dstq, 8 90*c0909341SAndroid Build Coastguard Worker sub bhd, 4 91*c0909341SAndroid Build Coastguard Worker jg .w4_padv 92*c0909341SAndroid Build Coastguard Worker.w4_end: 93*c0909341SAndroid Build Coastguard Worker RET 94*c0909341SAndroid Build Coastguard Worker.w8_padh: 95*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 96*c0909341SAndroid Build Coastguard Worker pshufb m1, m2 97*c0909341SAndroid Build Coastguard Worker jmp .w8_main 98*c0909341SAndroid Build Coastguard Worker.w8: 99*c0909341SAndroid Build Coastguard Worker mova m2, [base+pal_idx_w8_padh] 100*c0909341SAndroid Build Coastguard Worker.w8_loop: 101*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+16*0] 102*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+16*1] 103*c0909341SAndroid Build Coastguard Worker cmp wd, 8 104*c0909341SAndroid Build Coastguard Worker jl .w8_padh 105*c0909341SAndroid Build Coastguard Worker.w8_main: 106*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 107*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 108*c0909341SAndroid Build Coastguard Worker add srcq, 16*2 109*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 110*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 111*c0909341SAndroid Build Coastguard Worker add dstq, 16 112*c0909341SAndroid Build Coastguard Worker sub hd, 4 113*c0909341SAndroid Build Coastguard Worker jg .w8_loop 114*c0909341SAndroid Build Coastguard Worker test bhd, bhd 115*c0909341SAndroid Build Coastguard Worker jz .w8_end 116*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q3333 117*c0909341SAndroid Build Coastguard Worker.w8_padv: 118*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 119*c0909341SAndroid Build Coastguard Worker add dstq, 16 120*c0909341SAndroid Build Coastguard Worker sub bhd, 4 121*c0909341SAndroid Build Coastguard Worker jg .w8_padv 122*c0909341SAndroid Build Coastguard Worker.w8_end: 123*c0909341SAndroid Build Coastguard Worker RET 124*c0909341SAndroid Build Coastguard Worker.w16_padh: 125*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 126*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 127*c0909341SAndroid Build Coastguard Worker jmp .w16_main 128*c0909341SAndroid Build Coastguard Worker.w16: 129*c0909341SAndroid Build Coastguard Worker cmp wd, 16 130*c0909341SAndroid Build Coastguard Worker je .w16_loop 131*c0909341SAndroid Build Coastguard Worker call .setup_padh 132*c0909341SAndroid Build Coastguard Worker.w16_loop: 133*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+16*0] 134*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+16*1] 135*c0909341SAndroid Build Coastguard Worker cmp wd, 16 136*c0909341SAndroid Build Coastguard Worker jl .w16_padh 137*c0909341SAndroid Build Coastguard Worker.w16_main: 138*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 139*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 140*c0909341SAndroid Build Coastguard Worker add srcq, 16*2 141*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 142*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 143*c0909341SAndroid Build Coastguard Worker add dstq, 16 144*c0909341SAndroid Build Coastguard Worker sub hd, 2 145*c0909341SAndroid Build Coastguard Worker jg .w16_loop 146*c0909341SAndroid Build Coastguard Worker test bhd, bhd 147*c0909341SAndroid Build Coastguard Worker jz .w16_end 148*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m0 149*c0909341SAndroid Build Coastguard Worker.w16_padv: 150*c0909341SAndroid Build Coastguard Worker movu [dstq+16*0], m0 151*c0909341SAndroid Build Coastguard Worker movu [dstq+16*1], m0 152*c0909341SAndroid Build Coastguard Worker add dstq, 16*2 153*c0909341SAndroid Build Coastguard Worker sub bhd, 4 154*c0909341SAndroid Build Coastguard Worker jg .w16_padv 155*c0909341SAndroid Build Coastguard Worker.w16_end: 156*c0909341SAndroid Build Coastguard Worker RET 157*c0909341SAndroid Build Coastguard Worker.w32_padh: 158*c0909341SAndroid Build Coastguard Worker cmp wd, 16 159*c0909341SAndroid Build Coastguard Worker jg .w32_padh2 160*c0909341SAndroid Build Coastguard Worker pshufb m1, m0, m5 161*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 162*c0909341SAndroid Build Coastguard Worker jmp .w32_main 163*c0909341SAndroid Build Coastguard Worker.w32_padh2: 164*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 165*c0909341SAndroid Build Coastguard Worker jmp .w32_main 166*c0909341SAndroid Build Coastguard Worker.w32: 167*c0909341SAndroid Build Coastguard Worker cmp wd, 32 168*c0909341SAndroid Build Coastguard Worker je .w32_loop 169*c0909341SAndroid Build Coastguard Worker call .setup_padh 170*c0909341SAndroid Build Coastguard Worker.w32_loop: 171*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+16*0] 172*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+16*1] 173*c0909341SAndroid Build Coastguard Worker cmp wd, 32 174*c0909341SAndroid Build Coastguard Worker jl .w32_padh 175*c0909341SAndroid Build Coastguard Worker.w32_main: 176*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 177*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 178*c0909341SAndroid Build Coastguard Worker add srcq, 16*2 179*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 180*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 181*c0909341SAndroid Build Coastguard Worker add dstq, 16 182*c0909341SAndroid Build Coastguard Worker dec hd 183*c0909341SAndroid Build Coastguard Worker jg .w32_loop 184*c0909341SAndroid Build Coastguard Worker test bhd, bhd 185*c0909341SAndroid Build Coastguard Worker jz .w32_end 186*c0909341SAndroid Build Coastguard Worker.w32_padv: 187*c0909341SAndroid Build Coastguard Worker movu [dstq+16*0], m0 188*c0909341SAndroid Build Coastguard Worker movu [dstq+16*1], m0 189*c0909341SAndroid Build Coastguard Worker movu [dstq+16*2], m0 190*c0909341SAndroid Build Coastguard Worker movu [dstq+16*3], m0 191*c0909341SAndroid Build Coastguard Worker add dstq, 16*4 192*c0909341SAndroid Build Coastguard Worker sub bhd, 4 193*c0909341SAndroid Build Coastguard Worker jg .w32_padv 194*c0909341SAndroid Build Coastguard Worker.w32_end: 195*c0909341SAndroid Build Coastguard Worker RET 196*c0909341SAndroid Build Coastguard Worker.w64_padh: 197*c0909341SAndroid Build Coastguard Worker cmp wd, 16 198*c0909341SAndroid Build Coastguard Worker jg .w64_padh2 199*c0909341SAndroid Build Coastguard Worker pshufb m1, m0, m5 200*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 201*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 202*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 203*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 204*c0909341SAndroid Build Coastguard Worker packuswb m1, m1 205*c0909341SAndroid Build Coastguard Worker jmp .w64_main 206*c0909341SAndroid Build Coastguard Worker.w64_padh2: 207*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 208*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 209*c0909341SAndroid Build Coastguard Worker pmaddubsw m2, m1, m3 210*c0909341SAndroid Build Coastguard Worker pshufb m1, m5 211*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 212*c0909341SAndroid Build Coastguard Worker packuswb m0, m2 213*c0909341SAndroid Build Coastguard Worker packuswb m1, m1 214*c0909341SAndroid Build Coastguard Worker jmp .w64_main 215*c0909341SAndroid Build Coastguard Worker.w64_padh3: 216*c0909341SAndroid Build Coastguard Worker cmp wd, 48 217*c0909341SAndroid Build Coastguard Worker jg .w64_padh4 218*c0909341SAndroid Build Coastguard Worker pshufb m2, m1, m5 219*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 220*c0909341SAndroid Build Coastguard Worker jmp .w64_main2 221*c0909341SAndroid Build Coastguard Worker.w64_padh4: 222*c0909341SAndroid Build Coastguard Worker pshufb m2, m4 223*c0909341SAndroid Build Coastguard Worker jmp .w64_main2 224*c0909341SAndroid Build Coastguard Worker.w64: 225*c0909341SAndroid Build Coastguard Worker cmp wd, 64 226*c0909341SAndroid Build Coastguard Worker je .w64_loop 227*c0909341SAndroid Build Coastguard Worker call .setup_padh 228*c0909341SAndroid Build Coastguard Worker.w64_loop: 229*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+16*0] 230*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+16*1] 231*c0909341SAndroid Build Coastguard Worker cmp wd, 32 232*c0909341SAndroid Build Coastguard Worker jle .w64_padh 233*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m3 234*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 235*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 236*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+16*2] 237*c0909341SAndroid Build Coastguard Worker mova m2, [srcq+16*3] 238*c0909341SAndroid Build Coastguard Worker cmp wd, 64 239*c0909341SAndroid Build Coastguard Worker jl .w64_padh3 240*c0909341SAndroid Build Coastguard Worker.w64_main2: 241*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m3 242*c0909341SAndroid Build Coastguard Worker pmaddubsw m2, m3 243*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 244*c0909341SAndroid Build Coastguard Worker.w64_main: 245*c0909341SAndroid Build Coastguard Worker add srcq, 16*4 246*c0909341SAndroid Build Coastguard Worker movu [dstq+16*0], m0 247*c0909341SAndroid Build Coastguard Worker movu [dstq+16*1], m1 248*c0909341SAndroid Build Coastguard Worker add dstq, 16*2 249*c0909341SAndroid Build Coastguard Worker dec hd 250*c0909341SAndroid Build Coastguard Worker jg .w64_loop 251*c0909341SAndroid Build Coastguard Worker test bhd, bhd 252*c0909341SAndroid Build Coastguard Worker jz .w64_end 253*c0909341SAndroid Build Coastguard Worker.w64_padv: 254*c0909341SAndroid Build Coastguard Worker movu [dstq+16*0], m0 255*c0909341SAndroid Build Coastguard Worker movu [dstq+16*1], m1 256*c0909341SAndroid Build Coastguard Worker movu [dstq+16*2], m0 257*c0909341SAndroid Build Coastguard Worker movu [dstq+16*3], m1 258*c0909341SAndroid Build Coastguard Worker add dstq, 16*4 259*c0909341SAndroid Build Coastguard Worker sub bhd, 2 260*c0909341SAndroid Build Coastguard Worker jg .w64_padv 261*c0909341SAndroid Build Coastguard Worker.w64_end: 262*c0909341SAndroid Build Coastguard Worker RET 263*c0909341SAndroid Build Coastguard Worker.setup_padh: 264*c0909341SAndroid Build Coastguard Worker mova m4, [base+pb_0to63] 265*c0909341SAndroid Build Coastguard Worker lea r6d, [wq-1] 266*c0909341SAndroid Build Coastguard Worker and r6d, 15 267*c0909341SAndroid Build Coastguard Worker movd m5, r6d 268*c0909341SAndroid Build Coastguard Worker pxor m0, m0 269*c0909341SAndroid Build Coastguard Worker pshufb m5, m0 270*c0909341SAndroid Build Coastguard Worker pminub m4, m5 271*c0909341SAndroid Build Coastguard Worker ret 272*c0909341SAndroid Build Coastguard Worker 273*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 274*c0909341SAndroid Build Coastguard Worker 275*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2 276*c0909341SAndroid Build Coastguard Workercglobal pal_idx_finish, 4, 7, 5, dst, src, bw, bh, w, h 277*c0909341SAndroid Build Coastguard Worker%define base r6-pal_idx_finish_avx2_table 278*c0909341SAndroid Build Coastguard Worker lea r6, [pal_idx_finish_avx2_table] 279*c0909341SAndroid Build Coastguard Worker tzcnt bwd, bwd 280*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 281*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 282*c0909341SAndroid Build Coastguard Worker movsxd bwq, [r6+bwq*4] 283*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [base+pb_1_16] 284*c0909341SAndroid Build Coastguard Worker dec wd 285*c0909341SAndroid Build Coastguard Worker add bwq, r6 286*c0909341SAndroid Build Coastguard Worker sub bhd, hd 287*c0909341SAndroid Build Coastguard Worker jmp bwq 288*c0909341SAndroid Build Coastguard Worker.w4: 289*c0909341SAndroid Build Coastguard Worker mova xm0, [srcq] 290*c0909341SAndroid Build Coastguard Worker add srcq, 16 291*c0909341SAndroid Build Coastguard Worker pmaddubsw xm0, xm2 292*c0909341SAndroid Build Coastguard Worker packuswb xm0, xm0 293*c0909341SAndroid Build Coastguard Worker movq [dstq], xm0 294*c0909341SAndroid Build Coastguard Worker add dstq, 8 295*c0909341SAndroid Build Coastguard Worker sub hd, 4 296*c0909341SAndroid Build Coastguard Worker jg .w4 297*c0909341SAndroid Build Coastguard Worker test bhd, bhd 298*c0909341SAndroid Build Coastguard Worker jz .w4_end 299*c0909341SAndroid Build Coastguard Worker pshuflw xm0, xm0, q3333 300*c0909341SAndroid Build Coastguard Worker.w4_padv: 301*c0909341SAndroid Build Coastguard Worker movq [dstq], xm0 302*c0909341SAndroid Build Coastguard Worker add dstq, 8 303*c0909341SAndroid Build Coastguard Worker sub bhd, 4 304*c0909341SAndroid Build Coastguard Worker jg .w4_padv 305*c0909341SAndroid Build Coastguard Worker.w4_end: 306*c0909341SAndroid Build Coastguard Worker RET 307*c0909341SAndroid Build Coastguard Worker.w8_padh: 308*c0909341SAndroid Build Coastguard Worker pshufb xm0, xm3 309*c0909341SAndroid Build Coastguard Worker pshufb xm1, xm3 310*c0909341SAndroid Build Coastguard Worker jmp .w8_main 311*c0909341SAndroid Build Coastguard Worker.w8: 312*c0909341SAndroid Build Coastguard Worker mova xm3, [base+pal_idx_w8_padh] 313*c0909341SAndroid Build Coastguard Worker.w8_loop: 314*c0909341SAndroid Build Coastguard Worker mova xm0, [srcq+16*0] 315*c0909341SAndroid Build Coastguard Worker mova xm1, [srcq+16*1] 316*c0909341SAndroid Build Coastguard Worker cmp wd, 7 317*c0909341SAndroid Build Coastguard Worker jl .w8_padh 318*c0909341SAndroid Build Coastguard Worker.w8_main: 319*c0909341SAndroid Build Coastguard Worker pmaddubsw xm0, xm2 320*c0909341SAndroid Build Coastguard Worker pmaddubsw xm1, xm2 321*c0909341SAndroid Build Coastguard Worker add srcq, 16*2 322*c0909341SAndroid Build Coastguard Worker packuswb xm0, xm1 323*c0909341SAndroid Build Coastguard Worker movu [dstq], xm0 324*c0909341SAndroid Build Coastguard Worker add dstq, 16 325*c0909341SAndroid Build Coastguard Worker sub hd, 4 326*c0909341SAndroid Build Coastguard Worker jg .w8_loop 327*c0909341SAndroid Build Coastguard Worker test bhd, bhd 328*c0909341SAndroid Build Coastguard Worker jz .w8_end 329*c0909341SAndroid Build Coastguard Worker pshufd xm0, xm0, q3333 330*c0909341SAndroid Build Coastguard Worker.w8_padv: 331*c0909341SAndroid Build Coastguard Worker movu [dstq], xm0 332*c0909341SAndroid Build Coastguard Worker add dstq, 16 333*c0909341SAndroid Build Coastguard Worker sub bhd, 4 334*c0909341SAndroid Build Coastguard Worker jg .w8_padv 335*c0909341SAndroid Build Coastguard Worker.w8_end: 336*c0909341SAndroid Build Coastguard Worker RET 337*c0909341SAndroid Build Coastguard Worker.w16_padh: 338*c0909341SAndroid Build Coastguard Worker pshufb m0, m3 339*c0909341SAndroid Build Coastguard Worker pshufb m1, m3 340*c0909341SAndroid Build Coastguard Worker jmp .w16_main 341*c0909341SAndroid Build Coastguard Worker.w16: 342*c0909341SAndroid Build Coastguard Worker cmp wd, 15 343*c0909341SAndroid Build Coastguard Worker je .w16_loop 344*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [base+pb_0to63] 345*c0909341SAndroid Build Coastguard Worker movd xm3, wd 346*c0909341SAndroid Build Coastguard Worker vpbroadcastb m3, xm3 347*c0909341SAndroid Build Coastguard Worker pminub m3, m0 348*c0909341SAndroid Build Coastguard Worker.w16_loop: 349*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+32*0] 350*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+32*1] 351*c0909341SAndroid Build Coastguard Worker cmp wd, 15 352*c0909341SAndroid Build Coastguard Worker jl .w16_padh 353*c0909341SAndroid Build Coastguard Worker.w16_main: 354*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m2 355*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m2 356*c0909341SAndroid Build Coastguard Worker add srcq, 32*2 357*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 358*c0909341SAndroid Build Coastguard Worker vpermq m1, m0, q3120 359*c0909341SAndroid Build Coastguard Worker movu [dstq], m1 360*c0909341SAndroid Build Coastguard Worker add dstq, 32 361*c0909341SAndroid Build Coastguard Worker sub hd, 4 362*c0909341SAndroid Build Coastguard Worker jg .w16_loop 363*c0909341SAndroid Build Coastguard Worker test bhd, bhd 364*c0909341SAndroid Build Coastguard Worker jz .w16_end 365*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3333 366*c0909341SAndroid Build Coastguard Worker.w16_padv: 367*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 368*c0909341SAndroid Build Coastguard Worker add dstq, 32 369*c0909341SAndroid Build Coastguard Worker sub bhd, 4 370*c0909341SAndroid Build Coastguard Worker jg .w16_padv 371*c0909341SAndroid Build Coastguard Worker.w16_end: 372*c0909341SAndroid Build Coastguard Worker RET 373*c0909341SAndroid Build Coastguard Worker.w32_padh: 374*c0909341SAndroid Build Coastguard Worker cmp wd, 15 375*c0909341SAndroid Build Coastguard Worker jg .w32_padh2 376*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm0, 1 377*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm1, 1 378*c0909341SAndroid Build Coastguard Worker.w32_padh2: 379*c0909341SAndroid Build Coastguard Worker pshufb m0, m3 380*c0909341SAndroid Build Coastguard Worker pshufb m1, m3 381*c0909341SAndroid Build Coastguard Worker jmp .w32_main 382*c0909341SAndroid Build Coastguard Worker.w32: 383*c0909341SAndroid Build Coastguard Worker cmp wd, 31 384*c0909341SAndroid Build Coastguard Worker je .w32_loop 385*c0909341SAndroid Build Coastguard Worker movd xm3, wd 386*c0909341SAndroid Build Coastguard Worker vpbroadcastb m3, xm3 387*c0909341SAndroid Build Coastguard Worker pminub m3, [base+pb_0to63] 388*c0909341SAndroid Build Coastguard Worker.w32_loop: 389*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+32*0] 390*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+32*1] 391*c0909341SAndroid Build Coastguard Worker cmp wd, 31 392*c0909341SAndroid Build Coastguard Worker jl .w32_padh 393*c0909341SAndroid Build Coastguard Worker.w32_main: 394*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m2 395*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m2 396*c0909341SAndroid Build Coastguard Worker add srcq, 32*2 397*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 398*c0909341SAndroid Build Coastguard Worker vpermq m1, m0, q3120 399*c0909341SAndroid Build Coastguard Worker movu [dstq], m1 400*c0909341SAndroid Build Coastguard Worker add dstq, 32 401*c0909341SAndroid Build Coastguard Worker sub hd, 2 402*c0909341SAndroid Build Coastguard Worker jg .w32_loop 403*c0909341SAndroid Build Coastguard Worker test bhd, bhd 404*c0909341SAndroid Build Coastguard Worker jz .w32_end 405*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3131 406*c0909341SAndroid Build Coastguard Worker.w32_padv: 407*c0909341SAndroid Build Coastguard Worker movu [dstq+32*0], m0 408*c0909341SAndroid Build Coastguard Worker movu [dstq+32*1], m0 409*c0909341SAndroid Build Coastguard Worker add dstq, 32*2 410*c0909341SAndroid Build Coastguard Worker sub bhd, 4 411*c0909341SAndroid Build Coastguard Worker jg .w32_padv 412*c0909341SAndroid Build Coastguard Worker.w32_end: 413*c0909341SAndroid Build Coastguard Worker RET 414*c0909341SAndroid Build Coastguard Worker.w64_padh: 415*c0909341SAndroid Build Coastguard Worker cmp wd, 15 416*c0909341SAndroid Build Coastguard Worker jg .w64_padh2 417*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m0, xm0, 1 418*c0909341SAndroid Build Coastguard Worker pshufb m0, m1, m3 419*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 420*c0909341SAndroid Build Coastguard Worker jmp .w64_main 421*c0909341SAndroid Build Coastguard Worker.w64_padh2: 422*c0909341SAndroid Build Coastguard Worker cmp wd, 31 423*c0909341SAndroid Build Coastguard Worker jg .w64_padh3 424*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m0, 0x11 425*c0909341SAndroid Build Coastguard Worker pshufb m0, m3 426*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 427*c0909341SAndroid Build Coastguard Worker jmp .w64_main 428*c0909341SAndroid Build Coastguard Worker.w64_padh3: 429*c0909341SAndroid Build Coastguard Worker cmp wd, 47 430*c0909341SAndroid Build Coastguard Worker jg .w64_padh4 431*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm1, 1 432*c0909341SAndroid Build Coastguard Worker.w64_padh4: 433*c0909341SAndroid Build Coastguard Worker pshufb m1, m3 434*c0909341SAndroid Build Coastguard Worker jmp .w64_main 435*c0909341SAndroid Build Coastguard Worker.w64: 436*c0909341SAndroid Build Coastguard Worker cmp wd, 63 437*c0909341SAndroid Build Coastguard Worker je .w64_loop 438*c0909341SAndroid Build Coastguard Worker mov r6d, wd 439*c0909341SAndroid Build Coastguard Worker and r6d, 31 440*c0909341SAndroid Build Coastguard Worker movd xm4, r6d 441*c0909341SAndroid Build Coastguard Worker vpbroadcastb m4, xm4 442*c0909341SAndroid Build Coastguard Worker pminub m3, m4, [pb_0to63] 443*c0909341SAndroid Build Coastguard Worker.w64_loop: 444*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+32*0] 445*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+32*1] 446*c0909341SAndroid Build Coastguard Worker cmp wd, 63 447*c0909341SAndroid Build Coastguard Worker jl .w64_padh 448*c0909341SAndroid Build Coastguard Worker.w64_main: 449*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m2 450*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m2 451*c0909341SAndroid Build Coastguard Worker add srcq, 32*2 452*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 453*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 454*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 455*c0909341SAndroid Build Coastguard Worker add dstq, 32 456*c0909341SAndroid Build Coastguard Worker dec hd 457*c0909341SAndroid Build Coastguard Worker jg .w64_loop 458*c0909341SAndroid Build Coastguard Worker test bhd, bhd 459*c0909341SAndroid Build Coastguard Worker jz .w64_end 460*c0909341SAndroid Build Coastguard Worker.w64_padv: 461*c0909341SAndroid Build Coastguard Worker movu [dstq+32*0], m0 462*c0909341SAndroid Build Coastguard Worker movu [dstq+32*1], m0 463*c0909341SAndroid Build Coastguard Worker movu [dstq+32*2], m0 464*c0909341SAndroid Build Coastguard Worker movu [dstq+32*3], m0 465*c0909341SAndroid Build Coastguard Worker add dstq, 32*4 466*c0909341SAndroid Build Coastguard Worker sub bhd, 4 467*c0909341SAndroid Build Coastguard Worker jg .w64_padv 468*c0909341SAndroid Build Coastguard Worker.w64_end: 469*c0909341SAndroid Build Coastguard Worker RET 470*c0909341SAndroid Build Coastguard Worker 471*c0909341SAndroid Build Coastguard WorkerINIT_ZMM avx512icl 472*c0909341SAndroid Build Coastguard Workercglobal pal_idx_finish, 4, 7, 7, dst, src, bw, bh, w, h 473*c0909341SAndroid Build Coastguard Worker%define base r6-pal_idx_finish_avx512icl_table 474*c0909341SAndroid Build Coastguard Worker lea r6, [pal_idx_finish_avx512icl_table] 475*c0909341SAndroid Build Coastguard Worker tzcnt bwd, bwd 476*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 477*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 478*c0909341SAndroid Build Coastguard Worker movsxd bwq, [r6+bwq*4] 479*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [base+pb_1_16] 480*c0909341SAndroid Build Coastguard Worker dec wd 481*c0909341SAndroid Build Coastguard Worker add bwq, r6 482*c0909341SAndroid Build Coastguard Worker sub bhd, hd 483*c0909341SAndroid Build Coastguard Worker jmp bwq 484*c0909341SAndroid Build Coastguard Worker.w4: 485*c0909341SAndroid Build Coastguard Worker mova xmm0, [srcq] 486*c0909341SAndroid Build Coastguard Worker add srcq, 16 487*c0909341SAndroid Build Coastguard Worker pmaddubsw xmm0, xm4 488*c0909341SAndroid Build Coastguard Worker packuswb xmm0, xmm0 489*c0909341SAndroid Build Coastguard Worker movq [dstq], xmm0 490*c0909341SAndroid Build Coastguard Worker add dstq, 8 491*c0909341SAndroid Build Coastguard Worker sub hd, 4 492*c0909341SAndroid Build Coastguard Worker jg .w4 493*c0909341SAndroid Build Coastguard Worker test bhd, bhd 494*c0909341SAndroid Build Coastguard Worker jz .w4_end 495*c0909341SAndroid Build Coastguard Worker pshuflw xmm0, xmm0, q3333 496*c0909341SAndroid Build Coastguard Worker.w4_padv: 497*c0909341SAndroid Build Coastguard Worker movq [dstq], xmm0 498*c0909341SAndroid Build Coastguard Worker add dstq, 8 499*c0909341SAndroid Build Coastguard Worker sub bhd, 4 500*c0909341SAndroid Build Coastguard Worker jg .w4_padv 501*c0909341SAndroid Build Coastguard Worker.w4_end: 502*c0909341SAndroid Build Coastguard Worker RET 503*c0909341SAndroid Build Coastguard Worker.w8_padh: 504*c0909341SAndroid Build Coastguard Worker pshufb xmm0, xmm2 505*c0909341SAndroid Build Coastguard Worker pshufb xmm1, xmm2 506*c0909341SAndroid Build Coastguard Worker jmp .w8_main 507*c0909341SAndroid Build Coastguard Worker.w8: 508*c0909341SAndroid Build Coastguard Worker mova xmm2, [base+pal_idx_w8_padh] 509*c0909341SAndroid Build Coastguard Worker.w8_loop: 510*c0909341SAndroid Build Coastguard Worker mova xmm0, [srcq+16*0] 511*c0909341SAndroid Build Coastguard Worker mova xmm1, [srcq+16*1] 512*c0909341SAndroid Build Coastguard Worker cmp wd, 7 513*c0909341SAndroid Build Coastguard Worker jl .w8_padh 514*c0909341SAndroid Build Coastguard Worker.w8_main: 515*c0909341SAndroid Build Coastguard Worker pmaddubsw xmm0, xm4 516*c0909341SAndroid Build Coastguard Worker pmaddubsw xmm1, xm4 517*c0909341SAndroid Build Coastguard Worker add srcq, 16*2 518*c0909341SAndroid Build Coastguard Worker packuswb xmm0, xmm1 519*c0909341SAndroid Build Coastguard Worker movu [dstq], xmm0 520*c0909341SAndroid Build Coastguard Worker add dstq, 16 521*c0909341SAndroid Build Coastguard Worker sub hd, 4 522*c0909341SAndroid Build Coastguard Worker jg .w8_loop 523*c0909341SAndroid Build Coastguard Worker test bhd, bhd 524*c0909341SAndroid Build Coastguard Worker jz .w8_end 525*c0909341SAndroid Build Coastguard Worker pshufd xmm0, xmm0, q3333 526*c0909341SAndroid Build Coastguard Worker.w8_padv: 527*c0909341SAndroid Build Coastguard Worker movu [dstq], xmm0 528*c0909341SAndroid Build Coastguard Worker add dstq, 16 529*c0909341SAndroid Build Coastguard Worker sub bhd, 4 530*c0909341SAndroid Build Coastguard Worker jg .w8_padv 531*c0909341SAndroid Build Coastguard Worker.w8_end: 532*c0909341SAndroid Build Coastguard Worker RET 533*c0909341SAndroid Build Coastguard Worker.w16_padh: 534*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 535*c0909341SAndroid Build Coastguard Worker jmp .w16_main 536*c0909341SAndroid Build Coastguard Worker.w16: 537*c0909341SAndroid Build Coastguard Worker cmp wd, 15 538*c0909341SAndroid Build Coastguard Worker je .w16_loop 539*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m2, [base+pb_0to63] 540*c0909341SAndroid Build Coastguard Worker vpbroadcastb m0, wd 541*c0909341SAndroid Build Coastguard Worker pminub m2, m0 542*c0909341SAndroid Build Coastguard Worker.w16_loop: 543*c0909341SAndroid Build Coastguard Worker mova m0, [srcq] 544*c0909341SAndroid Build Coastguard Worker cmp wd, 15 545*c0909341SAndroid Build Coastguard Worker jl .w16_padh 546*c0909341SAndroid Build Coastguard Worker.w16_main: 547*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m4 548*c0909341SAndroid Build Coastguard Worker add srcq, 64 549*c0909341SAndroid Build Coastguard Worker vpmovwb ym0, m0 550*c0909341SAndroid Build Coastguard Worker movu [dstq], ym0 551*c0909341SAndroid Build Coastguard Worker add dstq, 32 552*c0909341SAndroid Build Coastguard Worker sub hd, 4 553*c0909341SAndroid Build Coastguard Worker jg .w16_loop 554*c0909341SAndroid Build Coastguard Worker test bhd, bhd 555*c0909341SAndroid Build Coastguard Worker jz .w16_end 556*c0909341SAndroid Build Coastguard Worker vpermq ym0, ym0, q3333 557*c0909341SAndroid Build Coastguard Worker.w16_padv: 558*c0909341SAndroid Build Coastguard Worker movu [dstq], ym0 559*c0909341SAndroid Build Coastguard Worker add dstq, 32 560*c0909341SAndroid Build Coastguard Worker sub bhd, 4 561*c0909341SAndroid Build Coastguard Worker jg .w16_padv 562*c0909341SAndroid Build Coastguard Worker.w16_end: 563*c0909341SAndroid Build Coastguard Worker RET 564*c0909341SAndroid Build Coastguard Worker.w32_padh: 565*c0909341SAndroid Build Coastguard Worker vpermb m0, m2, m0 566*c0909341SAndroid Build Coastguard Worker vpermb m1, m2, m1 567*c0909341SAndroid Build Coastguard Worker jmp .w32_main 568*c0909341SAndroid Build Coastguard Worker.w32: 569*c0909341SAndroid Build Coastguard Worker mova m2, [base+pb_0to63] 570*c0909341SAndroid Build Coastguard Worker paddb m3, m2, m2 571*c0909341SAndroid Build Coastguard Worker cmp wd, 31 572*c0909341SAndroid Build Coastguard Worker je .w32_loop 573*c0909341SAndroid Build Coastguard Worker vpbroadcastb m0, wd 574*c0909341SAndroid Build Coastguard Worker mov r6d, 0xff00 575*c0909341SAndroid Build Coastguard Worker kmovw k1, r6d 576*c0909341SAndroid Build Coastguard Worker vpaddd m0{k1}, [pb_32] {1to16} 577*c0909341SAndroid Build Coastguard Worker pminub m2, m0 578*c0909341SAndroid Build Coastguard Worker.w32_loop: 579*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+64*0] 580*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+64*1] 581*c0909341SAndroid Build Coastguard Worker cmp wd, 31 582*c0909341SAndroid Build Coastguard Worker jl .w32_padh 583*c0909341SAndroid Build Coastguard Worker.w32_main: 584*c0909341SAndroid Build Coastguard Worker pmaddubsw m0, m4 585*c0909341SAndroid Build Coastguard Worker pmaddubsw m1, m4 586*c0909341SAndroid Build Coastguard Worker add srcq, 64*2 587*c0909341SAndroid Build Coastguard Worker vpermt2b m0, m3, m1 588*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 589*c0909341SAndroid Build Coastguard Worker add dstq, 64 590*c0909341SAndroid Build Coastguard Worker sub hd, 4 591*c0909341SAndroid Build Coastguard Worker jg .w32_loop 592*c0909341SAndroid Build Coastguard Worker test bhd, bhd 593*c0909341SAndroid Build Coastguard Worker jz .w32_end 594*c0909341SAndroid Build Coastguard Worker vshufi32x4 m0, m0, q3333 595*c0909341SAndroid Build Coastguard Worker.w32_padv: 596*c0909341SAndroid Build Coastguard Worker movu [dstq], m0 597*c0909341SAndroid Build Coastguard Worker add dstq, 64 598*c0909341SAndroid Build Coastguard Worker sub bhd, 4 599*c0909341SAndroid Build Coastguard Worker jg .w32_padv 600*c0909341SAndroid Build Coastguard Worker.w32_end: 601*c0909341SAndroid Build Coastguard Worker RET 602*c0909341SAndroid Build Coastguard Worker.w64_padh: 603*c0909341SAndroid Build Coastguard Worker REPX {vpermb x, m5, x}, m0, m1, m2, m3 604*c0909341SAndroid Build Coastguard Worker jmp .w64_main 605*c0909341SAndroid Build Coastguard Worker.w64: 606*c0909341SAndroid Build Coastguard Worker mova m5, [base+pb_0to63] 607*c0909341SAndroid Build Coastguard Worker paddb m6, m5, m5 608*c0909341SAndroid Build Coastguard Worker cmp wd, 63 609*c0909341SAndroid Build Coastguard Worker je .w64_loop 610*c0909341SAndroid Build Coastguard Worker vpbroadcastb m0, wd 611*c0909341SAndroid Build Coastguard Worker pminub m5, m0 612*c0909341SAndroid Build Coastguard Worker.w64_loop: 613*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+64*0] 614*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+64*1] 615*c0909341SAndroid Build Coastguard Worker mova m2, [srcq+64*2] 616*c0909341SAndroid Build Coastguard Worker mova m3, [srcq+64*3] 617*c0909341SAndroid Build Coastguard Worker cmp wd, 63 618*c0909341SAndroid Build Coastguard Worker jl .w64_padh 619*c0909341SAndroid Build Coastguard Worker.w64_main: 620*c0909341SAndroid Build Coastguard Worker REPX {pmaddubsw x, m4}, m0, m1, m2, m3 621*c0909341SAndroid Build Coastguard Worker add srcq, 64*4 622*c0909341SAndroid Build Coastguard Worker vpermt2b m0, m6, m1 623*c0909341SAndroid Build Coastguard Worker vpermt2b m2, m6, m3 624*c0909341SAndroid Build Coastguard Worker movu [dstq+64*0], m0 625*c0909341SAndroid Build Coastguard Worker movu [dstq+64*1], m2 626*c0909341SAndroid Build Coastguard Worker add dstq, 64*2 627*c0909341SAndroid Build Coastguard Worker sub hd, 4 628*c0909341SAndroid Build Coastguard Worker jg .w64_loop 629*c0909341SAndroid Build Coastguard Worker test bhd, bhd 630*c0909341SAndroid Build Coastguard Worker jz .w64_end 631*c0909341SAndroid Build Coastguard Worker vshufi32x4 m2, m2, q3232 632*c0909341SAndroid Build Coastguard Worker.w64_padv: 633*c0909341SAndroid Build Coastguard Worker movu [dstq+64*0], m2 634*c0909341SAndroid Build Coastguard Worker movu [dstq+64*1], m2 635*c0909341SAndroid Build Coastguard Worker add dstq, 64*2 636*c0909341SAndroid Build Coastguard Worker sub bhd, 4 637*c0909341SAndroid Build Coastguard Worker jg .w64_padv 638*c0909341SAndroid Build Coastguard Worker.w64_end: 639*c0909341SAndroid Build Coastguard Worker RET 640*c0909341SAndroid Build Coastguard Worker 641*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64 642