1*c0909341SAndroid Build Coastguard Worker; Copyright © 2019, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2019, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 64 ; avoids cacheline splits 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Workermin_prob: dw 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0 32*c0909341SAndroid Build Coastguard Workerpw_0xff00: times 8 dw 0xff00 33*c0909341SAndroid Build Coastguard Workerpw_32: times 8 dw 32 34*c0909341SAndroid Build Coastguard Worker 35*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 36*c0909341SAndroid Build Coastguard Worker%define resp resq 37*c0909341SAndroid Build Coastguard Worker%define movp movq 38*c0909341SAndroid Build Coastguard Worker%define c_shuf q3333 39*c0909341SAndroid Build Coastguard Worker%macro DECODE_SYMBOL_ADAPT_INIT 0-1 40*c0909341SAndroid Build Coastguard Worker%endmacro 41*c0909341SAndroid Build Coastguard Worker%else 42*c0909341SAndroid Build Coastguard Worker%define resp resd 43*c0909341SAndroid Build Coastguard Worker%define movp movd 44*c0909341SAndroid Build Coastguard Worker%define c_shuf q1111 45*c0909341SAndroid Build Coastguard Worker%macro DECODE_SYMBOL_ADAPT_INIT 0-1 0 ; hi_tok 46*c0909341SAndroid Build Coastguard Worker mov t0, r0m 47*c0909341SAndroid Build Coastguard Worker mov t1, r1m 48*c0909341SAndroid Build Coastguard Worker%if %1 == 0 49*c0909341SAndroid Build Coastguard Worker mov t2, r2m 50*c0909341SAndroid Build Coastguard Worker%endif 51*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT >= 16 52*c0909341SAndroid Build Coastguard Worker sub esp, 40-%1*4 53*c0909341SAndroid Build Coastguard Worker%else 54*c0909341SAndroid Build Coastguard Worker mov eax, esp 55*c0909341SAndroid Build Coastguard Worker and esp, ~15 56*c0909341SAndroid Build Coastguard Worker sub esp, 40-%1*4 57*c0909341SAndroid Build Coastguard Worker mov [esp], eax 58*c0909341SAndroid Build Coastguard Worker%endif 59*c0909341SAndroid Build Coastguard Worker%endmacro 60*c0909341SAndroid Build Coastguard Worker%endif 61*c0909341SAndroid Build Coastguard Worker 62*c0909341SAndroid Build Coastguard Workerstruc msac 63*c0909341SAndroid Build Coastguard Worker .buf: resp 1 64*c0909341SAndroid Build Coastguard Worker .end: resp 1 65*c0909341SAndroid Build Coastguard Worker .dif: resp 1 66*c0909341SAndroid Build Coastguard Worker .rng: resd 1 67*c0909341SAndroid Build Coastguard Worker .cnt: resd 1 68*c0909341SAndroid Build Coastguard Worker .update_cdf: resd 1 69*c0909341SAndroid Build Coastguard Workerendstruc 70*c0909341SAndroid Build Coastguard Worker 71*c0909341SAndroid Build Coastguard Worker%define m(x, y) mangle(private_prefix %+ _ %+ x %+ y) 72*c0909341SAndroid Build Coastguard Worker 73*c0909341SAndroid Build Coastguard WorkerSECTION .text 74*c0909341SAndroid Build Coastguard Worker 75*c0909341SAndroid Build Coastguard Worker%if WIN64 76*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 0, 1, 2, 3, 4, 5, 7, 3, 8 77*c0909341SAndroid Build Coastguard Worker%define buf rsp+stack_offset+8 ; shadow space 78*c0909341SAndroid Build Coastguard Worker%elif UNIX64 79*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 0, 1, 2, 3, 4, 5, 7, 0, 8 80*c0909341SAndroid Build Coastguard Worker%define buf rsp-40 ; red zone 81*c0909341SAndroid Build Coastguard Worker%else 82*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 2, 3, 4, 1, 5, 6, 5, 2, 3 83*c0909341SAndroid Build Coastguard Worker%define buf esp+8 84*c0909341SAndroid Build Coastguard Worker%endif 85*c0909341SAndroid Build Coastguard Worker 86*c0909341SAndroid Build Coastguard WorkerINIT_XMM sse2 87*c0909341SAndroid Build Coastguard Workercglobal msac_decode_symbol_adapt4, 0, 6, 6 88*c0909341SAndroid Build Coastguard Worker DECODE_SYMBOL_ADAPT_INIT 89*c0909341SAndroid Build Coastguard Worker LEA rax, pw_0xff00 90*c0909341SAndroid Build Coastguard Worker movd m2, [t0+msac.rng] 91*c0909341SAndroid Build Coastguard Worker movq m1, [t1] 92*c0909341SAndroid Build Coastguard Worker movp m3, [t0+msac.dif] 93*c0909341SAndroid Build Coastguard Worker mov t3d, [t0+msac.update_cdf] 94*c0909341SAndroid Build Coastguard Worker mov t4d, t2d 95*c0909341SAndroid Build Coastguard Worker not t2 ; -(n_symbols + 1) 96*c0909341SAndroid Build Coastguard Worker pshuflw m2, m2, q0000 97*c0909341SAndroid Build Coastguard Worker movd [buf+12], m2 98*c0909341SAndroid Build Coastguard Worker pand m2, [rax] 99*c0909341SAndroid Build Coastguard Worker mova m0, m1 100*c0909341SAndroid Build Coastguard Worker psrlw m1, 6 101*c0909341SAndroid Build Coastguard Worker psllw m1, 7 102*c0909341SAndroid Build Coastguard Worker pmulhuw m1, m2 103*c0909341SAndroid Build Coastguard Worker movq m2, [rax+t2*2] 104*c0909341SAndroid Build Coastguard Worker pshuflw m3, m3, c_shuf 105*c0909341SAndroid Build Coastguard Worker paddw m1, m2 106*c0909341SAndroid Build Coastguard Worker mova [buf+16], m1 107*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 108*c0909341SAndroid Build Coastguard Worker pxor m2, m2 109*c0909341SAndroid Build Coastguard Worker pcmpeqw m1, m2 ; c >= v 110*c0909341SAndroid Build Coastguard Worker pmovmskb eax, m1 111*c0909341SAndroid Build Coastguard Worker test t3d, t3d 112*c0909341SAndroid Build Coastguard Worker jz .renorm ; !allow_update_cdf 113*c0909341SAndroid Build Coastguard Worker 114*c0909341SAndroid Build Coastguard Worker; update_cdf: 115*c0909341SAndroid Build Coastguard Worker movzx t3d, word [t1+t4*2] ; count 116*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m2 117*c0909341SAndroid Build Coastguard Worker mov t2d, t3d 118*c0909341SAndroid Build Coastguard Worker shr t3d, 4 119*c0909341SAndroid Build Coastguard Worker cmp t4d, 3 120*c0909341SAndroid Build Coastguard Worker sbb t3d, -5 ; (count >> 4) + (n_symbols > 2) + 4 121*c0909341SAndroid Build Coastguard Worker cmp t2d, 32 122*c0909341SAndroid Build Coastguard Worker adc t2d, 0 ; count + (count < 32) 123*c0909341SAndroid Build Coastguard Worker movd m3, t3d 124*c0909341SAndroid Build Coastguard Worker pavgw m2, m1 ; i >= val ? -1 : 32768 125*c0909341SAndroid Build Coastguard Worker psubw m2, m0 ; for (i = 0; i < val; i++) 126*c0909341SAndroid Build Coastguard Worker psubw m0, m1 ; cdf[i] += (32768 - cdf[i]) >> rate; 127*c0909341SAndroid Build Coastguard Worker psraw m2, m3 ; for (; i < n_symbols; i++) 128*c0909341SAndroid Build Coastguard Worker paddw m0, m2 ; cdf[i] += (( -1 - cdf[i]) >> rate) + 1; 129*c0909341SAndroid Build Coastguard Worker movq [t1], m0 130*c0909341SAndroid Build Coastguard Worker mov [t1+t4*2], t2w 131*c0909341SAndroid Build Coastguard Worker 132*c0909341SAndroid Build Coastguard Worker.renorm: 133*c0909341SAndroid Build Coastguard Worker tzcnt eax, eax 134*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 135*c0909341SAndroid Build Coastguard Worker movzx t1d, word [buf+rax+16] ; v 136*c0909341SAndroid Build Coastguard Worker movzx t2d, word [buf+rax+14] ; u 137*c0909341SAndroid Build Coastguard Worker shr eax, 1 138*c0909341SAndroid Build Coastguard Worker.renorm2: 139*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 140*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT >= 16 141*c0909341SAndroid Build Coastguard Worker add esp, 40 142*c0909341SAndroid Build Coastguard Worker%else 143*c0909341SAndroid Build Coastguard Worker mov esp, [esp] 144*c0909341SAndroid Build Coastguard Worker%endif 145*c0909341SAndroid Build Coastguard Worker%endif 146*c0909341SAndroid Build Coastguard Worker sub t2d, t1d ; rng 147*c0909341SAndroid Build Coastguard Worker shl t1, gprsize*8-16 148*c0909341SAndroid Build Coastguard Worker sub t4, t1 ; dif - v 149*c0909341SAndroid Build Coastguard Worker.renorm3: 150*c0909341SAndroid Build Coastguard Worker mov t1d, [t0+msac.cnt] 151*c0909341SAndroid Build Coastguard Worker movifnidn t7, t0 152*c0909341SAndroid Build Coastguard Worker.renorm4: 153*c0909341SAndroid Build Coastguard Worker bsr ecx, t2d 154*c0909341SAndroid Build Coastguard Worker xor ecx, 15 ; d 155*c0909341SAndroid Build Coastguard Worker.renorm5: 156*c0909341SAndroid Build Coastguard Worker shl t2d, cl 157*c0909341SAndroid Build Coastguard Worker shl t4, cl 158*c0909341SAndroid Build Coastguard Worker mov [t7+msac.rng], t2d 159*c0909341SAndroid Build Coastguard Worker sub t1d, ecx 160*c0909341SAndroid Build Coastguard Worker jae .end ; no refill required 161*c0909341SAndroid Build Coastguard Worker 162*c0909341SAndroid Build Coastguard Worker; refill: 163*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 164*c0909341SAndroid Build Coastguard Worker push t5 165*c0909341SAndroid Build Coastguard Worker%endif 166*c0909341SAndroid Build Coastguard Worker mov t2, [t7+msac.buf] 167*c0909341SAndroid Build Coastguard Worker mov t5, [t7+msac.end] 168*c0909341SAndroid Build Coastguard Worker lea rcx, [t2+gprsize] 169*c0909341SAndroid Build Coastguard Worker sub rcx, t5 170*c0909341SAndroid Build Coastguard Worker ja .refill_eob 171*c0909341SAndroid Build Coastguard Worker mov t5, [t2] 172*c0909341SAndroid Build Coastguard Worker lea ecx, [t1+16-gprsize*8] 173*c0909341SAndroid Build Coastguard Worker not t5 174*c0909341SAndroid Build Coastguard Worker bswap t5 175*c0909341SAndroid Build Coastguard Worker shr t5, cl 176*c0909341SAndroid Build Coastguard Worker neg ecx 177*c0909341SAndroid Build Coastguard Worker shr ecx, 3 ; num_bytes_read 178*c0909341SAndroid Build Coastguard Worker or t4, t5 179*c0909341SAndroid Build Coastguard Worker.refill_end: 180*c0909341SAndroid Build Coastguard Worker add t2, rcx 181*c0909341SAndroid Build Coastguard Worker lea t1d, [t1+rcx*8] ; cnt += num_bits_read 182*c0909341SAndroid Build Coastguard Worker mov [t7+msac.buf], t2 183*c0909341SAndroid Build Coastguard Worker.refill_end2: 184*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 185*c0909341SAndroid Build Coastguard Worker pop t5 186*c0909341SAndroid Build Coastguard Worker%endif 187*c0909341SAndroid Build Coastguard Worker.end: 188*c0909341SAndroid Build Coastguard Worker mov [t7+msac.cnt], t1d 189*c0909341SAndroid Build Coastguard Worker mov [t7+msac.dif], t4 190*c0909341SAndroid Build Coastguard Worker RET 191*c0909341SAndroid Build Coastguard Worker.pad_with_ones: 192*c0909341SAndroid Build Coastguard Worker lea ecx, [t1-16] 193*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 194*c0909341SAndroid Build Coastguard Worker ror rcx, cl 195*c0909341SAndroid Build Coastguard Worker%else 196*c0909341SAndroid Build Coastguard Worker shr ecx, cl 197*c0909341SAndroid Build Coastguard Worker%endif 198*c0909341SAndroid Build Coastguard Worker or t4, rcx 199*c0909341SAndroid Build Coastguard Worker jmp .refill_end2 200*c0909341SAndroid Build Coastguard Worker.refill_eob: ; avoid overreading the input buffer 201*c0909341SAndroid Build Coastguard Worker cmp t2, t5 202*c0909341SAndroid Build Coastguard Worker jae .pad_with_ones ; eob reached 203*c0909341SAndroid Build Coastguard Worker ; We can safely do a register-sized load of the last bytes of the buffer 204*c0909341SAndroid Build Coastguard Worker ; as this code is only reached if the msac buffer size is >= gprsize. 205*c0909341SAndroid Build Coastguard Worker mov t5, [t5-gprsize] 206*c0909341SAndroid Build Coastguard Worker shl ecx, 3 207*c0909341SAndroid Build Coastguard Worker shr t5, cl 208*c0909341SAndroid Build Coastguard Worker lea ecx, [t1+16-gprsize*8] 209*c0909341SAndroid Build Coastguard Worker not t5 210*c0909341SAndroid Build Coastguard Worker bswap t5 211*c0909341SAndroid Build Coastguard Worker shr t5, cl 212*c0909341SAndroid Build Coastguard Worker neg ecx 213*c0909341SAndroid Build Coastguard Worker or t4, t5 214*c0909341SAndroid Build Coastguard Worker mov t5d, [t7+msac.end] 215*c0909341SAndroid Build Coastguard Worker shr ecx, 3 216*c0909341SAndroid Build Coastguard Worker sub t5d, t2d ; num_bytes_left 217*c0909341SAndroid Build Coastguard Worker cmp ecx, t5d 218*c0909341SAndroid Build Coastguard Worker cmovae ecx, t5d ; num_bytes_read 219*c0909341SAndroid Build Coastguard Worker jmp .refill_end 220*c0909341SAndroid Build Coastguard Worker 221*c0909341SAndroid Build Coastguard Workercglobal msac_decode_symbol_adapt8, 0, 6, 6 222*c0909341SAndroid Build Coastguard Worker DECODE_SYMBOL_ADAPT_INIT 223*c0909341SAndroid Build Coastguard Worker LEA rax, pw_0xff00 224*c0909341SAndroid Build Coastguard Worker movd m2, [t0+msac.rng] 225*c0909341SAndroid Build Coastguard Worker mova m1, [t1] 226*c0909341SAndroid Build Coastguard Worker movp m3, [t0+msac.dif] 227*c0909341SAndroid Build Coastguard Worker mov t3d, [t0+msac.update_cdf] 228*c0909341SAndroid Build Coastguard Worker mov t4d, t2d 229*c0909341SAndroid Build Coastguard Worker not t2 230*c0909341SAndroid Build Coastguard Worker pshuflw m2, m2, q0000 231*c0909341SAndroid Build Coastguard Worker movd [buf+12], m2 232*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m2 233*c0909341SAndroid Build Coastguard Worker mova m0, m1 234*c0909341SAndroid Build Coastguard Worker psrlw m1, 6 235*c0909341SAndroid Build Coastguard Worker pand m2, [rax] 236*c0909341SAndroid Build Coastguard Worker psllw m1, 7 237*c0909341SAndroid Build Coastguard Worker pmulhuw m1, m2 238*c0909341SAndroid Build Coastguard Worker movu m2, [rax+t2*2] 239*c0909341SAndroid Build Coastguard Worker pshuflw m3, m3, c_shuf 240*c0909341SAndroid Build Coastguard Worker paddw m1, m2 241*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m3 242*c0909341SAndroid Build Coastguard Worker mova [buf+16], m1 243*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 244*c0909341SAndroid Build Coastguard Worker pxor m2, m2 245*c0909341SAndroid Build Coastguard Worker pcmpeqw m1, m2 246*c0909341SAndroid Build Coastguard Worker pmovmskb eax, m1 247*c0909341SAndroid Build Coastguard Worker test t3d, t3d 248*c0909341SAndroid Build Coastguard Worker jz m(msac_decode_symbol_adapt4, SUFFIX).renorm 249*c0909341SAndroid Build Coastguard Worker movzx t3d, word [t1+t4*2] 250*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m2 251*c0909341SAndroid Build Coastguard Worker mov t2d, t3d 252*c0909341SAndroid Build Coastguard Worker shr t3d, 4 253*c0909341SAndroid Build Coastguard Worker cmp t4d, 3 ; may be called with n_symbols <= 2 254*c0909341SAndroid Build Coastguard Worker sbb t3d, -5 255*c0909341SAndroid Build Coastguard Worker cmp t2d, 32 256*c0909341SAndroid Build Coastguard Worker adc t2d, 0 257*c0909341SAndroid Build Coastguard Worker movd m3, t3d 258*c0909341SAndroid Build Coastguard Worker pavgw m2, m1 259*c0909341SAndroid Build Coastguard Worker psubw m2, m0 260*c0909341SAndroid Build Coastguard Worker psubw m0, m1 261*c0909341SAndroid Build Coastguard Worker psraw m2, m3 262*c0909341SAndroid Build Coastguard Worker paddw m0, m2 263*c0909341SAndroid Build Coastguard Worker mova [t1], m0 264*c0909341SAndroid Build Coastguard Worker mov [t1+t4*2], t2w 265*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm 266*c0909341SAndroid Build Coastguard Worker 267*c0909341SAndroid Build Coastguard Workercglobal msac_decode_symbol_adapt16, 0, 6, 6 268*c0909341SAndroid Build Coastguard Worker DECODE_SYMBOL_ADAPT_INIT 269*c0909341SAndroid Build Coastguard Worker LEA rax, pw_0xff00 270*c0909341SAndroid Build Coastguard Worker movd m4, [t0+msac.rng] 271*c0909341SAndroid Build Coastguard Worker mova m2, [t1] 272*c0909341SAndroid Build Coastguard Worker mova m3, [t1+16] 273*c0909341SAndroid Build Coastguard Worker movp m5, [t0+msac.dif] 274*c0909341SAndroid Build Coastguard Worker mov t3d, [t0+msac.update_cdf] 275*c0909341SAndroid Build Coastguard Worker mov t4d, t2d 276*c0909341SAndroid Build Coastguard Worker not t2 277*c0909341SAndroid Build Coastguard Worker%if WIN64 278*c0909341SAndroid Build Coastguard Worker sub rsp, 48 ; need 36 bytes, shadow space is only 32 279*c0909341SAndroid Build Coastguard Worker%endif 280*c0909341SAndroid Build Coastguard Worker pshuflw m4, m4, q0000 281*c0909341SAndroid Build Coastguard Worker movd [buf-4], m4 282*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m4 283*c0909341SAndroid Build Coastguard Worker mova m0, m2 284*c0909341SAndroid Build Coastguard Worker psrlw m2, 6 285*c0909341SAndroid Build Coastguard Worker mova m1, m3 286*c0909341SAndroid Build Coastguard Worker psrlw m3, 6 287*c0909341SAndroid Build Coastguard Worker pand m4, [rax] 288*c0909341SAndroid Build Coastguard Worker psllw m2, 7 289*c0909341SAndroid Build Coastguard Worker psllw m3, 7 290*c0909341SAndroid Build Coastguard Worker pmulhuw m2, m4 291*c0909341SAndroid Build Coastguard Worker pmulhuw m3, m4 292*c0909341SAndroid Build Coastguard Worker movu m4, [rax+t2*2] 293*c0909341SAndroid Build Coastguard Worker pshuflw m5, m5, c_shuf 294*c0909341SAndroid Build Coastguard Worker paddw m2, m4 295*c0909341SAndroid Build Coastguard Worker psubw m4, [rax-pw_0xff00+pw_32] 296*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m5 297*c0909341SAndroid Build Coastguard Worker paddw m3, m4 298*c0909341SAndroid Build Coastguard Worker mova [buf], m2 299*c0909341SAndroid Build Coastguard Worker psubusw m2, m5 300*c0909341SAndroid Build Coastguard Worker mova [buf+16], m3 301*c0909341SAndroid Build Coastguard Worker psubusw m3, m5 302*c0909341SAndroid Build Coastguard Worker pxor m4, m4 303*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m4 304*c0909341SAndroid Build Coastguard Worker pcmpeqw m3, m4 305*c0909341SAndroid Build Coastguard Worker packsswb m5, m2, m3 306*c0909341SAndroid Build Coastguard Worker pmovmskb eax, m5 307*c0909341SAndroid Build Coastguard Worker test t3d, t3d 308*c0909341SAndroid Build Coastguard Worker jz .renorm 309*c0909341SAndroid Build Coastguard Worker movzx t3d, word [t1+t4*2] 310*c0909341SAndroid Build Coastguard Worker pcmpeqw m4, m4 311*c0909341SAndroid Build Coastguard Worker mova m5, m4 312*c0909341SAndroid Build Coastguard Worker lea t2d, [t3+80] ; only support n_symbols > 2 313*c0909341SAndroid Build Coastguard Worker shr t2d, 4 314*c0909341SAndroid Build Coastguard Worker cmp t3d, 32 315*c0909341SAndroid Build Coastguard Worker adc t3d, 0 316*c0909341SAndroid Build Coastguard Worker pavgw m4, m2 317*c0909341SAndroid Build Coastguard Worker pavgw m5, m3 318*c0909341SAndroid Build Coastguard Worker psubw m4, m0 319*c0909341SAndroid Build Coastguard Worker psubw m0, m2 320*c0909341SAndroid Build Coastguard Worker movd m2, t2d 321*c0909341SAndroid Build Coastguard Worker psubw m5, m1 322*c0909341SAndroid Build Coastguard Worker psubw m1, m3 323*c0909341SAndroid Build Coastguard Worker psraw m4, m2 324*c0909341SAndroid Build Coastguard Worker psraw m5, m2 325*c0909341SAndroid Build Coastguard Worker paddw m0, m4 326*c0909341SAndroid Build Coastguard Worker paddw m1, m5 327*c0909341SAndroid Build Coastguard Worker mova [t1], m0 328*c0909341SAndroid Build Coastguard Worker mova [t1+16], m1 329*c0909341SAndroid Build Coastguard Worker mov [t1+t4*2], t3w 330*c0909341SAndroid Build Coastguard Worker.renorm: 331*c0909341SAndroid Build Coastguard Worker tzcnt eax, eax 332*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 333*c0909341SAndroid Build Coastguard Worker movzx t1d, word [buf+rax*2] 334*c0909341SAndroid Build Coastguard Worker movzx t2d, word [buf+rax*2-2] 335*c0909341SAndroid Build Coastguard Worker%if WIN64 336*c0909341SAndroid Build Coastguard Worker add rsp, 48 337*c0909341SAndroid Build Coastguard Worker%endif 338*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm2 339*c0909341SAndroid Build Coastguard Worker 340*c0909341SAndroid Build Coastguard Workercglobal msac_decode_bool_adapt, 0, 6, 0 341*c0909341SAndroid Build Coastguard Worker movifnidn t1, r1mp 342*c0909341SAndroid Build Coastguard Worker movifnidn t0, r0mp 343*c0909341SAndroid Build Coastguard Worker movzx eax, word [t1] 344*c0909341SAndroid Build Coastguard Worker movzx t3d, byte [t0+msac.rng+1] 345*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 346*c0909341SAndroid Build Coastguard Worker mov t2d, [t0+msac.rng] 347*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 348*c0909341SAndroid Build Coastguard Worker mov t5d, eax 349*c0909341SAndroid Build Coastguard Worker%endif 350*c0909341SAndroid Build Coastguard Worker and eax, ~63 351*c0909341SAndroid Build Coastguard Worker imul eax, t3d 352*c0909341SAndroid Build Coastguard Worker%if UNIX64 353*c0909341SAndroid Build Coastguard Worker mov t6, t4 354*c0909341SAndroid Build Coastguard Worker%endif 355*c0909341SAndroid Build Coastguard Worker shr eax, 7 356*c0909341SAndroid Build Coastguard Worker add eax, 4 ; v 357*c0909341SAndroid Build Coastguard Worker mov t3d, eax 358*c0909341SAndroid Build Coastguard Worker shl rax, gprsize*8-16 ; vw 359*c0909341SAndroid Build Coastguard Worker sub t2d, t3d ; r - v 360*c0909341SAndroid Build Coastguard Worker sub t4, rax ; dif - vw 361*c0909341SAndroid Build Coastguard Worker setb al 362*c0909341SAndroid Build Coastguard Worker cmovb t2d, t3d 363*c0909341SAndroid Build Coastguard Worker mov t3d, [t0+msac.update_cdf] 364*c0909341SAndroid Build Coastguard Worker%if UNIX64 365*c0909341SAndroid Build Coastguard Worker cmovb t4, t6 366*c0909341SAndroid Build Coastguard Worker%else 367*c0909341SAndroid Build Coastguard Worker cmovb t4, [t0+msac.dif] 368*c0909341SAndroid Build Coastguard Worker%endif 369*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 370*c0909341SAndroid Build Coastguard Worker movzx eax, al 371*c0909341SAndroid Build Coastguard Worker%endif 372*c0909341SAndroid Build Coastguard Worker test t3d, t3d 373*c0909341SAndroid Build Coastguard Worker jz m(msac_decode_symbol_adapt4, SUFFIX).renorm3 374*c0909341SAndroid Build Coastguard Worker%if UNIX64 == 0 375*c0909341SAndroid Build Coastguard Worker push t6 376*c0909341SAndroid Build Coastguard Worker%endif 377*c0909341SAndroid Build Coastguard Worker movzx t6d, word [t1+2] 378*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 379*c0909341SAndroid Build Coastguard Worker push t5 380*c0909341SAndroid Build Coastguard Worker movzx t5d, word [t1] 381*c0909341SAndroid Build Coastguard Worker%endif 382*c0909341SAndroid Build Coastguard Worker movifnidn t7, t0 383*c0909341SAndroid Build Coastguard Worker lea ecx, [t6+64] 384*c0909341SAndroid Build Coastguard Worker cmp t6d, 32 385*c0909341SAndroid Build Coastguard Worker adc t6d, 0 386*c0909341SAndroid Build Coastguard Worker mov [t1+2], t6w 387*c0909341SAndroid Build Coastguard Worker imul t6d, eax, -32769 388*c0909341SAndroid Build Coastguard Worker shr ecx, 4 ; rate 389*c0909341SAndroid Build Coastguard Worker add t6d, t5d ; if (bit) 390*c0909341SAndroid Build Coastguard Worker sub t5d, eax ; cdf[0] -= ((cdf[0] - 32769) >> rate) + 1; 391*c0909341SAndroid Build Coastguard Worker sar t6d, cl ; else 392*c0909341SAndroid Build Coastguard Worker sub t5d, t6d ; cdf[0] -= cdf[0] >> rate; 393*c0909341SAndroid Build Coastguard Worker mov [t1], t5w 394*c0909341SAndroid Build Coastguard Worker%if WIN64 395*c0909341SAndroid Build Coastguard Worker mov t1d, [t7+msac.cnt] 396*c0909341SAndroid Build Coastguard Worker pop t6 397*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm4 398*c0909341SAndroid Build Coastguard Worker%else 399*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 400*c0909341SAndroid Build Coastguard Worker pop t5 401*c0909341SAndroid Build Coastguard Worker pop t6 402*c0909341SAndroid Build Coastguard Worker%endif 403*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm3 404*c0909341SAndroid Build Coastguard Worker%endif 405*c0909341SAndroid Build Coastguard Worker 406*c0909341SAndroid Build Coastguard Workercglobal msac_decode_bool_equi, 0, 6, 0 407*c0909341SAndroid Build Coastguard Worker movifnidn t0, r0mp 408*c0909341SAndroid Build Coastguard Worker mov t1d, [t0+msac.rng] 409*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 410*c0909341SAndroid Build Coastguard Worker mov t2d, t1d 411*c0909341SAndroid Build Coastguard Worker mov t1b, 8 412*c0909341SAndroid Build Coastguard Worker mov t3, t4 413*c0909341SAndroid Build Coastguard Worker mov eax, t1d 414*c0909341SAndroid Build Coastguard Worker shr t1d, 1 ; v 415*c0909341SAndroid Build Coastguard Worker shl rax, gprsize*8-17 ; vw 416*c0909341SAndroid Build Coastguard Worker sub t2d, t1d ; r - v 417*c0909341SAndroid Build Coastguard Worker sub t4, rax ; dif - vw 418*c0909341SAndroid Build Coastguard Worker cmovb t2d, t1d 419*c0909341SAndroid Build Coastguard Worker mov t1d, [t0+msac.cnt] 420*c0909341SAndroid Build Coastguard Worker cmovb t4, t3 421*c0909341SAndroid Build Coastguard Worker movifnidn t7, t0 422*c0909341SAndroid Build Coastguard Worker mov ecx, 0xbfff 423*c0909341SAndroid Build Coastguard Worker setb al ; the upper 32 bits contains garbage but that's OK 424*c0909341SAndroid Build Coastguard Worker sub ecx, t2d 425*c0909341SAndroid Build Coastguard Worker ; In this case of this function, (d =) 16 - clz(v) = 2 - (v >> 14) 426*c0909341SAndroid Build Coastguard Worker ; i.e. (0 <= d <= 2) and v < (3 << 14) 427*c0909341SAndroid Build Coastguard Worker shr ecx, 14 ; d 428*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 429*c0909341SAndroid Build Coastguard Worker movzx eax, al 430*c0909341SAndroid Build Coastguard Worker%endif 431*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm5 432*c0909341SAndroid Build Coastguard Worker 433*c0909341SAndroid Build Coastguard Workercglobal msac_decode_bool, 0, 6, 0 434*c0909341SAndroid Build Coastguard Worker movifnidn t0, r0mp 435*c0909341SAndroid Build Coastguard Worker movifnidn t1d, r1m 436*c0909341SAndroid Build Coastguard Worker movzx eax, byte [t0+msac.rng+1] ; r >> 8 437*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 438*c0909341SAndroid Build Coastguard Worker mov t2d, [t0+msac.rng] 439*c0909341SAndroid Build Coastguard Worker and t1d, ~63 440*c0909341SAndroid Build Coastguard Worker imul eax, t1d 441*c0909341SAndroid Build Coastguard Worker mov t3, t4 442*c0909341SAndroid Build Coastguard Worker shr eax, 7 443*c0909341SAndroid Build Coastguard Worker add eax, 4 ; v 444*c0909341SAndroid Build Coastguard Worker mov t1d, eax 445*c0909341SAndroid Build Coastguard Worker shl rax, gprsize*8-16 ; vw 446*c0909341SAndroid Build Coastguard Worker sub t2d, t1d ; r - v 447*c0909341SAndroid Build Coastguard Worker sub t4, rax ; dif - vw 448*c0909341SAndroid Build Coastguard Worker cmovb t2d, t1d 449*c0909341SAndroid Build Coastguard Worker cmovb t4, t3 450*c0909341SAndroid Build Coastguard Worker setb al 451*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 452*c0909341SAndroid Build Coastguard Worker movzx eax, al 453*c0909341SAndroid Build Coastguard Worker%endif 454*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, SUFFIX).renorm3 455*c0909341SAndroid Build Coastguard Worker 456*c0909341SAndroid Build Coastguard Worker%macro HI_TOK 1 ; update_cdf 457*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 458*c0909341SAndroid Build Coastguard Worker mov eax, -24 459*c0909341SAndroid Build Coastguard Worker%endif 460*c0909341SAndroid Build Coastguard Worker%%loop: 461*c0909341SAndroid Build Coastguard Worker%if %1 462*c0909341SAndroid Build Coastguard Worker movzx t2d, word [t1+3*2] 463*c0909341SAndroid Build Coastguard Worker%endif 464*c0909341SAndroid Build Coastguard Worker mova m1, m0 465*c0909341SAndroid Build Coastguard Worker pshuflw m2, m2, q0000 466*c0909341SAndroid Build Coastguard Worker psrlw m1, 6 467*c0909341SAndroid Build Coastguard Worker movd [buf+12], m2 468*c0909341SAndroid Build Coastguard Worker pand m2, m4 469*c0909341SAndroid Build Coastguard Worker psllw m1, 7 470*c0909341SAndroid Build Coastguard Worker pmulhuw m1, m2 471*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 472*c0909341SAndroid Build Coastguard Worker add eax, 5 473*c0909341SAndroid Build Coastguard Worker mov [buf+8], eax 474*c0909341SAndroid Build Coastguard Worker%endif 475*c0909341SAndroid Build Coastguard Worker pshuflw m3, m3, c_shuf 476*c0909341SAndroid Build Coastguard Worker paddw m1, m5 477*c0909341SAndroid Build Coastguard Worker movq [buf+16], m1 478*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 479*c0909341SAndroid Build Coastguard Worker pxor m2, m2 480*c0909341SAndroid Build Coastguard Worker pcmpeqw m1, m2 481*c0909341SAndroid Build Coastguard Worker pmovmskb eax, m1 482*c0909341SAndroid Build Coastguard Worker%if %1 483*c0909341SAndroid Build Coastguard Worker lea ecx, [t2+80] 484*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m2 485*c0909341SAndroid Build Coastguard Worker shr ecx, 4 486*c0909341SAndroid Build Coastguard Worker cmp t2d, 32 487*c0909341SAndroid Build Coastguard Worker adc t2d, 0 488*c0909341SAndroid Build Coastguard Worker movd m3, ecx 489*c0909341SAndroid Build Coastguard Worker pavgw m2, m1 490*c0909341SAndroid Build Coastguard Worker psubw m2, m0 491*c0909341SAndroid Build Coastguard Worker psubw m0, m1 492*c0909341SAndroid Build Coastguard Worker psraw m2, m3 493*c0909341SAndroid Build Coastguard Worker paddw m0, m2 494*c0909341SAndroid Build Coastguard Worker movq [t1], m0 495*c0909341SAndroid Build Coastguard Worker mov [t1+3*2], t2w 496*c0909341SAndroid Build Coastguard Worker%endif 497*c0909341SAndroid Build Coastguard Worker tzcnt eax, eax 498*c0909341SAndroid Build Coastguard Worker movzx ecx, word [buf+rax+16] 499*c0909341SAndroid Build Coastguard Worker movzx t2d, word [buf+rax+14] 500*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 501*c0909341SAndroid Build Coastguard Worker add t6d, 5 502*c0909341SAndroid Build Coastguard Worker%endif 503*c0909341SAndroid Build Coastguard Worker sub eax, 5 ; setup for merging the tok_br and tok branches 504*c0909341SAndroid Build Coastguard Worker sub t2d, ecx 505*c0909341SAndroid Build Coastguard Worker shl rcx, gprsize*8-16 506*c0909341SAndroid Build Coastguard Worker sub t4, rcx 507*c0909341SAndroid Build Coastguard Worker bsr ecx, t2d 508*c0909341SAndroid Build Coastguard Worker xor ecx, 15 509*c0909341SAndroid Build Coastguard Worker shl t2d, cl 510*c0909341SAndroid Build Coastguard Worker shl t4, cl 511*c0909341SAndroid Build Coastguard Worker movd m2, t2d 512*c0909341SAndroid Build Coastguard Worker mov [t7+msac.rng], t2d 513*c0909341SAndroid Build Coastguard Worker sub t5d, ecx 514*c0909341SAndroid Build Coastguard Worker jae %%end 515*c0909341SAndroid Build Coastguard Worker%if UNIX64 == 0 516*c0909341SAndroid Build Coastguard Worker push t8 517*c0909341SAndroid Build Coastguard Worker%endif 518*c0909341SAndroid Build Coastguard Worker mov t2, [t7+msac.buf] 519*c0909341SAndroid Build Coastguard Worker mov t8, [t7+msac.end] 520*c0909341SAndroid Build Coastguard Worker lea rcx, [t2+gprsize] 521*c0909341SAndroid Build Coastguard Worker sub rcx, t8 522*c0909341SAndroid Build Coastguard Worker ja %%refill_eob 523*c0909341SAndroid Build Coastguard Worker mov t8, [t2] 524*c0909341SAndroid Build Coastguard Worker lea ecx, [t5+16-gprsize*8] 525*c0909341SAndroid Build Coastguard Worker not t8 526*c0909341SAndroid Build Coastguard Worker bswap t8 527*c0909341SAndroid Build Coastguard Worker shr t8, cl 528*c0909341SAndroid Build Coastguard Worker neg ecx 529*c0909341SAndroid Build Coastguard Worker shr ecx, 3 530*c0909341SAndroid Build Coastguard Worker or t4, t8 531*c0909341SAndroid Build Coastguard Worker%%refill_end: 532*c0909341SAndroid Build Coastguard Worker add t2, rcx 533*c0909341SAndroid Build Coastguard Worker lea t5d, [t5+rcx*8] 534*c0909341SAndroid Build Coastguard Worker mov [t7+msac.buf], t2 535*c0909341SAndroid Build Coastguard Worker%%refill_end2: 536*c0909341SAndroid Build Coastguard Worker%if UNIX64 == 0 537*c0909341SAndroid Build Coastguard Worker pop t8 538*c0909341SAndroid Build Coastguard Worker%endif 539*c0909341SAndroid Build Coastguard Worker%%end: 540*c0909341SAndroid Build Coastguard Worker movp m3, t4 541*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 542*c0909341SAndroid Build Coastguard Worker add t6d, eax ; CF = tok_br < 3 || tok == 15 543*c0909341SAndroid Build Coastguard Worker jnc %%loop 544*c0909341SAndroid Build Coastguard Worker lea eax, [t6+30] 545*c0909341SAndroid Build Coastguard Worker%else 546*c0909341SAndroid Build Coastguard Worker add eax, [buf+8] 547*c0909341SAndroid Build Coastguard Worker jnc %%loop 548*c0909341SAndroid Build Coastguard Worker add eax, 30 549*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT >= 16 550*c0909341SAndroid Build Coastguard Worker add esp, 36 551*c0909341SAndroid Build Coastguard Worker%else 552*c0909341SAndroid Build Coastguard Worker mov esp, [esp] 553*c0909341SAndroid Build Coastguard Worker%endif 554*c0909341SAndroid Build Coastguard Worker%endif 555*c0909341SAndroid Build Coastguard Worker mov [t7+msac.dif], t4 556*c0909341SAndroid Build Coastguard Worker shr eax, 1 557*c0909341SAndroid Build Coastguard Worker mov [t7+msac.cnt], t5d 558*c0909341SAndroid Build Coastguard Worker RET 559*c0909341SAndroid Build Coastguard Worker%%pad_with_ones: 560*c0909341SAndroid Build Coastguard Worker ; ensure that dif is padded with at least 15 bits of ones at the end 561*c0909341SAndroid Build Coastguard Worker lea ecx, [t5-16] 562*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 563*c0909341SAndroid Build Coastguard Worker ror rcx, cl 564*c0909341SAndroid Build Coastguard Worker%else 565*c0909341SAndroid Build Coastguard Worker shr ecx, cl 566*c0909341SAndroid Build Coastguard Worker%endif 567*c0909341SAndroid Build Coastguard Worker or t4, rcx 568*c0909341SAndroid Build Coastguard Worker jmp %%refill_end2 569*c0909341SAndroid Build Coastguard Worker%%refill_eob: 570*c0909341SAndroid Build Coastguard Worker cmp t2, t8 571*c0909341SAndroid Build Coastguard Worker jae %%pad_with_ones 572*c0909341SAndroid Build Coastguard Worker mov t8, [t8-gprsize] 573*c0909341SAndroid Build Coastguard Worker shl ecx, 3 574*c0909341SAndroid Build Coastguard Worker shr t8, cl 575*c0909341SAndroid Build Coastguard Worker lea ecx, [t5+16-gprsize*8] 576*c0909341SAndroid Build Coastguard Worker not t8 577*c0909341SAndroid Build Coastguard Worker bswap t8 578*c0909341SAndroid Build Coastguard Worker shr t8, cl 579*c0909341SAndroid Build Coastguard Worker neg ecx 580*c0909341SAndroid Build Coastguard Worker or t4, t8 581*c0909341SAndroid Build Coastguard Worker mov t8d, [t7+msac.end] 582*c0909341SAndroid Build Coastguard Worker shr ecx, 3 583*c0909341SAndroid Build Coastguard Worker sub t8d, t2d 584*c0909341SAndroid Build Coastguard Worker cmp ecx, t8d 585*c0909341SAndroid Build Coastguard Worker cmovae ecx, t8d 586*c0909341SAndroid Build Coastguard Worker jmp %%refill_end 587*c0909341SAndroid Build Coastguard Worker%endmacro 588*c0909341SAndroid Build Coastguard Worker 589*c0909341SAndroid Build Coastguard Workercglobal msac_decode_hi_tok, 0, 7 + ARCH_X86_64, 6 590*c0909341SAndroid Build Coastguard Worker DECODE_SYMBOL_ADAPT_INIT 1 591*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 == 0 && PIC 592*c0909341SAndroid Build Coastguard Worker LEA t2, min_prob+12*2 593*c0909341SAndroid Build Coastguard Worker %define base t2-(min_prob+12*2) 594*c0909341SAndroid Build Coastguard Worker%else 595*c0909341SAndroid Build Coastguard Worker %define base 0 596*c0909341SAndroid Build Coastguard Worker%endif 597*c0909341SAndroid Build Coastguard Worker movq m0, [t1] 598*c0909341SAndroid Build Coastguard Worker movd m2, [t0+msac.rng] 599*c0909341SAndroid Build Coastguard Worker mov eax, [t0+msac.update_cdf] 600*c0909341SAndroid Build Coastguard Worker movq m4, [base+pw_0xff00] 601*c0909341SAndroid Build Coastguard Worker movp m3, [t0+msac.dif] 602*c0909341SAndroid Build Coastguard Worker movq m5, [base+min_prob+12*2] 603*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 604*c0909341SAndroid Build Coastguard Worker mov t5d, [t0+msac.cnt] 605*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 606*c0909341SAndroid Build Coastguard Worker mov t6d, -24 607*c0909341SAndroid Build Coastguard Worker%endif 608*c0909341SAndroid Build Coastguard Worker movifnidn t7, t0 609*c0909341SAndroid Build Coastguard Worker test eax, eax 610*c0909341SAndroid Build Coastguard Worker jz .no_update_cdf 611*c0909341SAndroid Build Coastguard Worker HI_TOK 1 612*c0909341SAndroid Build Coastguard Worker.no_update_cdf: 613*c0909341SAndroid Build Coastguard Worker HI_TOK 0 614*c0909341SAndroid Build Coastguard Worker 615*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 616*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2 617*c0909341SAndroid Build Coastguard Workercglobal msac_decode_symbol_adapt16, 3, 6, 6 618*c0909341SAndroid Build Coastguard Worker lea rax, [pw_0xff00] 619*c0909341SAndroid Build Coastguard Worker vpbroadcastw m2, [t0+msac.rng] 620*c0909341SAndroid Build Coastguard Worker mova m0, [t1] 621*c0909341SAndroid Build Coastguard Worker vpbroadcastw m3, [t0+msac.dif+6] 622*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [rax] 623*c0909341SAndroid Build Coastguard Worker mov t3d, [t0+msac.update_cdf] 624*c0909341SAndroid Build Coastguard Worker mov t4d, t2d 625*c0909341SAndroid Build Coastguard Worker not t2 626*c0909341SAndroid Build Coastguard Worker mov r5, rsp 627*c0909341SAndroid Build Coastguard Worker%if WIN64 628*c0909341SAndroid Build Coastguard Worker and rsp, ~31 629*c0909341SAndroid Build Coastguard Worker sub rsp, 40 630*c0909341SAndroid Build Coastguard Worker%else 631*c0909341SAndroid Build Coastguard Worker and r5, ~31 632*c0909341SAndroid Build Coastguard Worker %define buf r5-32 633*c0909341SAndroid Build Coastguard Worker%endif 634*c0909341SAndroid Build Coastguard Worker psrlw m1, m0, 6 635*c0909341SAndroid Build Coastguard Worker movd [buf-4], xm2 636*c0909341SAndroid Build Coastguard Worker pand m2, m4 637*c0909341SAndroid Build Coastguard Worker psllw m1, 7 638*c0909341SAndroid Build Coastguard Worker pmulhuw m1, m2 639*c0909341SAndroid Build Coastguard Worker paddw m1, [rax+t2*2] 640*c0909341SAndroid Build Coastguard Worker mova [buf], m1 641*c0909341SAndroid Build Coastguard Worker pmaxuw m1, m3 642*c0909341SAndroid Build Coastguard Worker pcmpeqw m1, m3 643*c0909341SAndroid Build Coastguard Worker pmovmskb eax, m1 644*c0909341SAndroid Build Coastguard Worker test t3d, t3d 645*c0909341SAndroid Build Coastguard Worker jz .renorm 646*c0909341SAndroid Build Coastguard Worker movzx t3d, word [t1+t4*2] 647*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m2 648*c0909341SAndroid Build Coastguard Worker lea t2d, [t3+80] 649*c0909341SAndroid Build Coastguard Worker shr t2d, 4 650*c0909341SAndroid Build Coastguard Worker cmp t3d, 32 651*c0909341SAndroid Build Coastguard Worker adc t3d, 0 652*c0909341SAndroid Build Coastguard Worker movd xm3, t2d 653*c0909341SAndroid Build Coastguard Worker pavgw m2, m1 654*c0909341SAndroid Build Coastguard Worker psubw m2, m0 655*c0909341SAndroid Build Coastguard Worker psubw m0, m1 656*c0909341SAndroid Build Coastguard Worker psraw m2, xm3 657*c0909341SAndroid Build Coastguard Worker paddw m0, m2 658*c0909341SAndroid Build Coastguard Worker mova [t1], m0 659*c0909341SAndroid Build Coastguard Worker mov [t1+t4*2], t3w 660*c0909341SAndroid Build Coastguard Worker.renorm: 661*c0909341SAndroid Build Coastguard Worker tzcnt eax, eax 662*c0909341SAndroid Build Coastguard Worker mov t4, [t0+msac.dif] 663*c0909341SAndroid Build Coastguard Worker movzx t1d, word [buf+rax-0] 664*c0909341SAndroid Build Coastguard Worker movzx t2d, word [buf+rax-2] 665*c0909341SAndroid Build Coastguard Worker shr eax, 1 666*c0909341SAndroid Build Coastguard Worker%if WIN64 667*c0909341SAndroid Build Coastguard Worker mov rsp, r5 668*c0909341SAndroid Build Coastguard Worker%endif 669*c0909341SAndroid Build Coastguard Worker vzeroupper 670*c0909341SAndroid Build Coastguard Worker jmp m(msac_decode_symbol_adapt4, _sse2).renorm2 671*c0909341SAndroid Build Coastguard Worker%endif 672