xref: /aosp_15_r20/external/libvpx/vp8/common/x86/dequantize_mmx.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker
12*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm"
13*fb1b10abSAndroid Build Coastguard Worker
14*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
15*fb1b10abSAndroid Build Coastguard Worker
16*fb1b10abSAndroid Build Coastguard Worker;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
17*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_dequantize_b_impl_mmx)
18*fb1b10abSAndroid Build Coastguard Workersym(vp8_dequantize_b_impl_mmx):
19*fb1b10abSAndroid Build Coastguard Worker    push        rbp
20*fb1b10abSAndroid Build Coastguard Worker    mov         rbp, rsp
21*fb1b10abSAndroid Build Coastguard Worker    SHADOW_ARGS_TO_STACK 3
22*fb1b10abSAndroid Build Coastguard Worker    push        rsi
23*fb1b10abSAndroid Build Coastguard Worker    push        rdi
24*fb1b10abSAndroid Build Coastguard Worker    ; end prolog
25*fb1b10abSAndroid Build Coastguard Worker
26*fb1b10abSAndroid Build Coastguard Worker        mov       rsi, arg(0) ;sq
27*fb1b10abSAndroid Build Coastguard Worker        mov       rdi, arg(1) ;dq
28*fb1b10abSAndroid Build Coastguard Worker        mov       rax, arg(2) ;q
29*fb1b10abSAndroid Build Coastguard Worker
30*fb1b10abSAndroid Build Coastguard Worker        movq      mm1, [rsi]
31*fb1b10abSAndroid Build Coastguard Worker        pmullw    mm1, [rax+0]            ; mm4 *= kernel 0 modifiers.
32*fb1b10abSAndroid Build Coastguard Worker        movq      [rdi], mm1
33*fb1b10abSAndroid Build Coastguard Worker
34*fb1b10abSAndroid Build Coastguard Worker        movq      mm1, [rsi+8]
35*fb1b10abSAndroid Build Coastguard Worker        pmullw    mm1, [rax+8]            ; mm4 *= kernel 0 modifiers.
36*fb1b10abSAndroid Build Coastguard Worker        movq      [rdi+8], mm1
37*fb1b10abSAndroid Build Coastguard Worker
38*fb1b10abSAndroid Build Coastguard Worker        movq      mm1, [rsi+16]
39*fb1b10abSAndroid Build Coastguard Worker        pmullw    mm1, [rax+16]            ; mm4 *= kernel 0 modifiers.
40*fb1b10abSAndroid Build Coastguard Worker        movq      [rdi+16], mm1
41*fb1b10abSAndroid Build Coastguard Worker
42*fb1b10abSAndroid Build Coastguard Worker        movq      mm1, [rsi+24]
43*fb1b10abSAndroid Build Coastguard Worker        pmullw    mm1, [rax+24]            ; mm4 *= kernel 0 modifiers.
44*fb1b10abSAndroid Build Coastguard Worker        movq      [rdi+24], mm1
45*fb1b10abSAndroid Build Coastguard Worker
46*fb1b10abSAndroid Build Coastguard Worker    ; begin epilog
47*fb1b10abSAndroid Build Coastguard Worker    pop rdi
48*fb1b10abSAndroid Build Coastguard Worker    pop rsi
49*fb1b10abSAndroid Build Coastguard Worker    UNSHADOW_ARGS
50*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
51*fb1b10abSAndroid Build Coastguard Worker    ret
52*fb1b10abSAndroid Build Coastguard Worker
53*fb1b10abSAndroid Build Coastguard Worker
54*fb1b10abSAndroid Build Coastguard Worker;void dequant_idct_add_mmx(
55*fb1b10abSAndroid Build Coastguard Worker;short *input,            0
56*fb1b10abSAndroid Build Coastguard Worker;short *dq,               1
57*fb1b10abSAndroid Build Coastguard Worker;unsigned char *dest,     2
58*fb1b10abSAndroid Build Coastguard Worker;int stride)              3
59*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_dequant_idct_add_mmx)
60*fb1b10abSAndroid Build Coastguard Workersym(vp8_dequant_idct_add_mmx):
61*fb1b10abSAndroid Build Coastguard Worker    push        rbp
62*fb1b10abSAndroid Build Coastguard Worker    mov         rbp, rsp
63*fb1b10abSAndroid Build Coastguard Worker    SHADOW_ARGS_TO_STACK 4
64*fb1b10abSAndroid Build Coastguard Worker    GET_GOT     rbx
65*fb1b10abSAndroid Build Coastguard Worker    push        rdi
66*fb1b10abSAndroid Build Coastguard Worker    ; end prolog
67*fb1b10abSAndroid Build Coastguard Worker
68*fb1b10abSAndroid Build Coastguard Worker        mov         rax,    arg(0) ;input
69*fb1b10abSAndroid Build Coastguard Worker        mov         rdx,    arg(1) ;dq
70*fb1b10abSAndroid Build Coastguard Worker
71*fb1b10abSAndroid Build Coastguard Worker
72*fb1b10abSAndroid Build Coastguard Worker        movq        mm0,    [rax   ]
73*fb1b10abSAndroid Build Coastguard Worker        pmullw      mm0,    [rdx]
74*fb1b10abSAndroid Build Coastguard Worker
75*fb1b10abSAndroid Build Coastguard Worker        movq        mm1,    [rax +8]
76*fb1b10abSAndroid Build Coastguard Worker        pmullw      mm1,    [rdx +8]
77*fb1b10abSAndroid Build Coastguard Worker
78*fb1b10abSAndroid Build Coastguard Worker        movq        mm2,    [rax+16]
79*fb1b10abSAndroid Build Coastguard Worker        pmullw      mm2,    [rdx+16]
80*fb1b10abSAndroid Build Coastguard Worker
81*fb1b10abSAndroid Build Coastguard Worker        movq        mm3,    [rax+24]
82*fb1b10abSAndroid Build Coastguard Worker        pmullw      mm3,    [rdx+24]
83*fb1b10abSAndroid Build Coastguard Worker
84*fb1b10abSAndroid Build Coastguard Worker        mov         rdx,    arg(2) ;dest
85*fb1b10abSAndroid Build Coastguard Worker
86*fb1b10abSAndroid Build Coastguard Worker        pxor        mm7,    mm7
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker
89*fb1b10abSAndroid Build Coastguard Worker        movq        [rax],   mm7
90*fb1b10abSAndroid Build Coastguard Worker        movq        [rax+8], mm7
91*fb1b10abSAndroid Build Coastguard Worker
92*fb1b10abSAndroid Build Coastguard Worker        movq        [rax+16],mm7
93*fb1b10abSAndroid Build Coastguard Worker        movq        [rax+24],mm7
94*fb1b10abSAndroid Build Coastguard Worker
95*fb1b10abSAndroid Build Coastguard Worker
96*fb1b10abSAndroid Build Coastguard Worker        movsxd      rdi,            dword ptr arg(3) ;stride
97*fb1b10abSAndroid Build Coastguard Worker
98*fb1b10abSAndroid Build Coastguard Worker        psubw       mm0,            mm2             ; b1= 0-2
99*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm2             ;
100*fb1b10abSAndroid Build Coastguard Worker
101*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm1
102*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm0             ; a1 =0+2
103*fb1b10abSAndroid Build Coastguard Worker
104*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
105*fb1b10abSAndroid Build Coastguard Worker        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
106*fb1b10abSAndroid Build Coastguard Worker
107*fb1b10abSAndroid Build Coastguard Worker        movq        mm7,            mm3             ;
108*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
109*fb1b10abSAndroid Build Coastguard Worker
110*fb1b10abSAndroid Build Coastguard Worker        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
111*fb1b10abSAndroid Build Coastguard Worker        psubw       mm7,            mm5             ; c1
112*fb1b10abSAndroid Build Coastguard Worker
113*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm1
114*fb1b10abSAndroid Build Coastguard Worker        movq        mm4,            mm3
115*fb1b10abSAndroid Build Coastguard Worker
116*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
117*fb1b10abSAndroid Build Coastguard Worker        paddw       mm5,            mm1
118*fb1b10abSAndroid Build Coastguard Worker
119*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
120*fb1b10abSAndroid Build Coastguard Worker        paddw       mm3,            mm4
121*fb1b10abSAndroid Build Coastguard Worker
122*fb1b10abSAndroid Build Coastguard Worker        paddw       mm3,            mm5             ; d1
123*fb1b10abSAndroid Build Coastguard Worker        movq        mm6,            mm2             ; a1
124*fb1b10abSAndroid Build Coastguard Worker
125*fb1b10abSAndroid Build Coastguard Worker        movq        mm4,            mm0             ; b1
126*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm3             ;0
127*fb1b10abSAndroid Build Coastguard Worker
128*fb1b10abSAndroid Build Coastguard Worker        paddw       mm4,            mm7             ;1
129*fb1b10abSAndroid Build Coastguard Worker        psubw       mm0,            mm7             ;2
130*fb1b10abSAndroid Build Coastguard Worker
131*fb1b10abSAndroid Build Coastguard Worker        psubw       mm6,            mm3             ;3
132*fb1b10abSAndroid Build Coastguard Worker
133*fb1b10abSAndroid Build Coastguard Worker        movq        mm1,            mm2             ; 03 02 01 00
134*fb1b10abSAndroid Build Coastguard Worker        movq        mm3,            mm4             ; 23 22 21 20
135*fb1b10abSAndroid Build Coastguard Worker
136*fb1b10abSAndroid Build Coastguard Worker        punpcklwd   mm1,            mm0             ; 11 01 10 00
137*fb1b10abSAndroid Build Coastguard Worker        punpckhwd   mm2,            mm0             ; 13 03 12 02
138*fb1b10abSAndroid Build Coastguard Worker
139*fb1b10abSAndroid Build Coastguard Worker        punpcklwd   mm3,            mm6             ; 31 21 30 20
140*fb1b10abSAndroid Build Coastguard Worker        punpckhwd   mm4,            mm6             ; 33 23 32 22
141*fb1b10abSAndroid Build Coastguard Worker
142*fb1b10abSAndroid Build Coastguard Worker        movq        mm0,            mm1             ; 11 01 10 00
143*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm2             ; 13 03 12 02
144*fb1b10abSAndroid Build Coastguard Worker
145*fb1b10abSAndroid Build Coastguard Worker        punpckldq   mm0,            mm3             ; 30 20 10 00
146*fb1b10abSAndroid Build Coastguard Worker        punpckhdq   mm1,            mm3             ; 31 21 11 01
147*fb1b10abSAndroid Build Coastguard Worker
148*fb1b10abSAndroid Build Coastguard Worker        punpckldq   mm2,            mm4             ; 32 22 12 02
149*fb1b10abSAndroid Build Coastguard Worker        punpckhdq   mm5,            mm4             ; 33 23 13 03
150*fb1b10abSAndroid Build Coastguard Worker
151*fb1b10abSAndroid Build Coastguard Worker        movq        mm3,            mm5             ; 33 23 13 03
152*fb1b10abSAndroid Build Coastguard Worker
153*fb1b10abSAndroid Build Coastguard Worker        psubw       mm0,            mm2             ; b1= 0-2
154*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm2             ;
155*fb1b10abSAndroid Build Coastguard Worker
156*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm1
157*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm0             ; a1 =0+2
158*fb1b10abSAndroid Build Coastguard Worker
159*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
160*fb1b10abSAndroid Build Coastguard Worker        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
161*fb1b10abSAndroid Build Coastguard Worker
162*fb1b10abSAndroid Build Coastguard Worker        movq        mm7,            mm3             ;
163*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
164*fb1b10abSAndroid Build Coastguard Worker
165*fb1b10abSAndroid Build Coastguard Worker        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
166*fb1b10abSAndroid Build Coastguard Worker        psubw       mm7,            mm5             ; c1
167*fb1b10abSAndroid Build Coastguard Worker
168*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm1
169*fb1b10abSAndroid Build Coastguard Worker        movq        mm4,            mm3
170*fb1b10abSAndroid Build Coastguard Worker
171*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
172*fb1b10abSAndroid Build Coastguard Worker        paddw       mm5,            mm1
173*fb1b10abSAndroid Build Coastguard Worker
174*fb1b10abSAndroid Build Coastguard Worker        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
175*fb1b10abSAndroid Build Coastguard Worker        paddw       mm3,            mm4
176*fb1b10abSAndroid Build Coastguard Worker
177*fb1b10abSAndroid Build Coastguard Worker        paddw       mm3,            mm5             ; d1
178*fb1b10abSAndroid Build Coastguard Worker        paddw       mm0,            [GLOBAL(fours)]
179*fb1b10abSAndroid Build Coastguard Worker
180*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            [GLOBAL(fours)]
181*fb1b10abSAndroid Build Coastguard Worker        movq        mm6,            mm2             ; a1
182*fb1b10abSAndroid Build Coastguard Worker
183*fb1b10abSAndroid Build Coastguard Worker        movq        mm4,            mm0             ; b1
184*fb1b10abSAndroid Build Coastguard Worker        paddw       mm2,            mm3             ;0
185*fb1b10abSAndroid Build Coastguard Worker
186*fb1b10abSAndroid Build Coastguard Worker        paddw       mm4,            mm7             ;1
187*fb1b10abSAndroid Build Coastguard Worker        psubw       mm0,            mm7             ;2
188*fb1b10abSAndroid Build Coastguard Worker
189*fb1b10abSAndroid Build Coastguard Worker        psubw       mm6,            mm3             ;3
190*fb1b10abSAndroid Build Coastguard Worker        psraw       mm2,            3
191*fb1b10abSAndroid Build Coastguard Worker
192*fb1b10abSAndroid Build Coastguard Worker        psraw       mm0,            3
193*fb1b10abSAndroid Build Coastguard Worker        psraw       mm4,            3
194*fb1b10abSAndroid Build Coastguard Worker
195*fb1b10abSAndroid Build Coastguard Worker        psraw       mm6,            3
196*fb1b10abSAndroid Build Coastguard Worker
197*fb1b10abSAndroid Build Coastguard Worker        movq        mm1,            mm2             ; 03 02 01 00
198*fb1b10abSAndroid Build Coastguard Worker        movq        mm3,            mm4             ; 23 22 21 20
199*fb1b10abSAndroid Build Coastguard Worker
200*fb1b10abSAndroid Build Coastguard Worker        punpcklwd   mm1,            mm0             ; 11 01 10 00
201*fb1b10abSAndroid Build Coastguard Worker        punpckhwd   mm2,            mm0             ; 13 03 12 02
202*fb1b10abSAndroid Build Coastguard Worker
203*fb1b10abSAndroid Build Coastguard Worker        punpcklwd   mm3,            mm6             ; 31 21 30 20
204*fb1b10abSAndroid Build Coastguard Worker        punpckhwd   mm4,            mm6             ; 33 23 32 22
205*fb1b10abSAndroid Build Coastguard Worker
206*fb1b10abSAndroid Build Coastguard Worker        movq        mm0,            mm1             ; 11 01 10 00
207*fb1b10abSAndroid Build Coastguard Worker        movq        mm5,            mm2             ; 13 03 12 02
208*fb1b10abSAndroid Build Coastguard Worker
209*fb1b10abSAndroid Build Coastguard Worker        punpckldq   mm0,            mm3             ; 30 20 10 00
210*fb1b10abSAndroid Build Coastguard Worker        punpckhdq   mm1,            mm3             ; 31 21 11 01
211*fb1b10abSAndroid Build Coastguard Worker
212*fb1b10abSAndroid Build Coastguard Worker        punpckldq   mm2,            mm4             ; 32 22 12 02
213*fb1b10abSAndroid Build Coastguard Worker        punpckhdq   mm5,            mm4             ; 33 23 13 03
214*fb1b10abSAndroid Build Coastguard Worker
215*fb1b10abSAndroid Build Coastguard Worker        pxor        mm7,            mm7
216*fb1b10abSAndroid Build Coastguard Worker
217*fb1b10abSAndroid Build Coastguard Worker        movd        mm4,            [rdx]
218*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm4,            mm7
219*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm0,            mm4
220*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm0,            mm7
221*fb1b10abSAndroid Build Coastguard Worker        movd        [rdx],          mm0
222*fb1b10abSAndroid Build Coastguard Worker
223*fb1b10abSAndroid Build Coastguard Worker        movd        mm4,            [rdx+rdi]
224*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm4,            mm7
225*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm1,            mm4
226*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm1,            mm7
227*fb1b10abSAndroid Build Coastguard Worker        movd        [rdx+rdi],      mm1
228*fb1b10abSAndroid Build Coastguard Worker
229*fb1b10abSAndroid Build Coastguard Worker        movd        mm4,            [rdx+2*rdi]
230*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm4,            mm7
231*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm2,            mm4
232*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm2,            mm7
233*fb1b10abSAndroid Build Coastguard Worker        movd        [rdx+rdi*2],    mm2
234*fb1b10abSAndroid Build Coastguard Worker
235*fb1b10abSAndroid Build Coastguard Worker        add         rdx,            rdi
236*fb1b10abSAndroid Build Coastguard Worker
237*fb1b10abSAndroid Build Coastguard Worker        movd        mm4,            [rdx+2*rdi]
238*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm4,            mm7
239*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm5,            mm4
240*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm5,            mm7
241*fb1b10abSAndroid Build Coastguard Worker        movd        [rdx+rdi*2],    mm5
242*fb1b10abSAndroid Build Coastguard Worker
243*fb1b10abSAndroid Build Coastguard Worker    ; begin epilog
244*fb1b10abSAndroid Build Coastguard Worker    pop rdi
245*fb1b10abSAndroid Build Coastguard Worker    RESTORE_GOT
246*fb1b10abSAndroid Build Coastguard Worker    UNSHADOW_ARGS
247*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
248*fb1b10abSAndroid Build Coastguard Worker    ret
249*fb1b10abSAndroid Build Coastguard Worker
250*fb1b10abSAndroid Build Coastguard WorkerSECTION_RODATA
251*fb1b10abSAndroid Build Coastguard Workeralign 16
252*fb1b10abSAndroid Build Coastguard Workerx_s1sqr2:
253*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x8A8C
254*fb1b10abSAndroid Build Coastguard Workeralign 16
255*fb1b10abSAndroid Build Coastguard Workerx_c1sqr2less1:
256*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x4E7B
257*fb1b10abSAndroid Build Coastguard Workeralign 16
258*fb1b10abSAndroid Build Coastguard Workerfours:
259*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x0004
260