xref: /aosp_15_r20/external/libvpx/vp8/common/x86/idctllm_mmx.asm (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker;
2*fb1b10abSAndroid Build Coastguard Worker;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker;
4*fb1b10abSAndroid Build Coastguard Worker;  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker;  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker;  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker;  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker;  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker;
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker
12*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm"
13*fb1b10abSAndroid Build Coastguard Worker
14*fb1b10abSAndroid Build Coastguard Worker; /****************************************************************************
15*fb1b10abSAndroid Build Coastguard Worker; * Notes:
16*fb1b10abSAndroid Build Coastguard Worker; *
17*fb1b10abSAndroid Build Coastguard Worker; * This implementation makes use of 16 bit fixed point version of two multiply
18*fb1b10abSAndroid Build Coastguard Worker; * constants:
19*fb1b10abSAndroid Build Coastguard Worker; *        1.   sqrt(2) * cos (pi/8)
20*fb1b10abSAndroid Build Coastguard Worker; *        2.   sqrt(2) * sin (pi/8)
21*fb1b10abSAndroid Build Coastguard Worker; * Because the first constant is bigger than 1, to maintain the same 16 bit
22*fb1b10abSAndroid Build Coastguard Worker; * fixed point precision as the second one, we use a trick of
23*fb1b10abSAndroid Build Coastguard Worker; *        x * a = x + x*(a-1)
24*fb1b10abSAndroid Build Coastguard Worker; * so
25*fb1b10abSAndroid Build Coastguard Worker; *        x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
26*fb1b10abSAndroid Build Coastguard Worker; *
27*fb1b10abSAndroid Build Coastguard Worker; * For the second constant, because of the 16bit version is 35468, which
28*fb1b10abSAndroid Build Coastguard Worker; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
29*fb1b10abSAndroid Build Coastguard Worker; * number.
30*fb1b10abSAndroid Build Coastguard Worker; *        (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
31*fb1b10abSAndroid Build Coastguard Worker; *
32*fb1b10abSAndroid Build Coastguard Worker; **************************************************************************/
33*fb1b10abSAndroid Build Coastguard Worker
34*fb1b10abSAndroid Build Coastguard WorkerSECTION .text
35*fb1b10abSAndroid Build Coastguard Worker
36*fb1b10abSAndroid Build Coastguard Worker;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
37*fb1b10abSAndroid Build Coastguard Worker;int pitch, unsigned char *dest,int stride)
38*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_short_idct4x4llm_mmx)
39*fb1b10abSAndroid Build Coastguard Workersym(vp8_short_idct4x4llm_mmx):
40*fb1b10abSAndroid Build Coastguard Worker    push        rbp
41*fb1b10abSAndroid Build Coastguard Worker    mov         rbp, rsp
42*fb1b10abSAndroid Build Coastguard Worker    SHADOW_ARGS_TO_STACK 5
43*fb1b10abSAndroid Build Coastguard Worker    GET_GOT     rbx
44*fb1b10abSAndroid Build Coastguard Worker    push        rsi
45*fb1b10abSAndroid Build Coastguard Worker    push        rdi
46*fb1b10abSAndroid Build Coastguard Worker    ; end prolog
47*fb1b10abSAndroid Build Coastguard Worker
48*fb1b10abSAndroid Build Coastguard Worker    mov         rax,    arg(0)              ;input
49*fb1b10abSAndroid Build Coastguard Worker    mov         rsi,    arg(1)              ;pred
50*fb1b10abSAndroid Build Coastguard Worker
51*fb1b10abSAndroid Build Coastguard Worker    movq        mm0,    [rax   ]
52*fb1b10abSAndroid Build Coastguard Worker    movq        mm1,    [rax+ 8]
53*fb1b10abSAndroid Build Coastguard Worker    movq        mm2,    [rax+16]
54*fb1b10abSAndroid Build Coastguard Worker    movq        mm3,    [rax+24]
55*fb1b10abSAndroid Build Coastguard Worker
56*fb1b10abSAndroid Build Coastguard Worker%if 0
57*fb1b10abSAndroid Build Coastguard Worker    pxor        mm7,    mm7
58*fb1b10abSAndroid Build Coastguard Worker    movq        [rax],   mm7
59*fb1b10abSAndroid Build Coastguard Worker    movq        [rax+8], mm7
60*fb1b10abSAndroid Build Coastguard Worker    movq        [rax+16],mm7
61*fb1b10abSAndroid Build Coastguard Worker    movq        [rax+24],mm7
62*fb1b10abSAndroid Build Coastguard Worker%endif
63*fb1b10abSAndroid Build Coastguard Worker    movsxd      rax,    dword ptr arg(2)    ;pitch
64*fb1b10abSAndroid Build Coastguard Worker    mov         rdx,    arg(3)              ;dest
65*fb1b10abSAndroid Build Coastguard Worker    movsxd      rdi,    dword ptr arg(4)    ;stride
66*fb1b10abSAndroid Build Coastguard Worker
67*fb1b10abSAndroid Build Coastguard Worker
68*fb1b10abSAndroid Build Coastguard Worker    psubw       mm0,            mm2             ; b1= 0-2
69*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm2             ;
70*fb1b10abSAndroid Build Coastguard Worker
71*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm1
72*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm0             ; a1 =0+2
73*fb1b10abSAndroid Build Coastguard Worker
74*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
75*fb1b10abSAndroid Build Coastguard Worker    paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
76*fb1b10abSAndroid Build Coastguard Worker
77*fb1b10abSAndroid Build Coastguard Worker    movq        mm7,            mm3             ;
78*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
79*fb1b10abSAndroid Build Coastguard Worker
80*fb1b10abSAndroid Build Coastguard Worker    paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
81*fb1b10abSAndroid Build Coastguard Worker    psubw       mm7,            mm5             ; c1
82*fb1b10abSAndroid Build Coastguard Worker
83*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm1
84*fb1b10abSAndroid Build Coastguard Worker    movq        mm4,            mm3
85*fb1b10abSAndroid Build Coastguard Worker
86*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
87*fb1b10abSAndroid Build Coastguard Worker    paddw       mm5,            mm1
88*fb1b10abSAndroid Build Coastguard Worker
89*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
90*fb1b10abSAndroid Build Coastguard Worker    paddw       mm3,            mm4
91*fb1b10abSAndroid Build Coastguard Worker
92*fb1b10abSAndroid Build Coastguard Worker    paddw       mm3,            mm5             ; d1
93*fb1b10abSAndroid Build Coastguard Worker    movq        mm6,            mm2             ; a1
94*fb1b10abSAndroid Build Coastguard Worker
95*fb1b10abSAndroid Build Coastguard Worker    movq        mm4,            mm0             ; b1
96*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm3             ;0
97*fb1b10abSAndroid Build Coastguard Worker
98*fb1b10abSAndroid Build Coastguard Worker    paddw       mm4,            mm7             ;1
99*fb1b10abSAndroid Build Coastguard Worker    psubw       mm0,            mm7             ;2
100*fb1b10abSAndroid Build Coastguard Worker
101*fb1b10abSAndroid Build Coastguard Worker    psubw       mm6,            mm3             ;3
102*fb1b10abSAndroid Build Coastguard Worker
103*fb1b10abSAndroid Build Coastguard Worker    movq        mm1,            mm2             ; 03 02 01 00
104*fb1b10abSAndroid Build Coastguard Worker    movq        mm3,            mm4             ; 23 22 21 20
105*fb1b10abSAndroid Build Coastguard Worker
106*fb1b10abSAndroid Build Coastguard Worker    punpcklwd   mm1,            mm0             ; 11 01 10 00
107*fb1b10abSAndroid Build Coastguard Worker    punpckhwd   mm2,            mm0             ; 13 03 12 02
108*fb1b10abSAndroid Build Coastguard Worker
109*fb1b10abSAndroid Build Coastguard Worker    punpcklwd   mm3,            mm6             ; 31 21 30 20
110*fb1b10abSAndroid Build Coastguard Worker    punpckhwd   mm4,            mm6             ; 33 23 32 22
111*fb1b10abSAndroid Build Coastguard Worker
112*fb1b10abSAndroid Build Coastguard Worker    movq        mm0,            mm1             ; 11 01 10 00
113*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm2             ; 13 03 12 02
114*fb1b10abSAndroid Build Coastguard Worker
115*fb1b10abSAndroid Build Coastguard Worker    punpckldq   mm0,            mm3             ; 30 20 10 00
116*fb1b10abSAndroid Build Coastguard Worker    punpckhdq   mm1,            mm3             ; 31 21 11 01
117*fb1b10abSAndroid Build Coastguard Worker
118*fb1b10abSAndroid Build Coastguard Worker    punpckldq   mm2,            mm4             ; 32 22 12 02
119*fb1b10abSAndroid Build Coastguard Worker    punpckhdq   mm5,            mm4             ; 33 23 13 03
120*fb1b10abSAndroid Build Coastguard Worker
121*fb1b10abSAndroid Build Coastguard Worker    movq        mm3,            mm5             ; 33 23 13 03
122*fb1b10abSAndroid Build Coastguard Worker
123*fb1b10abSAndroid Build Coastguard Worker    psubw       mm0,            mm2             ; b1= 0-2
124*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm2             ;
125*fb1b10abSAndroid Build Coastguard Worker
126*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm1
127*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm0             ; a1 =0+2
128*fb1b10abSAndroid Build Coastguard Worker
129*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
130*fb1b10abSAndroid Build Coastguard Worker    paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
131*fb1b10abSAndroid Build Coastguard Worker
132*fb1b10abSAndroid Build Coastguard Worker    movq        mm7,            mm3             ;
133*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
134*fb1b10abSAndroid Build Coastguard Worker
135*fb1b10abSAndroid Build Coastguard Worker    paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
136*fb1b10abSAndroid Build Coastguard Worker    psubw       mm7,            mm5             ; c1
137*fb1b10abSAndroid Build Coastguard Worker
138*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm1
139*fb1b10abSAndroid Build Coastguard Worker    movq        mm4,            mm3
140*fb1b10abSAndroid Build Coastguard Worker
141*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
142*fb1b10abSAndroid Build Coastguard Worker    paddw       mm5,            mm1
143*fb1b10abSAndroid Build Coastguard Worker
144*fb1b10abSAndroid Build Coastguard Worker    pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
145*fb1b10abSAndroid Build Coastguard Worker    paddw       mm3,            mm4
146*fb1b10abSAndroid Build Coastguard Worker
147*fb1b10abSAndroid Build Coastguard Worker    paddw       mm3,            mm5             ; d1
148*fb1b10abSAndroid Build Coastguard Worker    paddw       mm0,            [GLOBAL(fours)]
149*fb1b10abSAndroid Build Coastguard Worker
150*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            [GLOBAL(fours)]
151*fb1b10abSAndroid Build Coastguard Worker    movq        mm6,            mm2             ; a1
152*fb1b10abSAndroid Build Coastguard Worker
153*fb1b10abSAndroid Build Coastguard Worker    movq        mm4,            mm0             ; b1
154*fb1b10abSAndroid Build Coastguard Worker    paddw       mm2,            mm3             ;0
155*fb1b10abSAndroid Build Coastguard Worker
156*fb1b10abSAndroid Build Coastguard Worker    paddw       mm4,            mm7             ;1
157*fb1b10abSAndroid Build Coastguard Worker    psubw       mm0,            mm7             ;2
158*fb1b10abSAndroid Build Coastguard Worker
159*fb1b10abSAndroid Build Coastguard Worker    psubw       mm6,            mm3             ;3
160*fb1b10abSAndroid Build Coastguard Worker    psraw       mm2,            3
161*fb1b10abSAndroid Build Coastguard Worker
162*fb1b10abSAndroid Build Coastguard Worker    psraw       mm0,            3
163*fb1b10abSAndroid Build Coastguard Worker    psraw       mm4,            3
164*fb1b10abSAndroid Build Coastguard Worker
165*fb1b10abSAndroid Build Coastguard Worker    psraw       mm6,            3
166*fb1b10abSAndroid Build Coastguard Worker
167*fb1b10abSAndroid Build Coastguard Worker    movq        mm1,            mm2             ; 03 02 01 00
168*fb1b10abSAndroid Build Coastguard Worker    movq        mm3,            mm4             ; 23 22 21 20
169*fb1b10abSAndroid Build Coastguard Worker
170*fb1b10abSAndroid Build Coastguard Worker    punpcklwd   mm1,            mm0             ; 11 01 10 00
171*fb1b10abSAndroid Build Coastguard Worker    punpckhwd   mm2,            mm0             ; 13 03 12 02
172*fb1b10abSAndroid Build Coastguard Worker
173*fb1b10abSAndroid Build Coastguard Worker    punpcklwd   mm3,            mm6             ; 31 21 30 20
174*fb1b10abSAndroid Build Coastguard Worker    punpckhwd   mm4,            mm6             ; 33 23 32 22
175*fb1b10abSAndroid Build Coastguard Worker
176*fb1b10abSAndroid Build Coastguard Worker    movq        mm0,            mm1             ; 11 01 10 00
177*fb1b10abSAndroid Build Coastguard Worker    movq        mm5,            mm2             ; 13 03 12 02
178*fb1b10abSAndroid Build Coastguard Worker
179*fb1b10abSAndroid Build Coastguard Worker    punpckldq   mm0,            mm3             ; 30 20 10 00
180*fb1b10abSAndroid Build Coastguard Worker    punpckhdq   mm1,            mm3             ; 31 21 11 01
181*fb1b10abSAndroid Build Coastguard Worker
182*fb1b10abSAndroid Build Coastguard Worker    punpckldq   mm2,            mm4             ; 32 22 12 02
183*fb1b10abSAndroid Build Coastguard Worker    punpckhdq   mm5,            mm4             ; 33 23 13 03
184*fb1b10abSAndroid Build Coastguard Worker
185*fb1b10abSAndroid Build Coastguard Worker    pxor        mm7,            mm7
186*fb1b10abSAndroid Build Coastguard Worker
187*fb1b10abSAndroid Build Coastguard Worker    movd        mm4,            [rsi]
188*fb1b10abSAndroid Build Coastguard Worker    punpcklbw   mm4,            mm7
189*fb1b10abSAndroid Build Coastguard Worker    paddsw      mm0,            mm4
190*fb1b10abSAndroid Build Coastguard Worker    packuswb    mm0,            mm7
191*fb1b10abSAndroid Build Coastguard Worker    movd        [rdx],          mm0
192*fb1b10abSAndroid Build Coastguard Worker
193*fb1b10abSAndroid Build Coastguard Worker    movd        mm4,            [rsi+rax]
194*fb1b10abSAndroid Build Coastguard Worker    punpcklbw   mm4,            mm7
195*fb1b10abSAndroid Build Coastguard Worker    paddsw      mm1,            mm4
196*fb1b10abSAndroid Build Coastguard Worker    packuswb    mm1,            mm7
197*fb1b10abSAndroid Build Coastguard Worker    movd        [rdx+rdi],      mm1
198*fb1b10abSAndroid Build Coastguard Worker
199*fb1b10abSAndroid Build Coastguard Worker    movd        mm4,            [rsi+2*rax]
200*fb1b10abSAndroid Build Coastguard Worker    punpcklbw   mm4,            mm7
201*fb1b10abSAndroid Build Coastguard Worker    paddsw      mm2,            mm4
202*fb1b10abSAndroid Build Coastguard Worker    packuswb    mm2,            mm7
203*fb1b10abSAndroid Build Coastguard Worker    movd        [rdx+rdi*2],    mm2
204*fb1b10abSAndroid Build Coastguard Worker
205*fb1b10abSAndroid Build Coastguard Worker    add         rdx,            rdi
206*fb1b10abSAndroid Build Coastguard Worker    add         rsi,            rax
207*fb1b10abSAndroid Build Coastguard Worker
208*fb1b10abSAndroid Build Coastguard Worker    movd        mm4,            [rsi+2*rax]
209*fb1b10abSAndroid Build Coastguard Worker    punpcklbw   mm4,            mm7
210*fb1b10abSAndroid Build Coastguard Worker    paddsw      mm5,            mm4
211*fb1b10abSAndroid Build Coastguard Worker    packuswb    mm5,            mm7
212*fb1b10abSAndroid Build Coastguard Worker    movd        [rdx+rdi*2],    mm5
213*fb1b10abSAndroid Build Coastguard Worker
214*fb1b10abSAndroid Build Coastguard Worker    ; begin epilog
215*fb1b10abSAndroid Build Coastguard Worker    pop rdi
216*fb1b10abSAndroid Build Coastguard Worker    pop rsi
217*fb1b10abSAndroid Build Coastguard Worker    RESTORE_GOT
218*fb1b10abSAndroid Build Coastguard Worker    UNSHADOW_ARGS
219*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
220*fb1b10abSAndroid Build Coastguard Worker    ret
221*fb1b10abSAndroid Build Coastguard Worker
222*fb1b10abSAndroid Build Coastguard Worker;void vp8_dc_only_idct_add_mmx(
223*fb1b10abSAndroid Build Coastguard Worker;short input_dc,
224*fb1b10abSAndroid Build Coastguard Worker;unsigned char *pred_ptr,
225*fb1b10abSAndroid Build Coastguard Worker;int pred_stride,
226*fb1b10abSAndroid Build Coastguard Worker;unsigned char *dst_ptr,
227*fb1b10abSAndroid Build Coastguard Worker;int stride)
228*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_dc_only_idct_add_mmx)
229*fb1b10abSAndroid Build Coastguard Workersym(vp8_dc_only_idct_add_mmx):
230*fb1b10abSAndroid Build Coastguard Worker    push        rbp
231*fb1b10abSAndroid Build Coastguard Worker    mov         rbp, rsp
232*fb1b10abSAndroid Build Coastguard Worker    SHADOW_ARGS_TO_STACK 5
233*fb1b10abSAndroid Build Coastguard Worker    GET_GOT     rbx
234*fb1b10abSAndroid Build Coastguard Worker    ; end prolog
235*fb1b10abSAndroid Build Coastguard Worker
236*fb1b10abSAndroid Build Coastguard Worker        movd        mm5,            arg(0) ;input_dc
237*fb1b10abSAndroid Build Coastguard Worker        mov         rax,            arg(1) ;pred_ptr
238*fb1b10abSAndroid Build Coastguard Worker        movsxd      rdx,            dword ptr arg(2) ;pred_stride
239*fb1b10abSAndroid Build Coastguard Worker
240*fb1b10abSAndroid Build Coastguard Worker        pxor        mm0,            mm0
241*fb1b10abSAndroid Build Coastguard Worker
242*fb1b10abSAndroid Build Coastguard Worker        paddw       mm5,            [GLOBAL(fours)]
243*fb1b10abSAndroid Build Coastguard Worker        lea         rcx,            [rdx + rdx*2]
244*fb1b10abSAndroid Build Coastguard Worker
245*fb1b10abSAndroid Build Coastguard Worker        psraw       mm5,            3
246*fb1b10abSAndroid Build Coastguard Worker
247*fb1b10abSAndroid Build Coastguard Worker        punpcklwd   mm5,            mm5
248*fb1b10abSAndroid Build Coastguard Worker
249*fb1b10abSAndroid Build Coastguard Worker        punpckldq   mm5,            mm5
250*fb1b10abSAndroid Build Coastguard Worker
251*fb1b10abSAndroid Build Coastguard Worker        movd        mm1,            [rax]
252*fb1b10abSAndroid Build Coastguard Worker        movd        mm2,            [rax+rdx]
253*fb1b10abSAndroid Build Coastguard Worker        movd        mm3,            [rax+2*rdx]
254*fb1b10abSAndroid Build Coastguard Worker        movd        mm4,            [rax+rcx]
255*fb1b10abSAndroid Build Coastguard Worker
256*fb1b10abSAndroid Build Coastguard Worker        mov         rax,            arg(3) ;d -- destination
257*fb1b10abSAndroid Build Coastguard Worker        movsxd      rdx,            dword ptr arg(4) ;dst_stride
258*fb1b10abSAndroid Build Coastguard Worker
259*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm1,            mm0
260*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm1,            mm5
261*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm1,            mm0              ; pack and unpack to saturate
262*fb1b10abSAndroid Build Coastguard Worker        lea         rcx,            [rdx + rdx*2]
263*fb1b10abSAndroid Build Coastguard Worker
264*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm2,            mm0
265*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm2,            mm5
266*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm2,            mm0              ; pack and unpack to saturate
267*fb1b10abSAndroid Build Coastguard Worker
268*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm3,            mm0
269*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm3,            mm5
270*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm3,            mm0              ; pack and unpack to saturate
271*fb1b10abSAndroid Build Coastguard Worker
272*fb1b10abSAndroid Build Coastguard Worker        punpcklbw   mm4,            mm0
273*fb1b10abSAndroid Build Coastguard Worker        paddsw      mm4,            mm5
274*fb1b10abSAndroid Build Coastguard Worker        packuswb    mm4,            mm0              ; pack and unpack to saturate
275*fb1b10abSAndroid Build Coastguard Worker
276*fb1b10abSAndroid Build Coastguard Worker        movd        [rax],          mm1
277*fb1b10abSAndroid Build Coastguard Worker        movd        [rax+rdx],      mm2
278*fb1b10abSAndroid Build Coastguard Worker        movd        [rax+2*rdx],    mm3
279*fb1b10abSAndroid Build Coastguard Worker        movd        [rax+rcx],      mm4
280*fb1b10abSAndroid Build Coastguard Worker
281*fb1b10abSAndroid Build Coastguard Worker    ; begin epilog
282*fb1b10abSAndroid Build Coastguard Worker    RESTORE_GOT
283*fb1b10abSAndroid Build Coastguard Worker    UNSHADOW_ARGS
284*fb1b10abSAndroid Build Coastguard Worker    pop         rbp
285*fb1b10abSAndroid Build Coastguard Worker    ret
286*fb1b10abSAndroid Build Coastguard Worker
287*fb1b10abSAndroid Build Coastguard WorkerSECTION_RODATA
288*fb1b10abSAndroid Build Coastguard Workeralign 16
289*fb1b10abSAndroid Build Coastguard Workerx_s1sqr2:
290*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x8A8C
291*fb1b10abSAndroid Build Coastguard Workeralign 16
292*fb1b10abSAndroid Build Coastguard Workerx_c1sqr2less1:
293*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x4E7B
294*fb1b10abSAndroid Build Coastguard Workeralign 16
295*fb1b10abSAndroid Build Coastguard Workerfours:
296*fb1b10abSAndroid Build Coastguard Worker    times 4 dw 0x0004
297