xref: /aosp_15_r20/external/libffi/src/x86/win64.S (revision 1fd5a2e1d639cd1ddf29dd0c484c123bbd850c21)
1*1fd5a2e1SPrashanth Swaminathan#ifdef __x86_64__
2*1fd5a2e1SPrashanth Swaminathan#define LIBFFI_ASM
3*1fd5a2e1SPrashanth Swaminathan#include <fficonfig.h>
4*1fd5a2e1SPrashanth Swaminathan#include <ffi.h>
5*1fd5a2e1SPrashanth Swaminathan#include <ffi_cfi.h>
6*1fd5a2e1SPrashanth Swaminathan#include "asmnames.h"
7*1fd5a2e1SPrashanth Swaminathan
8*1fd5a2e1SPrashanth Swaminathan#if defined(HAVE_AS_CFI_PSEUDO_OP)
9*1fd5a2e1SPrashanth Swaminathan        .cfi_sections   .debug_frame
10*1fd5a2e1SPrashanth Swaminathan#endif
11*1fd5a2e1SPrashanth Swaminathan
12*1fd5a2e1SPrashanth Swaminathan#ifdef X86_WIN64
13*1fd5a2e1SPrashanth Swaminathan#define SEH(...) __VA_ARGS__
14*1fd5a2e1SPrashanth Swaminathan#define arg0	%rcx
15*1fd5a2e1SPrashanth Swaminathan#define arg1	%rdx
16*1fd5a2e1SPrashanth Swaminathan#define arg2	%r8
17*1fd5a2e1SPrashanth Swaminathan#define arg3	%r9
18*1fd5a2e1SPrashanth Swaminathan#else
19*1fd5a2e1SPrashanth Swaminathan#define SEH(...)
20*1fd5a2e1SPrashanth Swaminathan#define arg0	%rdi
21*1fd5a2e1SPrashanth Swaminathan#define arg1	%rsi
22*1fd5a2e1SPrashanth Swaminathan#define arg2	%rdx
23*1fd5a2e1SPrashanth Swaminathan#define arg3	%rcx
24*1fd5a2e1SPrashanth Swaminathan#endif
25*1fd5a2e1SPrashanth Swaminathan
26*1fd5a2e1SPrashanth Swaminathan/* This macro allows the safe creation of jump tables without an
27*1fd5a2e1SPrashanth Swaminathan   actual table.  The entry points into the table are all 8 bytes.
28*1fd5a2e1SPrashanth Swaminathan   The use of ORG asserts that we're at the correct location.  */
29*1fd5a2e1SPrashanth Swaminathan/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
30*1fd5a2e1SPrashanth Swaminathan#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
31*1fd5a2e1SPrashanth Swaminathan# define E(BASE, X)	.balign 8
32*1fd5a2e1SPrashanth Swaminathan#else
33*1fd5a2e1SPrashanth Swaminathan# define E(BASE, X)	.balign 8; .org BASE + X * 8
34*1fd5a2e1SPrashanth Swaminathan#endif
35*1fd5a2e1SPrashanth Swaminathan
36*1fd5a2e1SPrashanth Swaminathan	.text
37*1fd5a2e1SPrashanth Swaminathan
38*1fd5a2e1SPrashanth Swaminathan/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
39*1fd5a2e1SPrashanth Swaminathan
40*1fd5a2e1SPrashanth Swaminathan   Bit o trickiness here -- FRAME is the base of the stack frame
41*1fd5a2e1SPrashanth Swaminathan   for this function.  This has been allocated by ffi_call.  We also
42*1fd5a2e1SPrashanth Swaminathan   deallocate some of the stack that has been alloca'd.  */
43*1fd5a2e1SPrashanth Swaminathan
44*1fd5a2e1SPrashanth Swaminathan	.align	8
45*1fd5a2e1SPrashanth Swaminathan	.globl	C(ffi_call_win64)
46*1fd5a2e1SPrashanth Swaminathan	FFI_HIDDEN(C(ffi_call_win64))
47*1fd5a2e1SPrashanth Swaminathan
48*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_proc ffi_call_win64)
49*1fd5a2e1SPrashanth SwaminathanC(ffi_call_win64):
50*1fd5a2e1SPrashanth Swaminathan	cfi_startproc
51*1fd5a2e1SPrashanth Swaminathan	/* Set up the local stack frame and install it in rbp/rsp.  */
52*1fd5a2e1SPrashanth Swaminathan	movq	(%rsp), %rax
53*1fd5a2e1SPrashanth Swaminathan	movq	%rbp, (arg1)
54*1fd5a2e1SPrashanth Swaminathan	movq	%rax, 8(arg1)
55*1fd5a2e1SPrashanth Swaminathan	movq	arg1, %rbp
56*1fd5a2e1SPrashanth Swaminathan	cfi_def_cfa(%rbp, 16)
57*1fd5a2e1SPrashanth Swaminathan	cfi_rel_offset(%rbp, 0)
58*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_pushreg %rbp)
59*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_setframe %rbp, 0)
60*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_endprologue)
61*1fd5a2e1SPrashanth Swaminathan	movq	arg0, %rsp
62*1fd5a2e1SPrashanth Swaminathan
63*1fd5a2e1SPrashanth Swaminathan	movq	arg2, %r10
64*1fd5a2e1SPrashanth Swaminathan
65*1fd5a2e1SPrashanth Swaminathan	/* Load all slots into both general and xmm registers.  */
66*1fd5a2e1SPrashanth Swaminathan	movq	(%rsp), %rcx
67*1fd5a2e1SPrashanth Swaminathan	movsd	(%rsp), %xmm0
68*1fd5a2e1SPrashanth Swaminathan	movq	8(%rsp), %rdx
69*1fd5a2e1SPrashanth Swaminathan	movsd	8(%rsp), %xmm1
70*1fd5a2e1SPrashanth Swaminathan	movq	16(%rsp), %r8
71*1fd5a2e1SPrashanth Swaminathan	movsd	16(%rsp), %xmm2
72*1fd5a2e1SPrashanth Swaminathan	movq	24(%rsp), %r9
73*1fd5a2e1SPrashanth Swaminathan	movsd	24(%rsp), %xmm3
74*1fd5a2e1SPrashanth Swaminathan
75*1fd5a2e1SPrashanth Swaminathan	call	*16(%rbp)
76*1fd5a2e1SPrashanth Swaminathan
77*1fd5a2e1SPrashanth Swaminathan	movl	24(%rbp), %ecx
78*1fd5a2e1SPrashanth Swaminathan	movq	32(%rbp), %r8
79*1fd5a2e1SPrashanth Swaminathan	leaq	0f(%rip), %r10
80*1fd5a2e1SPrashanth Swaminathan	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
81*1fd5a2e1SPrashanth Swaminathan	leaq	(%r10, %rcx, 8), %r10
82*1fd5a2e1SPrashanth Swaminathan	ja	99f
83*1fd5a2e1SPrashanth Swaminathan	jmp	*%r10
84*1fd5a2e1SPrashanth Swaminathan
85*1fd5a2e1SPrashanth Swaminathan/* Below, we're space constrained most of the time.  Thus we eschew the
86*1fd5a2e1SPrashanth Swaminathan   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
87*1fd5a2e1SPrashanth Swaminathan.macro epilogue
88*1fd5a2e1SPrashanth Swaminathan	leaveq
89*1fd5a2e1SPrashanth Swaminathan	cfi_remember_state
90*1fd5a2e1SPrashanth Swaminathan	cfi_def_cfa(%rsp, 8)
91*1fd5a2e1SPrashanth Swaminathan	cfi_restore(%rbp)
92*1fd5a2e1SPrashanth Swaminathan	ret
93*1fd5a2e1SPrashanth Swaminathan	cfi_restore_state
94*1fd5a2e1SPrashanth Swaminathan.endm
95*1fd5a2e1SPrashanth Swaminathan
96*1fd5a2e1SPrashanth Swaminathan	.align	8
97*1fd5a2e1SPrashanth Swaminathan0:
98*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_VOID)
99*1fd5a2e1SPrashanth Swaminathan	epilogue
100*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_INT)
101*1fd5a2e1SPrashanth Swaminathan	movslq	%eax, %rax
102*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
103*1fd5a2e1SPrashanth Swaminathan	epilogue
104*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_FLOAT)
105*1fd5a2e1SPrashanth Swaminathan	movss	%xmm0, (%r8)
106*1fd5a2e1SPrashanth Swaminathan	epilogue
107*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_DOUBLE)
108*1fd5a2e1SPrashanth Swaminathan	movsd	%xmm0, (%r8)
109*1fd5a2e1SPrashanth Swaminathan	epilogue
110*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_LONGDOUBLE)
111*1fd5a2e1SPrashanth Swaminathan	call	PLT(C(abort))
112*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_UINT8)
113*1fd5a2e1SPrashanth Swaminathan	movzbl	%al, %eax
114*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
115*1fd5a2e1SPrashanth Swaminathan	epilogue
116*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SINT8)
117*1fd5a2e1SPrashanth Swaminathan	movsbq	%al, %rax
118*1fd5a2e1SPrashanth Swaminathan	jmp	98f
119*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_UINT16)
120*1fd5a2e1SPrashanth Swaminathan	movzwl	%ax, %eax
121*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
122*1fd5a2e1SPrashanth Swaminathan	epilogue
123*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SINT16)
124*1fd5a2e1SPrashanth Swaminathan	movswq	%ax, %rax
125*1fd5a2e1SPrashanth Swaminathan	jmp	98f
126*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_UINT32)
127*1fd5a2e1SPrashanth Swaminathan	movl	%eax, %eax
128*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
129*1fd5a2e1SPrashanth Swaminathan	epilogue
130*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SINT32)
131*1fd5a2e1SPrashanth Swaminathan	movslq	%eax, %rax
132*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
133*1fd5a2e1SPrashanth Swaminathan	epilogue
134*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_UINT64)
135*1fd5a2e1SPrashanth Swaminathan98:	movq	%rax, (%r8)
136*1fd5a2e1SPrashanth Swaminathan	epilogue
137*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SINT64)
138*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
139*1fd5a2e1SPrashanth Swaminathan	epilogue
140*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_STRUCT)
141*1fd5a2e1SPrashanth Swaminathan	epilogue
142*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_POINTER)
143*1fd5a2e1SPrashanth Swaminathan	movq	%rax, (%r8)
144*1fd5a2e1SPrashanth Swaminathan	epilogue
145*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_COMPLEX)
146*1fd5a2e1SPrashanth Swaminathan	call	PLT(C(abort))
147*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SMALL_STRUCT_1B)
148*1fd5a2e1SPrashanth Swaminathan	movb	%al, (%r8)
149*1fd5a2e1SPrashanth Swaminathan	epilogue
150*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SMALL_STRUCT_2B)
151*1fd5a2e1SPrashanth Swaminathan	movw	%ax, (%r8)
152*1fd5a2e1SPrashanth Swaminathan	epilogue
153*1fd5a2e1SPrashanth SwaminathanE(0b, FFI_TYPE_SMALL_STRUCT_4B)
154*1fd5a2e1SPrashanth Swaminathan	movl	%eax, (%r8)
155*1fd5a2e1SPrashanth Swaminathan	epilogue
156*1fd5a2e1SPrashanth Swaminathan
157*1fd5a2e1SPrashanth Swaminathan	.align	8
158*1fd5a2e1SPrashanth Swaminathan99:	call	PLT(C(abort))
159*1fd5a2e1SPrashanth Swaminathan
160*1fd5a2e1SPrashanth Swaminathan	epilogue
161*1fd5a2e1SPrashanth Swaminathan
162*1fd5a2e1SPrashanth Swaminathan	cfi_endproc
163*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_endproc)
164*1fd5a2e1SPrashanth Swaminathan
165*1fd5a2e1SPrashanth Swaminathan
166*1fd5a2e1SPrashanth Swaminathan/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
167*1fd5a2e1SPrashanth Swaminathan   16 bytes of result, 32 bytes of xmm registers.  */
168*1fd5a2e1SPrashanth Swaminathan#define ffi_clo_FS	(32+8+16+32)
169*1fd5a2e1SPrashanth Swaminathan#define ffi_clo_OFF_R	(32+8)
170*1fd5a2e1SPrashanth Swaminathan#define ffi_clo_OFF_X	(32+8+16)
171*1fd5a2e1SPrashanth Swaminathan
172*1fd5a2e1SPrashanth Swaminathan	.align	8
173*1fd5a2e1SPrashanth Swaminathan	.globl	C(ffi_go_closure_win64)
174*1fd5a2e1SPrashanth Swaminathan	FFI_HIDDEN(C(ffi_go_closure_win64))
175*1fd5a2e1SPrashanth Swaminathan
176*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_proc ffi_go_closure_win64)
177*1fd5a2e1SPrashanth SwaminathanC(ffi_go_closure_win64):
178*1fd5a2e1SPrashanth Swaminathan	cfi_startproc
179*1fd5a2e1SPrashanth Swaminathan	/* Save all integer arguments into the incoming reg stack space.  */
180*1fd5a2e1SPrashanth Swaminathan	movq	%rcx, 8(%rsp)
181*1fd5a2e1SPrashanth Swaminathan	movq	%rdx, 16(%rsp)
182*1fd5a2e1SPrashanth Swaminathan	movq	%r8, 24(%rsp)
183*1fd5a2e1SPrashanth Swaminathan	movq	%r9, 32(%rsp)
184*1fd5a2e1SPrashanth Swaminathan
185*1fd5a2e1SPrashanth Swaminathan	movq	8(%r10), %rcx			/* load cif */
186*1fd5a2e1SPrashanth Swaminathan	movq	16(%r10), %rdx			/* load fun */
187*1fd5a2e1SPrashanth Swaminathan	movq	%r10, %r8			/* closure is user_data */
188*1fd5a2e1SPrashanth Swaminathan	jmp	0f
189*1fd5a2e1SPrashanth Swaminathan	cfi_endproc
190*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_endproc)
191*1fd5a2e1SPrashanth Swaminathan
192*1fd5a2e1SPrashanth Swaminathan	.align	8
193*1fd5a2e1SPrashanth Swaminathan	.globl	C(ffi_closure_win64)
194*1fd5a2e1SPrashanth Swaminathan	FFI_HIDDEN(C(ffi_closure_win64))
195*1fd5a2e1SPrashanth Swaminathan
196*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_proc ffi_closure_win64)
197*1fd5a2e1SPrashanth SwaminathanC(ffi_closure_win64):
198*1fd5a2e1SPrashanth Swaminathan	cfi_startproc
199*1fd5a2e1SPrashanth Swaminathan	/* Save all integer arguments into the incoming reg stack space.  */
200*1fd5a2e1SPrashanth Swaminathan	movq	%rcx, 8(%rsp)
201*1fd5a2e1SPrashanth Swaminathan	movq	%rdx, 16(%rsp)
202*1fd5a2e1SPrashanth Swaminathan	movq	%r8, 24(%rsp)
203*1fd5a2e1SPrashanth Swaminathan	movq	%r9, 32(%rsp)
204*1fd5a2e1SPrashanth Swaminathan
205*1fd5a2e1SPrashanth Swaminathan	movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
206*1fd5a2e1SPrashanth Swaminathan	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
207*1fd5a2e1SPrashanth Swaminathan	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
208*1fd5a2e1SPrashanth Swaminathan0:
209*1fd5a2e1SPrashanth Swaminathan	subq	$ffi_clo_FS, %rsp
210*1fd5a2e1SPrashanth Swaminathan	cfi_adjust_cfa_offset(ffi_clo_FS)
211*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_stackalloc ffi_clo_FS)
212*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_endprologue)
213*1fd5a2e1SPrashanth Swaminathan
214*1fd5a2e1SPrashanth Swaminathan	/* Save all sse arguments into the stack frame.  */
215*1fd5a2e1SPrashanth Swaminathan	movsd	%xmm0, ffi_clo_OFF_X(%rsp)
216*1fd5a2e1SPrashanth Swaminathan	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
217*1fd5a2e1SPrashanth Swaminathan	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
218*1fd5a2e1SPrashanth Swaminathan	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
219*1fd5a2e1SPrashanth Swaminathan
220*1fd5a2e1SPrashanth Swaminathan	leaq	ffi_clo_OFF_R(%rsp), %r9
221*1fd5a2e1SPrashanth Swaminathan	call	PLT(C(ffi_closure_win64_inner))
222*1fd5a2e1SPrashanth Swaminathan
223*1fd5a2e1SPrashanth Swaminathan	/* Load the result into both possible result registers.  */
224*1fd5a2e1SPrashanth Swaminathan	movq    ffi_clo_OFF_R(%rsp), %rax
225*1fd5a2e1SPrashanth Swaminathan	movsd   ffi_clo_OFF_R(%rsp), %xmm0
226*1fd5a2e1SPrashanth Swaminathan
227*1fd5a2e1SPrashanth Swaminathan	addq	$ffi_clo_FS, %rsp
228*1fd5a2e1SPrashanth Swaminathan	cfi_adjust_cfa_offset(-ffi_clo_FS)
229*1fd5a2e1SPrashanth Swaminathan	ret
230*1fd5a2e1SPrashanth Swaminathan
231*1fd5a2e1SPrashanth Swaminathan	cfi_endproc
232*1fd5a2e1SPrashanth Swaminathan	SEH(.seh_endproc)
233*1fd5a2e1SPrashanth Swaminathan#endif /* __x86_64__ */
234*1fd5a2e1SPrashanth Swaminathan
235*1fd5a2e1SPrashanth Swaminathan#if defined __ELF__ && defined __linux__
236*1fd5a2e1SPrashanth Swaminathan	.section	.note.GNU-stack,"",@progbits
237*1fd5a2e1SPrashanth Swaminathan#endif
238