xref: /aosp_15_r20/external/boringssl/src/crypto/test/asm/trampoline-x86_64.pl (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1#!/usr/bin/env perl
2# Copyright (c) 2018, Google Inc.
3#
4# Permission to use, copy, modify, and/or distribute this software for any
5# purpose with or without fee is hereby granted, provided that the above
6# copyright notice and this permission notice appear in all copies.
7#
8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16# This file defines helper functions for crypto/test/abi_test.h on x86_64. See
17# that header for details on how to use this.
18#
19# For convenience, this file is linked into libcrypto, where consuming builds
20# already support architecture-specific sources. The static linker should drop
21# this code in non-test binaries. This includes a shared library build of
22# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
23# used.
24#
25# References:
26#
27# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
28# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017
29
30use strict;
31
32my $flavour = shift;
33my $output  = shift;
34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
35
36my $win64 = 0;
37$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
38
39$0 =~ m/(.*[\/\\])[^\/\\]+$/;
40my $dir = $1;
41my $xlate;
42( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
43( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
44die "can't locate x86_64-xlate.pl";
45
46open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
47*STDOUT = *OUT;
48
49# @inp is the registers used for function inputs, in order.
50my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") :
51                   ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9");
52
53# @caller_state is the list of registers that the callee must preserve for the
54# caller. This must match the definition of CallerState in abi_test.h.
55my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15");
56if ($win64) {
57  @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14",
58                   "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10",
59                   "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15");
60}
61
62# $caller_state_size is the size of CallerState, in bytes.
63my $caller_state_size = 0;
64foreach (@caller_state) {
65  if (/^%r/) {
66    $caller_state_size += 8;
67  } elsif (/^%xmm/) {
68    $caller_state_size += 16;
69  } else {
70    die "unknown register $_";
71  }
72}
73
74# load_caller_state returns code which loads a CallerState structure at
75# $off($reg) into the respective registers. No other registers are touched, but
76# $reg may not be a register in CallerState. $cb is an optional callback to
77# add extra lines after each movq or movdqa. $cb is passed the offset, relative
78# to $reg, and name of each register.
79sub load_caller_state {
80  my ($off, $reg, $cb) = @_;
81  my $ret = "";
82  foreach (@caller_state) {
83    my $old_off = $off;
84    if (/^%r/) {
85      $ret .= "\tmovq\t$off($reg), $_\n";
86      $off += 8;
87    } elsif (/^%xmm/) {
88      $ret .= "\tmovdqa\t$off($reg), $_\n";
89      $off += 16;
90    } else {
91      die "unknown register $_";
92    }
93    $ret .= $cb->($old_off, $_) if (defined($cb));
94  }
95  return $ret;
96}
97
98# store_caller_state behaves like load_caller_state, except that it writes the
99# current values of the registers into $off($reg).
100sub store_caller_state {
101  my ($off, $reg, $cb) = @_;
102  my $ret = "";
103  foreach (@caller_state) {
104    my $old_off = $off;
105    if (/^%r/) {
106      $ret .= "\tmovq\t$_, $off($reg)\n";
107      $off += 8;
108    } elsif (/^%xmm/) {
109      $ret .= "\tmovdqa\t$_, $off($reg)\n";
110      $off += 16;
111    } else {
112      die "unknown register $_";
113    }
114    $ret .= $cb->($old_off, $_) if (defined($cb));
115  }
116  return $ret;
117}
118
119# $max_params is the maximum number of parameters abi_test_trampoline supports.
120my $max_params = 10;
121
122# Windows reserves stack space for the register-based parameters, while SysV
123# only reserves space for the overflow ones.
124my $stack_params_skip = $win64 ? scalar(@inp) : 0;
125my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp);
126
127my ($func, $state, $argv, $argc, $unwind) = @inp;
128my $code = <<____;
129.text
130
131# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
132# with |argv|, then saves the callee-saved registers into |state|. It returns
133# the result of |func|. If |unwind| is non-zero, this function triggers unwind
134# instrumentation.
135# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
136#                              const uint64_t *argv, size_t argc,
137#                              int unwind);
138.type	abi_test_trampoline, \@abi-omnipotent
139.globl	abi_test_trampoline
140.align	16
141abi_test_trampoline:
142.cfi_startproc
143.seh_startproc
144	_CET_ENDBR
145	# Stack layout:
146	#   8 bytes - align
147	#   $caller_state_size bytes - saved caller registers
148	#   8 bytes - scratch space
149	#   8 bytes - saved copy of \$unwind (SysV-only)
150	#   8 bytes - saved copy of \$state
151	#   8 bytes - saved copy of \$func
152	#   8 bytes - if needed for stack alignment
153	#   8*$num_stack_params bytes - parameters for \$func
154____
155my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params;
156if (!$win64) {
157  $stack_alloc_size += 8;
158}
159# SysV and Windows both require the stack to be 16-byte-aligned. The call
160# instruction offsets it by 8, so stack allocations must be 8 mod 16.
161if ($stack_alloc_size % 16 != 8) {
162  $num_stack_params++;
163  $stack_alloc_size += 8;
164}
165my $stack_params_offset = 8 * $stack_params_skip;
166my $func_offset = 8 * $num_stack_params;
167my $state_offset = $func_offset + 8;
168# On Win64, unwind is already passed in memory. On SysV, it is passed in as
169# register and we must reserve stack space for it.
170my ($unwind_offset, $scratch_offset);
171if ($win64) {
172  $unwind_offset = $stack_alloc_size + 5*8;
173  $scratch_offset = $state_offset + 8;
174} else {
175  $unwind_offset = $state_offset + 8;
176  $scratch_offset = $unwind_offset + 8;
177}
178my $caller_state_offset = $scratch_offset + 8;
179$code .= <<____;
180	subq	\$$stack_alloc_size, %rsp
181.cfi_adjust_cfa_offset	$stack_alloc_size
182.seh_stackalloc	$stack_alloc_size
183____
184$code .= <<____ if (!$win64);
185	movq	$unwind, $unwind_offset(%rsp)
186____
187# Store our caller's state. This is needed because we modify it ourselves, and
188# also to isolate the test infrastruction from the function under test failing
189# to save some register.
190$code .= store_caller_state($caller_state_offset, "%rsp", sub {
191  my ($off, $reg) = @_;
192  $reg = substr($reg, 1);
193  # SEH records offsets relative to %rsp (when there is no frame pointer), while
194  # CFI records them relative to the CFA, the value of the parent's stack
195  # pointer just before the call.
196  my $cfi_off = $off - $stack_alloc_size - 8;
197  my $seh_dir = ".seh_savereg";
198  $seh_dir = ".seh_savexmm" if ($reg =~ /^xmm/);
199  return <<____;
200.cfi_offset	$reg, $cfi_off
201$seh_dir	\%$reg, $off
202____
203});
204$code .= ".seh_endprologue\n";
205
206$code .= load_caller_state(0, $state);
207$code .= <<____;
208	# Stash \$func and \$state, so they are available after the call returns.
209	movq	$func, $func_offset(%rsp)
210	movq	$state, $state_offset(%rsp)
211
212	# Load parameters. Note this will clobber \$argv and \$argc, so we can
213	# only use non-parameter volatile registers. There are three, and they
214	# are the same between SysV and Win64: %rax, %r10, and %r11.
215	movq	$argv, %r10
216	movq	$argc, %r11
217____
218foreach (@inp) {
219  $code .= <<____;
220	dec	%r11
221	js	.Largs_done
222	movq	(%r10), $_
223	addq	\$8, %r10
224____
225}
226$code .= <<____;
227	leaq	$stack_params_offset(%rsp), %rax
228.Largs_loop:
229	dec	%r11
230	js	.Largs_done
231
232	# This block should be:
233	#    movq (%r10), %rtmp
234	#    movq %rtmp, (%rax)
235	# There are no spare registers available, so we spill into the scratch
236	# space.
237	movq	%r11, $scratch_offset(%rsp)
238	movq	(%r10), %r11
239	movq	%r11, (%rax)
240	movq	$scratch_offset(%rsp), %r11
241
242	addq	\$8, %r10
243	addq	\$8, %rax
244	jmp	.Largs_loop
245
246.Largs_done:
247	movq	$func_offset(%rsp), %rax
248	movq	$unwind_offset(%rsp), %r10
249	testq	%r10, %r10
250	jz	.Lno_unwind
251
252	# Set the trap flag.
253	pushfq
254	orq	\$0x100, 0(%rsp)
255	popfq
256
257	# Run an instruction to trigger a breakpoint immediately before the
258	# call.
259	nop
260.globl	abi_test_unwind_start
261abi_test_unwind_start:
262
263	call	*%rax
264.globl	abi_test_unwind_return
265abi_test_unwind_return:
266
267	# Clear the trap flag. Note this assumes the trap flag was clear on
268	# entry. We do not support instrumenting an unwind-instrumented
269	# |abi_test_trampoline|.
270	pushfq
271	andq	\$-0x101, 0(%rsp)	# -0x101 is ~0x100
272	popfq
273.globl	abi_test_unwind_stop
274abi_test_unwind_stop:
275
276	jmp	.Lcall_done
277
278.Lno_unwind:
279	call	*%rax
280
281.Lcall_done:
282	# Store what \$func did our state, so our caller can check.
283	movq  $state_offset(%rsp), $state
284____
285$code .= store_caller_state(0, $state);
286
287# Restore our caller's state.
288$code .= load_caller_state($caller_state_offset, "%rsp", sub {
289  my ($off, $reg) = @_;
290  $reg = substr($reg, 1);
291  return ".cfi_restore\t$reg\n";
292});
293$code .= <<____;
294	addq	\$$stack_alloc_size, %rsp
295.cfi_adjust_cfa_offset	-$stack_alloc_size
296
297	# %rax already contains \$func's return value, unmodified.
298	ret
299.cfi_endproc
300.seh_endproc
301.size	abi_test_trampoline,.-abi_test_trampoline
302____
303
304# abi_test_clobber_* zeros the corresponding register. These are used to test
305# the ABI-testing framework.
306foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) {
307  $code .= <<____;
308.type	abi_test_clobber_r$_, \@abi-omnipotent
309.globl	abi_test_clobber_r$_
310.align	16
311abi_test_clobber_r$_:
312	_CET_ENDBR
313	xorq	%r$_, %r$_
314	ret
315.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
316____
317}
318
319foreach (0..15) {
320  $code .= <<____;
321.type	abi_test_clobber_xmm$_, \@abi-omnipotent
322.globl	abi_test_clobber_xmm$_
323.align	16
324abi_test_clobber_xmm$_:
325	_CET_ENDBR
326	pxor	%xmm$_, %xmm$_
327	ret
328.size	abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
329____
330}
331
332$code .= <<____;
333# abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong
334# register in unwind metadata.
335# void abi_test_bad_unwind_wrong_register(void);
336.type	abi_test_bad_unwind_wrong_register, \@abi-omnipotent
337.globl	abi_test_bad_unwind_wrong_register
338.align	16
339abi_test_bad_unwind_wrong_register:
340.cfi_startproc
341.seh_startproc
342	_CET_ENDBR
343	pushq	%r12
344.cfi_push	%r13	# This should be %r13
345.seh_pushreg	%r13	# This should be %r13
346.seh_endprologue
347	# Windows evaluates epilogs directly in the unwinder, rather than using
348	# unwind codes. Add a nop so there is one non-epilog point (immediately
349	# before the nop) where the unwinder can observe the mistake.
350	nop
351	popq	%r12
352.cfi_pop	%r12
353	ret
354.seh_endproc
355.cfi_endproc
356.size	abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
357
358# abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the
359# storage space for a saved register, breaking unwind.
360# void abi_test_bad_unwind_temporary(void);
361.type	abi_test_bad_unwind_temporary, \@abi-omnipotent
362.globl	abi_test_bad_unwind_temporary
363.align	16
364abi_test_bad_unwind_temporary:
365.cfi_startproc
366.seh_startproc
367	_CET_ENDBR
368	pushq	%r12
369.cfi_push	%r12
370.seh_pushreg	%r12
371.seh_endprologue
372
373	movq	%r12, %rax
374	inc	%rax
375	movq	%rax, (%rsp)
376	# Unwinding from here is incorrect. Although %r12 itself has not been
377	# changed, the unwind codes say to look in (%rsp) instead.
378
379	movq	%r12, (%rsp)
380	# Unwinding is now fixed.
381
382	popq	%r12
383.cfi_pop	%r12
384	ret
385.cfi_endproc
386.seh_endproc
387.size	abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
388
389# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
390# was previously set, it returns one. Otherwise, it returns zero.
391# int abi_test_get_and_clear_direction_flag(void);
392.type	abi_test_set_direction_flag, \@abi-omnipotent
393.globl	abi_test_get_and_clear_direction_flag
394abi_test_get_and_clear_direction_flag:
395	_CET_ENDBR
396	pushfq
397	popq	%rax
398	andq	\$0x400, %rax
399	shrq	\$10, %rax
400	cld
401	ret
402.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag
403
404# abi_test_set_direction_flag sets the direction flag.
405# void abi_test_set_direction_flag(void);
406.type	abi_test_set_direction_flag, \@abi-omnipotent
407.globl	abi_test_set_direction_flag
408abi_test_set_direction_flag:
409	_CET_ENDBR
410	std
411	ret
412.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
413____
414
415if ($win64) {
416  $code .= <<____;
417# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the
418# prolog, but the epilog does not match Win64's rules, breaking unwind during
419# the epilog.
420# void abi_test_bad_unwind_epilog(void);
421.type	abi_test_bad_unwind_epilog, \@abi-omnipotent
422.globl	abi_test_bad_unwind_epilog
423.align	16
424abi_test_bad_unwind_epilog:
425.seh_startproc
426	pushq	%r12
427.seh_pushreg	%r12
428.seh_endprologue
429
430	nop
431
432	# The epilog should begin here, but the nop makes it invalid.
433	popq	%r12
434	nop
435	ret
436.seh_endproc
437.size	abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog
438____
439}
440
441print $code;
442close STDOUT or die "error closing STDOUT: $!";
443