1#!/usr/bin/env perl 2# Copyright (c) 2018, Google Inc. 3# 4# Permission to use, copy, modify, and/or distribute this software for any 5# purpose with or without fee is hereby granted, provided that the above 6# copyright notice and this permission notice appear in all copies. 7# 8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 16# This file defines helper functions for crypto/test/abi_test.h on x86_64. See 17# that header for details on how to use this. 18# 19# For convenience, this file is linked into libcrypto, where consuming builds 20# already support architecture-specific sources. The static linker should drop 21# this code in non-test binaries. This includes a shared library build of 22# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is 23# used. 24# 25# References: 26# 27# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf 28# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017 29 30use strict; 31 32my $flavour = shift; 33my $output = shift; 34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 35 36my $win64 = 0; 37$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 38 39$0 =~ m/(.*[\/\\])[^\/\\]+$/; 40my $dir = $1; 41my $xlate; 42( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 43( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 44die "can't locate x86_64-xlate.pl"; 45 46open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; 47*STDOUT = *OUT; 48 49# @inp is the registers used for function inputs, in order. 50my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") : 51 ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"); 52 53# @caller_state is the list of registers that the callee must preserve for the 54# caller. This must match the definition of CallerState in abi_test.h. 55my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15"); 56if ($win64) { 57 @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14", 58 "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", 59 "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15"); 60} 61 62# $caller_state_size is the size of CallerState, in bytes. 63my $caller_state_size = 0; 64foreach (@caller_state) { 65 if (/^%r/) { 66 $caller_state_size += 8; 67 } elsif (/^%xmm/) { 68 $caller_state_size += 16; 69 } else { 70 die "unknown register $_"; 71 } 72} 73 74# load_caller_state returns code which loads a CallerState structure at 75# $off($reg) into the respective registers. No other registers are touched, but 76# $reg may not be a register in CallerState. $cb is an optional callback to 77# add extra lines after each movq or movdqa. $cb is passed the offset, relative 78# to $reg, and name of each register. 79sub load_caller_state { 80 my ($off, $reg, $cb) = @_; 81 my $ret = ""; 82 foreach (@caller_state) { 83 my $old_off = $off; 84 if (/^%r/) { 85 $ret .= "\tmovq\t$off($reg), $_\n"; 86 $off += 8; 87 } elsif (/^%xmm/) { 88 $ret .= "\tmovdqa\t$off($reg), $_\n"; 89 $off += 16; 90 } else { 91 die "unknown register $_"; 92 } 93 $ret .= $cb->($old_off, $_) if (defined($cb)); 94 } 95 return $ret; 96} 97 98# store_caller_state behaves like load_caller_state, except that it writes the 99# current values of the registers into $off($reg). 100sub store_caller_state { 101 my ($off, $reg, $cb) = @_; 102 my $ret = ""; 103 foreach (@caller_state) { 104 my $old_off = $off; 105 if (/^%r/) { 106 $ret .= "\tmovq\t$_, $off($reg)\n"; 107 $off += 8; 108 } elsif (/^%xmm/) { 109 $ret .= "\tmovdqa\t$_, $off($reg)\n"; 110 $off += 16; 111 } else { 112 die "unknown register $_"; 113 } 114 $ret .= $cb->($old_off, $_) if (defined($cb)); 115 } 116 return $ret; 117} 118 119# $max_params is the maximum number of parameters abi_test_trampoline supports. 120my $max_params = 10; 121 122# Windows reserves stack space for the register-based parameters, while SysV 123# only reserves space for the overflow ones. 124my $stack_params_skip = $win64 ? scalar(@inp) : 0; 125my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp); 126 127my ($func, $state, $argv, $argc, $unwind) = @inp; 128my $code = <<____; 129.text 130 131# abi_test_trampoline loads callee-saved registers from |state|, calls |func| 132# with |argv|, then saves the callee-saved registers into |state|. It returns 133# the result of |func|. If |unwind| is non-zero, this function triggers unwind 134# instrumentation. 135# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, 136# const uint64_t *argv, size_t argc, 137# int unwind); 138.type abi_test_trampoline, \@abi-omnipotent 139.globl abi_test_trampoline 140.align 16 141abi_test_trampoline: 142.cfi_startproc 143.seh_startproc 144 _CET_ENDBR 145 # Stack layout: 146 # 8 bytes - align 147 # $caller_state_size bytes - saved caller registers 148 # 8 bytes - scratch space 149 # 8 bytes - saved copy of \$unwind (SysV-only) 150 # 8 bytes - saved copy of \$state 151 # 8 bytes - saved copy of \$func 152 # 8 bytes - if needed for stack alignment 153 # 8*$num_stack_params bytes - parameters for \$func 154____ 155my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params; 156if (!$win64) { 157 $stack_alloc_size += 8; 158} 159# SysV and Windows both require the stack to be 16-byte-aligned. The call 160# instruction offsets it by 8, so stack allocations must be 8 mod 16. 161if ($stack_alloc_size % 16 != 8) { 162 $num_stack_params++; 163 $stack_alloc_size += 8; 164} 165my $stack_params_offset = 8 * $stack_params_skip; 166my $func_offset = 8 * $num_stack_params; 167my $state_offset = $func_offset + 8; 168# On Win64, unwind is already passed in memory. On SysV, it is passed in as 169# register and we must reserve stack space for it. 170my ($unwind_offset, $scratch_offset); 171if ($win64) { 172 $unwind_offset = $stack_alloc_size + 5*8; 173 $scratch_offset = $state_offset + 8; 174} else { 175 $unwind_offset = $state_offset + 8; 176 $scratch_offset = $unwind_offset + 8; 177} 178my $caller_state_offset = $scratch_offset + 8; 179$code .= <<____; 180 subq \$$stack_alloc_size, %rsp 181.cfi_adjust_cfa_offset $stack_alloc_size 182.seh_stackalloc $stack_alloc_size 183____ 184$code .= <<____ if (!$win64); 185 movq $unwind, $unwind_offset(%rsp) 186____ 187# Store our caller's state. This is needed because we modify it ourselves, and 188# also to isolate the test infrastruction from the function under test failing 189# to save some register. 190$code .= store_caller_state($caller_state_offset, "%rsp", sub { 191 my ($off, $reg) = @_; 192 $reg = substr($reg, 1); 193 # SEH records offsets relative to %rsp (when there is no frame pointer), while 194 # CFI records them relative to the CFA, the value of the parent's stack 195 # pointer just before the call. 196 my $cfi_off = $off - $stack_alloc_size - 8; 197 my $seh_dir = ".seh_savereg"; 198 $seh_dir = ".seh_savexmm" if ($reg =~ /^xmm/); 199 return <<____; 200.cfi_offset $reg, $cfi_off 201$seh_dir \%$reg, $off 202____ 203}); 204$code .= ".seh_endprologue\n"; 205 206$code .= load_caller_state(0, $state); 207$code .= <<____; 208 # Stash \$func and \$state, so they are available after the call returns. 209 movq $func, $func_offset(%rsp) 210 movq $state, $state_offset(%rsp) 211 212 # Load parameters. Note this will clobber \$argv and \$argc, so we can 213 # only use non-parameter volatile registers. There are three, and they 214 # are the same between SysV and Win64: %rax, %r10, and %r11. 215 movq $argv, %r10 216 movq $argc, %r11 217____ 218foreach (@inp) { 219 $code .= <<____; 220 dec %r11 221 js .Largs_done 222 movq (%r10), $_ 223 addq \$8, %r10 224____ 225} 226$code .= <<____; 227 leaq $stack_params_offset(%rsp), %rax 228.Largs_loop: 229 dec %r11 230 js .Largs_done 231 232 # This block should be: 233 # movq (%r10), %rtmp 234 # movq %rtmp, (%rax) 235 # There are no spare registers available, so we spill into the scratch 236 # space. 237 movq %r11, $scratch_offset(%rsp) 238 movq (%r10), %r11 239 movq %r11, (%rax) 240 movq $scratch_offset(%rsp), %r11 241 242 addq \$8, %r10 243 addq \$8, %rax 244 jmp .Largs_loop 245 246.Largs_done: 247 movq $func_offset(%rsp), %rax 248 movq $unwind_offset(%rsp), %r10 249 testq %r10, %r10 250 jz .Lno_unwind 251 252 # Set the trap flag. 253 pushfq 254 orq \$0x100, 0(%rsp) 255 popfq 256 257 # Run an instruction to trigger a breakpoint immediately before the 258 # call. 259 nop 260.globl abi_test_unwind_start 261abi_test_unwind_start: 262 263 call *%rax 264.globl abi_test_unwind_return 265abi_test_unwind_return: 266 267 # Clear the trap flag. Note this assumes the trap flag was clear on 268 # entry. We do not support instrumenting an unwind-instrumented 269 # |abi_test_trampoline|. 270 pushfq 271 andq \$-0x101, 0(%rsp) # -0x101 is ~0x100 272 popfq 273.globl abi_test_unwind_stop 274abi_test_unwind_stop: 275 276 jmp .Lcall_done 277 278.Lno_unwind: 279 call *%rax 280 281.Lcall_done: 282 # Store what \$func did our state, so our caller can check. 283 movq $state_offset(%rsp), $state 284____ 285$code .= store_caller_state(0, $state); 286 287# Restore our caller's state. 288$code .= load_caller_state($caller_state_offset, "%rsp", sub { 289 my ($off, $reg) = @_; 290 $reg = substr($reg, 1); 291 return ".cfi_restore\t$reg\n"; 292}); 293$code .= <<____; 294 addq \$$stack_alloc_size, %rsp 295.cfi_adjust_cfa_offset -$stack_alloc_size 296 297 # %rax already contains \$func's return value, unmodified. 298 ret 299.cfi_endproc 300.seh_endproc 301.size abi_test_trampoline,.-abi_test_trampoline 302____ 303 304# abi_test_clobber_* zeros the corresponding register. These are used to test 305# the ABI-testing framework. 306foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) { 307 $code .= <<____; 308.type abi_test_clobber_r$_, \@abi-omnipotent 309.globl abi_test_clobber_r$_ 310.align 16 311abi_test_clobber_r$_: 312 _CET_ENDBR 313 xorq %r$_, %r$_ 314 ret 315.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ 316____ 317} 318 319foreach (0..15) { 320 $code .= <<____; 321.type abi_test_clobber_xmm$_, \@abi-omnipotent 322.globl abi_test_clobber_xmm$_ 323.align 16 324abi_test_clobber_xmm$_: 325 _CET_ENDBR 326 pxor %xmm$_, %xmm$_ 327 ret 328.size abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_ 329____ 330} 331 332$code .= <<____; 333# abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong 334# register in unwind metadata. 335# void abi_test_bad_unwind_wrong_register(void); 336.type abi_test_bad_unwind_wrong_register, \@abi-omnipotent 337.globl abi_test_bad_unwind_wrong_register 338.align 16 339abi_test_bad_unwind_wrong_register: 340.cfi_startproc 341.seh_startproc 342 _CET_ENDBR 343 pushq %r12 344.cfi_push %r13 # This should be %r13 345.seh_pushreg %r13 # This should be %r13 346.seh_endprologue 347 # Windows evaluates epilogs directly in the unwinder, rather than using 348 # unwind codes. Add a nop so there is one non-epilog point (immediately 349 # before the nop) where the unwinder can observe the mistake. 350 nop 351 popq %r12 352.cfi_pop %r12 353 ret 354.seh_endproc 355.cfi_endproc 356.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register 357 358# abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the 359# storage space for a saved register, breaking unwind. 360# void abi_test_bad_unwind_temporary(void); 361.type abi_test_bad_unwind_temporary, \@abi-omnipotent 362.globl abi_test_bad_unwind_temporary 363.align 16 364abi_test_bad_unwind_temporary: 365.cfi_startproc 366.seh_startproc 367 _CET_ENDBR 368 pushq %r12 369.cfi_push %r12 370.seh_pushreg %r12 371.seh_endprologue 372 373 movq %r12, %rax 374 inc %rax 375 movq %rax, (%rsp) 376 # Unwinding from here is incorrect. Although %r12 itself has not been 377 # changed, the unwind codes say to look in (%rsp) instead. 378 379 movq %r12, (%rsp) 380 # Unwinding is now fixed. 381 382 popq %r12 383.cfi_pop %r12 384 ret 385.cfi_endproc 386.seh_endproc 387.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary 388 389# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag 390# was previously set, it returns one. Otherwise, it returns zero. 391# int abi_test_get_and_clear_direction_flag(void); 392.type abi_test_set_direction_flag, \@abi-omnipotent 393.globl abi_test_get_and_clear_direction_flag 394abi_test_get_and_clear_direction_flag: 395 _CET_ENDBR 396 pushfq 397 popq %rax 398 andq \$0x400, %rax 399 shrq \$10, %rax 400 cld 401 ret 402.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag 403 404# abi_test_set_direction_flag sets the direction flag. 405# void abi_test_set_direction_flag(void); 406.type abi_test_set_direction_flag, \@abi-omnipotent 407.globl abi_test_set_direction_flag 408abi_test_set_direction_flag: 409 _CET_ENDBR 410 std 411 ret 412.size abi_test_set_direction_flag,.-abi_test_set_direction_flag 413____ 414 415if ($win64) { 416 $code .= <<____; 417# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the 418# prolog, but the epilog does not match Win64's rules, breaking unwind during 419# the epilog. 420# void abi_test_bad_unwind_epilog(void); 421.type abi_test_bad_unwind_epilog, \@abi-omnipotent 422.globl abi_test_bad_unwind_epilog 423.align 16 424abi_test_bad_unwind_epilog: 425.seh_startproc 426 pushq %r12 427.seh_pushreg %r12 428.seh_endprologue 429 430 nop 431 432 # The epilog should begin here, but the nop makes it invalid. 433 popq %r12 434 nop 435 ret 436.seh_endproc 437.size abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog 438____ 439} 440 441print $code; 442close STDOUT or die "error closing STDOUT: $!"; 443