1#!/usr/bin/env perl 2# Copyright (c) 2023, Google Inc. 3# 4# Permission to use, copy, modify, and/or distribute this software for any 5# purpose with or without fee is hereby granted, provided that the above 6# copyright notice and this permission notice appear in all copies. 7# 8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 16use strict; 17 18my $flavour = shift; 19my $output = shift; 20if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 21 22$0 =~ m/(.*[\/\\])[^\/\\]+$/; 23my $dir = $1; 24my $xlate; 25( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 26( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or 27die "can't locate arm-xlate.pl"; 28 29open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; 30*STDOUT = *OUT; 31 32my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3"); 33my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8"); 34my $code = <<____; 35#include <openssl/arm_arch.h> 36 37.text 38 39// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 40// size_t num); 41.type bn_add_words, %function 42.globl bn_add_words 43.align 4 44bn_add_words: 45 AARCH64_VALID_CALL_TARGET 46 # Clear the carry flag. 47 cmn xzr, xzr 48 49 # aarch64 can load two registers at a time, so we do two loop iterations at 50 # at a time. Split $num = 2 * $num_pairs + $num. This allows loop 51 # operations to use CBNZ without clobbering the carry flag. 52 lsr $num_pairs, $num, #1 53 and $num, $num, #1 54 55 cbz $num_pairs, .Ladd_tail 56.Ladd_loop: 57 ldp $a0, $a1, [$ap], #16 58 ldp $b0, $b1, [$bp], #16 59 sub $num_pairs, $num_pairs, #1 60 adcs $a0, $a0, $b0 61 adcs $a1, $a1, $b1 62 stp $a0, $a1, [$rp], #16 63 cbnz $num_pairs, .Ladd_loop 64 65.Ladd_tail: 66 cbz $num, .Ladd_exit 67 ldr $a0, [$ap], #8 68 ldr $b0, [$bp], #8 69 adcs $a0, $a0, $b0 70 str $a0, [$rp], #8 71 72.Ladd_exit: 73 cset x0, cs 74 ret 75.size bn_add_words,.-bn_add_words 76 77// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 78// size_t num); 79.type bn_sub_words, %function 80.globl bn_sub_words 81.align 4 82bn_sub_words: 83 AARCH64_VALID_CALL_TARGET 84 # Set the carry flag. Arm's borrow bit is flipped from the carry flag, 85 # so we want C = 1 here. 86 cmp xzr, xzr 87 88 # aarch64 can load two registers at a time, so we do two loop iterations at 89 # at a time. Split $num = 2 * $num_pairs + $num. This allows loop 90 # operations to use CBNZ without clobbering the carry flag. 91 lsr $num_pairs, $num, #1 92 and $num, $num, #1 93 94 cbz $num_pairs, .Lsub_tail 95.Lsub_loop: 96 ldp $a0, $a1, [$ap], #16 97 ldp $b0, $b1, [$bp], #16 98 sub $num_pairs, $num_pairs, #1 99 sbcs $a0, $a0, $b0 100 sbcs $a1, $a1, $b1 101 stp $a0, $a1, [$rp], #16 102 cbnz $num_pairs, .Lsub_loop 103 104.Lsub_tail: 105 cbz $num, .Lsub_exit 106 ldr $a0, [$ap], #8 107 ldr $b0, [$bp], #8 108 sbcs $a0, $a0, $b0 109 str $a0, [$rp], #8 110 111.Lsub_exit: 112 cset x0, cc 113 ret 114size bn_sub_words,.-bn_sub_words 115____ 116 117print $code; 118close STDOUT or die "error closing STDOUT: $!"; 119