xref: /aosp_15_r20/external/boringssl/src/crypto/fipsmodule/bn/asm/bn-armv8.pl (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1#!/usr/bin/env perl
2# Copyright (c) 2023, Google Inc.
3#
4# Permission to use, copy, modify, and/or distribute this software for any
5# purpose with or without fee is hereby granted, provided that the above
6# copyright notice and this permission notice appear in all copies.
7#
8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16use strict;
17
18my $flavour = shift;
19my $output  = shift;
20if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
21
22$0 =~ m/(.*[\/\\])[^\/\\]+$/;
23my $dir = $1;
24my $xlate;
25( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
26( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
27die "can't locate arm-xlate.pl";
28
29open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
30*STDOUT = *OUT;
31
32my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3");
33my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8");
34my $code = <<____;
35#include <openssl/arm_arch.h>
36
37.text
38
39// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
40//                       size_t num);
41.type	bn_add_words, %function
42.globl	bn_add_words
43.align	4
44bn_add_words:
45	AARCH64_VALID_CALL_TARGET
46	# Clear the carry flag.
47	cmn	xzr, xzr
48
49	# aarch64 can load two registers at a time, so we do two loop iterations at
50	# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
51	# operations to use CBNZ without clobbering the carry flag.
52	lsr	$num_pairs, $num, #1
53	and	$num, $num, #1
54
55	cbz	$num_pairs, .Ladd_tail
56.Ladd_loop:
57	ldp	$a0, $a1, [$ap], #16
58	ldp	$b0, $b1, [$bp], #16
59	sub	$num_pairs, $num_pairs, #1
60	adcs	$a0, $a0, $b0
61	adcs	$a1, $a1, $b1
62	stp	$a0, $a1, [$rp], #16
63	cbnz	$num_pairs, .Ladd_loop
64
65.Ladd_tail:
66	cbz	$num, .Ladd_exit
67	ldr	$a0, [$ap], #8
68	ldr	$b0, [$bp], #8
69	adcs	$a0, $a0, $b0
70	str	$a0, [$rp], #8
71
72.Ladd_exit:
73	cset	x0, cs
74	ret
75.size	bn_add_words,.-bn_add_words
76
77// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
78//                       size_t num);
79.type	bn_sub_words, %function
80.globl	bn_sub_words
81.align	4
82bn_sub_words:
83	AARCH64_VALID_CALL_TARGET
84	# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
85	# so we want C = 1 here.
86	cmp	xzr, xzr
87
88	# aarch64 can load two registers at a time, so we do two loop iterations at
89	# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
90	# operations to use CBNZ without clobbering the carry flag.
91	lsr	$num_pairs, $num, #1
92	and	$num, $num, #1
93
94	cbz	$num_pairs, .Lsub_tail
95.Lsub_loop:
96	ldp	$a0, $a1, [$ap], #16
97	ldp	$b0, $b1, [$bp], #16
98	sub	$num_pairs, $num_pairs, #1
99	sbcs	$a0, $a0, $b0
100	sbcs	$a1, $a1, $b1
101	stp	$a0, $a1, [$rp], #16
102	cbnz	$num_pairs, .Lsub_loop
103
104.Lsub_tail:
105	cbz	$num, .Lsub_exit
106	ldr	$a0, [$ap], #8
107	ldr	$b0, [$bp], #8
108	sbcs	$a0, $a0, $b0
109	str	$a0, [$rp], #8
110
111.Lsub_exit:
112	cset x0, cc
113	ret
114size	bn_sub_words,.-bn_sub_words
115____
116
117print $code;
118close STDOUT or die "error closing STDOUT: $!";
119