xref: /aosp_15_r20/external/boringssl/src/crypto/fipsmodule/bn/asm/bn-586.pl (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1#! /usr/bin/env perl
2# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
11push(@INC,"${dir}","${dir}../../../perlasm");
12require "x86asm.pl";
13
14$output = pop;
15open STDOUT,">$output";
16
17&asm_init($ARGV[0]);
18
19$sse2=1;
20
21&bn_mul_add_words("bn_mul_add_words");
22&bn_mul_words("bn_mul_words");
23&bn_sqr_words("bn_sqr_words");
24&bn_div_words("bn_div_words");
25&bn_add_words("bn_add_words");
26&bn_sub_words("bn_sub_words");
27
28&asm_finish();
29
30close STDOUT or die "error closing STDOUT: $!";
31
32sub bn_mul_add_words
33	{
34	local($name)=@_;
35
36	&function_begin_B($name);
37
38	$r="eax";
39	$a="edx";
40	$c="ecx";
41
42	if ($sse2) {
43		&mov($r,&wparam(0));
44		&mov($a,&wparam(1));
45		&mov($c,&wparam(2));
46		&movd("mm0",&wparam(3));	# mm0 = w
47		&pxor("mm1","mm1");		# mm1 = carry_in
48		&jmp(&label("maw_sse2_entry"));
49
50	&set_label("maw_sse2_unrolled",16);
51		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]
52		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]
53		&movd("mm2",&DWP(0,$a,"",0));	# mm2 = a[0]
54		&pmuludq("mm2","mm0");		# mm2 = w*a[0]
55		&movd("mm4",&DWP(4,$a,"",0));	# mm4 = a[1]
56		&pmuludq("mm4","mm0");		# mm4 = w*a[1]
57		&movd("mm6",&DWP(8,$a,"",0));	# mm6 = a[2]
58		&pmuludq("mm6","mm0");		# mm6 = w*a[2]
59		&movd("mm7",&DWP(12,$a,"",0));	# mm7 = a[3]
60		&pmuludq("mm7","mm0");		# mm7 = w*a[3]
61		&paddq("mm1","mm2");		# mm1 = carry_in + r[0] + w*a[0]
62		&movd("mm3",&DWP(4,$r,"",0));	# mm3 = r[1]
63		&paddq("mm3","mm4");		# mm3 = r[1] + w*a[1]
64		&movd("mm5",&DWP(8,$r,"",0));	# mm5 = r[2]
65		&paddq("mm5","mm6");		# mm5 = r[2] + w*a[2]
66		&movd("mm4",&DWP(12,$r,"",0));	# mm4 = r[3]
67		&paddq("mm7","mm4");		# mm7 = r[3] + w*a[3]
68		&movd(&DWP(0,$r,"",0),"mm1");
69		&movd("mm2",&DWP(16,$a,"",0));	# mm2 = a[4]
70		&pmuludq("mm2","mm0");		# mm2 = w*a[4]
71		&psrlq("mm1",32);		# mm1 = carry0
72		&movd("mm4",&DWP(20,$a,"",0));	# mm4 = a[5]
73		&pmuludq("mm4","mm0");		# mm4 = w*a[5]
74		&paddq("mm1","mm3");		# mm1 = carry0 + r[1] + w*a[1]
75		&movd("mm6",&DWP(24,$a,"",0));	# mm6 = a[6]
76		&pmuludq("mm6","mm0");		# mm6 = w*a[6]
77		&movd(&DWP(4,$r,"",0),"mm1");
78		&psrlq("mm1",32);		# mm1 = carry1
79		&movd("mm3",&DWP(28,$a,"",0));	# mm3 = a[7]
80		&add($a,32);
81		&pmuludq("mm3","mm0");		# mm3 = w*a[7]
82		&paddq("mm1","mm5");		# mm1 = carry1 + r[2] + w*a[2]
83		&movd("mm5",&DWP(16,$r,"",0));	# mm5 = r[4]
84		&paddq("mm2","mm5");		# mm2 = r[4] + w*a[4]
85		&movd(&DWP(8,$r,"",0),"mm1");
86		&psrlq("mm1",32);		# mm1 = carry2
87		&paddq("mm1","mm7");		# mm1 = carry2 + r[3] + w*a[3]
88		&movd("mm5",&DWP(20,$r,"",0));	# mm5 = r[5]
89		&paddq("mm4","mm5");		# mm4 = r[5] + w*a[5]
90		&movd(&DWP(12,$r,"",0),"mm1");
91		&psrlq("mm1",32);		# mm1 = carry3
92		&paddq("mm1","mm2");		# mm1 = carry3 + r[4] + w*a[4]
93		&movd("mm5",&DWP(24,$r,"",0));	# mm5 = r[6]
94		&paddq("mm6","mm5");		# mm6 = r[6] + w*a[6]
95		&movd(&DWP(16,$r,"",0),"mm1");
96		&psrlq("mm1",32);		# mm1 = carry4
97		&paddq("mm1","mm4");		# mm1 = carry4 + r[5] + w*a[5]
98		&movd("mm5",&DWP(28,$r,"",0));	# mm5 = r[7]
99		&paddq("mm3","mm5");		# mm3 = r[7] + w*a[7]
100		&movd(&DWP(20,$r,"",0),"mm1");
101		&psrlq("mm1",32);		# mm1 = carry5
102		&paddq("mm1","mm6");		# mm1 = carry5 + r[6] + w*a[6]
103		&movd(&DWP(24,$r,"",0),"mm1");
104		&psrlq("mm1",32);		# mm1 = carry6
105		&paddq("mm1","mm3");		# mm1 = carry6 + r[7] + w*a[7]
106		&movd(&DWP(28,$r,"",0),"mm1");
107		&lea($r,&DWP(32,$r));
108		&psrlq("mm1",32);		# mm1 = carry_out
109
110		&sub($c,8);
111		&jz(&label("maw_sse2_exit"));
112	&set_label("maw_sse2_entry");
113		&test($c,0xfffffff8);
114		&jnz(&label("maw_sse2_unrolled"));
115
116	&set_label("maw_sse2_loop",4);
117		&movd("mm2",&DWP(0,$a));	# mm2 = a[i]
118		&movd("mm3",&DWP(0,$r));	# mm3 = r[i]
119		&pmuludq("mm2","mm0");		# a[i] *= w
120		&lea($a,&DWP(4,$a));
121		&paddq("mm1","mm3");		# carry += r[i]
122		&paddq("mm1","mm2");		# carry += a[i]*w
123		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low
124		&sub($c,1);
125		&psrlq("mm1",32);		# carry = carry_high
126		&lea($r,&DWP(4,$r));
127		&jnz(&label("maw_sse2_loop"));
128	&set_label("maw_sse2_exit");
129		&movd("eax","mm1");		# c = carry_out
130		&emms();
131		&ret();
132	}
133	&function_end($name);
134	}
135
136sub bn_mul_words
137	{
138	local($name)=@_;
139
140	&function_begin_B($name);
141
142	$r="eax";
143	$a="edx";
144	$c="ecx";
145
146	if ($sse2) {
147		&mov($r,&wparam(0));
148		&mov($a,&wparam(1));
149		&mov($c,&wparam(2));
150		&movd("mm0",&wparam(3));	# mm0 = w
151		&pxor("mm1","mm1");		# mm1 = carry = 0
152
153	&set_label("mw_sse2_loop",16);
154		&movd("mm2",&DWP(0,$a));	# mm2 = a[i]
155		&pmuludq("mm2","mm0");		# a[i] *= w
156		&lea($a,&DWP(4,$a));
157		&paddq("mm1","mm2");		# carry += a[i]*w
158		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low
159		&sub($c,1);
160		&psrlq("mm1",32);		# carry = carry_high
161		&lea($r,&DWP(4,$r));
162		&jnz(&label("mw_sse2_loop"));
163
164		&movd("eax","mm1");		# return carry
165		&emms();
166		&ret();
167	}
168	&function_end($name);
169	}
170
171sub bn_sqr_words
172	{
173	local($name)=@_;
174
175	&function_begin_B($name);
176
177	$r="eax";
178	$a="edx";
179	$c="ecx";
180
181	if ($sse2) {
182		&mov($r,&wparam(0));
183		&mov($a,&wparam(1));
184		&mov($c,&wparam(2));
185
186	&set_label("sqr_sse2_loop",16);
187		&movd("mm0",&DWP(0,$a));	# mm0 = a[i]
188		&pmuludq("mm0","mm0");		# a[i] *= a[i]
189		&lea($a,&DWP(4,$a));		# a++
190		&movq(&QWP(0,$r),"mm0");	# r[i] = a[i]*a[i]
191		&sub($c,1);
192		&lea($r,&DWP(8,$r));		# r += 2
193		&jnz(&label("sqr_sse2_loop"));
194
195		&emms();
196		&ret();
197	}
198	&function_end($name);
199	}
200
201sub bn_div_words
202	{
203	local($name)=@_;
204
205	&function_begin_B($name,"");
206	&mov("edx",&wparam(0));	#
207	&mov("eax",&wparam(1));	#
208	&mov("ecx",&wparam(2));	#
209	&div("ecx");
210	&ret();
211	&function_end_B($name);
212	}
213
214sub bn_add_words
215	{
216	local($name)=@_;
217
218	&function_begin($name,"");
219
220	&comment("");
221	$a="esi";
222	$b="edi";
223	$c="eax";
224	$r="ebx";
225	$tmp1="ecx";
226	$tmp2="edx";
227	$num="ebp";
228
229	&mov($r,&wparam(0));	# get r
230	 &mov($a,&wparam(1));	# get a
231	&mov($b,&wparam(2));	# get b
232	 &mov($num,&wparam(3));	# get num
233	&xor($c,$c);		# clear carry
234	 &and($num,0xfffffff8);	# num / 8
235
236	&jz(&label("aw_finish"));
237
238	&set_label("aw_loop",0);
239	for ($i=0; $i<8; $i++)
240		{
241		&comment("Round $i");
242
243		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
244		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
245		&add($tmp1,$c);
246		 &mov($c,0);
247		&adc($c,$c);
248		 &add($tmp1,$tmp2);
249		&adc($c,0);
250		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
251		}
252
253	&comment("");
254	&add($a,32);
255	 &add($b,32);
256	&add($r,32);
257	 &sub($num,8);
258	&jnz(&label("aw_loop"));
259
260	&set_label("aw_finish",0);
261	&mov($num,&wparam(3));	# get num
262	&and($num,7);
263	 &jz(&label("aw_end"));
264
265	for ($i=0; $i<7; $i++)
266		{
267		&comment("Tail Round $i");
268		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
269		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
270		&add($tmp1,$c);
271		 &mov($c,0);
272		&adc($c,$c);
273		 &add($tmp1,$tmp2);
274		&adc($c,0);
275		 &dec($num) if ($i != 6);
276		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
277		 &jz(&label("aw_end")) if ($i != 6);
278		}
279	&set_label("aw_end",0);
280
281#	&mov("eax",$c);		# $c is "eax"
282
283	&function_end($name);
284	}
285
286sub bn_sub_words
287	{
288	local($name)=@_;
289
290	&function_begin($name,"");
291
292	&comment("");
293	$a="esi";
294	$b="edi";
295	$c="eax";
296	$r="ebx";
297	$tmp1="ecx";
298	$tmp2="edx";
299	$num="ebp";
300
301	&mov($r,&wparam(0));	# get r
302	 &mov($a,&wparam(1));	# get a
303	&mov($b,&wparam(2));	# get b
304	 &mov($num,&wparam(3));	# get num
305	&xor($c,$c);		# clear carry
306	 &and($num,0xfffffff8);	# num / 8
307
308	&jz(&label("aw_finish"));
309
310	&set_label("aw_loop",0);
311	for ($i=0; $i<8; $i++)
312		{
313		&comment("Round $i");
314
315		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
316		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
317		&sub($tmp1,$c);
318		 &mov($c,0);
319		&adc($c,$c);
320		 &sub($tmp1,$tmp2);
321		&adc($c,0);
322		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
323		}
324
325	&comment("");
326	&add($a,32);
327	 &add($b,32);
328	&add($r,32);
329	 &sub($num,8);
330	&jnz(&label("aw_loop"));
331
332	&set_label("aw_finish",0);
333	&mov($num,&wparam(3));	# get num
334	&and($num,7);
335	 &jz(&label("aw_end"));
336
337	for ($i=0; $i<7; $i++)
338		{
339		&comment("Tail Round $i");
340		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
341		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
342		&sub($tmp1,$c);
343		 &mov($c,0);
344		&adc($c,$c);
345		 &sub($tmp1,$tmp2);
346		&adc($c,0);
347		 &dec($num) if ($i != 6);
348		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
349		 &jz(&label("aw_end")) if ($i != 6);
350		}
351	&set_label("aw_end",0);
352
353#	&mov("eax",$c);		# $c is "eax"
354
355	&function_end($name);
356	}
357