xref: /aosp_15_r20/external/boringssl/src/gen/bcm/bn-586-apple.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
7.text
8.globl	_bn_mul_add_words
9.private_extern	_bn_mul_add_words
10.align	4
11_bn_mul_add_words:
12L_bn_mul_add_words_begin:
13	movl	4(%esp),%eax
14	movl	8(%esp),%edx
15	movl	12(%esp),%ecx
16	movd	16(%esp),%mm0
17	pxor	%mm1,%mm1
18	jmp	L000maw_sse2_entry
19.align	4,0x90
20L001maw_sse2_unrolled:
21	movd	(%eax),%mm3
22	paddq	%mm3,%mm1
23	movd	(%edx),%mm2
24	pmuludq	%mm0,%mm2
25	movd	4(%edx),%mm4
26	pmuludq	%mm0,%mm4
27	movd	8(%edx),%mm6
28	pmuludq	%mm0,%mm6
29	movd	12(%edx),%mm7
30	pmuludq	%mm0,%mm7
31	paddq	%mm2,%mm1
32	movd	4(%eax),%mm3
33	paddq	%mm4,%mm3
34	movd	8(%eax),%mm5
35	paddq	%mm6,%mm5
36	movd	12(%eax),%mm4
37	paddq	%mm4,%mm7
38	movd	%mm1,(%eax)
39	movd	16(%edx),%mm2
40	pmuludq	%mm0,%mm2
41	psrlq	$32,%mm1
42	movd	20(%edx),%mm4
43	pmuludq	%mm0,%mm4
44	paddq	%mm3,%mm1
45	movd	24(%edx),%mm6
46	pmuludq	%mm0,%mm6
47	movd	%mm1,4(%eax)
48	psrlq	$32,%mm1
49	movd	28(%edx),%mm3
50	addl	$32,%edx
51	pmuludq	%mm0,%mm3
52	paddq	%mm5,%mm1
53	movd	16(%eax),%mm5
54	paddq	%mm5,%mm2
55	movd	%mm1,8(%eax)
56	psrlq	$32,%mm1
57	paddq	%mm7,%mm1
58	movd	20(%eax),%mm5
59	paddq	%mm5,%mm4
60	movd	%mm1,12(%eax)
61	psrlq	$32,%mm1
62	paddq	%mm2,%mm1
63	movd	24(%eax),%mm5
64	paddq	%mm5,%mm6
65	movd	%mm1,16(%eax)
66	psrlq	$32,%mm1
67	paddq	%mm4,%mm1
68	movd	28(%eax),%mm5
69	paddq	%mm5,%mm3
70	movd	%mm1,20(%eax)
71	psrlq	$32,%mm1
72	paddq	%mm6,%mm1
73	movd	%mm1,24(%eax)
74	psrlq	$32,%mm1
75	paddq	%mm3,%mm1
76	movd	%mm1,28(%eax)
77	leal	32(%eax),%eax
78	psrlq	$32,%mm1
79	subl	$8,%ecx
80	jz	L002maw_sse2_exit
81L000maw_sse2_entry:
82	testl	$4294967288,%ecx
83	jnz	L001maw_sse2_unrolled
84.align	2,0x90
85L003maw_sse2_loop:
86	movd	(%edx),%mm2
87	movd	(%eax),%mm3
88	pmuludq	%mm0,%mm2
89	leal	4(%edx),%edx
90	paddq	%mm3,%mm1
91	paddq	%mm2,%mm1
92	movd	%mm1,(%eax)
93	subl	$1,%ecx
94	psrlq	$32,%mm1
95	leal	4(%eax),%eax
96	jnz	L003maw_sse2_loop
97L002maw_sse2_exit:
98	movd	%mm1,%eax
99	emms
100	ret
101	popl	%edi
102	popl	%esi
103	popl	%ebx
104	popl	%ebp
105	ret
106.globl	_bn_mul_words
107.private_extern	_bn_mul_words
108.align	4
109_bn_mul_words:
110L_bn_mul_words_begin:
111	movl	4(%esp),%eax
112	movl	8(%esp),%edx
113	movl	12(%esp),%ecx
114	movd	16(%esp),%mm0
115	pxor	%mm1,%mm1
116.align	4,0x90
117L004mw_sse2_loop:
118	movd	(%edx),%mm2
119	pmuludq	%mm0,%mm2
120	leal	4(%edx),%edx
121	paddq	%mm2,%mm1
122	movd	%mm1,(%eax)
123	subl	$1,%ecx
124	psrlq	$32,%mm1
125	leal	4(%eax),%eax
126	jnz	L004mw_sse2_loop
127	movd	%mm1,%eax
128	emms
129	ret
130	popl	%edi
131	popl	%esi
132	popl	%ebx
133	popl	%ebp
134	ret
135.globl	_bn_sqr_words
136.private_extern	_bn_sqr_words
137.align	4
138_bn_sqr_words:
139L_bn_sqr_words_begin:
140	movl	4(%esp),%eax
141	movl	8(%esp),%edx
142	movl	12(%esp),%ecx
143.align	4,0x90
144L005sqr_sse2_loop:
145	movd	(%edx),%mm0
146	pmuludq	%mm0,%mm0
147	leal	4(%edx),%edx
148	movq	%mm0,(%eax)
149	subl	$1,%ecx
150	leal	8(%eax),%eax
151	jnz	L005sqr_sse2_loop
152	emms
153	ret
154	popl	%edi
155	popl	%esi
156	popl	%ebx
157	popl	%ebp
158	ret
159.globl	_bn_div_words
160.private_extern	_bn_div_words
161.align	4
162_bn_div_words:
163L_bn_div_words_begin:
164	movl	4(%esp),%edx
165	movl	8(%esp),%eax
166	movl	12(%esp),%ecx
167	divl	%ecx
168	ret
169.globl	_bn_add_words
170.private_extern	_bn_add_words
171.align	4
172_bn_add_words:
173L_bn_add_words_begin:
174	pushl	%ebp
175	pushl	%ebx
176	pushl	%esi
177	pushl	%edi
178
179	movl	20(%esp),%ebx
180	movl	24(%esp),%esi
181	movl	28(%esp),%edi
182	movl	32(%esp),%ebp
183	xorl	%eax,%eax
184	andl	$4294967288,%ebp
185	jz	L006aw_finish
186L007aw_loop:
187	# Round 0
188	movl	(%esi),%ecx
189	movl	(%edi),%edx
190	addl	%eax,%ecx
191	movl	$0,%eax
192	adcl	%eax,%eax
193	addl	%edx,%ecx
194	adcl	$0,%eax
195	movl	%ecx,(%ebx)
196	# Round 1
197	movl	4(%esi),%ecx
198	movl	4(%edi),%edx
199	addl	%eax,%ecx
200	movl	$0,%eax
201	adcl	%eax,%eax
202	addl	%edx,%ecx
203	adcl	$0,%eax
204	movl	%ecx,4(%ebx)
205	# Round 2
206	movl	8(%esi),%ecx
207	movl	8(%edi),%edx
208	addl	%eax,%ecx
209	movl	$0,%eax
210	adcl	%eax,%eax
211	addl	%edx,%ecx
212	adcl	$0,%eax
213	movl	%ecx,8(%ebx)
214	# Round 3
215	movl	12(%esi),%ecx
216	movl	12(%edi),%edx
217	addl	%eax,%ecx
218	movl	$0,%eax
219	adcl	%eax,%eax
220	addl	%edx,%ecx
221	adcl	$0,%eax
222	movl	%ecx,12(%ebx)
223	# Round 4
224	movl	16(%esi),%ecx
225	movl	16(%edi),%edx
226	addl	%eax,%ecx
227	movl	$0,%eax
228	adcl	%eax,%eax
229	addl	%edx,%ecx
230	adcl	$0,%eax
231	movl	%ecx,16(%ebx)
232	# Round 5
233	movl	20(%esi),%ecx
234	movl	20(%edi),%edx
235	addl	%eax,%ecx
236	movl	$0,%eax
237	adcl	%eax,%eax
238	addl	%edx,%ecx
239	adcl	$0,%eax
240	movl	%ecx,20(%ebx)
241	# Round 6
242	movl	24(%esi),%ecx
243	movl	24(%edi),%edx
244	addl	%eax,%ecx
245	movl	$0,%eax
246	adcl	%eax,%eax
247	addl	%edx,%ecx
248	adcl	$0,%eax
249	movl	%ecx,24(%ebx)
250	# Round 7
251	movl	28(%esi),%ecx
252	movl	28(%edi),%edx
253	addl	%eax,%ecx
254	movl	$0,%eax
255	adcl	%eax,%eax
256	addl	%edx,%ecx
257	adcl	$0,%eax
258	movl	%ecx,28(%ebx)
259
260	addl	$32,%esi
261	addl	$32,%edi
262	addl	$32,%ebx
263	subl	$8,%ebp
264	jnz	L007aw_loop
265L006aw_finish:
266	movl	32(%esp),%ebp
267	andl	$7,%ebp
268	jz	L008aw_end
269	# Tail Round 0
270	movl	(%esi),%ecx
271	movl	(%edi),%edx
272	addl	%eax,%ecx
273	movl	$0,%eax
274	adcl	%eax,%eax
275	addl	%edx,%ecx
276	adcl	$0,%eax
277	decl	%ebp
278	movl	%ecx,(%ebx)
279	jz	L008aw_end
280	# Tail Round 1
281	movl	4(%esi),%ecx
282	movl	4(%edi),%edx
283	addl	%eax,%ecx
284	movl	$0,%eax
285	adcl	%eax,%eax
286	addl	%edx,%ecx
287	adcl	$0,%eax
288	decl	%ebp
289	movl	%ecx,4(%ebx)
290	jz	L008aw_end
291	# Tail Round 2
292	movl	8(%esi),%ecx
293	movl	8(%edi),%edx
294	addl	%eax,%ecx
295	movl	$0,%eax
296	adcl	%eax,%eax
297	addl	%edx,%ecx
298	adcl	$0,%eax
299	decl	%ebp
300	movl	%ecx,8(%ebx)
301	jz	L008aw_end
302	# Tail Round 3
303	movl	12(%esi),%ecx
304	movl	12(%edi),%edx
305	addl	%eax,%ecx
306	movl	$0,%eax
307	adcl	%eax,%eax
308	addl	%edx,%ecx
309	adcl	$0,%eax
310	decl	%ebp
311	movl	%ecx,12(%ebx)
312	jz	L008aw_end
313	# Tail Round 4
314	movl	16(%esi),%ecx
315	movl	16(%edi),%edx
316	addl	%eax,%ecx
317	movl	$0,%eax
318	adcl	%eax,%eax
319	addl	%edx,%ecx
320	adcl	$0,%eax
321	decl	%ebp
322	movl	%ecx,16(%ebx)
323	jz	L008aw_end
324	# Tail Round 5
325	movl	20(%esi),%ecx
326	movl	20(%edi),%edx
327	addl	%eax,%ecx
328	movl	$0,%eax
329	adcl	%eax,%eax
330	addl	%edx,%ecx
331	adcl	$0,%eax
332	decl	%ebp
333	movl	%ecx,20(%ebx)
334	jz	L008aw_end
335	# Tail Round 6
336	movl	24(%esi),%ecx
337	movl	24(%edi),%edx
338	addl	%eax,%ecx
339	movl	$0,%eax
340	adcl	%eax,%eax
341	addl	%edx,%ecx
342	adcl	$0,%eax
343	movl	%ecx,24(%ebx)
344L008aw_end:
345	popl	%edi
346	popl	%esi
347	popl	%ebx
348	popl	%ebp
349	ret
350.globl	_bn_sub_words
351.private_extern	_bn_sub_words
352.align	4
353_bn_sub_words:
354L_bn_sub_words_begin:
355	pushl	%ebp
356	pushl	%ebx
357	pushl	%esi
358	pushl	%edi
359
360	movl	20(%esp),%ebx
361	movl	24(%esp),%esi
362	movl	28(%esp),%edi
363	movl	32(%esp),%ebp
364	xorl	%eax,%eax
365	andl	$4294967288,%ebp
366	jz	L009aw_finish
367L010aw_loop:
368	# Round 0
369	movl	(%esi),%ecx
370	movl	(%edi),%edx
371	subl	%eax,%ecx
372	movl	$0,%eax
373	adcl	%eax,%eax
374	subl	%edx,%ecx
375	adcl	$0,%eax
376	movl	%ecx,(%ebx)
377	# Round 1
378	movl	4(%esi),%ecx
379	movl	4(%edi),%edx
380	subl	%eax,%ecx
381	movl	$0,%eax
382	adcl	%eax,%eax
383	subl	%edx,%ecx
384	adcl	$0,%eax
385	movl	%ecx,4(%ebx)
386	# Round 2
387	movl	8(%esi),%ecx
388	movl	8(%edi),%edx
389	subl	%eax,%ecx
390	movl	$0,%eax
391	adcl	%eax,%eax
392	subl	%edx,%ecx
393	adcl	$0,%eax
394	movl	%ecx,8(%ebx)
395	# Round 3
396	movl	12(%esi),%ecx
397	movl	12(%edi),%edx
398	subl	%eax,%ecx
399	movl	$0,%eax
400	adcl	%eax,%eax
401	subl	%edx,%ecx
402	adcl	$0,%eax
403	movl	%ecx,12(%ebx)
404	# Round 4
405	movl	16(%esi),%ecx
406	movl	16(%edi),%edx
407	subl	%eax,%ecx
408	movl	$0,%eax
409	adcl	%eax,%eax
410	subl	%edx,%ecx
411	adcl	$0,%eax
412	movl	%ecx,16(%ebx)
413	# Round 5
414	movl	20(%esi),%ecx
415	movl	20(%edi),%edx
416	subl	%eax,%ecx
417	movl	$0,%eax
418	adcl	%eax,%eax
419	subl	%edx,%ecx
420	adcl	$0,%eax
421	movl	%ecx,20(%ebx)
422	# Round 6
423	movl	24(%esi),%ecx
424	movl	24(%edi),%edx
425	subl	%eax,%ecx
426	movl	$0,%eax
427	adcl	%eax,%eax
428	subl	%edx,%ecx
429	adcl	$0,%eax
430	movl	%ecx,24(%ebx)
431	# Round 7
432	movl	28(%esi),%ecx
433	movl	28(%edi),%edx
434	subl	%eax,%ecx
435	movl	$0,%eax
436	adcl	%eax,%eax
437	subl	%edx,%ecx
438	adcl	$0,%eax
439	movl	%ecx,28(%ebx)
440
441	addl	$32,%esi
442	addl	$32,%edi
443	addl	$32,%ebx
444	subl	$8,%ebp
445	jnz	L010aw_loop
446L009aw_finish:
447	movl	32(%esp),%ebp
448	andl	$7,%ebp
449	jz	L011aw_end
450	# Tail Round 0
451	movl	(%esi),%ecx
452	movl	(%edi),%edx
453	subl	%eax,%ecx
454	movl	$0,%eax
455	adcl	%eax,%eax
456	subl	%edx,%ecx
457	adcl	$0,%eax
458	decl	%ebp
459	movl	%ecx,(%ebx)
460	jz	L011aw_end
461	# Tail Round 1
462	movl	4(%esi),%ecx
463	movl	4(%edi),%edx
464	subl	%eax,%ecx
465	movl	$0,%eax
466	adcl	%eax,%eax
467	subl	%edx,%ecx
468	adcl	$0,%eax
469	decl	%ebp
470	movl	%ecx,4(%ebx)
471	jz	L011aw_end
472	# Tail Round 2
473	movl	8(%esi),%ecx
474	movl	8(%edi),%edx
475	subl	%eax,%ecx
476	movl	$0,%eax
477	adcl	%eax,%eax
478	subl	%edx,%ecx
479	adcl	$0,%eax
480	decl	%ebp
481	movl	%ecx,8(%ebx)
482	jz	L011aw_end
483	# Tail Round 3
484	movl	12(%esi),%ecx
485	movl	12(%edi),%edx
486	subl	%eax,%ecx
487	movl	$0,%eax
488	adcl	%eax,%eax
489	subl	%edx,%ecx
490	adcl	$0,%eax
491	decl	%ebp
492	movl	%ecx,12(%ebx)
493	jz	L011aw_end
494	# Tail Round 4
495	movl	16(%esi),%ecx
496	movl	16(%edi),%edx
497	subl	%eax,%ecx
498	movl	$0,%eax
499	adcl	%eax,%eax
500	subl	%edx,%ecx
501	adcl	$0,%eax
502	decl	%ebp
503	movl	%ecx,16(%ebx)
504	jz	L011aw_end
505	# Tail Round 5
506	movl	20(%esi),%ecx
507	movl	20(%edi),%edx
508	subl	%eax,%ecx
509	movl	$0,%eax
510	adcl	%eax,%eax
511	subl	%edx,%ecx
512	adcl	$0,%eax
513	decl	%ebp
514	movl	%ecx,20(%ebx)
515	jz	L011aw_end
516	# Tail Round 6
517	movl	24(%esi),%ecx
518	movl	24(%edi),%edx
519	subl	%eax,%ecx
520	movl	$0,%eax
521	adcl	%eax,%eax
522	subl	%edx,%ecx
523	adcl	$0,%eax
524	movl	%ecx,24(%ebx)
525L011aw_end:
526	popl	%edi
527	popl	%esi
528	popl	%ebx
529	popl	%ebp
530	ret
531#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
532