xref: /aosp_15_r20/external/boringssl/src/gen/bcm/x86-mont-apple.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
7.text
8.globl	_bn_mul_mont
9.private_extern	_bn_mul_mont
10.align	4
11_bn_mul_mont:
12L_bn_mul_mont_begin:
13	pushl	%ebp
14	pushl	%ebx
15	pushl	%esi
16	pushl	%edi
17	xorl	%eax,%eax
18	movl	40(%esp),%edi
19	cmpl	$4,%edi
20	jl	L000just_leave
21	leal	20(%esp),%esi
22	leal	24(%esp),%edx
23	addl	$2,%edi
24	negl	%edi
25	leal	-32(%esp,%edi,4),%ebp
26	negl	%edi
27	movl	%ebp,%eax
28	subl	%edx,%eax
29	andl	$2047,%eax
30	subl	%eax,%ebp
31	xorl	%ebp,%edx
32	andl	$2048,%edx
33	xorl	$2048,%edx
34	subl	%edx,%ebp
35	andl	$-64,%ebp
36	movl	%esp,%eax
37	subl	%ebp,%eax
38	andl	$-4096,%eax
39	movl	%esp,%edx
40	leal	(%ebp,%eax,1),%esp
41	movl	(%esp),%eax
42	cmpl	%ebp,%esp
43	ja	L001page_walk
44	jmp	L002page_walk_done
45.align	4,0x90
46L001page_walk:
47	leal	-4096(%esp),%esp
48	movl	(%esp),%eax
49	cmpl	%ebp,%esp
50	ja	L001page_walk
51L002page_walk_done:
52	movl	(%esi),%eax
53	movl	4(%esi),%ebx
54	movl	8(%esi),%ecx
55	movl	12(%esi),%ebp
56	movl	16(%esi),%esi
57	movl	(%esi),%esi
58	movl	%eax,4(%esp)
59	movl	%ebx,8(%esp)
60	movl	%ecx,12(%esp)
61	movl	%ebp,16(%esp)
62	movl	%esi,20(%esp)
63	leal	-3(%edi),%ebx
64	movl	%edx,24(%esp)
65	movl	$-1,%eax
66	movd	%eax,%mm7
67	movl	8(%esp),%esi
68	movl	12(%esp),%edi
69	movl	16(%esp),%ebp
70	xorl	%edx,%edx
71	xorl	%ecx,%ecx
72	movd	(%edi),%mm4
73	movd	(%esi),%mm5
74	movd	(%ebp),%mm3
75	pmuludq	%mm4,%mm5
76	movq	%mm5,%mm2
77	movq	%mm5,%mm0
78	pand	%mm7,%mm0
79	pmuludq	20(%esp),%mm5
80	pmuludq	%mm5,%mm3
81	paddq	%mm0,%mm3
82	movd	4(%ebp),%mm1
83	movd	4(%esi),%mm0
84	psrlq	$32,%mm2
85	psrlq	$32,%mm3
86	incl	%ecx
87.align	4,0x90
88L0031st:
89	pmuludq	%mm4,%mm0
90	pmuludq	%mm5,%mm1
91	paddq	%mm0,%mm2
92	paddq	%mm1,%mm3
93	movq	%mm2,%mm0
94	pand	%mm7,%mm0
95	movd	4(%ebp,%ecx,4),%mm1
96	paddq	%mm0,%mm3
97	movd	4(%esi,%ecx,4),%mm0
98	psrlq	$32,%mm2
99	movd	%mm3,28(%esp,%ecx,4)
100	psrlq	$32,%mm3
101	leal	1(%ecx),%ecx
102	cmpl	%ebx,%ecx
103	jl	L0031st
104	pmuludq	%mm4,%mm0
105	pmuludq	%mm5,%mm1
106	paddq	%mm0,%mm2
107	paddq	%mm1,%mm3
108	movq	%mm2,%mm0
109	pand	%mm7,%mm0
110	paddq	%mm0,%mm3
111	movd	%mm3,28(%esp,%ecx,4)
112	psrlq	$32,%mm2
113	psrlq	$32,%mm3
114	paddq	%mm2,%mm3
115	movq	%mm3,32(%esp,%ebx,4)
116	incl	%edx
117L004outer:
118	xorl	%ecx,%ecx
119	movd	(%edi,%edx,4),%mm4
120	movd	(%esi),%mm5
121	movd	32(%esp),%mm6
122	movd	(%ebp),%mm3
123	pmuludq	%mm4,%mm5
124	paddq	%mm6,%mm5
125	movq	%mm5,%mm0
126	movq	%mm5,%mm2
127	pand	%mm7,%mm0
128	pmuludq	20(%esp),%mm5
129	pmuludq	%mm5,%mm3
130	paddq	%mm0,%mm3
131	movd	36(%esp),%mm6
132	movd	4(%ebp),%mm1
133	movd	4(%esi),%mm0
134	psrlq	$32,%mm2
135	psrlq	$32,%mm3
136	paddq	%mm6,%mm2
137	incl	%ecx
138	decl	%ebx
139L005inner:
140	pmuludq	%mm4,%mm0
141	pmuludq	%mm5,%mm1
142	paddq	%mm0,%mm2
143	paddq	%mm1,%mm3
144	movq	%mm2,%mm0
145	movd	36(%esp,%ecx,4),%mm6
146	pand	%mm7,%mm0
147	movd	4(%ebp,%ecx,4),%mm1
148	paddq	%mm0,%mm3
149	movd	4(%esi,%ecx,4),%mm0
150	psrlq	$32,%mm2
151	movd	%mm3,28(%esp,%ecx,4)
152	psrlq	$32,%mm3
153	paddq	%mm6,%mm2
154	decl	%ebx
155	leal	1(%ecx),%ecx
156	jnz	L005inner
157	movl	%ecx,%ebx
158	pmuludq	%mm4,%mm0
159	pmuludq	%mm5,%mm1
160	paddq	%mm0,%mm2
161	paddq	%mm1,%mm3
162	movq	%mm2,%mm0
163	pand	%mm7,%mm0
164	paddq	%mm0,%mm3
165	movd	%mm3,28(%esp,%ecx,4)
166	psrlq	$32,%mm2
167	psrlq	$32,%mm3
168	movd	36(%esp,%ebx,4),%mm6
169	paddq	%mm2,%mm3
170	paddq	%mm6,%mm3
171	movq	%mm3,32(%esp,%ebx,4)
172	leal	1(%edx),%edx
173	cmpl	%ebx,%edx
174	jle	L004outer
175	emms
176	jmp	L006common_tail
177.align	4,0x90
178L006common_tail:
179	movl	16(%esp),%ebp
180	movl	4(%esp),%edi
181	leal	32(%esp),%esi
182	movl	(%esi),%eax
183	movl	%ebx,%ecx
184	xorl	%edx,%edx
185.align	4,0x90
186L007sub:
187	sbbl	(%ebp,%edx,4),%eax
188	movl	%eax,(%edi,%edx,4)
189	decl	%ecx
190	movl	4(%esi,%edx,4),%eax
191	leal	1(%edx),%edx
192	jge	L007sub
193	sbbl	$0,%eax
194	movl	$-1,%edx
195	xorl	%eax,%edx
196	jmp	L008copy
197.align	4,0x90
198L008copy:
199	movl	32(%esp,%ebx,4),%esi
200	movl	(%edi,%ebx,4),%ebp
201	movl	%ecx,32(%esp,%ebx,4)
202	andl	%eax,%esi
203	andl	%edx,%ebp
204	orl	%esi,%ebp
205	movl	%ebp,(%edi,%ebx,4)
206	decl	%ebx
207	jge	L008copy
208	movl	24(%esp),%esp
209	movl	$1,%eax
210L000just_leave:
211	popl	%edi
212	popl	%esi
213	popl	%ebx
214	popl	%ebp
215	ret
216.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
217.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
218.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
219.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
220.byte	111,114,103,62,0
221#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
222