xref: /aosp_15_r20/external/boringssl/src/gen/bcm/x86-mont-linux.S (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
7.text
8.globl	bn_mul_mont
9.hidden	bn_mul_mont
10.type	bn_mul_mont,@function
11.align	16
12bn_mul_mont:
13.L_bn_mul_mont_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	movl	40(%esp),%edi
20	cmpl	$4,%edi
21	jl	.L000just_leave
22	leal	20(%esp),%esi
23	leal	24(%esp),%edx
24	addl	$2,%edi
25	negl	%edi
26	leal	-32(%esp,%edi,4),%ebp
27	negl	%edi
28	movl	%ebp,%eax
29	subl	%edx,%eax
30	andl	$2047,%eax
31	subl	%eax,%ebp
32	xorl	%ebp,%edx
33	andl	$2048,%edx
34	xorl	$2048,%edx
35	subl	%edx,%ebp
36	andl	$-64,%ebp
37	movl	%esp,%eax
38	subl	%ebp,%eax
39	andl	$-4096,%eax
40	movl	%esp,%edx
41	leal	(%ebp,%eax,1),%esp
42	movl	(%esp),%eax
43	cmpl	%ebp,%esp
44	ja	.L001page_walk
45	jmp	.L002page_walk_done
46.align	16
47.L001page_walk:
48	leal	-4096(%esp),%esp
49	movl	(%esp),%eax
50	cmpl	%ebp,%esp
51	ja	.L001page_walk
52.L002page_walk_done:
53	movl	(%esi),%eax
54	movl	4(%esi),%ebx
55	movl	8(%esi),%ecx
56	movl	12(%esi),%ebp
57	movl	16(%esi),%esi
58	movl	(%esi),%esi
59	movl	%eax,4(%esp)
60	movl	%ebx,8(%esp)
61	movl	%ecx,12(%esp)
62	movl	%ebp,16(%esp)
63	movl	%esi,20(%esp)
64	leal	-3(%edi),%ebx
65	movl	%edx,24(%esp)
66	movl	$-1,%eax
67	movd	%eax,%mm7
68	movl	8(%esp),%esi
69	movl	12(%esp),%edi
70	movl	16(%esp),%ebp
71	xorl	%edx,%edx
72	xorl	%ecx,%ecx
73	movd	(%edi),%mm4
74	movd	(%esi),%mm5
75	movd	(%ebp),%mm3
76	pmuludq	%mm4,%mm5
77	movq	%mm5,%mm2
78	movq	%mm5,%mm0
79	pand	%mm7,%mm0
80	pmuludq	20(%esp),%mm5
81	pmuludq	%mm5,%mm3
82	paddq	%mm0,%mm3
83	movd	4(%ebp),%mm1
84	movd	4(%esi),%mm0
85	psrlq	$32,%mm2
86	psrlq	$32,%mm3
87	incl	%ecx
88.align	16
89.L0031st:
90	pmuludq	%mm4,%mm0
91	pmuludq	%mm5,%mm1
92	paddq	%mm0,%mm2
93	paddq	%mm1,%mm3
94	movq	%mm2,%mm0
95	pand	%mm7,%mm0
96	movd	4(%ebp,%ecx,4),%mm1
97	paddq	%mm0,%mm3
98	movd	4(%esi,%ecx,4),%mm0
99	psrlq	$32,%mm2
100	movd	%mm3,28(%esp,%ecx,4)
101	psrlq	$32,%mm3
102	leal	1(%ecx),%ecx
103	cmpl	%ebx,%ecx
104	jl	.L0031st
105	pmuludq	%mm4,%mm0
106	pmuludq	%mm5,%mm1
107	paddq	%mm0,%mm2
108	paddq	%mm1,%mm3
109	movq	%mm2,%mm0
110	pand	%mm7,%mm0
111	paddq	%mm0,%mm3
112	movd	%mm3,28(%esp,%ecx,4)
113	psrlq	$32,%mm2
114	psrlq	$32,%mm3
115	paddq	%mm2,%mm3
116	movq	%mm3,32(%esp,%ebx,4)
117	incl	%edx
118.L004outer:
119	xorl	%ecx,%ecx
120	movd	(%edi,%edx,4),%mm4
121	movd	(%esi),%mm5
122	movd	32(%esp),%mm6
123	movd	(%ebp),%mm3
124	pmuludq	%mm4,%mm5
125	paddq	%mm6,%mm5
126	movq	%mm5,%mm0
127	movq	%mm5,%mm2
128	pand	%mm7,%mm0
129	pmuludq	20(%esp),%mm5
130	pmuludq	%mm5,%mm3
131	paddq	%mm0,%mm3
132	movd	36(%esp),%mm6
133	movd	4(%ebp),%mm1
134	movd	4(%esi),%mm0
135	psrlq	$32,%mm2
136	psrlq	$32,%mm3
137	paddq	%mm6,%mm2
138	incl	%ecx
139	decl	%ebx
140.L005inner:
141	pmuludq	%mm4,%mm0
142	pmuludq	%mm5,%mm1
143	paddq	%mm0,%mm2
144	paddq	%mm1,%mm3
145	movq	%mm2,%mm0
146	movd	36(%esp,%ecx,4),%mm6
147	pand	%mm7,%mm0
148	movd	4(%ebp,%ecx,4),%mm1
149	paddq	%mm0,%mm3
150	movd	4(%esi,%ecx,4),%mm0
151	psrlq	$32,%mm2
152	movd	%mm3,28(%esp,%ecx,4)
153	psrlq	$32,%mm3
154	paddq	%mm6,%mm2
155	decl	%ebx
156	leal	1(%ecx),%ecx
157	jnz	.L005inner
158	movl	%ecx,%ebx
159	pmuludq	%mm4,%mm0
160	pmuludq	%mm5,%mm1
161	paddq	%mm0,%mm2
162	paddq	%mm1,%mm3
163	movq	%mm2,%mm0
164	pand	%mm7,%mm0
165	paddq	%mm0,%mm3
166	movd	%mm3,28(%esp,%ecx,4)
167	psrlq	$32,%mm2
168	psrlq	$32,%mm3
169	movd	36(%esp,%ebx,4),%mm6
170	paddq	%mm2,%mm3
171	paddq	%mm6,%mm3
172	movq	%mm3,32(%esp,%ebx,4)
173	leal	1(%edx),%edx
174	cmpl	%ebx,%edx
175	jle	.L004outer
176	emms
177	jmp	.L006common_tail
178.align	16
179.L006common_tail:
180	movl	16(%esp),%ebp
181	movl	4(%esp),%edi
182	leal	32(%esp),%esi
183	movl	(%esi),%eax
184	movl	%ebx,%ecx
185	xorl	%edx,%edx
186.align	16
187.L007sub:
188	sbbl	(%ebp,%edx,4),%eax
189	movl	%eax,(%edi,%edx,4)
190	decl	%ecx
191	movl	4(%esi,%edx,4),%eax
192	leal	1(%edx),%edx
193	jge	.L007sub
194	sbbl	$0,%eax
195	movl	$-1,%edx
196	xorl	%eax,%edx
197	jmp	.L008copy
198.align	16
199.L008copy:
200	movl	32(%esp,%ebx,4),%esi
201	movl	(%edi,%ebx,4),%ebp
202	movl	%ecx,32(%esp,%ebx,4)
203	andl	%eax,%esi
204	andl	%edx,%ebp
205	orl	%esi,%ebp
206	movl	%ebp,(%edi,%ebx,4)
207	decl	%ebx
208	jge	.L008copy
209	movl	24(%esp),%esp
210	movl	$1,%eax
211.L000just_leave:
212	popl	%edi
213	popl	%esi
214	popl	%ebx
215	popl	%ebp
216	ret
217.size	bn_mul_mont,.-.L_bn_mul_mont_begin
218.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
219.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
220.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
221.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
222.byte	111,114,103,62,0
223#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
224