xref: /aosp_15_r20/external/boringssl/src/gen/bcm/x86-mont-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_bn_mul_mont
17align	16
18_bn_mul_mont:
19L$_bn_mul_mont_begin:
20	push	ebp
21	push	ebx
22	push	esi
23	push	edi
24	xor	eax,eax
25	mov	edi,DWORD [40+esp]
26	cmp	edi,4
27	jl	NEAR L$000just_leave
28	lea	esi,[20+esp]
29	lea	edx,[24+esp]
30	add	edi,2
31	neg	edi
32	lea	ebp,[edi*4+esp-32]
33	neg	edi
34	mov	eax,ebp
35	sub	eax,edx
36	and	eax,2047
37	sub	ebp,eax
38	xor	edx,ebp
39	and	edx,2048
40	xor	edx,2048
41	sub	ebp,edx
42	and	ebp,-64
43	mov	eax,esp
44	sub	eax,ebp
45	and	eax,-4096
46	mov	edx,esp
47	lea	esp,[eax*1+ebp]
48	mov	eax,DWORD [esp]
49	cmp	esp,ebp
50	ja	NEAR L$001page_walk
51	jmp	NEAR L$002page_walk_done
52align	16
53L$001page_walk:
54	lea	esp,[esp-4096]
55	mov	eax,DWORD [esp]
56	cmp	esp,ebp
57	ja	NEAR L$001page_walk
58L$002page_walk_done:
59	mov	eax,DWORD [esi]
60	mov	ebx,DWORD [4+esi]
61	mov	ecx,DWORD [8+esi]
62	mov	ebp,DWORD [12+esi]
63	mov	esi,DWORD [16+esi]
64	mov	esi,DWORD [esi]
65	mov	DWORD [4+esp],eax
66	mov	DWORD [8+esp],ebx
67	mov	DWORD [12+esp],ecx
68	mov	DWORD [16+esp],ebp
69	mov	DWORD [20+esp],esi
70	lea	ebx,[edi-3]
71	mov	DWORD [24+esp],edx
72	mov	eax,-1
73	movd	mm7,eax
74	mov	esi,DWORD [8+esp]
75	mov	edi,DWORD [12+esp]
76	mov	ebp,DWORD [16+esp]
77	xor	edx,edx
78	xor	ecx,ecx
79	movd	mm4,DWORD [edi]
80	movd	mm5,DWORD [esi]
81	movd	mm3,DWORD [ebp]
82	pmuludq	mm5,mm4
83	movq	mm2,mm5
84	movq	mm0,mm5
85	pand	mm0,mm7
86	pmuludq	mm5,[20+esp]
87	pmuludq	mm3,mm5
88	paddq	mm3,mm0
89	movd	mm1,DWORD [4+ebp]
90	movd	mm0,DWORD [4+esi]
91	psrlq	mm2,32
92	psrlq	mm3,32
93	inc	ecx
94align	16
95L$0031st:
96	pmuludq	mm0,mm4
97	pmuludq	mm1,mm5
98	paddq	mm2,mm0
99	paddq	mm3,mm1
100	movq	mm0,mm2
101	pand	mm0,mm7
102	movd	mm1,DWORD [4+ecx*4+ebp]
103	paddq	mm3,mm0
104	movd	mm0,DWORD [4+ecx*4+esi]
105	psrlq	mm2,32
106	movd	DWORD [28+ecx*4+esp],mm3
107	psrlq	mm3,32
108	lea	ecx,[1+ecx]
109	cmp	ecx,ebx
110	jl	NEAR L$0031st
111	pmuludq	mm0,mm4
112	pmuludq	mm1,mm5
113	paddq	mm2,mm0
114	paddq	mm3,mm1
115	movq	mm0,mm2
116	pand	mm0,mm7
117	paddq	mm3,mm0
118	movd	DWORD [28+ecx*4+esp],mm3
119	psrlq	mm2,32
120	psrlq	mm3,32
121	paddq	mm3,mm2
122	movq	[32+ebx*4+esp],mm3
123	inc	edx
124L$004outer:
125	xor	ecx,ecx
126	movd	mm4,DWORD [edx*4+edi]
127	movd	mm5,DWORD [esi]
128	movd	mm6,DWORD [32+esp]
129	movd	mm3,DWORD [ebp]
130	pmuludq	mm5,mm4
131	paddq	mm5,mm6
132	movq	mm0,mm5
133	movq	mm2,mm5
134	pand	mm0,mm7
135	pmuludq	mm5,[20+esp]
136	pmuludq	mm3,mm5
137	paddq	mm3,mm0
138	movd	mm6,DWORD [36+esp]
139	movd	mm1,DWORD [4+ebp]
140	movd	mm0,DWORD [4+esi]
141	psrlq	mm2,32
142	psrlq	mm3,32
143	paddq	mm2,mm6
144	inc	ecx
145	dec	ebx
146L$005inner:
147	pmuludq	mm0,mm4
148	pmuludq	mm1,mm5
149	paddq	mm2,mm0
150	paddq	mm3,mm1
151	movq	mm0,mm2
152	movd	mm6,DWORD [36+ecx*4+esp]
153	pand	mm0,mm7
154	movd	mm1,DWORD [4+ecx*4+ebp]
155	paddq	mm3,mm0
156	movd	mm0,DWORD [4+ecx*4+esi]
157	psrlq	mm2,32
158	movd	DWORD [28+ecx*4+esp],mm3
159	psrlq	mm3,32
160	paddq	mm2,mm6
161	dec	ebx
162	lea	ecx,[1+ecx]
163	jnz	NEAR L$005inner
164	mov	ebx,ecx
165	pmuludq	mm0,mm4
166	pmuludq	mm1,mm5
167	paddq	mm2,mm0
168	paddq	mm3,mm1
169	movq	mm0,mm2
170	pand	mm0,mm7
171	paddq	mm3,mm0
172	movd	DWORD [28+ecx*4+esp],mm3
173	psrlq	mm2,32
174	psrlq	mm3,32
175	movd	mm6,DWORD [36+ebx*4+esp]
176	paddq	mm3,mm2
177	paddq	mm3,mm6
178	movq	[32+ebx*4+esp],mm3
179	lea	edx,[1+edx]
180	cmp	edx,ebx
181	jle	NEAR L$004outer
182	emms
183	jmp	NEAR L$006common_tail
184align	16
185L$006common_tail:
186	mov	ebp,DWORD [16+esp]
187	mov	edi,DWORD [4+esp]
188	lea	esi,[32+esp]
189	mov	eax,DWORD [esi]
190	mov	ecx,ebx
191	xor	edx,edx
192align	16
193L$007sub:
194	sbb	eax,DWORD [edx*4+ebp]
195	mov	DWORD [edx*4+edi],eax
196	dec	ecx
197	mov	eax,DWORD [4+edx*4+esi]
198	lea	edx,[1+edx]
199	jge	NEAR L$007sub
200	sbb	eax,0
201	mov	edx,-1
202	xor	edx,eax
203	jmp	NEAR L$008copy
204align	16
205L$008copy:
206	mov	esi,DWORD [32+ebx*4+esp]
207	mov	ebp,DWORD [ebx*4+edi]
208	mov	DWORD [32+ebx*4+esp],ecx
209	and	esi,eax
210	and	ebp,edx
211	or	ebp,esi
212	mov	DWORD [ebx*4+edi],ebp
213	dec	ebx
214	jge	NEAR L$008copy
215	mov	esp,DWORD [24+esp]
216	mov	eax,1
217L$000just_leave:
218	pop	edi
219	pop	esi
220	pop	ebx
221	pop	ebp
222	ret
223db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
224db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
225db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
226db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
227db	111,114,103,62,0
228%else
229; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
230ret
231%endif
232