xref: /aosp_15_r20/external/boringssl/src/gen/bcm/bn-586-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_bn_mul_add_words
17align	16
18_bn_mul_add_words:
19L$_bn_mul_add_words_begin:
20	mov	eax,DWORD [4+esp]
21	mov	edx,DWORD [8+esp]
22	mov	ecx,DWORD [12+esp]
23	movd	mm0,DWORD [16+esp]
24	pxor	mm1,mm1
25	jmp	NEAR L$000maw_sse2_entry
26align	16
27L$001maw_sse2_unrolled:
28	movd	mm3,DWORD [eax]
29	paddq	mm1,mm3
30	movd	mm2,DWORD [edx]
31	pmuludq	mm2,mm0
32	movd	mm4,DWORD [4+edx]
33	pmuludq	mm4,mm0
34	movd	mm6,DWORD [8+edx]
35	pmuludq	mm6,mm0
36	movd	mm7,DWORD [12+edx]
37	pmuludq	mm7,mm0
38	paddq	mm1,mm2
39	movd	mm3,DWORD [4+eax]
40	paddq	mm3,mm4
41	movd	mm5,DWORD [8+eax]
42	paddq	mm5,mm6
43	movd	mm4,DWORD [12+eax]
44	paddq	mm7,mm4
45	movd	DWORD [eax],mm1
46	movd	mm2,DWORD [16+edx]
47	pmuludq	mm2,mm0
48	psrlq	mm1,32
49	movd	mm4,DWORD [20+edx]
50	pmuludq	mm4,mm0
51	paddq	mm1,mm3
52	movd	mm6,DWORD [24+edx]
53	pmuludq	mm6,mm0
54	movd	DWORD [4+eax],mm1
55	psrlq	mm1,32
56	movd	mm3,DWORD [28+edx]
57	add	edx,32
58	pmuludq	mm3,mm0
59	paddq	mm1,mm5
60	movd	mm5,DWORD [16+eax]
61	paddq	mm2,mm5
62	movd	DWORD [8+eax],mm1
63	psrlq	mm1,32
64	paddq	mm1,mm7
65	movd	mm5,DWORD [20+eax]
66	paddq	mm4,mm5
67	movd	DWORD [12+eax],mm1
68	psrlq	mm1,32
69	paddq	mm1,mm2
70	movd	mm5,DWORD [24+eax]
71	paddq	mm6,mm5
72	movd	DWORD [16+eax],mm1
73	psrlq	mm1,32
74	paddq	mm1,mm4
75	movd	mm5,DWORD [28+eax]
76	paddq	mm3,mm5
77	movd	DWORD [20+eax],mm1
78	psrlq	mm1,32
79	paddq	mm1,mm6
80	movd	DWORD [24+eax],mm1
81	psrlq	mm1,32
82	paddq	mm1,mm3
83	movd	DWORD [28+eax],mm1
84	lea	eax,[32+eax]
85	psrlq	mm1,32
86	sub	ecx,8
87	jz	NEAR L$002maw_sse2_exit
88L$000maw_sse2_entry:
89	test	ecx,4294967288
90	jnz	NEAR L$001maw_sse2_unrolled
91align	4
92L$003maw_sse2_loop:
93	movd	mm2,DWORD [edx]
94	movd	mm3,DWORD [eax]
95	pmuludq	mm2,mm0
96	lea	edx,[4+edx]
97	paddq	mm1,mm3
98	paddq	mm1,mm2
99	movd	DWORD [eax],mm1
100	sub	ecx,1
101	psrlq	mm1,32
102	lea	eax,[4+eax]
103	jnz	NEAR L$003maw_sse2_loop
104L$002maw_sse2_exit:
105	movd	eax,mm1
106	emms
107	ret
108	pop	edi
109	pop	esi
110	pop	ebx
111	pop	ebp
112	ret
113global	_bn_mul_words
114align	16
115_bn_mul_words:
116L$_bn_mul_words_begin:
117	mov	eax,DWORD [4+esp]
118	mov	edx,DWORD [8+esp]
119	mov	ecx,DWORD [12+esp]
120	movd	mm0,DWORD [16+esp]
121	pxor	mm1,mm1
122align	16
123L$004mw_sse2_loop:
124	movd	mm2,DWORD [edx]
125	pmuludq	mm2,mm0
126	lea	edx,[4+edx]
127	paddq	mm1,mm2
128	movd	DWORD [eax],mm1
129	sub	ecx,1
130	psrlq	mm1,32
131	lea	eax,[4+eax]
132	jnz	NEAR L$004mw_sse2_loop
133	movd	eax,mm1
134	emms
135	ret
136	pop	edi
137	pop	esi
138	pop	ebx
139	pop	ebp
140	ret
141global	_bn_sqr_words
142align	16
143_bn_sqr_words:
144L$_bn_sqr_words_begin:
145	mov	eax,DWORD [4+esp]
146	mov	edx,DWORD [8+esp]
147	mov	ecx,DWORD [12+esp]
148align	16
149L$005sqr_sse2_loop:
150	movd	mm0,DWORD [edx]
151	pmuludq	mm0,mm0
152	lea	edx,[4+edx]
153	movq	[eax],mm0
154	sub	ecx,1
155	lea	eax,[8+eax]
156	jnz	NEAR L$005sqr_sse2_loop
157	emms
158	ret
159	pop	edi
160	pop	esi
161	pop	ebx
162	pop	ebp
163	ret
164global	_bn_div_words
165align	16
166_bn_div_words:
167L$_bn_div_words_begin:
168	mov	edx,DWORD [4+esp]
169	mov	eax,DWORD [8+esp]
170	mov	ecx,DWORD [12+esp]
171	div	ecx
172	ret
173global	_bn_add_words
174align	16
175_bn_add_words:
176L$_bn_add_words_begin:
177	push	ebp
178	push	ebx
179	push	esi
180	push	edi
181	;
182	mov	ebx,DWORD [20+esp]
183	mov	esi,DWORD [24+esp]
184	mov	edi,DWORD [28+esp]
185	mov	ebp,DWORD [32+esp]
186	xor	eax,eax
187	and	ebp,4294967288
188	jz	NEAR L$006aw_finish
189L$007aw_loop:
190	; Round 0
191	mov	ecx,DWORD [esi]
192	mov	edx,DWORD [edi]
193	add	ecx,eax
194	mov	eax,0
195	adc	eax,eax
196	add	ecx,edx
197	adc	eax,0
198	mov	DWORD [ebx],ecx
199	; Round 1
200	mov	ecx,DWORD [4+esi]
201	mov	edx,DWORD [4+edi]
202	add	ecx,eax
203	mov	eax,0
204	adc	eax,eax
205	add	ecx,edx
206	adc	eax,0
207	mov	DWORD [4+ebx],ecx
208	; Round 2
209	mov	ecx,DWORD [8+esi]
210	mov	edx,DWORD [8+edi]
211	add	ecx,eax
212	mov	eax,0
213	adc	eax,eax
214	add	ecx,edx
215	adc	eax,0
216	mov	DWORD [8+ebx],ecx
217	; Round 3
218	mov	ecx,DWORD [12+esi]
219	mov	edx,DWORD [12+edi]
220	add	ecx,eax
221	mov	eax,0
222	adc	eax,eax
223	add	ecx,edx
224	adc	eax,0
225	mov	DWORD [12+ebx],ecx
226	; Round 4
227	mov	ecx,DWORD [16+esi]
228	mov	edx,DWORD [16+edi]
229	add	ecx,eax
230	mov	eax,0
231	adc	eax,eax
232	add	ecx,edx
233	adc	eax,0
234	mov	DWORD [16+ebx],ecx
235	; Round 5
236	mov	ecx,DWORD [20+esi]
237	mov	edx,DWORD [20+edi]
238	add	ecx,eax
239	mov	eax,0
240	adc	eax,eax
241	add	ecx,edx
242	adc	eax,0
243	mov	DWORD [20+ebx],ecx
244	; Round 6
245	mov	ecx,DWORD [24+esi]
246	mov	edx,DWORD [24+edi]
247	add	ecx,eax
248	mov	eax,0
249	adc	eax,eax
250	add	ecx,edx
251	adc	eax,0
252	mov	DWORD [24+ebx],ecx
253	; Round 7
254	mov	ecx,DWORD [28+esi]
255	mov	edx,DWORD [28+edi]
256	add	ecx,eax
257	mov	eax,0
258	adc	eax,eax
259	add	ecx,edx
260	adc	eax,0
261	mov	DWORD [28+ebx],ecx
262	;
263	add	esi,32
264	add	edi,32
265	add	ebx,32
266	sub	ebp,8
267	jnz	NEAR L$007aw_loop
268L$006aw_finish:
269	mov	ebp,DWORD [32+esp]
270	and	ebp,7
271	jz	NEAR L$008aw_end
272	; Tail Round 0
273	mov	ecx,DWORD [esi]
274	mov	edx,DWORD [edi]
275	add	ecx,eax
276	mov	eax,0
277	adc	eax,eax
278	add	ecx,edx
279	adc	eax,0
280	dec	ebp
281	mov	DWORD [ebx],ecx
282	jz	NEAR L$008aw_end
283	; Tail Round 1
284	mov	ecx,DWORD [4+esi]
285	mov	edx,DWORD [4+edi]
286	add	ecx,eax
287	mov	eax,0
288	adc	eax,eax
289	add	ecx,edx
290	adc	eax,0
291	dec	ebp
292	mov	DWORD [4+ebx],ecx
293	jz	NEAR L$008aw_end
294	; Tail Round 2
295	mov	ecx,DWORD [8+esi]
296	mov	edx,DWORD [8+edi]
297	add	ecx,eax
298	mov	eax,0
299	adc	eax,eax
300	add	ecx,edx
301	adc	eax,0
302	dec	ebp
303	mov	DWORD [8+ebx],ecx
304	jz	NEAR L$008aw_end
305	; Tail Round 3
306	mov	ecx,DWORD [12+esi]
307	mov	edx,DWORD [12+edi]
308	add	ecx,eax
309	mov	eax,0
310	adc	eax,eax
311	add	ecx,edx
312	adc	eax,0
313	dec	ebp
314	mov	DWORD [12+ebx],ecx
315	jz	NEAR L$008aw_end
316	; Tail Round 4
317	mov	ecx,DWORD [16+esi]
318	mov	edx,DWORD [16+edi]
319	add	ecx,eax
320	mov	eax,0
321	adc	eax,eax
322	add	ecx,edx
323	adc	eax,0
324	dec	ebp
325	mov	DWORD [16+ebx],ecx
326	jz	NEAR L$008aw_end
327	; Tail Round 5
328	mov	ecx,DWORD [20+esi]
329	mov	edx,DWORD [20+edi]
330	add	ecx,eax
331	mov	eax,0
332	adc	eax,eax
333	add	ecx,edx
334	adc	eax,0
335	dec	ebp
336	mov	DWORD [20+ebx],ecx
337	jz	NEAR L$008aw_end
338	; Tail Round 6
339	mov	ecx,DWORD [24+esi]
340	mov	edx,DWORD [24+edi]
341	add	ecx,eax
342	mov	eax,0
343	adc	eax,eax
344	add	ecx,edx
345	adc	eax,0
346	mov	DWORD [24+ebx],ecx
347L$008aw_end:
348	pop	edi
349	pop	esi
350	pop	ebx
351	pop	ebp
352	ret
353global	_bn_sub_words
354align	16
355_bn_sub_words:
356L$_bn_sub_words_begin:
357	push	ebp
358	push	ebx
359	push	esi
360	push	edi
361	;
362	mov	ebx,DWORD [20+esp]
363	mov	esi,DWORD [24+esp]
364	mov	edi,DWORD [28+esp]
365	mov	ebp,DWORD [32+esp]
366	xor	eax,eax
367	and	ebp,4294967288
368	jz	NEAR L$009aw_finish
369L$010aw_loop:
370	; Round 0
371	mov	ecx,DWORD [esi]
372	mov	edx,DWORD [edi]
373	sub	ecx,eax
374	mov	eax,0
375	adc	eax,eax
376	sub	ecx,edx
377	adc	eax,0
378	mov	DWORD [ebx],ecx
379	; Round 1
380	mov	ecx,DWORD [4+esi]
381	mov	edx,DWORD [4+edi]
382	sub	ecx,eax
383	mov	eax,0
384	adc	eax,eax
385	sub	ecx,edx
386	adc	eax,0
387	mov	DWORD [4+ebx],ecx
388	; Round 2
389	mov	ecx,DWORD [8+esi]
390	mov	edx,DWORD [8+edi]
391	sub	ecx,eax
392	mov	eax,0
393	adc	eax,eax
394	sub	ecx,edx
395	adc	eax,0
396	mov	DWORD [8+ebx],ecx
397	; Round 3
398	mov	ecx,DWORD [12+esi]
399	mov	edx,DWORD [12+edi]
400	sub	ecx,eax
401	mov	eax,0
402	adc	eax,eax
403	sub	ecx,edx
404	adc	eax,0
405	mov	DWORD [12+ebx],ecx
406	; Round 4
407	mov	ecx,DWORD [16+esi]
408	mov	edx,DWORD [16+edi]
409	sub	ecx,eax
410	mov	eax,0
411	adc	eax,eax
412	sub	ecx,edx
413	adc	eax,0
414	mov	DWORD [16+ebx],ecx
415	; Round 5
416	mov	ecx,DWORD [20+esi]
417	mov	edx,DWORD [20+edi]
418	sub	ecx,eax
419	mov	eax,0
420	adc	eax,eax
421	sub	ecx,edx
422	adc	eax,0
423	mov	DWORD [20+ebx],ecx
424	; Round 6
425	mov	ecx,DWORD [24+esi]
426	mov	edx,DWORD [24+edi]
427	sub	ecx,eax
428	mov	eax,0
429	adc	eax,eax
430	sub	ecx,edx
431	adc	eax,0
432	mov	DWORD [24+ebx],ecx
433	; Round 7
434	mov	ecx,DWORD [28+esi]
435	mov	edx,DWORD [28+edi]
436	sub	ecx,eax
437	mov	eax,0
438	adc	eax,eax
439	sub	ecx,edx
440	adc	eax,0
441	mov	DWORD [28+ebx],ecx
442	;
443	add	esi,32
444	add	edi,32
445	add	ebx,32
446	sub	ebp,8
447	jnz	NEAR L$010aw_loop
448L$009aw_finish:
449	mov	ebp,DWORD [32+esp]
450	and	ebp,7
451	jz	NEAR L$011aw_end
452	; Tail Round 0
453	mov	ecx,DWORD [esi]
454	mov	edx,DWORD [edi]
455	sub	ecx,eax
456	mov	eax,0
457	adc	eax,eax
458	sub	ecx,edx
459	adc	eax,0
460	dec	ebp
461	mov	DWORD [ebx],ecx
462	jz	NEAR L$011aw_end
463	; Tail Round 1
464	mov	ecx,DWORD [4+esi]
465	mov	edx,DWORD [4+edi]
466	sub	ecx,eax
467	mov	eax,0
468	adc	eax,eax
469	sub	ecx,edx
470	adc	eax,0
471	dec	ebp
472	mov	DWORD [4+ebx],ecx
473	jz	NEAR L$011aw_end
474	; Tail Round 2
475	mov	ecx,DWORD [8+esi]
476	mov	edx,DWORD [8+edi]
477	sub	ecx,eax
478	mov	eax,0
479	adc	eax,eax
480	sub	ecx,edx
481	adc	eax,0
482	dec	ebp
483	mov	DWORD [8+ebx],ecx
484	jz	NEAR L$011aw_end
485	; Tail Round 3
486	mov	ecx,DWORD [12+esi]
487	mov	edx,DWORD [12+edi]
488	sub	ecx,eax
489	mov	eax,0
490	adc	eax,eax
491	sub	ecx,edx
492	adc	eax,0
493	dec	ebp
494	mov	DWORD [12+ebx],ecx
495	jz	NEAR L$011aw_end
496	; Tail Round 4
497	mov	ecx,DWORD [16+esi]
498	mov	edx,DWORD [16+edi]
499	sub	ecx,eax
500	mov	eax,0
501	adc	eax,eax
502	sub	ecx,edx
503	adc	eax,0
504	dec	ebp
505	mov	DWORD [16+ebx],ecx
506	jz	NEAR L$011aw_end
507	; Tail Round 5
508	mov	ecx,DWORD [20+esi]
509	mov	edx,DWORD [20+edi]
510	sub	ecx,eax
511	mov	eax,0
512	adc	eax,eax
513	sub	ecx,edx
514	adc	eax,0
515	dec	ebp
516	mov	DWORD [20+ebx],ecx
517	jz	NEAR L$011aw_end
518	; Tail Round 6
519	mov	ecx,DWORD [24+esi]
520	mov	edx,DWORD [24+edi]
521	sub	ecx,eax
522	mov	eax,0
523	adc	eax,eax
524	sub	ecx,edx
525	adc	eax,0
526	mov	DWORD [24+ebx],ecx
527L$011aw_end:
528	pop	edi
529	pop	esi
530	pop	ebx
531	pop	ebp
532	ret
533%else
534; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
535ret
536%endif
537