xref: /aosp_15_r20/external/boringssl/src/gen/bcm/co-586-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_bn_mul_comba8
17align	16
18_bn_mul_comba8:
19L$_bn_mul_comba8_begin:
20	push	esi
21	mov	esi,DWORD [12+esp]
22	push	edi
23	mov	edi,DWORD [20+esp]
24	push	ebp
25	push	ebx
26	xor	ebx,ebx
27	mov	eax,DWORD [esi]
28	xor	ecx,ecx
29	mov	edx,DWORD [edi]
30	; ################## Calculate word 0
31	xor	ebp,ebp
32	; mul a[0]*b[0]
33	mul	edx
34	add	ebx,eax
35	mov	eax,DWORD [20+esp]
36	adc	ecx,edx
37	mov	edx,DWORD [edi]
38	adc	ebp,0
39	mov	DWORD [eax],ebx
40	mov	eax,DWORD [4+esi]
41	; saved r[0]
42	; ################## Calculate word 1
43	xor	ebx,ebx
44	; mul a[1]*b[0]
45	mul	edx
46	add	ecx,eax
47	mov	eax,DWORD [esi]
48	adc	ebp,edx
49	mov	edx,DWORD [4+edi]
50	adc	ebx,0
51	; mul a[0]*b[1]
52	mul	edx
53	add	ecx,eax
54	mov	eax,DWORD [20+esp]
55	adc	ebp,edx
56	mov	edx,DWORD [edi]
57	adc	ebx,0
58	mov	DWORD [4+eax],ecx
59	mov	eax,DWORD [8+esi]
60	; saved r[1]
61	; ################## Calculate word 2
62	xor	ecx,ecx
63	; mul a[2]*b[0]
64	mul	edx
65	add	ebp,eax
66	mov	eax,DWORD [4+esi]
67	adc	ebx,edx
68	mov	edx,DWORD [4+edi]
69	adc	ecx,0
70	; mul a[1]*b[1]
71	mul	edx
72	add	ebp,eax
73	mov	eax,DWORD [esi]
74	adc	ebx,edx
75	mov	edx,DWORD [8+edi]
76	adc	ecx,0
77	; mul a[0]*b[2]
78	mul	edx
79	add	ebp,eax
80	mov	eax,DWORD [20+esp]
81	adc	ebx,edx
82	mov	edx,DWORD [edi]
83	adc	ecx,0
84	mov	DWORD [8+eax],ebp
85	mov	eax,DWORD [12+esi]
86	; saved r[2]
87	; ################## Calculate word 3
88	xor	ebp,ebp
89	; mul a[3]*b[0]
90	mul	edx
91	add	ebx,eax
92	mov	eax,DWORD [8+esi]
93	adc	ecx,edx
94	mov	edx,DWORD [4+edi]
95	adc	ebp,0
96	; mul a[2]*b[1]
97	mul	edx
98	add	ebx,eax
99	mov	eax,DWORD [4+esi]
100	adc	ecx,edx
101	mov	edx,DWORD [8+edi]
102	adc	ebp,0
103	; mul a[1]*b[2]
104	mul	edx
105	add	ebx,eax
106	mov	eax,DWORD [esi]
107	adc	ecx,edx
108	mov	edx,DWORD [12+edi]
109	adc	ebp,0
110	; mul a[0]*b[3]
111	mul	edx
112	add	ebx,eax
113	mov	eax,DWORD [20+esp]
114	adc	ecx,edx
115	mov	edx,DWORD [edi]
116	adc	ebp,0
117	mov	DWORD [12+eax],ebx
118	mov	eax,DWORD [16+esi]
119	; saved r[3]
120	; ################## Calculate word 4
121	xor	ebx,ebx
122	; mul a[4]*b[0]
123	mul	edx
124	add	ecx,eax
125	mov	eax,DWORD [12+esi]
126	adc	ebp,edx
127	mov	edx,DWORD [4+edi]
128	adc	ebx,0
129	; mul a[3]*b[1]
130	mul	edx
131	add	ecx,eax
132	mov	eax,DWORD [8+esi]
133	adc	ebp,edx
134	mov	edx,DWORD [8+edi]
135	adc	ebx,0
136	; mul a[2]*b[2]
137	mul	edx
138	add	ecx,eax
139	mov	eax,DWORD [4+esi]
140	adc	ebp,edx
141	mov	edx,DWORD [12+edi]
142	adc	ebx,0
143	; mul a[1]*b[3]
144	mul	edx
145	add	ecx,eax
146	mov	eax,DWORD [esi]
147	adc	ebp,edx
148	mov	edx,DWORD [16+edi]
149	adc	ebx,0
150	; mul a[0]*b[4]
151	mul	edx
152	add	ecx,eax
153	mov	eax,DWORD [20+esp]
154	adc	ebp,edx
155	mov	edx,DWORD [edi]
156	adc	ebx,0
157	mov	DWORD [16+eax],ecx
158	mov	eax,DWORD [20+esi]
159	; saved r[4]
160	; ################## Calculate word 5
161	xor	ecx,ecx
162	; mul a[5]*b[0]
163	mul	edx
164	add	ebp,eax
165	mov	eax,DWORD [16+esi]
166	adc	ebx,edx
167	mov	edx,DWORD [4+edi]
168	adc	ecx,0
169	; mul a[4]*b[1]
170	mul	edx
171	add	ebp,eax
172	mov	eax,DWORD [12+esi]
173	adc	ebx,edx
174	mov	edx,DWORD [8+edi]
175	adc	ecx,0
176	; mul a[3]*b[2]
177	mul	edx
178	add	ebp,eax
179	mov	eax,DWORD [8+esi]
180	adc	ebx,edx
181	mov	edx,DWORD [12+edi]
182	adc	ecx,0
183	; mul a[2]*b[3]
184	mul	edx
185	add	ebp,eax
186	mov	eax,DWORD [4+esi]
187	adc	ebx,edx
188	mov	edx,DWORD [16+edi]
189	adc	ecx,0
190	; mul a[1]*b[4]
191	mul	edx
192	add	ebp,eax
193	mov	eax,DWORD [esi]
194	adc	ebx,edx
195	mov	edx,DWORD [20+edi]
196	adc	ecx,0
197	; mul a[0]*b[5]
198	mul	edx
199	add	ebp,eax
200	mov	eax,DWORD [20+esp]
201	adc	ebx,edx
202	mov	edx,DWORD [edi]
203	adc	ecx,0
204	mov	DWORD [20+eax],ebp
205	mov	eax,DWORD [24+esi]
206	; saved r[5]
207	; ################## Calculate word 6
208	xor	ebp,ebp
209	; mul a[6]*b[0]
210	mul	edx
211	add	ebx,eax
212	mov	eax,DWORD [20+esi]
213	adc	ecx,edx
214	mov	edx,DWORD [4+edi]
215	adc	ebp,0
216	; mul a[5]*b[1]
217	mul	edx
218	add	ebx,eax
219	mov	eax,DWORD [16+esi]
220	adc	ecx,edx
221	mov	edx,DWORD [8+edi]
222	adc	ebp,0
223	; mul a[4]*b[2]
224	mul	edx
225	add	ebx,eax
226	mov	eax,DWORD [12+esi]
227	adc	ecx,edx
228	mov	edx,DWORD [12+edi]
229	adc	ebp,0
230	; mul a[3]*b[3]
231	mul	edx
232	add	ebx,eax
233	mov	eax,DWORD [8+esi]
234	adc	ecx,edx
235	mov	edx,DWORD [16+edi]
236	adc	ebp,0
237	; mul a[2]*b[4]
238	mul	edx
239	add	ebx,eax
240	mov	eax,DWORD [4+esi]
241	adc	ecx,edx
242	mov	edx,DWORD [20+edi]
243	adc	ebp,0
244	; mul a[1]*b[5]
245	mul	edx
246	add	ebx,eax
247	mov	eax,DWORD [esi]
248	adc	ecx,edx
249	mov	edx,DWORD [24+edi]
250	adc	ebp,0
251	; mul a[0]*b[6]
252	mul	edx
253	add	ebx,eax
254	mov	eax,DWORD [20+esp]
255	adc	ecx,edx
256	mov	edx,DWORD [edi]
257	adc	ebp,0
258	mov	DWORD [24+eax],ebx
259	mov	eax,DWORD [28+esi]
260	; saved r[6]
261	; ################## Calculate word 7
262	xor	ebx,ebx
263	; mul a[7]*b[0]
264	mul	edx
265	add	ecx,eax
266	mov	eax,DWORD [24+esi]
267	adc	ebp,edx
268	mov	edx,DWORD [4+edi]
269	adc	ebx,0
270	; mul a[6]*b[1]
271	mul	edx
272	add	ecx,eax
273	mov	eax,DWORD [20+esi]
274	adc	ebp,edx
275	mov	edx,DWORD [8+edi]
276	adc	ebx,0
277	; mul a[5]*b[2]
278	mul	edx
279	add	ecx,eax
280	mov	eax,DWORD [16+esi]
281	adc	ebp,edx
282	mov	edx,DWORD [12+edi]
283	adc	ebx,0
284	; mul a[4]*b[3]
285	mul	edx
286	add	ecx,eax
287	mov	eax,DWORD [12+esi]
288	adc	ebp,edx
289	mov	edx,DWORD [16+edi]
290	adc	ebx,0
291	; mul a[3]*b[4]
292	mul	edx
293	add	ecx,eax
294	mov	eax,DWORD [8+esi]
295	adc	ebp,edx
296	mov	edx,DWORD [20+edi]
297	adc	ebx,0
298	; mul a[2]*b[5]
299	mul	edx
300	add	ecx,eax
301	mov	eax,DWORD [4+esi]
302	adc	ebp,edx
303	mov	edx,DWORD [24+edi]
304	adc	ebx,0
305	; mul a[1]*b[6]
306	mul	edx
307	add	ecx,eax
308	mov	eax,DWORD [esi]
309	adc	ebp,edx
310	mov	edx,DWORD [28+edi]
311	adc	ebx,0
312	; mul a[0]*b[7]
313	mul	edx
314	add	ecx,eax
315	mov	eax,DWORD [20+esp]
316	adc	ebp,edx
317	mov	edx,DWORD [4+edi]
318	adc	ebx,0
319	mov	DWORD [28+eax],ecx
320	mov	eax,DWORD [28+esi]
321	; saved r[7]
322	; ################## Calculate word 8
323	xor	ecx,ecx
324	; mul a[7]*b[1]
325	mul	edx
326	add	ebp,eax
327	mov	eax,DWORD [24+esi]
328	adc	ebx,edx
329	mov	edx,DWORD [8+edi]
330	adc	ecx,0
331	; mul a[6]*b[2]
332	mul	edx
333	add	ebp,eax
334	mov	eax,DWORD [20+esi]
335	adc	ebx,edx
336	mov	edx,DWORD [12+edi]
337	adc	ecx,0
338	; mul a[5]*b[3]
339	mul	edx
340	add	ebp,eax
341	mov	eax,DWORD [16+esi]
342	adc	ebx,edx
343	mov	edx,DWORD [16+edi]
344	adc	ecx,0
345	; mul a[4]*b[4]
346	mul	edx
347	add	ebp,eax
348	mov	eax,DWORD [12+esi]
349	adc	ebx,edx
350	mov	edx,DWORD [20+edi]
351	adc	ecx,0
352	; mul a[3]*b[5]
353	mul	edx
354	add	ebp,eax
355	mov	eax,DWORD [8+esi]
356	adc	ebx,edx
357	mov	edx,DWORD [24+edi]
358	adc	ecx,0
359	; mul a[2]*b[6]
360	mul	edx
361	add	ebp,eax
362	mov	eax,DWORD [4+esi]
363	adc	ebx,edx
364	mov	edx,DWORD [28+edi]
365	adc	ecx,0
366	; mul a[1]*b[7]
367	mul	edx
368	add	ebp,eax
369	mov	eax,DWORD [20+esp]
370	adc	ebx,edx
371	mov	edx,DWORD [8+edi]
372	adc	ecx,0
373	mov	DWORD [32+eax],ebp
374	mov	eax,DWORD [28+esi]
375	; saved r[8]
376	; ################## Calculate word 9
377	xor	ebp,ebp
378	; mul a[7]*b[2]
379	mul	edx
380	add	ebx,eax
381	mov	eax,DWORD [24+esi]
382	adc	ecx,edx
383	mov	edx,DWORD [12+edi]
384	adc	ebp,0
385	; mul a[6]*b[3]
386	mul	edx
387	add	ebx,eax
388	mov	eax,DWORD [20+esi]
389	adc	ecx,edx
390	mov	edx,DWORD [16+edi]
391	adc	ebp,0
392	; mul a[5]*b[4]
393	mul	edx
394	add	ebx,eax
395	mov	eax,DWORD [16+esi]
396	adc	ecx,edx
397	mov	edx,DWORD [20+edi]
398	adc	ebp,0
399	; mul a[4]*b[5]
400	mul	edx
401	add	ebx,eax
402	mov	eax,DWORD [12+esi]
403	adc	ecx,edx
404	mov	edx,DWORD [24+edi]
405	adc	ebp,0
406	; mul a[3]*b[6]
407	mul	edx
408	add	ebx,eax
409	mov	eax,DWORD [8+esi]
410	adc	ecx,edx
411	mov	edx,DWORD [28+edi]
412	adc	ebp,0
413	; mul a[2]*b[7]
414	mul	edx
415	add	ebx,eax
416	mov	eax,DWORD [20+esp]
417	adc	ecx,edx
418	mov	edx,DWORD [12+edi]
419	adc	ebp,0
420	mov	DWORD [36+eax],ebx
421	mov	eax,DWORD [28+esi]
422	; saved r[9]
423	; ################## Calculate word 10
424	xor	ebx,ebx
425	; mul a[7]*b[3]
426	mul	edx
427	add	ecx,eax
428	mov	eax,DWORD [24+esi]
429	adc	ebp,edx
430	mov	edx,DWORD [16+edi]
431	adc	ebx,0
432	; mul a[6]*b[4]
433	mul	edx
434	add	ecx,eax
435	mov	eax,DWORD [20+esi]
436	adc	ebp,edx
437	mov	edx,DWORD [20+edi]
438	adc	ebx,0
439	; mul a[5]*b[5]
440	mul	edx
441	add	ecx,eax
442	mov	eax,DWORD [16+esi]
443	adc	ebp,edx
444	mov	edx,DWORD [24+edi]
445	adc	ebx,0
446	; mul a[4]*b[6]
447	mul	edx
448	add	ecx,eax
449	mov	eax,DWORD [12+esi]
450	adc	ebp,edx
451	mov	edx,DWORD [28+edi]
452	adc	ebx,0
453	; mul a[3]*b[7]
454	mul	edx
455	add	ecx,eax
456	mov	eax,DWORD [20+esp]
457	adc	ebp,edx
458	mov	edx,DWORD [16+edi]
459	adc	ebx,0
460	mov	DWORD [40+eax],ecx
461	mov	eax,DWORD [28+esi]
462	; saved r[10]
463	; ################## Calculate word 11
464	xor	ecx,ecx
465	; mul a[7]*b[4]
466	mul	edx
467	add	ebp,eax
468	mov	eax,DWORD [24+esi]
469	adc	ebx,edx
470	mov	edx,DWORD [20+edi]
471	adc	ecx,0
472	; mul a[6]*b[5]
473	mul	edx
474	add	ebp,eax
475	mov	eax,DWORD [20+esi]
476	adc	ebx,edx
477	mov	edx,DWORD [24+edi]
478	adc	ecx,0
479	; mul a[5]*b[6]
480	mul	edx
481	add	ebp,eax
482	mov	eax,DWORD [16+esi]
483	adc	ebx,edx
484	mov	edx,DWORD [28+edi]
485	adc	ecx,0
486	; mul a[4]*b[7]
487	mul	edx
488	add	ebp,eax
489	mov	eax,DWORD [20+esp]
490	adc	ebx,edx
491	mov	edx,DWORD [20+edi]
492	adc	ecx,0
493	mov	DWORD [44+eax],ebp
494	mov	eax,DWORD [28+esi]
495	; saved r[11]
496	; ################## Calculate word 12
497	xor	ebp,ebp
498	; mul a[7]*b[5]
499	mul	edx
500	add	ebx,eax
501	mov	eax,DWORD [24+esi]
502	adc	ecx,edx
503	mov	edx,DWORD [24+edi]
504	adc	ebp,0
505	; mul a[6]*b[6]
506	mul	edx
507	add	ebx,eax
508	mov	eax,DWORD [20+esi]
509	adc	ecx,edx
510	mov	edx,DWORD [28+edi]
511	adc	ebp,0
512	; mul a[5]*b[7]
513	mul	edx
514	add	ebx,eax
515	mov	eax,DWORD [20+esp]
516	adc	ecx,edx
517	mov	edx,DWORD [24+edi]
518	adc	ebp,0
519	mov	DWORD [48+eax],ebx
520	mov	eax,DWORD [28+esi]
521	; saved r[12]
522	; ################## Calculate word 13
523	xor	ebx,ebx
524	; mul a[7]*b[6]
525	mul	edx
526	add	ecx,eax
527	mov	eax,DWORD [24+esi]
528	adc	ebp,edx
529	mov	edx,DWORD [28+edi]
530	adc	ebx,0
531	; mul a[6]*b[7]
532	mul	edx
533	add	ecx,eax
534	mov	eax,DWORD [20+esp]
535	adc	ebp,edx
536	mov	edx,DWORD [28+edi]
537	adc	ebx,0
538	mov	DWORD [52+eax],ecx
539	mov	eax,DWORD [28+esi]
540	; saved r[13]
541	; ################## Calculate word 14
542	xor	ecx,ecx
543	; mul a[7]*b[7]
544	mul	edx
545	add	ebp,eax
546	mov	eax,DWORD [20+esp]
547	adc	ebx,edx
548	adc	ecx,0
549	mov	DWORD [56+eax],ebp
550	; saved r[14]
551	; save r[15]
552	mov	DWORD [60+eax],ebx
553	pop	ebx
554	pop	ebp
555	pop	edi
556	pop	esi
557	ret
558global	_bn_mul_comba4
559align	16
560_bn_mul_comba4:
561L$_bn_mul_comba4_begin:
562	push	esi
563	mov	esi,DWORD [12+esp]
564	push	edi
565	mov	edi,DWORD [20+esp]
566	push	ebp
567	push	ebx
568	xor	ebx,ebx
569	mov	eax,DWORD [esi]
570	xor	ecx,ecx
571	mov	edx,DWORD [edi]
572	; ################## Calculate word 0
573	xor	ebp,ebp
574	; mul a[0]*b[0]
575	mul	edx
576	add	ebx,eax
577	mov	eax,DWORD [20+esp]
578	adc	ecx,edx
579	mov	edx,DWORD [edi]
580	adc	ebp,0
581	mov	DWORD [eax],ebx
582	mov	eax,DWORD [4+esi]
583	; saved r[0]
584	; ################## Calculate word 1
585	xor	ebx,ebx
586	; mul a[1]*b[0]
587	mul	edx
588	add	ecx,eax
589	mov	eax,DWORD [esi]
590	adc	ebp,edx
591	mov	edx,DWORD [4+edi]
592	adc	ebx,0
593	; mul a[0]*b[1]
594	mul	edx
595	add	ecx,eax
596	mov	eax,DWORD [20+esp]
597	adc	ebp,edx
598	mov	edx,DWORD [edi]
599	adc	ebx,0
600	mov	DWORD [4+eax],ecx
601	mov	eax,DWORD [8+esi]
602	; saved r[1]
603	; ################## Calculate word 2
604	xor	ecx,ecx
605	; mul a[2]*b[0]
606	mul	edx
607	add	ebp,eax
608	mov	eax,DWORD [4+esi]
609	adc	ebx,edx
610	mov	edx,DWORD [4+edi]
611	adc	ecx,0
612	; mul a[1]*b[1]
613	mul	edx
614	add	ebp,eax
615	mov	eax,DWORD [esi]
616	adc	ebx,edx
617	mov	edx,DWORD [8+edi]
618	adc	ecx,0
619	; mul a[0]*b[2]
620	mul	edx
621	add	ebp,eax
622	mov	eax,DWORD [20+esp]
623	adc	ebx,edx
624	mov	edx,DWORD [edi]
625	adc	ecx,0
626	mov	DWORD [8+eax],ebp
627	mov	eax,DWORD [12+esi]
628	; saved r[2]
629	; ################## Calculate word 3
630	xor	ebp,ebp
631	; mul a[3]*b[0]
632	mul	edx
633	add	ebx,eax
634	mov	eax,DWORD [8+esi]
635	adc	ecx,edx
636	mov	edx,DWORD [4+edi]
637	adc	ebp,0
638	; mul a[2]*b[1]
639	mul	edx
640	add	ebx,eax
641	mov	eax,DWORD [4+esi]
642	adc	ecx,edx
643	mov	edx,DWORD [8+edi]
644	adc	ebp,0
645	; mul a[1]*b[2]
646	mul	edx
647	add	ebx,eax
648	mov	eax,DWORD [esi]
649	adc	ecx,edx
650	mov	edx,DWORD [12+edi]
651	adc	ebp,0
652	; mul a[0]*b[3]
653	mul	edx
654	add	ebx,eax
655	mov	eax,DWORD [20+esp]
656	adc	ecx,edx
657	mov	edx,DWORD [4+edi]
658	adc	ebp,0
659	mov	DWORD [12+eax],ebx
660	mov	eax,DWORD [12+esi]
661	; saved r[3]
662	; ################## Calculate word 4
663	xor	ebx,ebx
664	; mul a[3]*b[1]
665	mul	edx
666	add	ecx,eax
667	mov	eax,DWORD [8+esi]
668	adc	ebp,edx
669	mov	edx,DWORD [8+edi]
670	adc	ebx,0
671	; mul a[2]*b[2]
672	mul	edx
673	add	ecx,eax
674	mov	eax,DWORD [4+esi]
675	adc	ebp,edx
676	mov	edx,DWORD [12+edi]
677	adc	ebx,0
678	; mul a[1]*b[3]
679	mul	edx
680	add	ecx,eax
681	mov	eax,DWORD [20+esp]
682	adc	ebp,edx
683	mov	edx,DWORD [8+edi]
684	adc	ebx,0
685	mov	DWORD [16+eax],ecx
686	mov	eax,DWORD [12+esi]
687	; saved r[4]
688	; ################## Calculate word 5
689	xor	ecx,ecx
690	; mul a[3]*b[2]
691	mul	edx
692	add	ebp,eax
693	mov	eax,DWORD [8+esi]
694	adc	ebx,edx
695	mov	edx,DWORD [12+edi]
696	adc	ecx,0
697	; mul a[2]*b[3]
698	mul	edx
699	add	ebp,eax
700	mov	eax,DWORD [20+esp]
701	adc	ebx,edx
702	mov	edx,DWORD [12+edi]
703	adc	ecx,0
704	mov	DWORD [20+eax],ebp
705	mov	eax,DWORD [12+esi]
706	; saved r[5]
707	; ################## Calculate word 6
708	xor	ebp,ebp
709	; mul a[3]*b[3]
710	mul	edx
711	add	ebx,eax
712	mov	eax,DWORD [20+esp]
713	adc	ecx,edx
714	adc	ebp,0
715	mov	DWORD [24+eax],ebx
716	; saved r[6]
717	; save r[7]
718	mov	DWORD [28+eax],ecx
719	pop	ebx
720	pop	ebp
721	pop	edi
722	pop	esi
723	ret
724global	_bn_sqr_comba8
725align	16
726_bn_sqr_comba8:
727L$_bn_sqr_comba8_begin:
728	push	esi
729	push	edi
730	push	ebp
731	push	ebx
732	mov	edi,DWORD [20+esp]
733	mov	esi,DWORD [24+esp]
734	xor	ebx,ebx
735	xor	ecx,ecx
736	mov	eax,DWORD [esi]
737	; ############### Calculate word 0
738	xor	ebp,ebp
739	; sqr a[0]*a[0]
740	mul	eax
741	add	ebx,eax
742	adc	ecx,edx
743	mov	edx,DWORD [esi]
744	adc	ebp,0
745	mov	DWORD [edi],ebx
746	mov	eax,DWORD [4+esi]
747	; saved r[0]
748	; ############### Calculate word 1
749	xor	ebx,ebx
750	; sqr a[1]*a[0]
751	mul	edx
752	add	eax,eax
753	adc	edx,edx
754	adc	ebx,0
755	add	ecx,eax
756	adc	ebp,edx
757	mov	eax,DWORD [8+esi]
758	adc	ebx,0
759	mov	DWORD [4+edi],ecx
760	mov	edx,DWORD [esi]
761	; saved r[1]
762	; ############### Calculate word 2
763	xor	ecx,ecx
764	; sqr a[2]*a[0]
765	mul	edx
766	add	eax,eax
767	adc	edx,edx
768	adc	ecx,0
769	add	ebp,eax
770	adc	ebx,edx
771	mov	eax,DWORD [4+esi]
772	adc	ecx,0
773	; sqr a[1]*a[1]
774	mul	eax
775	add	ebp,eax
776	adc	ebx,edx
777	mov	edx,DWORD [esi]
778	adc	ecx,0
779	mov	DWORD [8+edi],ebp
780	mov	eax,DWORD [12+esi]
781	; saved r[2]
782	; ############### Calculate word 3
783	xor	ebp,ebp
784	; sqr a[3]*a[0]
785	mul	edx
786	add	eax,eax
787	adc	edx,edx
788	adc	ebp,0
789	add	ebx,eax
790	adc	ecx,edx
791	mov	eax,DWORD [8+esi]
792	adc	ebp,0
793	mov	edx,DWORD [4+esi]
794	; sqr a[2]*a[1]
795	mul	edx
796	add	eax,eax
797	adc	edx,edx
798	adc	ebp,0
799	add	ebx,eax
800	adc	ecx,edx
801	mov	eax,DWORD [16+esi]
802	adc	ebp,0
803	mov	DWORD [12+edi],ebx
804	mov	edx,DWORD [esi]
805	; saved r[3]
806	; ############### Calculate word 4
807	xor	ebx,ebx
808	; sqr a[4]*a[0]
809	mul	edx
810	add	eax,eax
811	adc	edx,edx
812	adc	ebx,0
813	add	ecx,eax
814	adc	ebp,edx
815	mov	eax,DWORD [12+esi]
816	adc	ebx,0
817	mov	edx,DWORD [4+esi]
818	; sqr a[3]*a[1]
819	mul	edx
820	add	eax,eax
821	adc	edx,edx
822	adc	ebx,0
823	add	ecx,eax
824	adc	ebp,edx
825	mov	eax,DWORD [8+esi]
826	adc	ebx,0
827	; sqr a[2]*a[2]
828	mul	eax
829	add	ecx,eax
830	adc	ebp,edx
831	mov	edx,DWORD [esi]
832	adc	ebx,0
833	mov	DWORD [16+edi],ecx
834	mov	eax,DWORD [20+esi]
835	; saved r[4]
836	; ############### Calculate word 5
837	xor	ecx,ecx
838	; sqr a[5]*a[0]
839	mul	edx
840	add	eax,eax
841	adc	edx,edx
842	adc	ecx,0
843	add	ebp,eax
844	adc	ebx,edx
845	mov	eax,DWORD [16+esi]
846	adc	ecx,0
847	mov	edx,DWORD [4+esi]
848	; sqr a[4]*a[1]
849	mul	edx
850	add	eax,eax
851	adc	edx,edx
852	adc	ecx,0
853	add	ebp,eax
854	adc	ebx,edx
855	mov	eax,DWORD [12+esi]
856	adc	ecx,0
857	mov	edx,DWORD [8+esi]
858	; sqr a[3]*a[2]
859	mul	edx
860	add	eax,eax
861	adc	edx,edx
862	adc	ecx,0
863	add	ebp,eax
864	adc	ebx,edx
865	mov	eax,DWORD [24+esi]
866	adc	ecx,0
867	mov	DWORD [20+edi],ebp
868	mov	edx,DWORD [esi]
869	; saved r[5]
870	; ############### Calculate word 6
871	xor	ebp,ebp
872	; sqr a[6]*a[0]
873	mul	edx
874	add	eax,eax
875	adc	edx,edx
876	adc	ebp,0
877	add	ebx,eax
878	adc	ecx,edx
879	mov	eax,DWORD [20+esi]
880	adc	ebp,0
881	mov	edx,DWORD [4+esi]
882	; sqr a[5]*a[1]
883	mul	edx
884	add	eax,eax
885	adc	edx,edx
886	adc	ebp,0
887	add	ebx,eax
888	adc	ecx,edx
889	mov	eax,DWORD [16+esi]
890	adc	ebp,0
891	mov	edx,DWORD [8+esi]
892	; sqr a[4]*a[2]
893	mul	edx
894	add	eax,eax
895	adc	edx,edx
896	adc	ebp,0
897	add	ebx,eax
898	adc	ecx,edx
899	mov	eax,DWORD [12+esi]
900	adc	ebp,0
901	; sqr a[3]*a[3]
902	mul	eax
903	add	ebx,eax
904	adc	ecx,edx
905	mov	edx,DWORD [esi]
906	adc	ebp,0
907	mov	DWORD [24+edi],ebx
908	mov	eax,DWORD [28+esi]
909	; saved r[6]
910	; ############### Calculate word 7
911	xor	ebx,ebx
912	; sqr a[7]*a[0]
913	mul	edx
914	add	eax,eax
915	adc	edx,edx
916	adc	ebx,0
917	add	ecx,eax
918	adc	ebp,edx
919	mov	eax,DWORD [24+esi]
920	adc	ebx,0
921	mov	edx,DWORD [4+esi]
922	; sqr a[6]*a[1]
923	mul	edx
924	add	eax,eax
925	adc	edx,edx
926	adc	ebx,0
927	add	ecx,eax
928	adc	ebp,edx
929	mov	eax,DWORD [20+esi]
930	adc	ebx,0
931	mov	edx,DWORD [8+esi]
932	; sqr a[5]*a[2]
933	mul	edx
934	add	eax,eax
935	adc	edx,edx
936	adc	ebx,0
937	add	ecx,eax
938	adc	ebp,edx
939	mov	eax,DWORD [16+esi]
940	adc	ebx,0
941	mov	edx,DWORD [12+esi]
942	; sqr a[4]*a[3]
943	mul	edx
944	add	eax,eax
945	adc	edx,edx
946	adc	ebx,0
947	add	ecx,eax
948	adc	ebp,edx
949	mov	eax,DWORD [28+esi]
950	adc	ebx,0
951	mov	DWORD [28+edi],ecx
952	mov	edx,DWORD [4+esi]
953	; saved r[7]
954	; ############### Calculate word 8
955	xor	ecx,ecx
956	; sqr a[7]*a[1]
957	mul	edx
958	add	eax,eax
959	adc	edx,edx
960	adc	ecx,0
961	add	ebp,eax
962	adc	ebx,edx
963	mov	eax,DWORD [24+esi]
964	adc	ecx,0
965	mov	edx,DWORD [8+esi]
966	; sqr a[6]*a[2]
967	mul	edx
968	add	eax,eax
969	adc	edx,edx
970	adc	ecx,0
971	add	ebp,eax
972	adc	ebx,edx
973	mov	eax,DWORD [20+esi]
974	adc	ecx,0
975	mov	edx,DWORD [12+esi]
976	; sqr a[5]*a[3]
977	mul	edx
978	add	eax,eax
979	adc	edx,edx
980	adc	ecx,0
981	add	ebp,eax
982	adc	ebx,edx
983	mov	eax,DWORD [16+esi]
984	adc	ecx,0
985	; sqr a[4]*a[4]
986	mul	eax
987	add	ebp,eax
988	adc	ebx,edx
989	mov	edx,DWORD [8+esi]
990	adc	ecx,0
991	mov	DWORD [32+edi],ebp
992	mov	eax,DWORD [28+esi]
993	; saved r[8]
994	; ############### Calculate word 9
995	xor	ebp,ebp
996	; sqr a[7]*a[2]
997	mul	edx
998	add	eax,eax
999	adc	edx,edx
1000	adc	ebp,0
1001	add	ebx,eax
1002	adc	ecx,edx
1003	mov	eax,DWORD [24+esi]
1004	adc	ebp,0
1005	mov	edx,DWORD [12+esi]
1006	; sqr a[6]*a[3]
1007	mul	edx
1008	add	eax,eax
1009	adc	edx,edx
1010	adc	ebp,0
1011	add	ebx,eax
1012	adc	ecx,edx
1013	mov	eax,DWORD [20+esi]
1014	adc	ebp,0
1015	mov	edx,DWORD [16+esi]
1016	; sqr a[5]*a[4]
1017	mul	edx
1018	add	eax,eax
1019	adc	edx,edx
1020	adc	ebp,0
1021	add	ebx,eax
1022	adc	ecx,edx
1023	mov	eax,DWORD [28+esi]
1024	adc	ebp,0
1025	mov	DWORD [36+edi],ebx
1026	mov	edx,DWORD [12+esi]
1027	; saved r[9]
1028	; ############### Calculate word 10
1029	xor	ebx,ebx
1030	; sqr a[7]*a[3]
1031	mul	edx
1032	add	eax,eax
1033	adc	edx,edx
1034	adc	ebx,0
1035	add	ecx,eax
1036	adc	ebp,edx
1037	mov	eax,DWORD [24+esi]
1038	adc	ebx,0
1039	mov	edx,DWORD [16+esi]
1040	; sqr a[6]*a[4]
1041	mul	edx
1042	add	eax,eax
1043	adc	edx,edx
1044	adc	ebx,0
1045	add	ecx,eax
1046	adc	ebp,edx
1047	mov	eax,DWORD [20+esi]
1048	adc	ebx,0
1049	; sqr a[5]*a[5]
1050	mul	eax
1051	add	ecx,eax
1052	adc	ebp,edx
1053	mov	edx,DWORD [16+esi]
1054	adc	ebx,0
1055	mov	DWORD [40+edi],ecx
1056	mov	eax,DWORD [28+esi]
1057	; saved r[10]
1058	; ############### Calculate word 11
1059	xor	ecx,ecx
1060	; sqr a[7]*a[4]
1061	mul	edx
1062	add	eax,eax
1063	adc	edx,edx
1064	adc	ecx,0
1065	add	ebp,eax
1066	adc	ebx,edx
1067	mov	eax,DWORD [24+esi]
1068	adc	ecx,0
1069	mov	edx,DWORD [20+esi]
1070	; sqr a[6]*a[5]
1071	mul	edx
1072	add	eax,eax
1073	adc	edx,edx
1074	adc	ecx,0
1075	add	ebp,eax
1076	adc	ebx,edx
1077	mov	eax,DWORD [28+esi]
1078	adc	ecx,0
1079	mov	DWORD [44+edi],ebp
1080	mov	edx,DWORD [20+esi]
1081	; saved r[11]
1082	; ############### Calculate word 12
1083	xor	ebp,ebp
1084	; sqr a[7]*a[5]
1085	mul	edx
1086	add	eax,eax
1087	adc	edx,edx
1088	adc	ebp,0
1089	add	ebx,eax
1090	adc	ecx,edx
1091	mov	eax,DWORD [24+esi]
1092	adc	ebp,0
1093	; sqr a[6]*a[6]
1094	mul	eax
1095	add	ebx,eax
1096	adc	ecx,edx
1097	mov	edx,DWORD [24+esi]
1098	adc	ebp,0
1099	mov	DWORD [48+edi],ebx
1100	mov	eax,DWORD [28+esi]
1101	; saved r[12]
1102	; ############### Calculate word 13
1103	xor	ebx,ebx
1104	; sqr a[7]*a[6]
1105	mul	edx
1106	add	eax,eax
1107	adc	edx,edx
1108	adc	ebx,0
1109	add	ecx,eax
1110	adc	ebp,edx
1111	mov	eax,DWORD [28+esi]
1112	adc	ebx,0
1113	mov	DWORD [52+edi],ecx
1114	; saved r[13]
1115	; ############### Calculate word 14
1116	xor	ecx,ecx
1117	; sqr a[7]*a[7]
1118	mul	eax
1119	add	ebp,eax
1120	adc	ebx,edx
1121	adc	ecx,0
1122	mov	DWORD [56+edi],ebp
1123	; saved r[14]
1124	mov	DWORD [60+edi],ebx
1125	pop	ebx
1126	pop	ebp
1127	pop	edi
1128	pop	esi
1129	ret
1130global	_bn_sqr_comba4
1131align	16
1132_bn_sqr_comba4:
1133L$_bn_sqr_comba4_begin:
1134	push	esi
1135	push	edi
1136	push	ebp
1137	push	ebx
1138	mov	edi,DWORD [20+esp]
1139	mov	esi,DWORD [24+esp]
1140	xor	ebx,ebx
1141	xor	ecx,ecx
1142	mov	eax,DWORD [esi]
1143	; ############### Calculate word 0
1144	xor	ebp,ebp
1145	; sqr a[0]*a[0]
1146	mul	eax
1147	add	ebx,eax
1148	adc	ecx,edx
1149	mov	edx,DWORD [esi]
1150	adc	ebp,0
1151	mov	DWORD [edi],ebx
1152	mov	eax,DWORD [4+esi]
1153	; saved r[0]
1154	; ############### Calculate word 1
1155	xor	ebx,ebx
1156	; sqr a[1]*a[0]
1157	mul	edx
1158	add	eax,eax
1159	adc	edx,edx
1160	adc	ebx,0
1161	add	ecx,eax
1162	adc	ebp,edx
1163	mov	eax,DWORD [8+esi]
1164	adc	ebx,0
1165	mov	DWORD [4+edi],ecx
1166	mov	edx,DWORD [esi]
1167	; saved r[1]
1168	; ############### Calculate word 2
1169	xor	ecx,ecx
1170	; sqr a[2]*a[0]
1171	mul	edx
1172	add	eax,eax
1173	adc	edx,edx
1174	adc	ecx,0
1175	add	ebp,eax
1176	adc	ebx,edx
1177	mov	eax,DWORD [4+esi]
1178	adc	ecx,0
1179	; sqr a[1]*a[1]
1180	mul	eax
1181	add	ebp,eax
1182	adc	ebx,edx
1183	mov	edx,DWORD [esi]
1184	adc	ecx,0
1185	mov	DWORD [8+edi],ebp
1186	mov	eax,DWORD [12+esi]
1187	; saved r[2]
1188	; ############### Calculate word 3
1189	xor	ebp,ebp
1190	; sqr a[3]*a[0]
1191	mul	edx
1192	add	eax,eax
1193	adc	edx,edx
1194	adc	ebp,0
1195	add	ebx,eax
1196	adc	ecx,edx
1197	mov	eax,DWORD [8+esi]
1198	adc	ebp,0
1199	mov	edx,DWORD [4+esi]
1200	; sqr a[2]*a[1]
1201	mul	edx
1202	add	eax,eax
1203	adc	edx,edx
1204	adc	ebp,0
1205	add	ebx,eax
1206	adc	ecx,edx
1207	mov	eax,DWORD [12+esi]
1208	adc	ebp,0
1209	mov	DWORD [12+edi],ebx
1210	mov	edx,DWORD [4+esi]
1211	; saved r[3]
1212	; ############### Calculate word 4
1213	xor	ebx,ebx
1214	; sqr a[3]*a[1]
1215	mul	edx
1216	add	eax,eax
1217	adc	edx,edx
1218	adc	ebx,0
1219	add	ecx,eax
1220	adc	ebp,edx
1221	mov	eax,DWORD [8+esi]
1222	adc	ebx,0
1223	; sqr a[2]*a[2]
1224	mul	eax
1225	add	ecx,eax
1226	adc	ebp,edx
1227	mov	edx,DWORD [8+esi]
1228	adc	ebx,0
1229	mov	DWORD [16+edi],ecx
1230	mov	eax,DWORD [12+esi]
1231	; saved r[4]
1232	; ############### Calculate word 5
1233	xor	ecx,ecx
1234	; sqr a[3]*a[2]
1235	mul	edx
1236	add	eax,eax
1237	adc	edx,edx
1238	adc	ecx,0
1239	add	ebp,eax
1240	adc	ebx,edx
1241	mov	eax,DWORD [12+esi]
1242	adc	ecx,0
1243	mov	DWORD [20+edi],ebp
1244	; saved r[5]
1245	; ############### Calculate word 6
1246	xor	ebp,ebp
1247	; sqr a[3]*a[3]
1248	mul	eax
1249	add	ebx,eax
1250	adc	ecx,edx
1251	adc	ebp,0
1252	mov	DWORD [24+edi],ebx
1253	; saved r[6]
1254	mov	DWORD [28+edi],ecx
1255	pop	ebx
1256	pop	ebp
1257	pop	edi
1258	pop	esi
1259	ret
1260%else
1261; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
1262ret
1263%endif
1264