xref: /aosp_15_r20/external/boringssl/src/gen/bcm/x86_64-mont-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%ifdef BORINGSSL_PREFIX
12%include "boringssl_prefix_symbols_nasm.inc"
13%endif
14section	.text code align=64
15
16
17global	bn_mul_mont_nohw
18
19ALIGN	16
20bn_mul_mont_nohw:
21	mov	QWORD[8+rsp],rdi	;WIN64 prologue
22	mov	QWORD[16+rsp],rsi
23	mov	rax,rsp
24$L$SEH_begin_bn_mul_mont_nohw:
25	mov	rdi,rcx
26	mov	rsi,rdx
27	mov	rdx,r8
28	mov	rcx,r9
29	mov	r8,QWORD[40+rsp]
30	mov	r9,QWORD[48+rsp]
31
32
33
34_CET_ENDBR
35	mov	r9d,r9d
36	mov	rax,rsp
37
38	push	rbx
39
40	push	rbp
41
42	push	r12
43
44	push	r13
45
46	push	r14
47
48	push	r15
49
50
51	neg	r9
52	mov	r11,rsp
53	lea	r10,[((-16))+r9*8+rsp]
54	neg	r9
55	and	r10,-1024
56
57
58
59
60
61
62
63
64
65	sub	r11,r10
66	and	r11,-4096
67	lea	rsp,[r11*1+r10]
68	mov	r11,QWORD[rsp]
69	cmp	rsp,r10
70	ja	NEAR $L$mul_page_walk
71	jmp	NEAR $L$mul_page_walk_done
72
73ALIGN	16
74$L$mul_page_walk:
75	lea	rsp,[((-4096))+rsp]
76	mov	r11,QWORD[rsp]
77	cmp	rsp,r10
78	ja	NEAR $L$mul_page_walk
79$L$mul_page_walk_done:
80
81	mov	QWORD[8+r9*8+rsp],rax
82
83$L$mul_body:
84	mov	r12,rdx
85	mov	r8,QWORD[r8]
86	mov	rbx,QWORD[r12]
87	mov	rax,QWORD[rsi]
88
89	xor	r14,r14
90	xor	r15,r15
91
92	mov	rbp,r8
93	mul	rbx
94	mov	r10,rax
95	mov	rax,QWORD[rcx]
96
97	imul	rbp,r10
98	mov	r11,rdx
99
100	mul	rbp
101	add	r10,rax
102	mov	rax,QWORD[8+rsi]
103	adc	rdx,0
104	mov	r13,rdx
105
106	lea	r15,[1+r15]
107	jmp	NEAR $L$1st_enter
108
109ALIGN	16
110$L$1st:
111	add	r13,rax
112	mov	rax,QWORD[r15*8+rsi]
113	adc	rdx,0
114	add	r13,r11
115	mov	r11,r10
116	adc	rdx,0
117	mov	QWORD[((-16))+r15*8+rsp],r13
118	mov	r13,rdx
119
120$L$1st_enter:
121	mul	rbx
122	add	r11,rax
123	mov	rax,QWORD[r15*8+rcx]
124	adc	rdx,0
125	lea	r15,[1+r15]
126	mov	r10,rdx
127
128	mul	rbp
129	cmp	r15,r9
130	jne	NEAR $L$1st
131
132	add	r13,rax
133	mov	rax,QWORD[rsi]
134	adc	rdx,0
135	add	r13,r11
136	adc	rdx,0
137	mov	QWORD[((-16))+r15*8+rsp],r13
138	mov	r13,rdx
139	mov	r11,r10
140
141	xor	rdx,rdx
142	add	r13,r11
143	adc	rdx,0
144	mov	QWORD[((-8))+r9*8+rsp],r13
145	mov	QWORD[r9*8+rsp],rdx
146
147	lea	r14,[1+r14]
148	jmp	NEAR $L$outer
149ALIGN	16
150$L$outer:
151	mov	rbx,QWORD[r14*8+r12]
152	xor	r15,r15
153	mov	rbp,r8
154	mov	r10,QWORD[rsp]
155	mul	rbx
156	add	r10,rax
157	mov	rax,QWORD[rcx]
158	adc	rdx,0
159
160	imul	rbp,r10
161	mov	r11,rdx
162
163	mul	rbp
164	add	r10,rax
165	mov	rax,QWORD[8+rsi]
166	adc	rdx,0
167	mov	r10,QWORD[8+rsp]
168	mov	r13,rdx
169
170	lea	r15,[1+r15]
171	jmp	NEAR $L$inner_enter
172
173ALIGN	16
174$L$inner:
175	add	r13,rax
176	mov	rax,QWORD[r15*8+rsi]
177	adc	rdx,0
178	add	r13,r10
179	mov	r10,QWORD[r15*8+rsp]
180	adc	rdx,0
181	mov	QWORD[((-16))+r15*8+rsp],r13
182	mov	r13,rdx
183
184$L$inner_enter:
185	mul	rbx
186	add	r11,rax
187	mov	rax,QWORD[r15*8+rcx]
188	adc	rdx,0
189	add	r10,r11
190	mov	r11,rdx
191	adc	r11,0
192	lea	r15,[1+r15]
193
194	mul	rbp
195	cmp	r15,r9
196	jne	NEAR $L$inner
197
198	add	r13,rax
199	mov	rax,QWORD[rsi]
200	adc	rdx,0
201	add	r13,r10
202	mov	r10,QWORD[r15*8+rsp]
203	adc	rdx,0
204	mov	QWORD[((-16))+r15*8+rsp],r13
205	mov	r13,rdx
206
207	xor	rdx,rdx
208	add	r13,r11
209	adc	rdx,0
210	add	r13,r10
211	adc	rdx,0
212	mov	QWORD[((-8))+r9*8+rsp],r13
213	mov	QWORD[r9*8+rsp],rdx
214
215	lea	r14,[1+r14]
216	cmp	r14,r9
217	jb	NEAR $L$outer
218
219	xor	r14,r14
220	mov	rax,QWORD[rsp]
221	mov	r15,r9
222
223ALIGN	16
224$L$sub:	sbb	rax,QWORD[r14*8+rcx]
225	mov	QWORD[r14*8+rdi],rax
226	mov	rax,QWORD[8+r14*8+rsp]
227	lea	r14,[1+r14]
228	dec	r15
229	jnz	NEAR $L$sub
230
231	sbb	rax,0
232	mov	rbx,-1
233	xor	rbx,rax
234	xor	r14,r14
235	mov	r15,r9
236
237$L$copy:
238	mov	rcx,QWORD[r14*8+rdi]
239	mov	rdx,QWORD[r14*8+rsp]
240	and	rcx,rbx
241	and	rdx,rax
242	mov	QWORD[r14*8+rsp],r9
243	or	rdx,rcx
244	mov	QWORD[r14*8+rdi],rdx
245	lea	r14,[1+r14]
246	sub	r15,1
247	jnz	NEAR $L$copy
248
249	mov	rsi,QWORD[8+r9*8+rsp]
250
251	mov	rax,1
252	mov	r15,QWORD[((-48))+rsi]
253
254	mov	r14,QWORD[((-40))+rsi]
255
256	mov	r13,QWORD[((-32))+rsi]
257
258	mov	r12,QWORD[((-24))+rsi]
259
260	mov	rbp,QWORD[((-16))+rsi]
261
262	mov	rbx,QWORD[((-8))+rsi]
263
264	lea	rsp,[rsi]
265
266$L$mul_epilogue:
267	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
268	mov	rsi,QWORD[16+rsp]
269	ret
270
271$L$SEH_end_bn_mul_mont_nohw:
272global	bn_mul4x_mont
273
274ALIGN	16
275bn_mul4x_mont:
276	mov	QWORD[8+rsp],rdi	;WIN64 prologue
277	mov	QWORD[16+rsp],rsi
278	mov	rax,rsp
279$L$SEH_begin_bn_mul4x_mont:
280	mov	rdi,rcx
281	mov	rsi,rdx
282	mov	rdx,r8
283	mov	rcx,r9
284	mov	r8,QWORD[40+rsp]
285	mov	r9,QWORD[48+rsp]
286
287
288
289_CET_ENDBR
290	mov	r9d,r9d
291	mov	rax,rsp
292
293	push	rbx
294
295	push	rbp
296
297	push	r12
298
299	push	r13
300
301	push	r14
302
303	push	r15
304
305
306	neg	r9
307	mov	r11,rsp
308	lea	r10,[((-32))+r9*8+rsp]
309	neg	r9
310	and	r10,-1024
311
312	sub	r11,r10
313	and	r11,-4096
314	lea	rsp,[r11*1+r10]
315	mov	r11,QWORD[rsp]
316	cmp	rsp,r10
317	ja	NEAR $L$mul4x_page_walk
318	jmp	NEAR $L$mul4x_page_walk_done
319
320$L$mul4x_page_walk:
321	lea	rsp,[((-4096))+rsp]
322	mov	r11,QWORD[rsp]
323	cmp	rsp,r10
324	ja	NEAR $L$mul4x_page_walk
325$L$mul4x_page_walk_done:
326
327	mov	QWORD[8+r9*8+rsp],rax
328
329$L$mul4x_body:
330	mov	QWORD[16+r9*8+rsp],rdi
331	mov	r12,rdx
332	mov	r8,QWORD[r8]
333	mov	rbx,QWORD[r12]
334	mov	rax,QWORD[rsi]
335
336	xor	r14,r14
337	xor	r15,r15
338
339	mov	rbp,r8
340	mul	rbx
341	mov	r10,rax
342	mov	rax,QWORD[rcx]
343
344	imul	rbp,r10
345	mov	r11,rdx
346
347	mul	rbp
348	add	r10,rax
349	mov	rax,QWORD[8+rsi]
350	adc	rdx,0
351	mov	rdi,rdx
352
353	mul	rbx
354	add	r11,rax
355	mov	rax,QWORD[8+rcx]
356	adc	rdx,0
357	mov	r10,rdx
358
359	mul	rbp
360	add	rdi,rax
361	mov	rax,QWORD[16+rsi]
362	adc	rdx,0
363	add	rdi,r11
364	lea	r15,[4+r15]
365	adc	rdx,0
366	mov	QWORD[rsp],rdi
367	mov	r13,rdx
368	jmp	NEAR $L$1st4x
369ALIGN	16
370$L$1st4x:
371	mul	rbx
372	add	r10,rax
373	mov	rax,QWORD[((-16))+r15*8+rcx]
374	adc	rdx,0
375	mov	r11,rdx
376
377	mul	rbp
378	add	r13,rax
379	mov	rax,QWORD[((-8))+r15*8+rsi]
380	adc	rdx,0
381	add	r13,r10
382	adc	rdx,0
383	mov	QWORD[((-24))+r15*8+rsp],r13
384	mov	rdi,rdx
385
386	mul	rbx
387	add	r11,rax
388	mov	rax,QWORD[((-8))+r15*8+rcx]
389	adc	rdx,0
390	mov	r10,rdx
391
392	mul	rbp
393	add	rdi,rax
394	mov	rax,QWORD[r15*8+rsi]
395	adc	rdx,0
396	add	rdi,r11
397	adc	rdx,0
398	mov	QWORD[((-16))+r15*8+rsp],rdi
399	mov	r13,rdx
400
401	mul	rbx
402	add	r10,rax
403	mov	rax,QWORD[r15*8+rcx]
404	adc	rdx,0
405	mov	r11,rdx
406
407	mul	rbp
408	add	r13,rax
409	mov	rax,QWORD[8+r15*8+rsi]
410	adc	rdx,0
411	add	r13,r10
412	adc	rdx,0
413	mov	QWORD[((-8))+r15*8+rsp],r13
414	mov	rdi,rdx
415
416	mul	rbx
417	add	r11,rax
418	mov	rax,QWORD[8+r15*8+rcx]
419	adc	rdx,0
420	lea	r15,[4+r15]
421	mov	r10,rdx
422
423	mul	rbp
424	add	rdi,rax
425	mov	rax,QWORD[((-16))+r15*8+rsi]
426	adc	rdx,0
427	add	rdi,r11
428	adc	rdx,0
429	mov	QWORD[((-32))+r15*8+rsp],rdi
430	mov	r13,rdx
431	cmp	r15,r9
432	jb	NEAR $L$1st4x
433
434	mul	rbx
435	add	r10,rax
436	mov	rax,QWORD[((-16))+r15*8+rcx]
437	adc	rdx,0
438	mov	r11,rdx
439
440	mul	rbp
441	add	r13,rax
442	mov	rax,QWORD[((-8))+r15*8+rsi]
443	adc	rdx,0
444	add	r13,r10
445	adc	rdx,0
446	mov	QWORD[((-24))+r15*8+rsp],r13
447	mov	rdi,rdx
448
449	mul	rbx
450	add	r11,rax
451	mov	rax,QWORD[((-8))+r15*8+rcx]
452	adc	rdx,0
453	mov	r10,rdx
454
455	mul	rbp
456	add	rdi,rax
457	mov	rax,QWORD[rsi]
458	adc	rdx,0
459	add	rdi,r11
460	adc	rdx,0
461	mov	QWORD[((-16))+r15*8+rsp],rdi
462	mov	r13,rdx
463
464	xor	rdi,rdi
465	add	r13,r10
466	adc	rdi,0
467	mov	QWORD[((-8))+r15*8+rsp],r13
468	mov	QWORD[r15*8+rsp],rdi
469
470	lea	r14,[1+r14]
471ALIGN	4
472$L$outer4x:
473	mov	rbx,QWORD[r14*8+r12]
474	xor	r15,r15
475	mov	r10,QWORD[rsp]
476	mov	rbp,r8
477	mul	rbx
478	add	r10,rax
479	mov	rax,QWORD[rcx]
480	adc	rdx,0
481
482	imul	rbp,r10
483	mov	r11,rdx
484
485	mul	rbp
486	add	r10,rax
487	mov	rax,QWORD[8+rsi]
488	adc	rdx,0
489	mov	rdi,rdx
490
491	mul	rbx
492	add	r11,rax
493	mov	rax,QWORD[8+rcx]
494	adc	rdx,0
495	add	r11,QWORD[8+rsp]
496	adc	rdx,0
497	mov	r10,rdx
498
499	mul	rbp
500	add	rdi,rax
501	mov	rax,QWORD[16+rsi]
502	adc	rdx,0
503	add	rdi,r11
504	lea	r15,[4+r15]
505	adc	rdx,0
506	mov	QWORD[rsp],rdi
507	mov	r13,rdx
508	jmp	NEAR $L$inner4x
509ALIGN	16
510$L$inner4x:
511	mul	rbx
512	add	r10,rax
513	mov	rax,QWORD[((-16))+r15*8+rcx]
514	adc	rdx,0
515	add	r10,QWORD[((-16))+r15*8+rsp]
516	adc	rdx,0
517	mov	r11,rdx
518
519	mul	rbp
520	add	r13,rax
521	mov	rax,QWORD[((-8))+r15*8+rsi]
522	adc	rdx,0
523	add	r13,r10
524	adc	rdx,0
525	mov	QWORD[((-24))+r15*8+rsp],r13
526	mov	rdi,rdx
527
528	mul	rbx
529	add	r11,rax
530	mov	rax,QWORD[((-8))+r15*8+rcx]
531	adc	rdx,0
532	add	r11,QWORD[((-8))+r15*8+rsp]
533	adc	rdx,0
534	mov	r10,rdx
535
536	mul	rbp
537	add	rdi,rax
538	mov	rax,QWORD[r15*8+rsi]
539	adc	rdx,0
540	add	rdi,r11
541	adc	rdx,0
542	mov	QWORD[((-16))+r15*8+rsp],rdi
543	mov	r13,rdx
544
545	mul	rbx
546	add	r10,rax
547	mov	rax,QWORD[r15*8+rcx]
548	adc	rdx,0
549	add	r10,QWORD[r15*8+rsp]
550	adc	rdx,0
551	mov	r11,rdx
552
553	mul	rbp
554	add	r13,rax
555	mov	rax,QWORD[8+r15*8+rsi]
556	adc	rdx,0
557	add	r13,r10
558	adc	rdx,0
559	mov	QWORD[((-8))+r15*8+rsp],r13
560	mov	rdi,rdx
561
562	mul	rbx
563	add	r11,rax
564	mov	rax,QWORD[8+r15*8+rcx]
565	adc	rdx,0
566	add	r11,QWORD[8+r15*8+rsp]
567	adc	rdx,0
568	lea	r15,[4+r15]
569	mov	r10,rdx
570
571	mul	rbp
572	add	rdi,rax
573	mov	rax,QWORD[((-16))+r15*8+rsi]
574	adc	rdx,0
575	add	rdi,r11
576	adc	rdx,0
577	mov	QWORD[((-32))+r15*8+rsp],rdi
578	mov	r13,rdx
579	cmp	r15,r9
580	jb	NEAR $L$inner4x
581
582	mul	rbx
583	add	r10,rax
584	mov	rax,QWORD[((-16))+r15*8+rcx]
585	adc	rdx,0
586	add	r10,QWORD[((-16))+r15*8+rsp]
587	adc	rdx,0
588	mov	r11,rdx
589
590	mul	rbp
591	add	r13,rax
592	mov	rax,QWORD[((-8))+r15*8+rsi]
593	adc	rdx,0
594	add	r13,r10
595	adc	rdx,0
596	mov	QWORD[((-24))+r15*8+rsp],r13
597	mov	rdi,rdx
598
599	mul	rbx
600	add	r11,rax
601	mov	rax,QWORD[((-8))+r15*8+rcx]
602	adc	rdx,0
603	add	r11,QWORD[((-8))+r15*8+rsp]
604	adc	rdx,0
605	lea	r14,[1+r14]
606	mov	r10,rdx
607
608	mul	rbp
609	add	rdi,rax
610	mov	rax,QWORD[rsi]
611	adc	rdx,0
612	add	rdi,r11
613	adc	rdx,0
614	mov	QWORD[((-16))+r15*8+rsp],rdi
615	mov	r13,rdx
616
617	xor	rdi,rdi
618	add	r13,r10
619	adc	rdi,0
620	add	r13,QWORD[r9*8+rsp]
621	adc	rdi,0
622	mov	QWORD[((-8))+r15*8+rsp],r13
623	mov	QWORD[r15*8+rsp],rdi
624
625	cmp	r14,r9
626	jb	NEAR $L$outer4x
627	mov	rdi,QWORD[16+r9*8+rsp]
628	lea	r15,[((-4))+r9]
629	mov	rax,QWORD[rsp]
630	mov	rdx,QWORD[8+rsp]
631	shr	r15,2
632	lea	rsi,[rsp]
633	xor	r14,r14
634
635	sub	rax,QWORD[rcx]
636	mov	rbx,QWORD[16+rsi]
637	mov	rbp,QWORD[24+rsi]
638	sbb	rdx,QWORD[8+rcx]
639
640$L$sub4x:
641	mov	QWORD[r14*8+rdi],rax
642	mov	QWORD[8+r14*8+rdi],rdx
643	sbb	rbx,QWORD[16+r14*8+rcx]
644	mov	rax,QWORD[32+r14*8+rsi]
645	mov	rdx,QWORD[40+r14*8+rsi]
646	sbb	rbp,QWORD[24+r14*8+rcx]
647	mov	QWORD[16+r14*8+rdi],rbx
648	mov	QWORD[24+r14*8+rdi],rbp
649	sbb	rax,QWORD[32+r14*8+rcx]
650	mov	rbx,QWORD[48+r14*8+rsi]
651	mov	rbp,QWORD[56+r14*8+rsi]
652	sbb	rdx,QWORD[40+r14*8+rcx]
653	lea	r14,[4+r14]
654	dec	r15
655	jnz	NEAR $L$sub4x
656
657	mov	QWORD[r14*8+rdi],rax
658	mov	rax,QWORD[32+r14*8+rsi]
659	sbb	rbx,QWORD[16+r14*8+rcx]
660	mov	QWORD[8+r14*8+rdi],rdx
661	sbb	rbp,QWORD[24+r14*8+rcx]
662	mov	QWORD[16+r14*8+rdi],rbx
663
664	sbb	rax,0
665	mov	QWORD[24+r14*8+rdi],rbp
666	pxor	xmm0,xmm0
667DB	102,72,15,110,224
668	pcmpeqd	xmm5,xmm5
669	pshufd	xmm4,xmm4,0
670	mov	r15,r9
671	pxor	xmm5,xmm4
672	shr	r15,2
673	xor	eax,eax
674
675	jmp	NEAR $L$copy4x
676ALIGN	16
677$L$copy4x:
678	movdqa	xmm1,XMMWORD[rax*1+rsp]
679	movdqu	xmm2,XMMWORD[rax*1+rdi]
680	pand	xmm1,xmm4
681	pand	xmm2,xmm5
682	movdqa	xmm3,XMMWORD[16+rax*1+rsp]
683	movdqa	XMMWORD[rax*1+rsp],xmm0
684	por	xmm1,xmm2
685	movdqu	xmm2,XMMWORD[16+rax*1+rdi]
686	movdqu	XMMWORD[rax*1+rdi],xmm1
687	pand	xmm3,xmm4
688	pand	xmm2,xmm5
689	movdqa	XMMWORD[16+rax*1+rsp],xmm0
690	por	xmm3,xmm2
691	movdqu	XMMWORD[16+rax*1+rdi],xmm3
692	lea	rax,[32+rax]
693	dec	r15
694	jnz	NEAR $L$copy4x
695	mov	rsi,QWORD[8+r9*8+rsp]
696
697	mov	rax,1
698	mov	r15,QWORD[((-48))+rsi]
699
700	mov	r14,QWORD[((-40))+rsi]
701
702	mov	r13,QWORD[((-32))+rsi]
703
704	mov	r12,QWORD[((-24))+rsi]
705
706	mov	rbp,QWORD[((-16))+rsi]
707
708	mov	rbx,QWORD[((-8))+rsi]
709
710	lea	rsp,[rsi]
711
712$L$mul4x_epilogue:
713	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
714	mov	rsi,QWORD[16+rsp]
715	ret
716
717$L$SEH_end_bn_mul4x_mont:
718EXTERN	bn_sqrx8x_internal
719EXTERN	bn_sqr8x_internal
720
721global	bn_sqr8x_mont
722
723ALIGN	32
724bn_sqr8x_mont:
725	mov	QWORD[8+rsp],rdi	;WIN64 prologue
726	mov	QWORD[16+rsp],rsi
727	mov	rax,rsp
728$L$SEH_begin_bn_sqr8x_mont:
729	mov	rdi,rcx
730	mov	rsi,rdx
731	mov	rdx,r8
732	mov	rcx,r9
733	mov	r8,QWORD[40+rsp]
734	mov	r9,QWORD[48+rsp]
735
736
737
738_CET_ENDBR
739	mov	r9d,r9d
740	mov	rax,rsp
741
742	push	rbx
743
744	push	rbp
745
746	push	r12
747
748	push	r13
749
750	push	r14
751
752	push	r15
753
754$L$sqr8x_prologue:
755
756	mov	r10d,r9d
757	shl	r9d,3
758	shl	r10,3+2
759	neg	r9
760
761
762
763
764
765
766	lea	r11,[((-64))+r9*2+rsp]
767	mov	rbp,rsp
768	mov	r8,QWORD[r8]
769	sub	r11,rsi
770	and	r11,4095
771	cmp	r10,r11
772	jb	NEAR $L$sqr8x_sp_alt
773	sub	rbp,r11
774	lea	rbp,[((-64))+r9*2+rbp]
775	jmp	NEAR $L$sqr8x_sp_done
776
777ALIGN	32
778$L$sqr8x_sp_alt:
779	lea	r10,[((4096-64))+r9*2]
780	lea	rbp,[((-64))+r9*2+rbp]
781	sub	r11,r10
782	mov	r10,0
783	cmovc	r11,r10
784	sub	rbp,r11
785$L$sqr8x_sp_done:
786	and	rbp,-64
787	mov	r11,rsp
788	sub	r11,rbp
789	and	r11,-4096
790	lea	rsp,[rbp*1+r11]
791	mov	r10,QWORD[rsp]
792	cmp	rsp,rbp
793	ja	NEAR $L$sqr8x_page_walk
794	jmp	NEAR $L$sqr8x_page_walk_done
795
796ALIGN	16
797$L$sqr8x_page_walk:
798	lea	rsp,[((-4096))+rsp]
799	mov	r10,QWORD[rsp]
800	cmp	rsp,rbp
801	ja	NEAR $L$sqr8x_page_walk
802$L$sqr8x_page_walk_done:
803
804	mov	r10,r9
805	neg	r9
806
807	mov	QWORD[32+rsp],r8
808	mov	QWORD[40+rsp],rax
809
810$L$sqr8x_body:
811
812DB	102,72,15,110,209
813	pxor	xmm0,xmm0
814DB	102,72,15,110,207
815DB	102,73,15,110,218
816	test	rdx,rdx
817	jz	NEAR $L$sqr8x_nox
818
819	call	bn_sqrx8x_internal
820
821
822
823
824	lea	rbx,[rcx*1+r8]
825	mov	r9,rcx
826	mov	rdx,rcx
827DB	102,72,15,126,207
828	sar	rcx,3+2
829	jmp	NEAR $L$sqr8x_sub
830
831ALIGN	32
832$L$sqr8x_nox:
833	call	bn_sqr8x_internal
834
835
836
837
838	lea	rbx,[r9*1+rdi]
839	mov	rcx,r9
840	mov	rdx,r9
841DB	102,72,15,126,207
842	sar	rcx,3+2
843	jmp	NEAR $L$sqr8x_sub
844
845ALIGN	32
846$L$sqr8x_sub:
847	mov	r12,QWORD[rbx]
848	mov	r13,QWORD[8+rbx]
849	mov	r14,QWORD[16+rbx]
850	mov	r15,QWORD[24+rbx]
851	lea	rbx,[32+rbx]
852	sbb	r12,QWORD[rbp]
853	sbb	r13,QWORD[8+rbp]
854	sbb	r14,QWORD[16+rbp]
855	sbb	r15,QWORD[24+rbp]
856	lea	rbp,[32+rbp]
857	mov	QWORD[rdi],r12
858	mov	QWORD[8+rdi],r13
859	mov	QWORD[16+rdi],r14
860	mov	QWORD[24+rdi],r15
861	lea	rdi,[32+rdi]
862	inc	rcx
863	jnz	NEAR $L$sqr8x_sub
864
865	sbb	rax,0
866	lea	rbx,[r9*1+rbx]
867	lea	rdi,[r9*1+rdi]
868
869DB	102,72,15,110,200
870	pxor	xmm0,xmm0
871	pshufd	xmm1,xmm1,0
872	mov	rsi,QWORD[40+rsp]
873
874	jmp	NEAR $L$sqr8x_cond_copy
875
876ALIGN	32
877$L$sqr8x_cond_copy:
878	movdqa	xmm2,XMMWORD[rbx]
879	movdqa	xmm3,XMMWORD[16+rbx]
880	lea	rbx,[32+rbx]
881	movdqu	xmm4,XMMWORD[rdi]
882	movdqu	xmm5,XMMWORD[16+rdi]
883	lea	rdi,[32+rdi]
884	movdqa	XMMWORD[(-32)+rbx],xmm0
885	movdqa	XMMWORD[(-16)+rbx],xmm0
886	movdqa	XMMWORD[(-32)+rdx*1+rbx],xmm0
887	movdqa	XMMWORD[(-16)+rdx*1+rbx],xmm0
888	pcmpeqd	xmm0,xmm1
889	pand	xmm2,xmm1
890	pand	xmm3,xmm1
891	pand	xmm4,xmm0
892	pand	xmm5,xmm0
893	pxor	xmm0,xmm0
894	por	xmm4,xmm2
895	por	xmm5,xmm3
896	movdqu	XMMWORD[(-32)+rdi],xmm4
897	movdqu	XMMWORD[(-16)+rdi],xmm5
898	add	r9,32
899	jnz	NEAR $L$sqr8x_cond_copy
900
901	mov	rax,1
902	mov	r15,QWORD[((-48))+rsi]
903
904	mov	r14,QWORD[((-40))+rsi]
905
906	mov	r13,QWORD[((-32))+rsi]
907
908	mov	r12,QWORD[((-24))+rsi]
909
910	mov	rbp,QWORD[((-16))+rsi]
911
912	mov	rbx,QWORD[((-8))+rsi]
913
914	lea	rsp,[rsi]
915
916$L$sqr8x_epilogue:
917	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
918	mov	rsi,QWORD[16+rsp]
919	ret
920
921$L$SEH_end_bn_sqr8x_mont:
922global	bn_mulx4x_mont
923
924ALIGN	32
925bn_mulx4x_mont:
926	mov	QWORD[8+rsp],rdi	;WIN64 prologue
927	mov	QWORD[16+rsp],rsi
928	mov	rax,rsp
929$L$SEH_begin_bn_mulx4x_mont:
930	mov	rdi,rcx
931	mov	rsi,rdx
932	mov	rdx,r8
933	mov	rcx,r9
934	mov	r8,QWORD[40+rsp]
935	mov	r9,QWORD[48+rsp]
936
937
938
939_CET_ENDBR
940	mov	rax,rsp
941
942	push	rbx
943
944	push	rbp
945
946	push	r12
947
948	push	r13
949
950	push	r14
951
952	push	r15
953
954$L$mulx4x_prologue:
955
956	shl	r9d,3
957	xor	r10,r10
958	sub	r10,r9
959	mov	r8,QWORD[r8]
960	lea	rbp,[((-72))+r10*1+rsp]
961	and	rbp,-128
962	mov	r11,rsp
963	sub	r11,rbp
964	and	r11,-4096
965	lea	rsp,[rbp*1+r11]
966	mov	r10,QWORD[rsp]
967	cmp	rsp,rbp
968	ja	NEAR $L$mulx4x_page_walk
969	jmp	NEAR $L$mulx4x_page_walk_done
970
971ALIGN	16
972$L$mulx4x_page_walk:
973	lea	rsp,[((-4096))+rsp]
974	mov	r10,QWORD[rsp]
975	cmp	rsp,rbp
976	ja	NEAR $L$mulx4x_page_walk
977$L$mulx4x_page_walk_done:
978
979	lea	r10,[r9*1+rdx]
980
981
982
983
984
985
986
987
988
989
990
991
992	mov	QWORD[rsp],r9
993	shr	r9,5
994	mov	QWORD[16+rsp],r10
995	sub	r9,1
996	mov	QWORD[24+rsp],r8
997	mov	QWORD[32+rsp],rdi
998	mov	QWORD[40+rsp],rax
999
1000	mov	QWORD[48+rsp],r9
1001	jmp	NEAR $L$mulx4x_body
1002
1003ALIGN	32
1004$L$mulx4x_body:
1005	lea	rdi,[8+rdx]
1006	mov	rdx,QWORD[rdx]
1007	lea	rbx,[((64+32))+rsp]
1008	mov	r9,rdx
1009
1010	mulx	rax,r8,QWORD[rsi]
1011	mulx	r14,r11,QWORD[8+rsi]
1012	add	r11,rax
1013	mov	QWORD[8+rsp],rdi
1014	mulx	r13,r12,QWORD[16+rsi]
1015	adc	r12,r14
1016	adc	r13,0
1017
1018	mov	rdi,r8
1019	imul	r8,QWORD[24+rsp]
1020	xor	rbp,rbp
1021
1022	mulx	r14,rax,QWORD[24+rsi]
1023	mov	rdx,r8
1024	lea	rsi,[32+rsi]
1025	adcx	r13,rax
1026	adcx	r14,rbp
1027
1028	mulx	r10,rax,QWORD[rcx]
1029	adcx	rdi,rax
1030	adox	r10,r11
1031	mulx	r11,rax,QWORD[8+rcx]
1032	adcx	r10,rax
1033	adox	r11,r12
1034	DB	0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
1035	mov	rdi,QWORD[48+rsp]
1036	mov	QWORD[((-32))+rbx],r10
1037	adcx	r11,rax
1038	adox	r12,r13
1039	mulx	r15,rax,QWORD[24+rcx]
1040	mov	rdx,r9
1041	mov	QWORD[((-24))+rbx],r11
1042	adcx	r12,rax
1043	adox	r15,rbp
1044	lea	rcx,[32+rcx]
1045	mov	QWORD[((-16))+rbx],r12
1046
1047	jmp	NEAR $L$mulx4x_1st
1048
1049ALIGN	32
1050$L$mulx4x_1st:
1051	adcx	r15,rbp
1052	mulx	rax,r10,QWORD[rsi]
1053	adcx	r10,r14
1054	mulx	r14,r11,QWORD[8+rsi]
1055	adcx	r11,rax
1056	mulx	rax,r12,QWORD[16+rsi]
1057	adcx	r12,r14
1058	mulx	r14,r13,QWORD[24+rsi]
1059	DB	0x67,0x67
1060	mov	rdx,r8
1061	adcx	r13,rax
1062	adcx	r14,rbp
1063	lea	rsi,[32+rsi]
1064	lea	rbx,[32+rbx]
1065
1066	adox	r10,r15
1067	mulx	r15,rax,QWORD[rcx]
1068	adcx	r10,rax
1069	adox	r11,r15
1070	mulx	r15,rax,QWORD[8+rcx]
1071	adcx	r11,rax
1072	adox	r12,r15
1073	mulx	r15,rax,QWORD[16+rcx]
1074	mov	QWORD[((-40))+rbx],r10
1075	adcx	r12,rax
1076	mov	QWORD[((-32))+rbx],r11
1077	adox	r13,r15
1078	mulx	r15,rax,QWORD[24+rcx]
1079	mov	rdx,r9
1080	mov	QWORD[((-24))+rbx],r12
1081	adcx	r13,rax
1082	adox	r15,rbp
1083	lea	rcx,[32+rcx]
1084	mov	QWORD[((-16))+rbx],r13
1085
1086	dec	rdi
1087	jnz	NEAR $L$mulx4x_1st
1088
1089	mov	rax,QWORD[rsp]
1090	mov	rdi,QWORD[8+rsp]
1091	adc	r15,rbp
1092	add	r14,r15
1093	sbb	r15,r15
1094	mov	QWORD[((-8))+rbx],r14
1095	jmp	NEAR $L$mulx4x_outer
1096
1097ALIGN	32
1098$L$mulx4x_outer:
1099	mov	rdx,QWORD[rdi]
1100	lea	rdi,[8+rdi]
1101	sub	rsi,rax
1102	mov	QWORD[rbx],r15
1103	lea	rbx,[((64+32))+rsp]
1104	sub	rcx,rax
1105
1106	mulx	r11,r8,QWORD[rsi]
1107	xor	ebp,ebp
1108	mov	r9,rdx
1109	mulx	r12,r14,QWORD[8+rsi]
1110	adox	r8,QWORD[((-32))+rbx]
1111	adcx	r11,r14
1112	mulx	r13,r15,QWORD[16+rsi]
1113	adox	r11,QWORD[((-24))+rbx]
1114	adcx	r12,r15
1115	adox	r12,QWORD[((-16))+rbx]
1116	adcx	r13,rbp
1117	adox	r13,rbp
1118
1119	mov	QWORD[8+rsp],rdi
1120	mov	r15,r8
1121	imul	r8,QWORD[24+rsp]
1122	xor	ebp,ebp
1123
1124	mulx	r14,rax,QWORD[24+rsi]
1125	mov	rdx,r8
1126	adcx	r13,rax
1127	adox	r13,QWORD[((-8))+rbx]
1128	adcx	r14,rbp
1129	lea	rsi,[32+rsi]
1130	adox	r14,rbp
1131
1132	mulx	r10,rax,QWORD[rcx]
1133	adcx	r15,rax
1134	adox	r10,r11
1135	mulx	r11,rax,QWORD[8+rcx]
1136	adcx	r10,rax
1137	adox	r11,r12
1138	mulx	r12,rax,QWORD[16+rcx]
1139	mov	QWORD[((-32))+rbx],r10
1140	adcx	r11,rax
1141	adox	r12,r13
1142	mulx	r15,rax,QWORD[24+rcx]
1143	mov	rdx,r9
1144	mov	QWORD[((-24))+rbx],r11
1145	lea	rcx,[32+rcx]
1146	adcx	r12,rax
1147	adox	r15,rbp
1148	mov	rdi,QWORD[48+rsp]
1149	mov	QWORD[((-16))+rbx],r12
1150
1151	jmp	NEAR $L$mulx4x_inner
1152
1153ALIGN	32
1154$L$mulx4x_inner:
1155	mulx	rax,r10,QWORD[rsi]
1156	adcx	r15,rbp
1157	adox	r10,r14
1158	mulx	r14,r11,QWORD[8+rsi]
1159	adcx	r10,QWORD[rbx]
1160	adox	r11,rax
1161	mulx	rax,r12,QWORD[16+rsi]
1162	adcx	r11,QWORD[8+rbx]
1163	adox	r12,r14
1164	mulx	r14,r13,QWORD[24+rsi]
1165	mov	rdx,r8
1166	adcx	r12,QWORD[16+rbx]
1167	adox	r13,rax
1168	adcx	r13,QWORD[24+rbx]
1169	adox	r14,rbp
1170	lea	rsi,[32+rsi]
1171	lea	rbx,[32+rbx]
1172	adcx	r14,rbp
1173
1174	adox	r10,r15
1175	mulx	r15,rax,QWORD[rcx]
1176	adcx	r10,rax
1177	adox	r11,r15
1178	mulx	r15,rax,QWORD[8+rcx]
1179	adcx	r11,rax
1180	adox	r12,r15
1181	mulx	r15,rax,QWORD[16+rcx]
1182	mov	QWORD[((-40))+rbx],r10
1183	adcx	r12,rax
1184	adox	r13,r15
1185	mulx	r15,rax,QWORD[24+rcx]
1186	mov	rdx,r9
1187	mov	QWORD[((-32))+rbx],r11
1188	mov	QWORD[((-24))+rbx],r12
1189	adcx	r13,rax
1190	adox	r15,rbp
1191	lea	rcx,[32+rcx]
1192	mov	QWORD[((-16))+rbx],r13
1193
1194	dec	rdi
1195	jnz	NEAR $L$mulx4x_inner
1196
1197	mov	rax,QWORD[rsp]
1198	mov	rdi,QWORD[8+rsp]
1199	adc	r15,rbp
1200	sub	rbp,QWORD[rbx]
1201	adc	r14,r15
1202	sbb	r15,r15
1203	mov	QWORD[((-8))+rbx],r14
1204
1205	cmp	rdi,QWORD[16+rsp]
1206	jne	NEAR $L$mulx4x_outer
1207
1208	lea	rbx,[64+rsp]
1209	sub	rcx,rax
1210	neg	r15
1211	mov	rdx,rax
1212	shr	rax,3+2
1213	mov	rdi,QWORD[32+rsp]
1214	jmp	NEAR $L$mulx4x_sub
1215
1216ALIGN	32
1217$L$mulx4x_sub:
1218	mov	r11,QWORD[rbx]
1219	mov	r12,QWORD[8+rbx]
1220	mov	r13,QWORD[16+rbx]
1221	mov	r14,QWORD[24+rbx]
1222	lea	rbx,[32+rbx]
1223	sbb	r11,QWORD[rcx]
1224	sbb	r12,QWORD[8+rcx]
1225	sbb	r13,QWORD[16+rcx]
1226	sbb	r14,QWORD[24+rcx]
1227	lea	rcx,[32+rcx]
1228	mov	QWORD[rdi],r11
1229	mov	QWORD[8+rdi],r12
1230	mov	QWORD[16+rdi],r13
1231	mov	QWORD[24+rdi],r14
1232	lea	rdi,[32+rdi]
1233	dec	rax
1234	jnz	NEAR $L$mulx4x_sub
1235
1236	sbb	r15,0
1237	lea	rbx,[64+rsp]
1238	sub	rdi,rdx
1239
1240DB	102,73,15,110,207
1241	pxor	xmm0,xmm0
1242	pshufd	xmm1,xmm1,0
1243	mov	rsi,QWORD[40+rsp]
1244
1245	jmp	NEAR $L$mulx4x_cond_copy
1246
1247ALIGN	32
1248$L$mulx4x_cond_copy:
1249	movdqa	xmm2,XMMWORD[rbx]
1250	movdqa	xmm3,XMMWORD[16+rbx]
1251	lea	rbx,[32+rbx]
1252	movdqu	xmm4,XMMWORD[rdi]
1253	movdqu	xmm5,XMMWORD[16+rdi]
1254	lea	rdi,[32+rdi]
1255	movdqa	XMMWORD[(-32)+rbx],xmm0
1256	movdqa	XMMWORD[(-16)+rbx],xmm0
1257	pcmpeqd	xmm0,xmm1
1258	pand	xmm2,xmm1
1259	pand	xmm3,xmm1
1260	pand	xmm4,xmm0
1261	pand	xmm5,xmm0
1262	pxor	xmm0,xmm0
1263	por	xmm4,xmm2
1264	por	xmm5,xmm3
1265	movdqu	XMMWORD[(-32)+rdi],xmm4
1266	movdqu	XMMWORD[(-16)+rdi],xmm5
1267	sub	rdx,32
1268	jnz	NEAR $L$mulx4x_cond_copy
1269
1270	mov	QWORD[rbx],rdx
1271
1272	mov	rax,1
1273	mov	r15,QWORD[((-48))+rsi]
1274
1275	mov	r14,QWORD[((-40))+rsi]
1276
1277	mov	r13,QWORD[((-32))+rsi]
1278
1279	mov	r12,QWORD[((-24))+rsi]
1280
1281	mov	rbp,QWORD[((-16))+rsi]
1282
1283	mov	rbx,QWORD[((-8))+rsi]
1284
1285	lea	rsp,[rsi]
1286
1287$L$mulx4x_epilogue:
1288	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1289	mov	rsi,QWORD[16+rsp]
1290	ret
1291
1292$L$SEH_end_bn_mulx4x_mont:
1293	DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1294	DB	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
1295	DB	54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
1296	DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
1297	DB	115,108,46,111,114,103,62,0
1298ALIGN	16
1299EXTERN	__imp_RtlVirtualUnwind
1300
1301ALIGN	16
1302mul_handler:
1303	push	rsi
1304	push	rdi
1305	push	rbx
1306	push	rbp
1307	push	r12
1308	push	r13
1309	push	r14
1310	push	r15
1311	pushfq
1312	sub	rsp,64
1313
1314	mov	rax,QWORD[120+r8]
1315	mov	rbx,QWORD[248+r8]
1316
1317	mov	rsi,QWORD[8+r9]
1318	mov	r11,QWORD[56+r9]
1319
1320	mov	r10d,DWORD[r11]
1321	lea	r10,[r10*1+rsi]
1322	cmp	rbx,r10
1323	jb	NEAR $L$common_seh_tail
1324
1325	mov	rax,QWORD[152+r8]
1326
1327	mov	r10d,DWORD[4+r11]
1328	lea	r10,[r10*1+rsi]
1329	cmp	rbx,r10
1330	jae	NEAR $L$common_seh_tail
1331
1332	mov	r10,QWORD[192+r8]
1333	mov	rax,QWORD[8+r10*8+rax]
1334
1335	jmp	NEAR $L$common_pop_regs
1336
1337
1338
1339ALIGN	16
1340sqr_handler:
1341	push	rsi
1342	push	rdi
1343	push	rbx
1344	push	rbp
1345	push	r12
1346	push	r13
1347	push	r14
1348	push	r15
1349	pushfq
1350	sub	rsp,64
1351
1352	mov	rax,QWORD[120+r8]
1353	mov	rbx,QWORD[248+r8]
1354
1355	mov	rsi,QWORD[8+r9]
1356	mov	r11,QWORD[56+r9]
1357
1358	mov	r10d,DWORD[r11]
1359	lea	r10,[r10*1+rsi]
1360	cmp	rbx,r10
1361	jb	NEAR $L$common_seh_tail
1362
1363	mov	r10d,DWORD[4+r11]
1364	lea	r10,[r10*1+rsi]
1365	cmp	rbx,r10
1366	jb	NEAR $L$common_pop_regs
1367
1368	mov	rax,QWORD[152+r8]
1369
1370	mov	r10d,DWORD[8+r11]
1371	lea	r10,[r10*1+rsi]
1372	cmp	rbx,r10
1373	jae	NEAR $L$common_seh_tail
1374
1375	mov	rax,QWORD[40+rax]
1376
1377$L$common_pop_regs:
1378	mov	rbx,QWORD[((-8))+rax]
1379	mov	rbp,QWORD[((-16))+rax]
1380	mov	r12,QWORD[((-24))+rax]
1381	mov	r13,QWORD[((-32))+rax]
1382	mov	r14,QWORD[((-40))+rax]
1383	mov	r15,QWORD[((-48))+rax]
1384	mov	QWORD[144+r8],rbx
1385	mov	QWORD[160+r8],rbp
1386	mov	QWORD[216+r8],r12
1387	mov	QWORD[224+r8],r13
1388	mov	QWORD[232+r8],r14
1389	mov	QWORD[240+r8],r15
1390
1391$L$common_seh_tail:
1392	mov	rdi,QWORD[8+rax]
1393	mov	rsi,QWORD[16+rax]
1394	mov	QWORD[152+r8],rax
1395	mov	QWORD[168+r8],rsi
1396	mov	QWORD[176+r8],rdi
1397
1398	mov	rdi,QWORD[40+r9]
1399	mov	rsi,r8
1400	mov	ecx,154
1401	DD	0xa548f3fc
1402
1403	mov	rsi,r9
1404	xor	rcx,rcx
1405	mov	rdx,QWORD[8+rsi]
1406	mov	r8,QWORD[rsi]
1407	mov	r9,QWORD[16+rsi]
1408	mov	r10,QWORD[40+rsi]
1409	lea	r11,[56+rsi]
1410	lea	r12,[24+rsi]
1411	mov	QWORD[32+rsp],r10
1412	mov	QWORD[40+rsp],r11
1413	mov	QWORD[48+rsp],r12
1414	mov	QWORD[56+rsp],rcx
1415	call	QWORD[__imp_RtlVirtualUnwind]
1416
1417	mov	eax,1
1418	add	rsp,64
1419	popfq
1420	pop	r15
1421	pop	r14
1422	pop	r13
1423	pop	r12
1424	pop	rbp
1425	pop	rbx
1426	pop	rdi
1427	pop	rsi
1428	ret
1429
1430
1431section	.pdata rdata align=4
1432ALIGN	4
1433	DD	$L$SEH_begin_bn_mul_mont_nohw wrt ..imagebase
1434	DD	$L$SEH_end_bn_mul_mont_nohw wrt ..imagebase
1435	DD	$L$SEH_info_bn_mul_mont_nohw wrt ..imagebase
1436
1437	DD	$L$SEH_begin_bn_mul4x_mont wrt ..imagebase
1438	DD	$L$SEH_end_bn_mul4x_mont wrt ..imagebase
1439	DD	$L$SEH_info_bn_mul4x_mont wrt ..imagebase
1440
1441	DD	$L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
1442	DD	$L$SEH_end_bn_sqr8x_mont wrt ..imagebase
1443	DD	$L$SEH_info_bn_sqr8x_mont wrt ..imagebase
1444	DD	$L$SEH_begin_bn_mulx4x_mont wrt ..imagebase
1445	DD	$L$SEH_end_bn_mulx4x_mont wrt ..imagebase
1446	DD	$L$SEH_info_bn_mulx4x_mont wrt ..imagebase
1447section	.xdata rdata align=8
1448ALIGN	8
1449$L$SEH_info_bn_mul_mont_nohw:
1450	DB	9,0,0,0
1451	DD	mul_handler wrt ..imagebase
1452	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1453$L$SEH_info_bn_mul4x_mont:
1454	DB	9,0,0,0
1455	DD	mul_handler wrt ..imagebase
1456	DD	$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
1457$L$SEH_info_bn_sqr8x_mont:
1458	DB	9,0,0,0
1459	DD	sqr_handler wrt ..imagebase
1460	DD	$L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
1461ALIGN	8
1462$L$SEH_info_bn_mulx4x_mont:
1463	DB	9,0,0,0
1464	DD	sqr_handler wrt ..imagebase
1465	DD	$L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
1466ALIGN	8
1467%else
1468; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
1469ret
1470%endif
1471