xref: /aosp_15_r20/external/boringssl/src/gen/bcm/p256_beeu-x86_64-asm-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%ifdef BORINGSSL_PREFIX
12%include "boringssl_prefix_symbols_nasm.inc"
13%endif
14section	.text code align=64
15
16
17
18
19global	beeu_mod_inverse_vartime
20ALIGN	32
21beeu_mod_inverse_vartime:
22	mov	QWORD[8+rsp],rdi	;WIN64 prologue
23	mov	QWORD[16+rsp],rsi
24	mov	rax,rsp
25$L$SEH_begin_beeu_mod_inverse_vartime:
26	mov	rdi,rcx
27	mov	rsi,rdx
28	mov	rdx,r8
29	mov	rcx,r9
30	mov	r8,QWORD[40+rsp]
31	mov	r9,QWORD[48+rsp]
32
33
34
35_CET_ENDBR
36	push	rbp
37
38	push	r12
39
40	push	r13
41
42	push	r14
43
44	push	r15
45
46	push	rbx
47
48	push	rsi
49
50
51	sub	rsp,80
52
53	mov	QWORD[rsp],rdi
54
55
56	mov	r8,1
57	xor	r9,r9
58	xor	r10,r10
59	xor	r11,r11
60	xor	rdi,rdi
61
62	xor	r12,r12
63	xor	r13,r13
64	xor	r14,r14
65	xor	r15,r15
66	xor	rbp,rbp
67
68
69	vmovdqu	xmm0,XMMWORD[rsi]
70	vmovdqu	xmm1,XMMWORD[16+rsi]
71	vmovdqu	XMMWORD[48+rsp],xmm0
72	vmovdqu	XMMWORD[64+rsp],xmm1
73
74	vmovdqu	xmm0,XMMWORD[rdx]
75	vmovdqu	xmm1,XMMWORD[16+rdx]
76	vmovdqu	XMMWORD[16+rsp],xmm0
77	vmovdqu	XMMWORD[32+rsp],xmm1
78
79$L$beeu_loop:
80	xor	rbx,rbx
81	or	rbx,QWORD[48+rsp]
82	or	rbx,QWORD[56+rsp]
83	or	rbx,QWORD[64+rsp]
84	or	rbx,QWORD[72+rsp]
85	jz	NEAR $L$beeu_loop_end
86
87
88
89
90
91
92
93
94
95
96	mov	rcx,1
97
98
99$L$beeu_shift_loop_XB:
100	mov	rbx,rcx
101	and	rbx,QWORD[48+rsp]
102	jnz	NEAR $L$beeu_shift_loop_end_XB
103
104
105	mov	rbx,1
106	and	rbx,r8
107	jz	NEAR $L$shift1_0
108	add	r8,QWORD[rdx]
109	adc	r9,QWORD[8+rdx]
110	adc	r10,QWORD[16+rdx]
111	adc	r11,QWORD[24+rdx]
112	adc	rdi,0
113
114$L$shift1_0:
115	shrd	r8,r9,1
116	shrd	r9,r10,1
117	shrd	r10,r11,1
118	shrd	r11,rdi,1
119	shr	rdi,1
120
121	shl	rcx,1
122
123
124
125
126
127	cmp	rcx,0x8000000
128	jne	NEAR $L$beeu_shift_loop_XB
129
130$L$beeu_shift_loop_end_XB:
131	bsf	rcx,rcx
132	test	rcx,rcx
133	jz	NEAR $L$beeu_no_shift_XB
134
135
136
137	mov	rax,QWORD[((8+48))+rsp]
138	mov	rbx,QWORD[((16+48))+rsp]
139	mov	rsi,QWORD[((24+48))+rsp]
140
141	shrd	QWORD[((0+48))+rsp],rax,cl
142	shrd	QWORD[((8+48))+rsp],rbx,cl
143	shrd	QWORD[((16+48))+rsp],rsi,cl
144
145	shr	rsi,cl
146	mov	QWORD[((24+48))+rsp],rsi
147
148
149$L$beeu_no_shift_XB:
150
151	mov	rcx,1
152
153
154$L$beeu_shift_loop_YA:
155	mov	rbx,rcx
156	and	rbx,QWORD[16+rsp]
157	jnz	NEAR $L$beeu_shift_loop_end_YA
158
159
160	mov	rbx,1
161	and	rbx,r12
162	jz	NEAR $L$shift1_1
163	add	r12,QWORD[rdx]
164	adc	r13,QWORD[8+rdx]
165	adc	r14,QWORD[16+rdx]
166	adc	r15,QWORD[24+rdx]
167	adc	rbp,0
168
169$L$shift1_1:
170	shrd	r12,r13,1
171	shrd	r13,r14,1
172	shrd	r14,r15,1
173	shrd	r15,rbp,1
174	shr	rbp,1
175
176	shl	rcx,1
177
178
179
180
181
182	cmp	rcx,0x8000000
183	jne	NEAR $L$beeu_shift_loop_YA
184
185$L$beeu_shift_loop_end_YA:
186	bsf	rcx,rcx
187	test	rcx,rcx
188	jz	NEAR $L$beeu_no_shift_YA
189
190
191
192	mov	rax,QWORD[((8+16))+rsp]
193	mov	rbx,QWORD[((16+16))+rsp]
194	mov	rsi,QWORD[((24+16))+rsp]
195
196	shrd	QWORD[((0+16))+rsp],rax,cl
197	shrd	QWORD[((8+16))+rsp],rbx,cl
198	shrd	QWORD[((16+16))+rsp],rsi,cl
199
200	shr	rsi,cl
201	mov	QWORD[((24+16))+rsp],rsi
202
203
204$L$beeu_no_shift_YA:
205
206	mov	rax,QWORD[48+rsp]
207	mov	rbx,QWORD[56+rsp]
208	mov	rsi,QWORD[64+rsp]
209	mov	rcx,QWORD[72+rsp]
210	sub	rax,QWORD[16+rsp]
211	sbb	rbx,QWORD[24+rsp]
212	sbb	rsi,QWORD[32+rsp]
213	sbb	rcx,QWORD[40+rsp]
214	jnc	NEAR $L$beeu_B_bigger_than_A
215
216
217	mov	rax,QWORD[16+rsp]
218	mov	rbx,QWORD[24+rsp]
219	mov	rsi,QWORD[32+rsp]
220	mov	rcx,QWORD[40+rsp]
221	sub	rax,QWORD[48+rsp]
222	sbb	rbx,QWORD[56+rsp]
223	sbb	rsi,QWORD[64+rsp]
224	sbb	rcx,QWORD[72+rsp]
225	mov	QWORD[16+rsp],rax
226	mov	QWORD[24+rsp],rbx
227	mov	QWORD[32+rsp],rsi
228	mov	QWORD[40+rsp],rcx
229
230
231	add	r12,r8
232	adc	r13,r9
233	adc	r14,r10
234	adc	r15,r11
235	adc	rbp,rdi
236	jmp	NEAR $L$beeu_loop
237
238$L$beeu_B_bigger_than_A:
239
240	mov	QWORD[48+rsp],rax
241	mov	QWORD[56+rsp],rbx
242	mov	QWORD[64+rsp],rsi
243	mov	QWORD[72+rsp],rcx
244
245
246	add	r8,r12
247	adc	r9,r13
248	adc	r10,r14
249	adc	r11,r15
250	adc	rdi,rbp
251
252	jmp	NEAR $L$beeu_loop
253
254$L$beeu_loop_end:
255
256
257
258
259	mov	rbx,QWORD[16+rsp]
260	sub	rbx,1
261	or	rbx,QWORD[24+rsp]
262	or	rbx,QWORD[32+rsp]
263	or	rbx,QWORD[40+rsp]
264
265	jnz	NEAR $L$beeu_err
266
267
268
269
270	mov	r8,QWORD[rdx]
271	mov	r9,QWORD[8+rdx]
272	mov	r10,QWORD[16+rdx]
273	mov	r11,QWORD[24+rdx]
274	xor	rdi,rdi
275
276$L$beeu_reduction_loop:
277	mov	QWORD[16+rsp],r12
278	mov	QWORD[24+rsp],r13
279	mov	QWORD[32+rsp],r14
280	mov	QWORD[40+rsp],r15
281	mov	QWORD[48+rsp],rbp
282
283
284	sub	r12,r8
285	sbb	r13,r9
286	sbb	r14,r10
287	sbb	r15,r11
288	sbb	rbp,0
289
290
291	cmovc	r12,QWORD[16+rsp]
292	cmovc	r13,QWORD[24+rsp]
293	cmovc	r14,QWORD[32+rsp]
294	cmovc	r15,QWORD[40+rsp]
295	jnc	NEAR $L$beeu_reduction_loop
296
297
298	sub	r8,r12
299	sbb	r9,r13
300	sbb	r10,r14
301	sbb	r11,r15
302
303$L$beeu_save:
304
305	mov	rdi,QWORD[rsp]
306
307	mov	QWORD[rdi],r8
308	mov	QWORD[8+rdi],r9
309	mov	QWORD[16+rdi],r10
310	mov	QWORD[24+rdi],r11
311
312
313	mov	rax,1
314	jmp	NEAR $L$beeu_finish
315
316$L$beeu_err:
317
318	xor	rax,rax
319
320$L$beeu_finish:
321	add	rsp,80
322
323	pop	rsi
324
325	pop	rbx
326
327	pop	r15
328
329	pop	r14
330
331	pop	r13
332
333	pop	r12
334
335	pop	rbp
336
337	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
338	mov	rsi,QWORD[16+rsp]
339	ret
340
341
342$L$SEH_end_beeu_mod_inverse_vartime:
343%else
344; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
345ret
346%endif
347