xref: /aosp_15_r20/external/boringssl/src/gen/bcm/ghash-x86-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_gcm_init_clmul
17align	16
18_gcm_init_clmul:
19L$_gcm_init_clmul_begin:
20	mov	edx,DWORD [4+esp]
21	mov	eax,DWORD [8+esp]
22	call	L$000pic
23L$000pic:
24	pop	ecx
25	lea	ecx,[(L$bswap-L$000pic)+ecx]
26	movdqu	xmm2,[eax]
27	pshufd	xmm2,xmm2,78
28	pshufd	xmm4,xmm2,255
29	movdqa	xmm3,xmm2
30	psllq	xmm2,1
31	pxor	xmm5,xmm5
32	psrlq	xmm3,63
33	pcmpgtd	xmm5,xmm4
34	pslldq	xmm3,8
35	por	xmm2,xmm3
36	pand	xmm5,[16+ecx]
37	pxor	xmm2,xmm5
38	movdqa	xmm0,xmm2
39	movdqa	xmm1,xmm0
40	pshufd	xmm3,xmm0,78
41	pshufd	xmm4,xmm2,78
42	pxor	xmm3,xmm0
43	pxor	xmm4,xmm2
44db	102,15,58,68,194,0
45db	102,15,58,68,202,17
46db	102,15,58,68,220,0
47	xorps	xmm3,xmm0
48	xorps	xmm3,xmm1
49	movdqa	xmm4,xmm3
50	psrldq	xmm3,8
51	pslldq	xmm4,8
52	pxor	xmm1,xmm3
53	pxor	xmm0,xmm4
54	movdqa	xmm4,xmm0
55	movdqa	xmm3,xmm0
56	psllq	xmm0,5
57	pxor	xmm3,xmm0
58	psllq	xmm0,1
59	pxor	xmm0,xmm3
60	psllq	xmm0,57
61	movdqa	xmm3,xmm0
62	pslldq	xmm0,8
63	psrldq	xmm3,8
64	pxor	xmm0,xmm4
65	pxor	xmm1,xmm3
66	movdqa	xmm4,xmm0
67	psrlq	xmm0,1
68	pxor	xmm1,xmm4
69	pxor	xmm4,xmm0
70	psrlq	xmm0,5
71	pxor	xmm0,xmm4
72	psrlq	xmm0,1
73	pxor	xmm0,xmm1
74	pshufd	xmm3,xmm2,78
75	pshufd	xmm4,xmm0,78
76	pxor	xmm3,xmm2
77	movdqu	[edx],xmm2
78	pxor	xmm4,xmm0
79	movdqu	[16+edx],xmm0
80db	102,15,58,15,227,8
81	movdqu	[32+edx],xmm4
82	ret
83global	_gcm_gmult_clmul
84align	16
85_gcm_gmult_clmul:
86L$_gcm_gmult_clmul_begin:
87	mov	eax,DWORD [4+esp]
88	mov	edx,DWORD [8+esp]
89	call	L$001pic
90L$001pic:
91	pop	ecx
92	lea	ecx,[(L$bswap-L$001pic)+ecx]
93	movdqu	xmm0,[eax]
94	movdqa	xmm5,[ecx]
95	movups	xmm2,[edx]
96db	102,15,56,0,197
97	movups	xmm4,[32+edx]
98	movdqa	xmm1,xmm0
99	pshufd	xmm3,xmm0,78
100	pxor	xmm3,xmm0
101db	102,15,58,68,194,0
102db	102,15,58,68,202,17
103db	102,15,58,68,220,0
104	xorps	xmm3,xmm0
105	xorps	xmm3,xmm1
106	movdqa	xmm4,xmm3
107	psrldq	xmm3,8
108	pslldq	xmm4,8
109	pxor	xmm1,xmm3
110	pxor	xmm0,xmm4
111	movdqa	xmm4,xmm0
112	movdqa	xmm3,xmm0
113	psllq	xmm0,5
114	pxor	xmm3,xmm0
115	psllq	xmm0,1
116	pxor	xmm0,xmm3
117	psllq	xmm0,57
118	movdqa	xmm3,xmm0
119	pslldq	xmm0,8
120	psrldq	xmm3,8
121	pxor	xmm0,xmm4
122	pxor	xmm1,xmm3
123	movdqa	xmm4,xmm0
124	psrlq	xmm0,1
125	pxor	xmm1,xmm4
126	pxor	xmm4,xmm0
127	psrlq	xmm0,5
128	pxor	xmm0,xmm4
129	psrlq	xmm0,1
130	pxor	xmm0,xmm1
131db	102,15,56,0,197
132	movdqu	[eax],xmm0
133	ret
134global	_gcm_ghash_clmul
135align	16
136_gcm_ghash_clmul:
137L$_gcm_ghash_clmul_begin:
138	push	ebp
139	push	ebx
140	push	esi
141	push	edi
142	mov	eax,DWORD [20+esp]
143	mov	edx,DWORD [24+esp]
144	mov	esi,DWORD [28+esp]
145	mov	ebx,DWORD [32+esp]
146	call	L$002pic
147L$002pic:
148	pop	ecx
149	lea	ecx,[(L$bswap-L$002pic)+ecx]
150	movdqu	xmm0,[eax]
151	movdqa	xmm5,[ecx]
152	movdqu	xmm2,[edx]
153db	102,15,56,0,197
154	sub	ebx,16
155	jz	NEAR L$003odd_tail
156	movdqu	xmm3,[esi]
157	movdqu	xmm6,[16+esi]
158db	102,15,56,0,221
159db	102,15,56,0,245
160	movdqu	xmm5,[32+edx]
161	pxor	xmm0,xmm3
162	pshufd	xmm3,xmm6,78
163	movdqa	xmm7,xmm6
164	pxor	xmm3,xmm6
165	lea	esi,[32+esi]
166db	102,15,58,68,242,0
167db	102,15,58,68,250,17
168db	102,15,58,68,221,0
169	movups	xmm2,[16+edx]
170	nop
171	sub	ebx,32
172	jbe	NEAR L$004even_tail
173	jmp	NEAR L$005mod_loop
174align	32
175L$005mod_loop:
176	pshufd	xmm4,xmm0,78
177	movdqa	xmm1,xmm0
178	pxor	xmm4,xmm0
179	nop
180db	102,15,58,68,194,0
181db	102,15,58,68,202,17
182db	102,15,58,68,229,16
183	movups	xmm2,[edx]
184	xorps	xmm0,xmm6
185	movdqa	xmm5,[ecx]
186	xorps	xmm1,xmm7
187	movdqu	xmm7,[esi]
188	pxor	xmm3,xmm0
189	movdqu	xmm6,[16+esi]
190	pxor	xmm3,xmm1
191db	102,15,56,0,253
192	pxor	xmm4,xmm3
193	movdqa	xmm3,xmm4
194	psrldq	xmm4,8
195	pslldq	xmm3,8
196	pxor	xmm1,xmm4
197	pxor	xmm0,xmm3
198db	102,15,56,0,245
199	pxor	xmm1,xmm7
200	movdqa	xmm7,xmm6
201	movdqa	xmm4,xmm0
202	movdqa	xmm3,xmm0
203	psllq	xmm0,5
204	pxor	xmm3,xmm0
205	psllq	xmm0,1
206	pxor	xmm0,xmm3
207db	102,15,58,68,242,0
208	movups	xmm5,[32+edx]
209	psllq	xmm0,57
210	movdqa	xmm3,xmm0
211	pslldq	xmm0,8
212	psrldq	xmm3,8
213	pxor	xmm0,xmm4
214	pxor	xmm1,xmm3
215	pshufd	xmm3,xmm7,78
216	movdqa	xmm4,xmm0
217	psrlq	xmm0,1
218	pxor	xmm3,xmm7
219	pxor	xmm1,xmm4
220db	102,15,58,68,250,17
221	movups	xmm2,[16+edx]
222	pxor	xmm4,xmm0
223	psrlq	xmm0,5
224	pxor	xmm0,xmm4
225	psrlq	xmm0,1
226	pxor	xmm0,xmm1
227db	102,15,58,68,221,0
228	lea	esi,[32+esi]
229	sub	ebx,32
230	ja	NEAR L$005mod_loop
231L$004even_tail:
232	pshufd	xmm4,xmm0,78
233	movdqa	xmm1,xmm0
234	pxor	xmm4,xmm0
235db	102,15,58,68,194,0
236db	102,15,58,68,202,17
237db	102,15,58,68,229,16
238	movdqa	xmm5,[ecx]
239	xorps	xmm0,xmm6
240	xorps	xmm1,xmm7
241	pxor	xmm3,xmm0
242	pxor	xmm3,xmm1
243	pxor	xmm4,xmm3
244	movdqa	xmm3,xmm4
245	psrldq	xmm4,8
246	pslldq	xmm3,8
247	pxor	xmm1,xmm4
248	pxor	xmm0,xmm3
249	movdqa	xmm4,xmm0
250	movdqa	xmm3,xmm0
251	psllq	xmm0,5
252	pxor	xmm3,xmm0
253	psllq	xmm0,1
254	pxor	xmm0,xmm3
255	psllq	xmm0,57
256	movdqa	xmm3,xmm0
257	pslldq	xmm0,8
258	psrldq	xmm3,8
259	pxor	xmm0,xmm4
260	pxor	xmm1,xmm3
261	movdqa	xmm4,xmm0
262	psrlq	xmm0,1
263	pxor	xmm1,xmm4
264	pxor	xmm4,xmm0
265	psrlq	xmm0,5
266	pxor	xmm0,xmm4
267	psrlq	xmm0,1
268	pxor	xmm0,xmm1
269	test	ebx,ebx
270	jnz	NEAR L$006done
271	movups	xmm2,[edx]
272L$003odd_tail:
273	movdqu	xmm3,[esi]
274db	102,15,56,0,221
275	pxor	xmm0,xmm3
276	movdqa	xmm1,xmm0
277	pshufd	xmm3,xmm0,78
278	pshufd	xmm4,xmm2,78
279	pxor	xmm3,xmm0
280	pxor	xmm4,xmm2
281db	102,15,58,68,194,0
282db	102,15,58,68,202,17
283db	102,15,58,68,220,0
284	xorps	xmm3,xmm0
285	xorps	xmm3,xmm1
286	movdqa	xmm4,xmm3
287	psrldq	xmm3,8
288	pslldq	xmm4,8
289	pxor	xmm1,xmm3
290	pxor	xmm0,xmm4
291	movdqa	xmm4,xmm0
292	movdqa	xmm3,xmm0
293	psllq	xmm0,5
294	pxor	xmm3,xmm0
295	psllq	xmm0,1
296	pxor	xmm0,xmm3
297	psllq	xmm0,57
298	movdqa	xmm3,xmm0
299	pslldq	xmm0,8
300	psrldq	xmm3,8
301	pxor	xmm0,xmm4
302	pxor	xmm1,xmm3
303	movdqa	xmm4,xmm0
304	psrlq	xmm0,1
305	pxor	xmm1,xmm4
306	pxor	xmm4,xmm0
307	psrlq	xmm0,5
308	pxor	xmm0,xmm4
309	psrlq	xmm0,1
310	pxor	xmm0,xmm1
311L$006done:
312db	102,15,56,0,197
313	movdqu	[eax],xmm0
314	pop	edi
315	pop	esi
316	pop	ebx
317	pop	ebp
318	ret
319align	64
320L$bswap:
321db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
322db	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
323db	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
324db	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
325db	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
326db	0
327%else
328; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
329ret
330%endif
331