xref: /aosp_15_r20/external/boringssl/src/gen/bcm/ghash-ssse3-x86-win.asm (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__, win32
8%ifidn __OUTPUT_FORMAT__,obj
9section	code	use32 class=code align=64
10%elifidn __OUTPUT_FORMAT__,win32
11$@feat.00 equ 1
12section	.text	code align=64
13%else
14section	.text	code
15%endif
16global	_gcm_gmult_ssse3
17align	16
18_gcm_gmult_ssse3:
19L$_gcm_gmult_ssse3_begin:
20	push	ebp
21	push	ebx
22	push	esi
23	push	edi
24	mov	edi,DWORD [20+esp]
25	mov	esi,DWORD [24+esp]
26	movdqu	xmm0,[edi]
27	call	L$000pic_point
28L$000pic_point:
29	pop	eax
30	movdqa	xmm7,[(L$reverse_bytes-L$000pic_point)+eax]
31	movdqa	xmm2,[(L$low4_mask-L$000pic_point)+eax]
32db	102,15,56,0,199
33	movdqa	xmm1,xmm2
34	pandn	xmm1,xmm0
35	psrld	xmm1,4
36	pand	xmm0,xmm2
37	pxor	xmm2,xmm2
38	pxor	xmm3,xmm3
39	mov	eax,5
40L$001loop_row_1:
41	movdqa	xmm4,[esi]
42	lea	esi,[16+esi]
43	movdqa	xmm6,xmm2
44db	102,15,58,15,243,1
45	movdqa	xmm3,xmm6
46	psrldq	xmm2,1
47	movdqa	xmm5,xmm4
48db	102,15,56,0,224
49db	102,15,56,0,233
50	pxor	xmm2,xmm5
51	movdqa	xmm5,xmm4
52	psllq	xmm5,60
53	movdqa	xmm6,xmm5
54	pslldq	xmm6,8
55	pxor	xmm3,xmm6
56	psrldq	xmm5,8
57	pxor	xmm2,xmm5
58	psrlq	xmm4,4
59	pxor	xmm2,xmm4
60	sub	eax,1
61	jnz	NEAR L$001loop_row_1
62	pxor	xmm2,xmm3
63	psrlq	xmm3,1
64	pxor	xmm2,xmm3
65	psrlq	xmm3,1
66	pxor	xmm2,xmm3
67	psrlq	xmm3,5
68	pxor	xmm2,xmm3
69	pxor	xmm3,xmm3
70	mov	eax,5
71L$002loop_row_2:
72	movdqa	xmm4,[esi]
73	lea	esi,[16+esi]
74	movdqa	xmm6,xmm2
75db	102,15,58,15,243,1
76	movdqa	xmm3,xmm6
77	psrldq	xmm2,1
78	movdqa	xmm5,xmm4
79db	102,15,56,0,224
80db	102,15,56,0,233
81	pxor	xmm2,xmm5
82	movdqa	xmm5,xmm4
83	psllq	xmm5,60
84	movdqa	xmm6,xmm5
85	pslldq	xmm6,8
86	pxor	xmm3,xmm6
87	psrldq	xmm5,8
88	pxor	xmm2,xmm5
89	psrlq	xmm4,4
90	pxor	xmm2,xmm4
91	sub	eax,1
92	jnz	NEAR L$002loop_row_2
93	pxor	xmm2,xmm3
94	psrlq	xmm3,1
95	pxor	xmm2,xmm3
96	psrlq	xmm3,1
97	pxor	xmm2,xmm3
98	psrlq	xmm3,5
99	pxor	xmm2,xmm3
100	pxor	xmm3,xmm3
101	mov	eax,6
102L$003loop_row_3:
103	movdqa	xmm4,[esi]
104	lea	esi,[16+esi]
105	movdqa	xmm6,xmm2
106db	102,15,58,15,243,1
107	movdqa	xmm3,xmm6
108	psrldq	xmm2,1
109	movdqa	xmm5,xmm4
110db	102,15,56,0,224
111db	102,15,56,0,233
112	pxor	xmm2,xmm5
113	movdqa	xmm5,xmm4
114	psllq	xmm5,60
115	movdqa	xmm6,xmm5
116	pslldq	xmm6,8
117	pxor	xmm3,xmm6
118	psrldq	xmm5,8
119	pxor	xmm2,xmm5
120	psrlq	xmm4,4
121	pxor	xmm2,xmm4
122	sub	eax,1
123	jnz	NEAR L$003loop_row_3
124	pxor	xmm2,xmm3
125	psrlq	xmm3,1
126	pxor	xmm2,xmm3
127	psrlq	xmm3,1
128	pxor	xmm2,xmm3
129	psrlq	xmm3,5
130	pxor	xmm2,xmm3
131	pxor	xmm3,xmm3
132db	102,15,56,0,215
133	movdqu	[edi],xmm2
134	pxor	xmm0,xmm0
135	pxor	xmm1,xmm1
136	pxor	xmm2,xmm2
137	pxor	xmm3,xmm3
138	pxor	xmm4,xmm4
139	pxor	xmm5,xmm5
140	pxor	xmm6,xmm6
141	pop	edi
142	pop	esi
143	pop	ebx
144	pop	ebp
145	ret
146global	_gcm_ghash_ssse3
147align	16
148_gcm_ghash_ssse3:
149L$_gcm_ghash_ssse3_begin:
150	push	ebp
151	push	ebx
152	push	esi
153	push	edi
154	mov	edi,DWORD [20+esp]
155	mov	esi,DWORD [24+esp]
156	mov	edx,DWORD [28+esp]
157	mov	ecx,DWORD [32+esp]
158	movdqu	xmm0,[edi]
159	call	L$004pic_point
160L$004pic_point:
161	pop	ebx
162	movdqa	xmm7,[(L$reverse_bytes-L$004pic_point)+ebx]
163	and	ecx,-16
164db	102,15,56,0,199
165	pxor	xmm3,xmm3
166L$005loop_ghash:
167	movdqa	xmm2,[(L$low4_mask-L$004pic_point)+ebx]
168	movdqu	xmm1,[edx]
169db	102,15,56,0,207
170	pxor	xmm0,xmm1
171	movdqa	xmm1,xmm2
172	pandn	xmm1,xmm0
173	psrld	xmm1,4
174	pand	xmm0,xmm2
175	pxor	xmm2,xmm2
176	mov	eax,5
177L$006loop_row_4:
178	movdqa	xmm4,[esi]
179	lea	esi,[16+esi]
180	movdqa	xmm6,xmm2
181db	102,15,58,15,243,1
182	movdqa	xmm3,xmm6
183	psrldq	xmm2,1
184	movdqa	xmm5,xmm4
185db	102,15,56,0,224
186db	102,15,56,0,233
187	pxor	xmm2,xmm5
188	movdqa	xmm5,xmm4
189	psllq	xmm5,60
190	movdqa	xmm6,xmm5
191	pslldq	xmm6,8
192	pxor	xmm3,xmm6
193	psrldq	xmm5,8
194	pxor	xmm2,xmm5
195	psrlq	xmm4,4
196	pxor	xmm2,xmm4
197	sub	eax,1
198	jnz	NEAR L$006loop_row_4
199	pxor	xmm2,xmm3
200	psrlq	xmm3,1
201	pxor	xmm2,xmm3
202	psrlq	xmm3,1
203	pxor	xmm2,xmm3
204	psrlq	xmm3,5
205	pxor	xmm2,xmm3
206	pxor	xmm3,xmm3
207	mov	eax,5
208L$007loop_row_5:
209	movdqa	xmm4,[esi]
210	lea	esi,[16+esi]
211	movdqa	xmm6,xmm2
212db	102,15,58,15,243,1
213	movdqa	xmm3,xmm6
214	psrldq	xmm2,1
215	movdqa	xmm5,xmm4
216db	102,15,56,0,224
217db	102,15,56,0,233
218	pxor	xmm2,xmm5
219	movdqa	xmm5,xmm4
220	psllq	xmm5,60
221	movdqa	xmm6,xmm5
222	pslldq	xmm6,8
223	pxor	xmm3,xmm6
224	psrldq	xmm5,8
225	pxor	xmm2,xmm5
226	psrlq	xmm4,4
227	pxor	xmm2,xmm4
228	sub	eax,1
229	jnz	NEAR L$007loop_row_5
230	pxor	xmm2,xmm3
231	psrlq	xmm3,1
232	pxor	xmm2,xmm3
233	psrlq	xmm3,1
234	pxor	xmm2,xmm3
235	psrlq	xmm3,5
236	pxor	xmm2,xmm3
237	pxor	xmm3,xmm3
238	mov	eax,6
239L$008loop_row_6:
240	movdqa	xmm4,[esi]
241	lea	esi,[16+esi]
242	movdqa	xmm6,xmm2
243db	102,15,58,15,243,1
244	movdqa	xmm3,xmm6
245	psrldq	xmm2,1
246	movdqa	xmm5,xmm4
247db	102,15,56,0,224
248db	102,15,56,0,233
249	pxor	xmm2,xmm5
250	movdqa	xmm5,xmm4
251	psllq	xmm5,60
252	movdqa	xmm6,xmm5
253	pslldq	xmm6,8
254	pxor	xmm3,xmm6
255	psrldq	xmm5,8
256	pxor	xmm2,xmm5
257	psrlq	xmm4,4
258	pxor	xmm2,xmm4
259	sub	eax,1
260	jnz	NEAR L$008loop_row_6
261	pxor	xmm2,xmm3
262	psrlq	xmm3,1
263	pxor	xmm2,xmm3
264	psrlq	xmm3,1
265	pxor	xmm2,xmm3
266	psrlq	xmm3,5
267	pxor	xmm2,xmm3
268	pxor	xmm3,xmm3
269	movdqa	xmm0,xmm2
270	lea	esi,[esi-256]
271	lea	edx,[16+edx]
272	sub	ecx,16
273	jnz	NEAR L$005loop_ghash
274db	102,15,56,0,199
275	movdqu	[edi],xmm0
276	pxor	xmm0,xmm0
277	pxor	xmm1,xmm1
278	pxor	xmm2,xmm2
279	pxor	xmm3,xmm3
280	pxor	xmm4,xmm4
281	pxor	xmm5,xmm5
282	pxor	xmm6,xmm6
283	pop	edi
284	pop	esi
285	pop	ebx
286	pop	ebp
287	ret
288align	16
289L$reverse_bytes:
290db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
291align	16
292L$low4_mask:
293dd	252645135,252645135,252645135,252645135
294%else
295; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
296ret
297%endif
298