xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/ghash-ssse3-x86-apple.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
7.text
8.globl	_gcm_gmult_ssse3
9.private_extern	_gcm_gmult_ssse3
10.align	4
11_gcm_gmult_ssse3:
12L_gcm_gmult_ssse3_begin:
13	pushl	%ebp
14	pushl	%ebx
15	pushl	%esi
16	pushl	%edi
17	movl	20(%esp),%edi
18	movl	24(%esp),%esi
19	movdqu	(%edi),%xmm0
20	call	L000pic_point
21L000pic_point:
22	popl	%eax
23	movdqa	Lreverse_bytes-L000pic_point(%eax),%xmm7
24	movdqa	Llow4_mask-L000pic_point(%eax),%xmm2
25.byte	102,15,56,0,199
26	movdqa	%xmm2,%xmm1
27	pandn	%xmm0,%xmm1
28	psrld	$4,%xmm1
29	pand	%xmm2,%xmm0
30	pxor	%xmm2,%xmm2
31	pxor	%xmm3,%xmm3
32	movl	$5,%eax
33L001loop_row_1:
34	movdqa	(%esi),%xmm4
35	leal	16(%esi),%esi
36	movdqa	%xmm2,%xmm6
37.byte	102,15,58,15,243,1
38	movdqa	%xmm6,%xmm3
39	psrldq	$1,%xmm2
40	movdqa	%xmm4,%xmm5
41.byte	102,15,56,0,224
42.byte	102,15,56,0,233
43	pxor	%xmm5,%xmm2
44	movdqa	%xmm4,%xmm5
45	psllq	$60,%xmm5
46	movdqa	%xmm5,%xmm6
47	pslldq	$8,%xmm6
48	pxor	%xmm6,%xmm3
49	psrldq	$8,%xmm5
50	pxor	%xmm5,%xmm2
51	psrlq	$4,%xmm4
52	pxor	%xmm4,%xmm2
53	subl	$1,%eax
54	jnz	L001loop_row_1
55	pxor	%xmm3,%xmm2
56	psrlq	$1,%xmm3
57	pxor	%xmm3,%xmm2
58	psrlq	$1,%xmm3
59	pxor	%xmm3,%xmm2
60	psrlq	$5,%xmm3
61	pxor	%xmm3,%xmm2
62	pxor	%xmm3,%xmm3
63	movl	$5,%eax
64L002loop_row_2:
65	movdqa	(%esi),%xmm4
66	leal	16(%esi),%esi
67	movdqa	%xmm2,%xmm6
68.byte	102,15,58,15,243,1
69	movdqa	%xmm6,%xmm3
70	psrldq	$1,%xmm2
71	movdqa	%xmm4,%xmm5
72.byte	102,15,56,0,224
73.byte	102,15,56,0,233
74	pxor	%xmm5,%xmm2
75	movdqa	%xmm4,%xmm5
76	psllq	$60,%xmm5
77	movdqa	%xmm5,%xmm6
78	pslldq	$8,%xmm6
79	pxor	%xmm6,%xmm3
80	psrldq	$8,%xmm5
81	pxor	%xmm5,%xmm2
82	psrlq	$4,%xmm4
83	pxor	%xmm4,%xmm2
84	subl	$1,%eax
85	jnz	L002loop_row_2
86	pxor	%xmm3,%xmm2
87	psrlq	$1,%xmm3
88	pxor	%xmm3,%xmm2
89	psrlq	$1,%xmm3
90	pxor	%xmm3,%xmm2
91	psrlq	$5,%xmm3
92	pxor	%xmm3,%xmm2
93	pxor	%xmm3,%xmm3
94	movl	$6,%eax
95L003loop_row_3:
96	movdqa	(%esi),%xmm4
97	leal	16(%esi),%esi
98	movdqa	%xmm2,%xmm6
99.byte	102,15,58,15,243,1
100	movdqa	%xmm6,%xmm3
101	psrldq	$1,%xmm2
102	movdqa	%xmm4,%xmm5
103.byte	102,15,56,0,224
104.byte	102,15,56,0,233
105	pxor	%xmm5,%xmm2
106	movdqa	%xmm4,%xmm5
107	psllq	$60,%xmm5
108	movdqa	%xmm5,%xmm6
109	pslldq	$8,%xmm6
110	pxor	%xmm6,%xmm3
111	psrldq	$8,%xmm5
112	pxor	%xmm5,%xmm2
113	psrlq	$4,%xmm4
114	pxor	%xmm4,%xmm2
115	subl	$1,%eax
116	jnz	L003loop_row_3
117	pxor	%xmm3,%xmm2
118	psrlq	$1,%xmm3
119	pxor	%xmm3,%xmm2
120	psrlq	$1,%xmm3
121	pxor	%xmm3,%xmm2
122	psrlq	$5,%xmm3
123	pxor	%xmm3,%xmm2
124	pxor	%xmm3,%xmm3
125.byte	102,15,56,0,215
126	movdqu	%xmm2,(%edi)
127	pxor	%xmm0,%xmm0
128	pxor	%xmm1,%xmm1
129	pxor	%xmm2,%xmm2
130	pxor	%xmm3,%xmm3
131	pxor	%xmm4,%xmm4
132	pxor	%xmm5,%xmm5
133	pxor	%xmm6,%xmm6
134	popl	%edi
135	popl	%esi
136	popl	%ebx
137	popl	%ebp
138	ret
139.globl	_gcm_ghash_ssse3
140.private_extern	_gcm_ghash_ssse3
141.align	4
142_gcm_ghash_ssse3:
143L_gcm_ghash_ssse3_begin:
144	pushl	%ebp
145	pushl	%ebx
146	pushl	%esi
147	pushl	%edi
148	movl	20(%esp),%edi
149	movl	24(%esp),%esi
150	movl	28(%esp),%edx
151	movl	32(%esp),%ecx
152	movdqu	(%edi),%xmm0
153	call	L004pic_point
154L004pic_point:
155	popl	%ebx
156	movdqa	Lreverse_bytes-L004pic_point(%ebx),%xmm7
157	andl	$-16,%ecx
158.byte	102,15,56,0,199
159	pxor	%xmm3,%xmm3
160L005loop_ghash:
161	movdqa	Llow4_mask-L004pic_point(%ebx),%xmm2
162	movdqu	(%edx),%xmm1
163.byte	102,15,56,0,207
164	pxor	%xmm1,%xmm0
165	movdqa	%xmm2,%xmm1
166	pandn	%xmm0,%xmm1
167	psrld	$4,%xmm1
168	pand	%xmm2,%xmm0
169	pxor	%xmm2,%xmm2
170	movl	$5,%eax
171L006loop_row_4:
172	movdqa	(%esi),%xmm4
173	leal	16(%esi),%esi
174	movdqa	%xmm2,%xmm6
175.byte	102,15,58,15,243,1
176	movdqa	%xmm6,%xmm3
177	psrldq	$1,%xmm2
178	movdqa	%xmm4,%xmm5
179.byte	102,15,56,0,224
180.byte	102,15,56,0,233
181	pxor	%xmm5,%xmm2
182	movdqa	%xmm4,%xmm5
183	psllq	$60,%xmm5
184	movdqa	%xmm5,%xmm6
185	pslldq	$8,%xmm6
186	pxor	%xmm6,%xmm3
187	psrldq	$8,%xmm5
188	pxor	%xmm5,%xmm2
189	psrlq	$4,%xmm4
190	pxor	%xmm4,%xmm2
191	subl	$1,%eax
192	jnz	L006loop_row_4
193	pxor	%xmm3,%xmm2
194	psrlq	$1,%xmm3
195	pxor	%xmm3,%xmm2
196	psrlq	$1,%xmm3
197	pxor	%xmm3,%xmm2
198	psrlq	$5,%xmm3
199	pxor	%xmm3,%xmm2
200	pxor	%xmm3,%xmm3
201	movl	$5,%eax
202L007loop_row_5:
203	movdqa	(%esi),%xmm4
204	leal	16(%esi),%esi
205	movdqa	%xmm2,%xmm6
206.byte	102,15,58,15,243,1
207	movdqa	%xmm6,%xmm3
208	psrldq	$1,%xmm2
209	movdqa	%xmm4,%xmm5
210.byte	102,15,56,0,224
211.byte	102,15,56,0,233
212	pxor	%xmm5,%xmm2
213	movdqa	%xmm4,%xmm5
214	psllq	$60,%xmm5
215	movdqa	%xmm5,%xmm6
216	pslldq	$8,%xmm6
217	pxor	%xmm6,%xmm3
218	psrldq	$8,%xmm5
219	pxor	%xmm5,%xmm2
220	psrlq	$4,%xmm4
221	pxor	%xmm4,%xmm2
222	subl	$1,%eax
223	jnz	L007loop_row_5
224	pxor	%xmm3,%xmm2
225	psrlq	$1,%xmm3
226	pxor	%xmm3,%xmm2
227	psrlq	$1,%xmm3
228	pxor	%xmm3,%xmm2
229	psrlq	$5,%xmm3
230	pxor	%xmm3,%xmm2
231	pxor	%xmm3,%xmm3
232	movl	$6,%eax
233L008loop_row_6:
234	movdqa	(%esi),%xmm4
235	leal	16(%esi),%esi
236	movdqa	%xmm2,%xmm6
237.byte	102,15,58,15,243,1
238	movdqa	%xmm6,%xmm3
239	psrldq	$1,%xmm2
240	movdqa	%xmm4,%xmm5
241.byte	102,15,56,0,224
242.byte	102,15,56,0,233
243	pxor	%xmm5,%xmm2
244	movdqa	%xmm4,%xmm5
245	psllq	$60,%xmm5
246	movdqa	%xmm5,%xmm6
247	pslldq	$8,%xmm6
248	pxor	%xmm6,%xmm3
249	psrldq	$8,%xmm5
250	pxor	%xmm5,%xmm2
251	psrlq	$4,%xmm4
252	pxor	%xmm4,%xmm2
253	subl	$1,%eax
254	jnz	L008loop_row_6
255	pxor	%xmm3,%xmm2
256	psrlq	$1,%xmm3
257	pxor	%xmm3,%xmm2
258	psrlq	$1,%xmm3
259	pxor	%xmm3,%xmm2
260	psrlq	$5,%xmm3
261	pxor	%xmm3,%xmm2
262	pxor	%xmm3,%xmm3
263	movdqa	%xmm2,%xmm0
264	leal	-256(%esi),%esi
265	leal	16(%edx),%edx
266	subl	$16,%ecx
267	jnz	L005loop_ghash
268.byte	102,15,56,0,199
269	movdqu	%xmm0,(%edi)
270	pxor	%xmm0,%xmm0
271	pxor	%xmm1,%xmm1
272	pxor	%xmm2,%xmm2
273	pxor	%xmm3,%xmm3
274	pxor	%xmm4,%xmm4
275	pxor	%xmm5,%xmm5
276	pxor	%xmm6,%xmm6
277	popl	%edi
278	popl	%esi
279	popl	%ebx
280	popl	%ebp
281	ret
282.align	4,0x90
283Lreverse_bytes:
284.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
285.align	4,0x90
286Llow4_mask:
287.long	252645135,252645135,252645135,252645135
288#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
289