1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25.p2align	4
26_vpaes_encrypt_core:
27
28	movq	%rdx,%r9
29	movq	$16,%r11
30	movl	240(%rdx),%eax
31	movdqa	%xmm9,%xmm1
32	movdqa	L$k_ipt(%rip),%xmm2
33	pandn	%xmm0,%xmm1
34	movdqu	(%r9),%xmm5
35	psrld	$4,%xmm1
36	pand	%xmm9,%xmm0
37.byte	102,15,56,0,208
38	movdqa	L$k_ipt+16(%rip),%xmm0
39.byte	102,15,56,0,193
40	pxor	%xmm5,%xmm2
41	addq	$16,%r9
42	pxor	%xmm2,%xmm0
43	leaq	L$k_mc_backward(%rip),%r10
44	jmp	L$enc_entry
45
46.p2align	4
47L$enc_loop:
48
49	movdqa	%xmm13,%xmm4
50	movdqa	%xmm12,%xmm0
51.byte	102,15,56,0,226
52.byte	102,15,56,0,195
53	pxor	%xmm5,%xmm4
54	movdqa	%xmm15,%xmm5
55	pxor	%xmm4,%xmm0
56	movdqa	-64(%r11,%r10,1),%xmm1
57.byte	102,15,56,0,234
58	movdqa	(%r11,%r10,1),%xmm4
59	movdqa	%xmm14,%xmm2
60.byte	102,15,56,0,211
61	movdqa	%xmm0,%xmm3
62	pxor	%xmm5,%xmm2
63.byte	102,15,56,0,193
64	addq	$16,%r9
65	pxor	%xmm2,%xmm0
66.byte	102,15,56,0,220
67	addq	$16,%r11
68	pxor	%xmm0,%xmm3
69.byte	102,15,56,0,193
70	andq	$0x30,%r11
71	subq	$1,%rax
72	pxor	%xmm3,%xmm0
73
74L$enc_entry:
75
76	movdqa	%xmm9,%xmm1
77	movdqa	%xmm11,%xmm5
78	pandn	%xmm0,%xmm1
79	psrld	$4,%xmm1
80	pand	%xmm9,%xmm0
81.byte	102,15,56,0,232
82	movdqa	%xmm10,%xmm3
83	pxor	%xmm1,%xmm0
84.byte	102,15,56,0,217
85	movdqa	%xmm10,%xmm4
86	pxor	%xmm5,%xmm3
87.byte	102,15,56,0,224
88	movdqa	%xmm10,%xmm2
89	pxor	%xmm5,%xmm4
90.byte	102,15,56,0,211
91	movdqa	%xmm10,%xmm3
92	pxor	%xmm0,%xmm2
93.byte	102,15,56,0,220
94	movdqu	(%r9),%xmm5
95	pxor	%xmm1,%xmm3
96	jnz	L$enc_loop
97
98
99	movdqa	-96(%r10),%xmm4
100	movdqa	-80(%r10),%xmm0
101.byte	102,15,56,0,226
102	pxor	%xmm5,%xmm4
103.byte	102,15,56,0,195
104	movdqa	64(%r11,%r10,1),%xmm1
105	pxor	%xmm4,%xmm0
106.byte	102,15,56,0,193
107	ret
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141.p2align	4
142_vpaes_encrypt_core_2x:
143
144	movq	%rdx,%r9
145	movq	$16,%r11
146	movl	240(%rdx),%eax
147	movdqa	%xmm9,%xmm1
148	movdqa	%xmm9,%xmm7
149	movdqa	L$k_ipt(%rip),%xmm2
150	movdqa	%xmm2,%xmm8
151	pandn	%xmm0,%xmm1
152	pandn	%xmm6,%xmm7
153	movdqu	(%r9),%xmm5
154
155	psrld	$4,%xmm1
156	psrld	$4,%xmm7
157	pand	%xmm9,%xmm0
158	pand	%xmm9,%xmm6
159.byte	102,15,56,0,208
160.byte	102,68,15,56,0,198
161	movdqa	L$k_ipt+16(%rip),%xmm0
162	movdqa	%xmm0,%xmm6
163.byte	102,15,56,0,193
164.byte	102,15,56,0,247
165	pxor	%xmm5,%xmm2
166	pxor	%xmm5,%xmm8
167	addq	$16,%r9
168	pxor	%xmm2,%xmm0
169	pxor	%xmm8,%xmm6
170	leaq	L$k_mc_backward(%rip),%r10
171	jmp	L$enc2x_entry
172
173.p2align	4
174L$enc2x_loop:
175
176	movdqa	L$k_sb1(%rip),%xmm4
177	movdqa	L$k_sb1+16(%rip),%xmm0
178	movdqa	%xmm4,%xmm12
179	movdqa	%xmm0,%xmm6
180.byte	102,15,56,0,226
181.byte	102,69,15,56,0,224
182.byte	102,15,56,0,195
183.byte	102,65,15,56,0,243
184	pxor	%xmm5,%xmm4
185	pxor	%xmm5,%xmm12
186	movdqa	L$k_sb2(%rip),%xmm5
187	movdqa	%xmm5,%xmm13
188	pxor	%xmm4,%xmm0
189	pxor	%xmm12,%xmm6
190	movdqa	-64(%r11,%r10,1),%xmm1
191
192.byte	102,15,56,0,234
193.byte	102,69,15,56,0,232
194	movdqa	(%r11,%r10,1),%xmm4
195
196	movdqa	L$k_sb2+16(%rip),%xmm2
197	movdqa	%xmm2,%xmm8
198.byte	102,15,56,0,211
199.byte	102,69,15,56,0,195
200	movdqa	%xmm0,%xmm3
201	movdqa	%xmm6,%xmm11
202	pxor	%xmm5,%xmm2
203	pxor	%xmm13,%xmm8
204.byte	102,15,56,0,193
205.byte	102,15,56,0,241
206	addq	$16,%r9
207	pxor	%xmm2,%xmm0
208	pxor	%xmm8,%xmm6
209.byte	102,15,56,0,220
210.byte	102,68,15,56,0,220
211	addq	$16,%r11
212	pxor	%xmm0,%xmm3
213	pxor	%xmm6,%xmm11
214.byte	102,15,56,0,193
215.byte	102,15,56,0,241
216	andq	$0x30,%r11
217	subq	$1,%rax
218	pxor	%xmm3,%xmm0
219	pxor	%xmm11,%xmm6
220
221L$enc2x_entry:
222
223	movdqa	%xmm9,%xmm1
224	movdqa	%xmm9,%xmm7
225	movdqa	L$k_inv+16(%rip),%xmm5
226	movdqa	%xmm5,%xmm13
227	pandn	%xmm0,%xmm1
228	pandn	%xmm6,%xmm7
229	psrld	$4,%xmm1
230	psrld	$4,%xmm7
231	pand	%xmm9,%xmm0
232	pand	%xmm9,%xmm6
233.byte	102,15,56,0,232
234.byte	102,68,15,56,0,238
235	movdqa	%xmm10,%xmm3
236	movdqa	%xmm10,%xmm11
237	pxor	%xmm1,%xmm0
238	pxor	%xmm7,%xmm6
239.byte	102,15,56,0,217
240.byte	102,68,15,56,0,223
241	movdqa	%xmm10,%xmm4
242	movdqa	%xmm10,%xmm12
243	pxor	%xmm5,%xmm3
244	pxor	%xmm13,%xmm11
245.byte	102,15,56,0,224
246.byte	102,68,15,56,0,230
247	movdqa	%xmm10,%xmm2
248	movdqa	%xmm10,%xmm8
249	pxor	%xmm5,%xmm4
250	pxor	%xmm13,%xmm12
251.byte	102,15,56,0,211
252.byte	102,69,15,56,0,195
253	movdqa	%xmm10,%xmm3
254	movdqa	%xmm10,%xmm11
255	pxor	%xmm0,%xmm2
256	pxor	%xmm6,%xmm8
257.byte	102,15,56,0,220
258.byte	102,69,15,56,0,220
259	movdqu	(%r9),%xmm5
260
261	pxor	%xmm1,%xmm3
262	pxor	%xmm7,%xmm11
263	jnz	L$enc2x_loop
264
265
266	movdqa	-96(%r10),%xmm4
267	movdqa	-80(%r10),%xmm0
268	movdqa	%xmm4,%xmm12
269	movdqa	%xmm0,%xmm6
270.byte	102,15,56,0,226
271.byte	102,69,15,56,0,224
272	pxor	%xmm5,%xmm4
273	pxor	%xmm5,%xmm12
274.byte	102,15,56,0,195
275.byte	102,65,15,56,0,243
276	movdqa	64(%r11,%r10,1),%xmm1
277
278	pxor	%xmm4,%xmm0
279	pxor	%xmm12,%xmm6
280.byte	102,15,56,0,193
281.byte	102,15,56,0,241
282	ret
283
284
285
286
287
288
289
290
291
292.p2align	4
293_vpaes_schedule_core:
294
295
296
297
298
299
300	call	_vpaes_preheat
301	movdqa	L$k_rcon(%rip),%xmm8
302	movdqu	(%rdi),%xmm0
303
304
305	movdqa	%xmm0,%xmm3
306	leaq	L$k_ipt(%rip),%r11
307	call	_vpaes_schedule_transform
308	movdqa	%xmm0,%xmm7
309
310	leaq	L$k_sr(%rip),%r10
311
312
313	movdqu	%xmm0,(%rdx)
314
315L$schedule_go:
316	cmpl	$192,%esi
317	ja	L$schedule_256
318
319
320
321
322
323
324
325
326
327
328
329L$schedule_128:
330	movl	$10,%esi
331
332L$oop_schedule_128:
333	call	_vpaes_schedule_round
334	decq	%rsi
335	jz	L$schedule_mangle_last
336	call	_vpaes_schedule_mangle
337	jmp	L$oop_schedule_128
338
339
340
341
342
343
344
345
346
347
348
349.p2align	4
350L$schedule_256:
351	movdqu	16(%rdi),%xmm0
352	call	_vpaes_schedule_transform
353	movl	$7,%esi
354
355L$oop_schedule_256:
356	call	_vpaes_schedule_mangle
357	movdqa	%xmm0,%xmm6
358
359
360	call	_vpaes_schedule_round
361	decq	%rsi
362	jz	L$schedule_mangle_last
363	call	_vpaes_schedule_mangle
364
365
366	pshufd	$0xFF,%xmm0,%xmm0
367	movdqa	%xmm7,%xmm5
368	movdqa	%xmm6,%xmm7
369	call	_vpaes_schedule_low_round
370	movdqa	%xmm5,%xmm7
371
372	jmp	L$oop_schedule_256
373
374
375
376
377
378
379
380
381
382
383
384
385.p2align	4
386L$schedule_mangle_last:
387
388	leaq	L$k_deskew(%rip),%r11
389
390
391	movdqa	(%r8,%r10,1),%xmm1
392.byte	102,15,56,0,193
393	leaq	L$k_opt(%rip),%r11
394	addq	$32,%rdx
395
396L$schedule_mangle_last_dec:
397	addq	$-16,%rdx
398	pxor	L$k_s63(%rip),%xmm0
399	call	_vpaes_schedule_transform
400	movdqu	%xmm0,(%rdx)
401
402
403	pxor	%xmm0,%xmm0
404	pxor	%xmm1,%xmm1
405	pxor	%xmm2,%xmm2
406	pxor	%xmm3,%xmm3
407	pxor	%xmm4,%xmm4
408	pxor	%xmm5,%xmm5
409	pxor	%xmm6,%xmm6
410	pxor	%xmm7,%xmm7
411	ret
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434.p2align	4
435_vpaes_schedule_round:
436
437
438	pxor	%xmm1,%xmm1
439.byte	102,65,15,58,15,200,15
440.byte	102,69,15,58,15,192,15
441	pxor	%xmm1,%xmm7
442
443
444	pshufd	$0xFF,%xmm0,%xmm0
445.byte	102,15,58,15,192,1
446
447
448
449
450_vpaes_schedule_low_round:
451
452	movdqa	%xmm7,%xmm1
453	pslldq	$4,%xmm7
454	pxor	%xmm1,%xmm7
455	movdqa	%xmm7,%xmm1
456	pslldq	$8,%xmm7
457	pxor	%xmm1,%xmm7
458	pxor	L$k_s63(%rip),%xmm7
459
460
461	movdqa	%xmm9,%xmm1
462	pandn	%xmm0,%xmm1
463	psrld	$4,%xmm1
464	pand	%xmm9,%xmm0
465	movdqa	%xmm11,%xmm2
466.byte	102,15,56,0,208
467	pxor	%xmm1,%xmm0
468	movdqa	%xmm10,%xmm3
469.byte	102,15,56,0,217
470	pxor	%xmm2,%xmm3
471	movdqa	%xmm10,%xmm4
472.byte	102,15,56,0,224
473	pxor	%xmm2,%xmm4
474	movdqa	%xmm10,%xmm2
475.byte	102,15,56,0,211
476	pxor	%xmm0,%xmm2
477	movdqa	%xmm10,%xmm3
478.byte	102,15,56,0,220
479	pxor	%xmm1,%xmm3
480	movdqa	%xmm13,%xmm4
481.byte	102,15,56,0,226
482	movdqa	%xmm12,%xmm0
483.byte	102,15,56,0,195
484	pxor	%xmm4,%xmm0
485
486
487	pxor	%xmm7,%xmm0
488	movdqa	%xmm0,%xmm7
489	ret
490
491
492
493
494
495
496
497
498
499
500
501
502
503.p2align	4
504_vpaes_schedule_transform:
505
506	movdqa	%xmm9,%xmm1
507	pandn	%xmm0,%xmm1
508	psrld	$4,%xmm1
509	pand	%xmm9,%xmm0
510	movdqa	(%r11),%xmm2
511.byte	102,15,56,0,208
512	movdqa	16(%r11),%xmm0
513.byte	102,15,56,0,193
514	pxor	%xmm2,%xmm0
515	ret
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543.p2align	4
544_vpaes_schedule_mangle:
545
546	movdqa	%xmm0,%xmm4
547	movdqa	L$k_mc_forward(%rip),%xmm5
548
549
550	addq	$16,%rdx
551	pxor	L$k_s63(%rip),%xmm4
552.byte	102,15,56,0,229
553	movdqa	%xmm4,%xmm3
554.byte	102,15,56,0,229
555	pxor	%xmm4,%xmm3
556.byte	102,15,56,0,229
557	pxor	%xmm4,%xmm3
558
559L$schedule_mangle_both:
560	movdqa	(%r8,%r10,1),%xmm1
561.byte	102,15,56,0,217
562	addq	$-16,%r8
563	andq	$0x30,%r8
564	movdqu	%xmm3,(%rdx)
565	ret
566
567
568
569
570
571
572.globl	_vpaes_set_encrypt_key
573.private_extern _vpaes_set_encrypt_key
574
575.p2align	4
576_vpaes_set_encrypt_key:
577
578_CET_ENDBR
579#ifdef BORINGSSL_DISPATCH_TEST
580
581	movb	$1,_BORINGSSL_function_hit+5(%rip)
582#endif
583
584	movl	%esi,%eax
585	shrl	$5,%eax
586	addl	$5,%eax
587	movl	%eax,240(%rdx)
588
589	movl	$0,%ecx
590	movl	$0x30,%r8d
591	call	_vpaes_schedule_core
592	xorl	%eax,%eax
593	ret
594
595
596
597.globl	_vpaes_encrypt
598.private_extern _vpaes_encrypt
599
600.p2align	4
601_vpaes_encrypt:
602
603_CET_ENDBR
604#ifdef BORINGSSL_DISPATCH_TEST
605
606	movb	$1,_BORINGSSL_function_hit+4(%rip)
607#endif
608	movdqu	(%rdi),%xmm0
609	call	_vpaes_preheat
610	call	_vpaes_encrypt_core
611	movdqu	%xmm0,(%rsi)
612	ret
613
614
615.globl	_vpaes_ctr32_encrypt_blocks
616.private_extern _vpaes_ctr32_encrypt_blocks
617
618.p2align	4
619_vpaes_ctr32_encrypt_blocks:
620
621_CET_ENDBR
622
623	xchgq	%rcx,%rdx
624	testq	%rcx,%rcx
625	jz	L$ctr32_abort
626	movdqu	(%r8),%xmm0
627	movdqa	L$ctr_add_one(%rip),%xmm8
628	subq	%rdi,%rsi
629	call	_vpaes_preheat
630	movdqa	%xmm0,%xmm6
631	pshufb	L$rev_ctr(%rip),%xmm6
632
633	testq	$1,%rcx
634	jz	L$ctr32_prep_loop
635
636
637
638	movdqu	(%rdi),%xmm7
639	call	_vpaes_encrypt_core
640	pxor	%xmm7,%xmm0
641	paddd	%xmm8,%xmm6
642	movdqu	%xmm0,(%rsi,%rdi,1)
643	subq	$1,%rcx
644	leaq	16(%rdi),%rdi
645	jz	L$ctr32_done
646
647L$ctr32_prep_loop:
648
649
650	movdqa	%xmm6,%xmm14
651	movdqa	%xmm6,%xmm15
652	paddd	%xmm8,%xmm15
653
654L$ctr32_loop:
655	movdqa	L$rev_ctr(%rip),%xmm1
656	movdqa	%xmm14,%xmm0
657	movdqa	%xmm15,%xmm6
658.byte	102,15,56,0,193
659.byte	102,15,56,0,241
660	call	_vpaes_encrypt_core_2x
661	movdqu	(%rdi),%xmm1
662	movdqu	16(%rdi),%xmm2
663	movdqa	L$ctr_add_two(%rip),%xmm3
664	pxor	%xmm1,%xmm0
665	pxor	%xmm2,%xmm6
666	paddd	%xmm3,%xmm14
667	paddd	%xmm3,%xmm15
668	movdqu	%xmm0,(%rsi,%rdi,1)
669	movdqu	%xmm6,16(%rsi,%rdi,1)
670	subq	$2,%rcx
671	leaq	32(%rdi),%rdi
672	jnz	L$ctr32_loop
673
674L$ctr32_done:
675L$ctr32_abort:
676	ret
677
678
679
680
681
682
683
684
685
686.p2align	4
687_vpaes_preheat:
688
689	leaq	L$k_s0F(%rip),%r10
690	movdqa	-32(%r10),%xmm10
691	movdqa	-16(%r10),%xmm11
692	movdqa	0(%r10),%xmm9
693	movdqa	48(%r10),%xmm13
694	movdqa	64(%r10),%xmm12
695	movdqa	80(%r10),%xmm15
696	movdqa	96(%r10),%xmm14
697	ret
698
699
700
701
702
703
704
705
706.section	__DATA,__const
707.p2align	6
708_vpaes_consts:
709L$k_inv:
710.quad	0x0E05060F0D080180, 0x040703090A0B0C02
711.quad	0x01040A060F0B0780, 0x030D0E0C02050809
712
713L$k_s0F:
714.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
715
716L$k_ipt:
717.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
718.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
719
720L$k_sb1:
721.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
722.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
723L$k_sb2:
724.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
725.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
726L$k_sbo:
727.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
728.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
729
730L$k_mc_forward:
731.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
732.quad	0x080B0A0904070605, 0x000302010C0F0E0D
733.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
734.quad	0x000302010C0F0E0D, 0x080B0A0904070605
735
736L$k_mc_backward:
737.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
738.quad	0x020100030E0D0C0F, 0x0A09080B06050407
739.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
740.quad	0x0A09080B06050407, 0x020100030E0D0C0F
741
742L$k_sr:
743.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
744.quad	0x030E09040F0A0500, 0x0B06010C07020D08
745.quad	0x0F060D040B020900, 0x070E050C030A0108
746.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
747
748L$k_rcon:
749.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
750
751L$k_s63:
752.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
753
754L$k_opt:
755.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
756.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
757
758L$k_deskew:
759.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
760.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
761
762
763L$rev_ctr:
764.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
765
766
767L$ctr_add_one:
768.quad	0x0000000000000000, 0x0000000100000000
769L$ctr_add_two:
770.quad	0x0000000000000000, 0x0000000200000000
771
772.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
773.p2align	6
774
775.text
776#endif
777