xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/vpaes-x86_64-apple.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25.p2align	4
26_vpaes_encrypt_core:
27
28	movq	%rdx,%r9
29	movq	$16,%r11
30	movl	240(%rdx),%eax
31	movdqa	%xmm9,%xmm1
32	movdqa	L$k_ipt(%rip),%xmm2
33	pandn	%xmm0,%xmm1
34	movdqu	(%r9),%xmm5
35	psrld	$4,%xmm1
36	pand	%xmm9,%xmm0
37.byte	102,15,56,0,208
38	movdqa	L$k_ipt+16(%rip),%xmm0
39.byte	102,15,56,0,193
40	pxor	%xmm5,%xmm2
41	addq	$16,%r9
42	pxor	%xmm2,%xmm0
43	leaq	L$k_mc_backward(%rip),%r10
44	jmp	L$enc_entry
45
46.p2align	4
47L$enc_loop:
48
49	movdqa	%xmm13,%xmm4
50	movdqa	%xmm12,%xmm0
51.byte	102,15,56,0,226
52.byte	102,15,56,0,195
53	pxor	%xmm5,%xmm4
54	movdqa	%xmm15,%xmm5
55	pxor	%xmm4,%xmm0
56	movdqa	-64(%r11,%r10,1),%xmm1
57.byte	102,15,56,0,234
58	movdqa	(%r11,%r10,1),%xmm4
59	movdqa	%xmm14,%xmm2
60.byte	102,15,56,0,211
61	movdqa	%xmm0,%xmm3
62	pxor	%xmm5,%xmm2
63.byte	102,15,56,0,193
64	addq	$16,%r9
65	pxor	%xmm2,%xmm0
66.byte	102,15,56,0,220
67	addq	$16,%r11
68	pxor	%xmm0,%xmm3
69.byte	102,15,56,0,193
70	andq	$0x30,%r11
71	subq	$1,%rax
72	pxor	%xmm3,%xmm0
73
74L$enc_entry:
75
76	movdqa	%xmm9,%xmm1
77	movdqa	%xmm11,%xmm5
78	pandn	%xmm0,%xmm1
79	psrld	$4,%xmm1
80	pand	%xmm9,%xmm0
81.byte	102,15,56,0,232
82	movdqa	%xmm10,%xmm3
83	pxor	%xmm1,%xmm0
84.byte	102,15,56,0,217
85	movdqa	%xmm10,%xmm4
86	pxor	%xmm5,%xmm3
87.byte	102,15,56,0,224
88	movdqa	%xmm10,%xmm2
89	pxor	%xmm5,%xmm4
90.byte	102,15,56,0,211
91	movdqa	%xmm10,%xmm3
92	pxor	%xmm0,%xmm2
93.byte	102,15,56,0,220
94	movdqu	(%r9),%xmm5
95	pxor	%xmm1,%xmm3
96	jnz	L$enc_loop
97
98
99	movdqa	-96(%r10),%xmm4
100	movdqa	-80(%r10),%xmm0
101.byte	102,15,56,0,226
102	pxor	%xmm5,%xmm4
103.byte	102,15,56,0,195
104	movdqa	64(%r11,%r10,1),%xmm1
105	pxor	%xmm4,%xmm0
106.byte	102,15,56,0,193
107	ret
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141.p2align	4
142_vpaes_encrypt_core_2x:
143
144	movq	%rdx,%r9
145	movq	$16,%r11
146	movl	240(%rdx),%eax
147	movdqa	%xmm9,%xmm1
148	movdqa	%xmm9,%xmm7
149	movdqa	L$k_ipt(%rip),%xmm2
150	movdqa	%xmm2,%xmm8
151	pandn	%xmm0,%xmm1
152	pandn	%xmm6,%xmm7
153	movdqu	(%r9),%xmm5
154
155	psrld	$4,%xmm1
156	psrld	$4,%xmm7
157	pand	%xmm9,%xmm0
158	pand	%xmm9,%xmm6
159.byte	102,15,56,0,208
160.byte	102,68,15,56,0,198
161	movdqa	L$k_ipt+16(%rip),%xmm0
162	movdqa	%xmm0,%xmm6
163.byte	102,15,56,0,193
164.byte	102,15,56,0,247
165	pxor	%xmm5,%xmm2
166	pxor	%xmm5,%xmm8
167	addq	$16,%r9
168	pxor	%xmm2,%xmm0
169	pxor	%xmm8,%xmm6
170	leaq	L$k_mc_backward(%rip),%r10
171	jmp	L$enc2x_entry
172
173.p2align	4
174L$enc2x_loop:
175
176	movdqa	L$k_sb1(%rip),%xmm4
177	movdqa	L$k_sb1+16(%rip),%xmm0
178	movdqa	%xmm4,%xmm12
179	movdqa	%xmm0,%xmm6
180.byte	102,15,56,0,226
181.byte	102,69,15,56,0,224
182.byte	102,15,56,0,195
183.byte	102,65,15,56,0,243
184	pxor	%xmm5,%xmm4
185	pxor	%xmm5,%xmm12
186	movdqa	L$k_sb2(%rip),%xmm5
187	movdqa	%xmm5,%xmm13
188	pxor	%xmm4,%xmm0
189	pxor	%xmm12,%xmm6
190	movdqa	-64(%r11,%r10,1),%xmm1
191
192.byte	102,15,56,0,234
193.byte	102,69,15,56,0,232
194	movdqa	(%r11,%r10,1),%xmm4
195
196	movdqa	L$k_sb2+16(%rip),%xmm2
197	movdqa	%xmm2,%xmm8
198.byte	102,15,56,0,211
199.byte	102,69,15,56,0,195
200	movdqa	%xmm0,%xmm3
201	movdqa	%xmm6,%xmm11
202	pxor	%xmm5,%xmm2
203	pxor	%xmm13,%xmm8
204.byte	102,15,56,0,193
205.byte	102,15,56,0,241
206	addq	$16,%r9
207	pxor	%xmm2,%xmm0
208	pxor	%xmm8,%xmm6
209.byte	102,15,56,0,220
210.byte	102,68,15,56,0,220
211	addq	$16,%r11
212	pxor	%xmm0,%xmm3
213	pxor	%xmm6,%xmm11
214.byte	102,15,56,0,193
215.byte	102,15,56,0,241
216	andq	$0x30,%r11
217	subq	$1,%rax
218	pxor	%xmm3,%xmm0
219	pxor	%xmm11,%xmm6
220
221L$enc2x_entry:
222
223	movdqa	%xmm9,%xmm1
224	movdqa	%xmm9,%xmm7
225	movdqa	L$k_inv+16(%rip),%xmm5
226	movdqa	%xmm5,%xmm13
227	pandn	%xmm0,%xmm1
228	pandn	%xmm6,%xmm7
229	psrld	$4,%xmm1
230	psrld	$4,%xmm7
231	pand	%xmm9,%xmm0
232	pand	%xmm9,%xmm6
233.byte	102,15,56,0,232
234.byte	102,68,15,56,0,238
235	movdqa	%xmm10,%xmm3
236	movdqa	%xmm10,%xmm11
237	pxor	%xmm1,%xmm0
238	pxor	%xmm7,%xmm6
239.byte	102,15,56,0,217
240.byte	102,68,15,56,0,223
241	movdqa	%xmm10,%xmm4
242	movdqa	%xmm10,%xmm12
243	pxor	%xmm5,%xmm3
244	pxor	%xmm13,%xmm11
245.byte	102,15,56,0,224
246.byte	102,68,15,56,0,230
247	movdqa	%xmm10,%xmm2
248	movdqa	%xmm10,%xmm8
249	pxor	%xmm5,%xmm4
250	pxor	%xmm13,%xmm12
251.byte	102,15,56,0,211
252.byte	102,69,15,56,0,195
253	movdqa	%xmm10,%xmm3
254	movdqa	%xmm10,%xmm11
255	pxor	%xmm0,%xmm2
256	pxor	%xmm6,%xmm8
257.byte	102,15,56,0,220
258.byte	102,69,15,56,0,220
259	movdqu	(%r9),%xmm5
260
261	pxor	%xmm1,%xmm3
262	pxor	%xmm7,%xmm11
263	jnz	L$enc2x_loop
264
265
266	movdqa	-96(%r10),%xmm4
267	movdqa	-80(%r10),%xmm0
268	movdqa	%xmm4,%xmm12
269	movdqa	%xmm0,%xmm6
270.byte	102,15,56,0,226
271.byte	102,69,15,56,0,224
272	pxor	%xmm5,%xmm4
273	pxor	%xmm5,%xmm12
274.byte	102,15,56,0,195
275.byte	102,65,15,56,0,243
276	movdqa	64(%r11,%r10,1),%xmm1
277
278	pxor	%xmm4,%xmm0
279	pxor	%xmm12,%xmm6
280.byte	102,15,56,0,193
281.byte	102,15,56,0,241
282	ret
283
284
285
286
287
288
289
290
291
292.p2align	4
293_vpaes_decrypt_core:
294
295	movq	%rdx,%r9
296	movl	240(%rdx),%eax
297	movdqa	%xmm9,%xmm1
298	movdqa	L$k_dipt(%rip),%xmm2
299	pandn	%xmm0,%xmm1
300	movq	%rax,%r11
301	psrld	$4,%xmm1
302	movdqu	(%r9),%xmm5
303	shlq	$4,%r11
304	pand	%xmm9,%xmm0
305.byte	102,15,56,0,208
306	movdqa	L$k_dipt+16(%rip),%xmm0
307	xorq	$0x30,%r11
308	leaq	L$k_dsbd(%rip),%r10
309.byte	102,15,56,0,193
310	andq	$0x30,%r11
311	pxor	%xmm5,%xmm2
312	movdqa	L$k_mc_forward+48(%rip),%xmm5
313	pxor	%xmm2,%xmm0
314	addq	$16,%r9
315	addq	%r10,%r11
316	jmp	L$dec_entry
317
318.p2align	4
319L$dec_loop:
320
321
322
323	movdqa	-32(%r10),%xmm4
324	movdqa	-16(%r10),%xmm1
325.byte	102,15,56,0,226
326.byte	102,15,56,0,203
327	pxor	%xmm4,%xmm0
328	movdqa	0(%r10),%xmm4
329	pxor	%xmm1,%xmm0
330	movdqa	16(%r10),%xmm1
331
332.byte	102,15,56,0,226
333.byte	102,15,56,0,197
334.byte	102,15,56,0,203
335	pxor	%xmm4,%xmm0
336	movdqa	32(%r10),%xmm4
337	pxor	%xmm1,%xmm0
338	movdqa	48(%r10),%xmm1
339
340.byte	102,15,56,0,226
341.byte	102,15,56,0,197
342.byte	102,15,56,0,203
343	pxor	%xmm4,%xmm0
344	movdqa	64(%r10),%xmm4
345	pxor	%xmm1,%xmm0
346	movdqa	80(%r10),%xmm1
347
348.byte	102,15,56,0,226
349.byte	102,15,56,0,197
350.byte	102,15,56,0,203
351	pxor	%xmm4,%xmm0
352	addq	$16,%r9
353.byte	102,15,58,15,237,12
354	pxor	%xmm1,%xmm0
355	subq	$1,%rax
356
357L$dec_entry:
358
359	movdqa	%xmm9,%xmm1
360	pandn	%xmm0,%xmm1
361	movdqa	%xmm11,%xmm2
362	psrld	$4,%xmm1
363	pand	%xmm9,%xmm0
364.byte	102,15,56,0,208
365	movdqa	%xmm10,%xmm3
366	pxor	%xmm1,%xmm0
367.byte	102,15,56,0,217
368	movdqa	%xmm10,%xmm4
369	pxor	%xmm2,%xmm3
370.byte	102,15,56,0,224
371	pxor	%xmm2,%xmm4
372	movdqa	%xmm10,%xmm2
373.byte	102,15,56,0,211
374	movdqa	%xmm10,%xmm3
375	pxor	%xmm0,%xmm2
376.byte	102,15,56,0,220
377	movdqu	(%r9),%xmm0
378	pxor	%xmm1,%xmm3
379	jnz	L$dec_loop
380
381
382	movdqa	96(%r10),%xmm4
383.byte	102,15,56,0,226
384	pxor	%xmm0,%xmm4
385	movdqa	112(%r10),%xmm0
386	movdqa	-352(%r11),%xmm2
387.byte	102,15,56,0,195
388	pxor	%xmm4,%xmm0
389.byte	102,15,56,0,194
390	ret
391
392
393
394
395
396
397
398
399
400.p2align	4
401_vpaes_schedule_core:
402
403
404
405
406
407
408	call	_vpaes_preheat
409	movdqa	L$k_rcon(%rip),%xmm8
410	movdqu	(%rdi),%xmm0
411
412
413	movdqa	%xmm0,%xmm3
414	leaq	L$k_ipt(%rip),%r11
415	call	_vpaes_schedule_transform
416	movdqa	%xmm0,%xmm7
417
418	leaq	L$k_sr(%rip),%r10
419	testq	%rcx,%rcx
420	jnz	L$schedule_am_decrypting
421
422
423	movdqu	%xmm0,(%rdx)
424	jmp	L$schedule_go
425
426L$schedule_am_decrypting:
427
428	movdqa	(%r8,%r10,1),%xmm1
429.byte	102,15,56,0,217
430	movdqu	%xmm3,(%rdx)
431	xorq	$0x30,%r8
432
433L$schedule_go:
434	cmpl	$192,%esi
435	ja	L$schedule_256
436	je	L$schedule_192
437
438
439
440
441
442
443
444
445
446
447L$schedule_128:
448	movl	$10,%esi
449
450L$oop_schedule_128:
451	call	_vpaes_schedule_round
452	decq	%rsi
453	jz	L$schedule_mangle_last
454	call	_vpaes_schedule_mangle
455	jmp	L$oop_schedule_128
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472.p2align	4
473L$schedule_192:
474	movdqu	8(%rdi),%xmm0
475	call	_vpaes_schedule_transform
476	movdqa	%xmm0,%xmm6
477	pxor	%xmm4,%xmm4
478	movhlps	%xmm4,%xmm6
479	movl	$4,%esi
480
481L$oop_schedule_192:
482	call	_vpaes_schedule_round
483.byte	102,15,58,15,198,8
484	call	_vpaes_schedule_mangle
485	call	_vpaes_schedule_192_smear
486	call	_vpaes_schedule_mangle
487	call	_vpaes_schedule_round
488	decq	%rsi
489	jz	L$schedule_mangle_last
490	call	_vpaes_schedule_mangle
491	call	_vpaes_schedule_192_smear
492	jmp	L$oop_schedule_192
493
494
495
496
497
498
499
500
501
502
503
504.p2align	4
505L$schedule_256:
506	movdqu	16(%rdi),%xmm0
507	call	_vpaes_schedule_transform
508	movl	$7,%esi
509
510L$oop_schedule_256:
511	call	_vpaes_schedule_mangle
512	movdqa	%xmm0,%xmm6
513
514
515	call	_vpaes_schedule_round
516	decq	%rsi
517	jz	L$schedule_mangle_last
518	call	_vpaes_schedule_mangle
519
520
521	pshufd	$0xFF,%xmm0,%xmm0
522	movdqa	%xmm7,%xmm5
523	movdqa	%xmm6,%xmm7
524	call	_vpaes_schedule_low_round
525	movdqa	%xmm5,%xmm7
526
527	jmp	L$oop_schedule_256
528
529
530
531
532
533
534
535
536
537
538
539
540.p2align	4
541L$schedule_mangle_last:
542
543	leaq	L$k_deskew(%rip),%r11
544	testq	%rcx,%rcx
545	jnz	L$schedule_mangle_last_dec
546
547
548	movdqa	(%r8,%r10,1),%xmm1
549.byte	102,15,56,0,193
550	leaq	L$k_opt(%rip),%r11
551	addq	$32,%rdx
552
553L$schedule_mangle_last_dec:
554	addq	$-16,%rdx
555	pxor	L$k_s63(%rip),%xmm0
556	call	_vpaes_schedule_transform
557	movdqu	%xmm0,(%rdx)
558
559
560	pxor	%xmm0,%xmm0
561	pxor	%xmm1,%xmm1
562	pxor	%xmm2,%xmm2
563	pxor	%xmm3,%xmm3
564	pxor	%xmm4,%xmm4
565	pxor	%xmm5,%xmm5
566	pxor	%xmm6,%xmm6
567	pxor	%xmm7,%xmm7
568	ret
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587.p2align	4
588_vpaes_schedule_192_smear:
589
590	pshufd	$0x80,%xmm6,%xmm1
591	pshufd	$0xFE,%xmm7,%xmm0
592	pxor	%xmm1,%xmm6
593	pxor	%xmm1,%xmm1
594	pxor	%xmm0,%xmm6
595	movdqa	%xmm6,%xmm0
596	movhlps	%xmm1,%xmm6
597	ret
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620.p2align	4
621_vpaes_schedule_round:
622
623
624	pxor	%xmm1,%xmm1
625.byte	102,65,15,58,15,200,15
626.byte	102,69,15,58,15,192,15
627	pxor	%xmm1,%xmm7
628
629
630	pshufd	$0xFF,%xmm0,%xmm0
631.byte	102,15,58,15,192,1
632
633
634
635
636_vpaes_schedule_low_round:
637
638	movdqa	%xmm7,%xmm1
639	pslldq	$4,%xmm7
640	pxor	%xmm1,%xmm7
641	movdqa	%xmm7,%xmm1
642	pslldq	$8,%xmm7
643	pxor	%xmm1,%xmm7
644	pxor	L$k_s63(%rip),%xmm7
645
646
647	movdqa	%xmm9,%xmm1
648	pandn	%xmm0,%xmm1
649	psrld	$4,%xmm1
650	pand	%xmm9,%xmm0
651	movdqa	%xmm11,%xmm2
652.byte	102,15,56,0,208
653	pxor	%xmm1,%xmm0
654	movdqa	%xmm10,%xmm3
655.byte	102,15,56,0,217
656	pxor	%xmm2,%xmm3
657	movdqa	%xmm10,%xmm4
658.byte	102,15,56,0,224
659	pxor	%xmm2,%xmm4
660	movdqa	%xmm10,%xmm2
661.byte	102,15,56,0,211
662	pxor	%xmm0,%xmm2
663	movdqa	%xmm10,%xmm3
664.byte	102,15,56,0,220
665	pxor	%xmm1,%xmm3
666	movdqa	%xmm13,%xmm4
667.byte	102,15,56,0,226
668	movdqa	%xmm12,%xmm0
669.byte	102,15,56,0,195
670	pxor	%xmm4,%xmm0
671
672
673	pxor	%xmm7,%xmm0
674	movdqa	%xmm0,%xmm7
675	ret
676
677
678
679
680
681
682
683
684
685
686
687
688
689.p2align	4
690_vpaes_schedule_transform:
691
692	movdqa	%xmm9,%xmm1
693	pandn	%xmm0,%xmm1
694	psrld	$4,%xmm1
695	pand	%xmm9,%xmm0
696	movdqa	(%r11),%xmm2
697.byte	102,15,56,0,208
698	movdqa	16(%r11),%xmm0
699.byte	102,15,56,0,193
700	pxor	%xmm2,%xmm0
701	ret
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729.p2align	4
730_vpaes_schedule_mangle:
731
732	movdqa	%xmm0,%xmm4
733	movdqa	L$k_mc_forward(%rip),%xmm5
734	testq	%rcx,%rcx
735	jnz	L$schedule_mangle_dec
736
737
738	addq	$16,%rdx
739	pxor	L$k_s63(%rip),%xmm4
740.byte	102,15,56,0,229
741	movdqa	%xmm4,%xmm3
742.byte	102,15,56,0,229
743	pxor	%xmm4,%xmm3
744.byte	102,15,56,0,229
745	pxor	%xmm4,%xmm3
746
747	jmp	L$schedule_mangle_both
748.p2align	4
749L$schedule_mangle_dec:
750
751	leaq	L$k_dksd(%rip),%r11
752	movdqa	%xmm9,%xmm1
753	pandn	%xmm4,%xmm1
754	psrld	$4,%xmm1
755	pand	%xmm9,%xmm4
756
757	movdqa	0(%r11),%xmm2
758.byte	102,15,56,0,212
759	movdqa	16(%r11),%xmm3
760.byte	102,15,56,0,217
761	pxor	%xmm2,%xmm3
762.byte	102,15,56,0,221
763
764	movdqa	32(%r11),%xmm2
765.byte	102,15,56,0,212
766	pxor	%xmm3,%xmm2
767	movdqa	48(%r11),%xmm3
768.byte	102,15,56,0,217
769	pxor	%xmm2,%xmm3
770.byte	102,15,56,0,221
771
772	movdqa	64(%r11),%xmm2
773.byte	102,15,56,0,212
774	pxor	%xmm3,%xmm2
775	movdqa	80(%r11),%xmm3
776.byte	102,15,56,0,217
777	pxor	%xmm2,%xmm3
778.byte	102,15,56,0,221
779
780	movdqa	96(%r11),%xmm2
781.byte	102,15,56,0,212
782	pxor	%xmm3,%xmm2
783	movdqa	112(%r11),%xmm3
784.byte	102,15,56,0,217
785	pxor	%xmm2,%xmm3
786
787	addq	$-16,%rdx
788
789L$schedule_mangle_both:
790	movdqa	(%r8,%r10,1),%xmm1
791.byte	102,15,56,0,217
792	addq	$-16,%r8
793	andq	$0x30,%r8
794	movdqu	%xmm3,(%rdx)
795	ret
796
797
798
799
800
801
802.globl	_vpaes_set_encrypt_key
803.private_extern _vpaes_set_encrypt_key
804
805.p2align	4
806_vpaes_set_encrypt_key:
807
808_CET_ENDBR
809#ifdef BORINGSSL_DISPATCH_TEST
810
811	movb	$1,_BORINGSSL_function_hit+5(%rip)
812#endif
813
814	movl	%esi,%eax
815	shrl	$5,%eax
816	addl	$5,%eax
817	movl	%eax,240(%rdx)
818
819	movl	$0,%ecx
820	movl	$0x30,%r8d
821	call	_vpaes_schedule_core
822	xorl	%eax,%eax
823	ret
824
825
826
827.globl	_vpaes_set_decrypt_key
828.private_extern _vpaes_set_decrypt_key
829
830.p2align	4
831_vpaes_set_decrypt_key:
832
833_CET_ENDBR
834	movl	%esi,%eax
835	shrl	$5,%eax
836	addl	$5,%eax
837	movl	%eax,240(%rdx)
838	shll	$4,%eax
839	leaq	16(%rdx,%rax,1),%rdx
840
841	movl	$1,%ecx
842	movl	%esi,%r8d
843	shrl	$1,%r8d
844	andl	$32,%r8d
845	xorl	$32,%r8d
846	call	_vpaes_schedule_core
847	xorl	%eax,%eax
848	ret
849
850
851
852.globl	_vpaes_encrypt
853.private_extern _vpaes_encrypt
854
855.p2align	4
856_vpaes_encrypt:
857
858_CET_ENDBR
859#ifdef BORINGSSL_DISPATCH_TEST
860
861	movb	$1,_BORINGSSL_function_hit+4(%rip)
862#endif
863	movdqu	(%rdi),%xmm0
864	call	_vpaes_preheat
865	call	_vpaes_encrypt_core
866	movdqu	%xmm0,(%rsi)
867	ret
868
869
870
871.globl	_vpaes_decrypt
872.private_extern _vpaes_decrypt
873
874.p2align	4
875_vpaes_decrypt:
876
877_CET_ENDBR
878	movdqu	(%rdi),%xmm0
879	call	_vpaes_preheat
880	call	_vpaes_decrypt_core
881	movdqu	%xmm0,(%rsi)
882	ret
883
884
885.globl	_vpaes_cbc_encrypt
886.private_extern _vpaes_cbc_encrypt
887
888.p2align	4
889_vpaes_cbc_encrypt:
890
891_CET_ENDBR
892	xchgq	%rcx,%rdx
893	subq	$16,%rcx
894	jc	L$cbc_abort
895	movdqu	(%r8),%xmm6
896	subq	%rdi,%rsi
897	call	_vpaes_preheat
898	cmpl	$0,%r9d
899	je	L$cbc_dec_loop
900	jmp	L$cbc_enc_loop
901.p2align	4
902L$cbc_enc_loop:
903	movdqu	(%rdi),%xmm0
904	pxor	%xmm6,%xmm0
905	call	_vpaes_encrypt_core
906	movdqa	%xmm0,%xmm6
907	movdqu	%xmm0,(%rsi,%rdi,1)
908	leaq	16(%rdi),%rdi
909	subq	$16,%rcx
910	jnc	L$cbc_enc_loop
911	jmp	L$cbc_done
912.p2align	4
913L$cbc_dec_loop:
914	movdqu	(%rdi),%xmm0
915	movdqa	%xmm0,%xmm7
916	call	_vpaes_decrypt_core
917	pxor	%xmm6,%xmm0
918	movdqa	%xmm7,%xmm6
919	movdqu	%xmm0,(%rsi,%rdi,1)
920	leaq	16(%rdi),%rdi
921	subq	$16,%rcx
922	jnc	L$cbc_dec_loop
923L$cbc_done:
924	movdqu	%xmm6,(%r8)
925L$cbc_abort:
926	ret
927
928
929.globl	_vpaes_ctr32_encrypt_blocks
930.private_extern _vpaes_ctr32_encrypt_blocks
931
932.p2align	4
933_vpaes_ctr32_encrypt_blocks:
934
935_CET_ENDBR
936
937	xchgq	%rcx,%rdx
938	testq	%rcx,%rcx
939	jz	L$ctr32_abort
940	movdqu	(%r8),%xmm0
941	movdqa	L$ctr_add_one(%rip),%xmm8
942	subq	%rdi,%rsi
943	call	_vpaes_preheat
944	movdqa	%xmm0,%xmm6
945	pshufb	L$rev_ctr(%rip),%xmm6
946
947	testq	$1,%rcx
948	jz	L$ctr32_prep_loop
949
950
951
952	movdqu	(%rdi),%xmm7
953	call	_vpaes_encrypt_core
954	pxor	%xmm7,%xmm0
955	paddd	%xmm8,%xmm6
956	movdqu	%xmm0,(%rsi,%rdi,1)
957	subq	$1,%rcx
958	leaq	16(%rdi),%rdi
959	jz	L$ctr32_done
960
961L$ctr32_prep_loop:
962
963
964	movdqa	%xmm6,%xmm14
965	movdqa	%xmm6,%xmm15
966	paddd	%xmm8,%xmm15
967
968L$ctr32_loop:
969	movdqa	L$rev_ctr(%rip),%xmm1
970	movdqa	%xmm14,%xmm0
971	movdqa	%xmm15,%xmm6
972.byte	102,15,56,0,193
973.byte	102,15,56,0,241
974	call	_vpaes_encrypt_core_2x
975	movdqu	(%rdi),%xmm1
976	movdqu	16(%rdi),%xmm2
977	movdqa	L$ctr_add_two(%rip),%xmm3
978	pxor	%xmm1,%xmm0
979	pxor	%xmm2,%xmm6
980	paddd	%xmm3,%xmm14
981	paddd	%xmm3,%xmm15
982	movdqu	%xmm0,(%rsi,%rdi,1)
983	movdqu	%xmm6,16(%rsi,%rdi,1)
984	subq	$2,%rcx
985	leaq	32(%rdi),%rdi
986	jnz	L$ctr32_loop
987
988L$ctr32_done:
989L$ctr32_abort:
990	ret
991
992
993
994
995
996
997
998
999
1000.p2align	4
1001_vpaes_preheat:
1002
1003	leaq	L$k_s0F(%rip),%r10
1004	movdqa	-32(%r10),%xmm10
1005	movdqa	-16(%r10),%xmm11
1006	movdqa	0(%r10),%xmm9
1007	movdqa	48(%r10),%xmm13
1008	movdqa	64(%r10),%xmm12
1009	movdqa	80(%r10),%xmm15
1010	movdqa	96(%r10),%xmm14
1011	ret
1012
1013
1014
1015
1016
1017
1018
1019
1020.section	__DATA,__const
1021.p2align	6
1022_vpaes_consts:
1023L$k_inv:
1024.quad	0x0E05060F0D080180, 0x040703090A0B0C02
1025.quad	0x01040A060F0B0780, 0x030D0E0C02050809
1026
1027L$k_s0F:
1028.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
1029
1030L$k_ipt:
1031.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
1032.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
1033
1034L$k_sb1:
1035.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
1036.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
1037L$k_sb2:
1038.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
1039.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
1040L$k_sbo:
1041.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
1042.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
1043
1044L$k_mc_forward:
1045.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
1046.quad	0x080B0A0904070605, 0x000302010C0F0E0D
1047.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
1048.quad	0x000302010C0F0E0D, 0x080B0A0904070605
1049
1050L$k_mc_backward:
1051.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
1052.quad	0x020100030E0D0C0F, 0x0A09080B06050407
1053.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
1054.quad	0x0A09080B06050407, 0x020100030E0D0C0F
1055
1056L$k_sr:
1057.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
1058.quad	0x030E09040F0A0500, 0x0B06010C07020D08
1059.quad	0x0F060D040B020900, 0x070E050C030A0108
1060.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
1061
1062L$k_rcon:
1063.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
1064
1065L$k_s63:
1066.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
1067
1068L$k_opt:
1069.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
1070.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
1071
1072L$k_deskew:
1073.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
1074.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
1075
1076
1077
1078
1079
1080L$k_dksd:
1081.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
1082.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
1083L$k_dksb:
1084.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
1085.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
1086L$k_dkse:
1087.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
1088.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
1089L$k_dks9:
1090.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
1091.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
1092
1093
1094
1095
1096
1097L$k_dipt:
1098.quad	0x0F505B040B545F00, 0x154A411E114E451A
1099.quad	0x86E383E660056500, 0x12771772F491F194
1100
1101L$k_dsb9:
1102.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
1103.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
1104L$k_dsbd:
1105.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
1106.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
1107L$k_dsbb:
1108.quad	0xD022649296B44200, 0x602646F6B0F2D404
1109.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
1110L$k_dsbe:
1111.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
1112.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
1113L$k_dsbo:
1114.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1115.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1116
1117
1118L$rev_ctr:
1119.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
1120
1121
1122L$ctr_add_one:
1123.quad	0x0000000000000000, 0x0000000100000000
1124L$ctr_add_two:
1125.quad	0x0000000000000000, 0x0000000200000000
1126
1127.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
1128.p2align	6
1129
1130.text
1131#endif
1132