1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24.type	_vpaes_encrypt_core,@function
25.align	16
26_vpaes_encrypt_core:
27.cfi_startproc
28	movq	%rdx,%r9
29	movq	$16,%r11
30	movl	240(%rdx),%eax
31	movdqa	%xmm9,%xmm1
32	movdqa	.Lk_ipt(%rip),%xmm2
33	pandn	%xmm0,%xmm1
34	movdqu	(%r9),%xmm5
35	psrld	$4,%xmm1
36	pand	%xmm9,%xmm0
37.byte	102,15,56,0,208
38	movdqa	.Lk_ipt+16(%rip),%xmm0
39.byte	102,15,56,0,193
40	pxor	%xmm5,%xmm2
41	addq	$16,%r9
42	pxor	%xmm2,%xmm0
43	leaq	.Lk_mc_backward(%rip),%r10
44	jmp	.Lenc_entry
45
46.align	16
47.Lenc_loop:
48
49	movdqa	%xmm13,%xmm4
50	movdqa	%xmm12,%xmm0
51.byte	102,15,56,0,226
52.byte	102,15,56,0,195
53	pxor	%xmm5,%xmm4
54	movdqa	%xmm15,%xmm5
55	pxor	%xmm4,%xmm0
56	movdqa	-64(%r11,%r10,1),%xmm1
57.byte	102,15,56,0,234
58	movdqa	(%r11,%r10,1),%xmm4
59	movdqa	%xmm14,%xmm2
60.byte	102,15,56,0,211
61	movdqa	%xmm0,%xmm3
62	pxor	%xmm5,%xmm2
63.byte	102,15,56,0,193
64	addq	$16,%r9
65	pxor	%xmm2,%xmm0
66.byte	102,15,56,0,220
67	addq	$16,%r11
68	pxor	%xmm0,%xmm3
69.byte	102,15,56,0,193
70	andq	$0x30,%r11
71	subq	$1,%rax
72	pxor	%xmm3,%xmm0
73
74.Lenc_entry:
75
76	movdqa	%xmm9,%xmm1
77	movdqa	%xmm11,%xmm5
78	pandn	%xmm0,%xmm1
79	psrld	$4,%xmm1
80	pand	%xmm9,%xmm0
81.byte	102,15,56,0,232
82	movdqa	%xmm10,%xmm3
83	pxor	%xmm1,%xmm0
84.byte	102,15,56,0,217
85	movdqa	%xmm10,%xmm4
86	pxor	%xmm5,%xmm3
87.byte	102,15,56,0,224
88	movdqa	%xmm10,%xmm2
89	pxor	%xmm5,%xmm4
90.byte	102,15,56,0,211
91	movdqa	%xmm10,%xmm3
92	pxor	%xmm0,%xmm2
93.byte	102,15,56,0,220
94	movdqu	(%r9),%xmm5
95	pxor	%xmm1,%xmm3
96	jnz	.Lenc_loop
97
98
99	movdqa	-96(%r10),%xmm4
100	movdqa	-80(%r10),%xmm0
101.byte	102,15,56,0,226
102	pxor	%xmm5,%xmm4
103.byte	102,15,56,0,195
104	movdqa	64(%r11,%r10,1),%xmm1
105	pxor	%xmm4,%xmm0
106.byte	102,15,56,0,193
107	ret
108.cfi_endproc
109.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140.type	_vpaes_encrypt_core_2x,@function
141.align	16
142_vpaes_encrypt_core_2x:
143.cfi_startproc
144	movq	%rdx,%r9
145	movq	$16,%r11
146	movl	240(%rdx),%eax
147	movdqa	%xmm9,%xmm1
148	movdqa	%xmm9,%xmm7
149	movdqa	.Lk_ipt(%rip),%xmm2
150	movdqa	%xmm2,%xmm8
151	pandn	%xmm0,%xmm1
152	pandn	%xmm6,%xmm7
153	movdqu	(%r9),%xmm5
154
155	psrld	$4,%xmm1
156	psrld	$4,%xmm7
157	pand	%xmm9,%xmm0
158	pand	%xmm9,%xmm6
159.byte	102,15,56,0,208
160.byte	102,68,15,56,0,198
161	movdqa	.Lk_ipt+16(%rip),%xmm0
162	movdqa	%xmm0,%xmm6
163.byte	102,15,56,0,193
164.byte	102,15,56,0,247
165	pxor	%xmm5,%xmm2
166	pxor	%xmm5,%xmm8
167	addq	$16,%r9
168	pxor	%xmm2,%xmm0
169	pxor	%xmm8,%xmm6
170	leaq	.Lk_mc_backward(%rip),%r10
171	jmp	.Lenc2x_entry
172
173.align	16
174.Lenc2x_loop:
175
176	movdqa	.Lk_sb1(%rip),%xmm4
177	movdqa	.Lk_sb1+16(%rip),%xmm0
178	movdqa	%xmm4,%xmm12
179	movdqa	%xmm0,%xmm6
180.byte	102,15,56,0,226
181.byte	102,69,15,56,0,224
182.byte	102,15,56,0,195
183.byte	102,65,15,56,0,243
184	pxor	%xmm5,%xmm4
185	pxor	%xmm5,%xmm12
186	movdqa	.Lk_sb2(%rip),%xmm5
187	movdqa	%xmm5,%xmm13
188	pxor	%xmm4,%xmm0
189	pxor	%xmm12,%xmm6
190	movdqa	-64(%r11,%r10,1),%xmm1
191
192.byte	102,15,56,0,234
193.byte	102,69,15,56,0,232
194	movdqa	(%r11,%r10,1),%xmm4
195
196	movdqa	.Lk_sb2+16(%rip),%xmm2
197	movdqa	%xmm2,%xmm8
198.byte	102,15,56,0,211
199.byte	102,69,15,56,0,195
200	movdqa	%xmm0,%xmm3
201	movdqa	%xmm6,%xmm11
202	pxor	%xmm5,%xmm2
203	pxor	%xmm13,%xmm8
204.byte	102,15,56,0,193
205.byte	102,15,56,0,241
206	addq	$16,%r9
207	pxor	%xmm2,%xmm0
208	pxor	%xmm8,%xmm6
209.byte	102,15,56,0,220
210.byte	102,68,15,56,0,220
211	addq	$16,%r11
212	pxor	%xmm0,%xmm3
213	pxor	%xmm6,%xmm11
214.byte	102,15,56,0,193
215.byte	102,15,56,0,241
216	andq	$0x30,%r11
217	subq	$1,%rax
218	pxor	%xmm3,%xmm0
219	pxor	%xmm11,%xmm6
220
221.Lenc2x_entry:
222
223	movdqa	%xmm9,%xmm1
224	movdqa	%xmm9,%xmm7
225	movdqa	.Lk_inv+16(%rip),%xmm5
226	movdqa	%xmm5,%xmm13
227	pandn	%xmm0,%xmm1
228	pandn	%xmm6,%xmm7
229	psrld	$4,%xmm1
230	psrld	$4,%xmm7
231	pand	%xmm9,%xmm0
232	pand	%xmm9,%xmm6
233.byte	102,15,56,0,232
234.byte	102,68,15,56,0,238
235	movdqa	%xmm10,%xmm3
236	movdqa	%xmm10,%xmm11
237	pxor	%xmm1,%xmm0
238	pxor	%xmm7,%xmm6
239.byte	102,15,56,0,217
240.byte	102,68,15,56,0,223
241	movdqa	%xmm10,%xmm4
242	movdqa	%xmm10,%xmm12
243	pxor	%xmm5,%xmm3
244	pxor	%xmm13,%xmm11
245.byte	102,15,56,0,224
246.byte	102,68,15,56,0,230
247	movdqa	%xmm10,%xmm2
248	movdqa	%xmm10,%xmm8
249	pxor	%xmm5,%xmm4
250	pxor	%xmm13,%xmm12
251.byte	102,15,56,0,211
252.byte	102,69,15,56,0,195
253	movdqa	%xmm10,%xmm3
254	movdqa	%xmm10,%xmm11
255	pxor	%xmm0,%xmm2
256	pxor	%xmm6,%xmm8
257.byte	102,15,56,0,220
258.byte	102,69,15,56,0,220
259	movdqu	(%r9),%xmm5
260
261	pxor	%xmm1,%xmm3
262	pxor	%xmm7,%xmm11
263	jnz	.Lenc2x_loop
264
265
266	movdqa	-96(%r10),%xmm4
267	movdqa	-80(%r10),%xmm0
268	movdqa	%xmm4,%xmm12
269	movdqa	%xmm0,%xmm6
270.byte	102,15,56,0,226
271.byte	102,69,15,56,0,224
272	pxor	%xmm5,%xmm4
273	pxor	%xmm5,%xmm12
274.byte	102,15,56,0,195
275.byte	102,65,15,56,0,243
276	movdqa	64(%r11,%r10,1),%xmm1
277
278	pxor	%xmm4,%xmm0
279	pxor	%xmm12,%xmm6
280.byte	102,15,56,0,193
281.byte	102,15,56,0,241
282	ret
283.cfi_endproc
284.size	_vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
285
286
287
288
289
290
291.type	_vpaes_schedule_core,@function
292.align	16
293_vpaes_schedule_core:
294.cfi_startproc
295
296
297
298
299
300	call	_vpaes_preheat
301	movdqa	.Lk_rcon(%rip),%xmm8
302	movdqu	(%rdi),%xmm0
303
304
305	movdqa	%xmm0,%xmm3
306	leaq	.Lk_ipt(%rip),%r11
307	call	_vpaes_schedule_transform
308	movdqa	%xmm0,%xmm7
309
310	leaq	.Lk_sr(%rip),%r10
311
312
313	movdqu	%xmm0,(%rdx)
314
315.Lschedule_go:
316	cmpl	$192,%esi
317	ja	.Lschedule_256
318
319
320
321
322
323
324
325
326
327
328
329.Lschedule_128:
330	movl	$10,%esi
331
332.Loop_schedule_128:
333	call	_vpaes_schedule_round
334	decq	%rsi
335	jz	.Lschedule_mangle_last
336	call	_vpaes_schedule_mangle
337	jmp	.Loop_schedule_128
338
339
340
341
342
343
344
345
346
347
348
349.align	16
350.Lschedule_256:
351	movdqu	16(%rdi),%xmm0
352	call	_vpaes_schedule_transform
353	movl	$7,%esi
354
355.Loop_schedule_256:
356	call	_vpaes_schedule_mangle
357	movdqa	%xmm0,%xmm6
358
359
360	call	_vpaes_schedule_round
361	decq	%rsi
362	jz	.Lschedule_mangle_last
363	call	_vpaes_schedule_mangle
364
365
366	pshufd	$0xFF,%xmm0,%xmm0
367	movdqa	%xmm7,%xmm5
368	movdqa	%xmm6,%xmm7
369	call	_vpaes_schedule_low_round
370	movdqa	%xmm5,%xmm7
371
372	jmp	.Loop_schedule_256
373
374
375
376
377
378
379
380
381
382
383
384
385.align	16
386.Lschedule_mangle_last:
387
388	leaq	.Lk_deskew(%rip),%r11
389
390
391	movdqa	(%r8,%r10,1),%xmm1
392.byte	102,15,56,0,193
393	leaq	.Lk_opt(%rip),%r11
394	addq	$32,%rdx
395
396.Lschedule_mangle_last_dec:
397	addq	$-16,%rdx
398	pxor	.Lk_s63(%rip),%xmm0
399	call	_vpaes_schedule_transform
400	movdqu	%xmm0,(%rdx)
401
402
403	pxor	%xmm0,%xmm0
404	pxor	%xmm1,%xmm1
405	pxor	%xmm2,%xmm2
406	pxor	%xmm3,%xmm3
407	pxor	%xmm4,%xmm4
408	pxor	%xmm5,%xmm5
409	pxor	%xmm6,%xmm6
410	pxor	%xmm7,%xmm7
411	ret
412.cfi_endproc
413.size	_vpaes_schedule_core,.-_vpaes_schedule_core
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433.type	_vpaes_schedule_round,@function
434.align	16
435_vpaes_schedule_round:
436.cfi_startproc
437
438	pxor	%xmm1,%xmm1
439.byte	102,65,15,58,15,200,15
440.byte	102,69,15,58,15,192,15
441	pxor	%xmm1,%xmm7
442
443
444	pshufd	$0xFF,%xmm0,%xmm0
445.byte	102,15,58,15,192,1
446
447
448
449
450_vpaes_schedule_low_round:
451
452	movdqa	%xmm7,%xmm1
453	pslldq	$4,%xmm7
454	pxor	%xmm1,%xmm7
455	movdqa	%xmm7,%xmm1
456	pslldq	$8,%xmm7
457	pxor	%xmm1,%xmm7
458	pxor	.Lk_s63(%rip),%xmm7
459
460
461	movdqa	%xmm9,%xmm1
462	pandn	%xmm0,%xmm1
463	psrld	$4,%xmm1
464	pand	%xmm9,%xmm0
465	movdqa	%xmm11,%xmm2
466.byte	102,15,56,0,208
467	pxor	%xmm1,%xmm0
468	movdqa	%xmm10,%xmm3
469.byte	102,15,56,0,217
470	pxor	%xmm2,%xmm3
471	movdqa	%xmm10,%xmm4
472.byte	102,15,56,0,224
473	pxor	%xmm2,%xmm4
474	movdqa	%xmm10,%xmm2
475.byte	102,15,56,0,211
476	pxor	%xmm0,%xmm2
477	movdqa	%xmm10,%xmm3
478.byte	102,15,56,0,220
479	pxor	%xmm1,%xmm3
480	movdqa	%xmm13,%xmm4
481.byte	102,15,56,0,226
482	movdqa	%xmm12,%xmm0
483.byte	102,15,56,0,195
484	pxor	%xmm4,%xmm0
485
486
487	pxor	%xmm7,%xmm0
488	movdqa	%xmm0,%xmm7
489	ret
490.cfi_endproc
491.size	_vpaes_schedule_round,.-_vpaes_schedule_round
492
493
494
495
496
497
498
499
500
501
502.type	_vpaes_schedule_transform,@function
503.align	16
504_vpaes_schedule_transform:
505.cfi_startproc
506	movdqa	%xmm9,%xmm1
507	pandn	%xmm0,%xmm1
508	psrld	$4,%xmm1
509	pand	%xmm9,%xmm0
510	movdqa	(%r11),%xmm2
511.byte	102,15,56,0,208
512	movdqa	16(%r11),%xmm0
513.byte	102,15,56,0,193
514	pxor	%xmm2,%xmm0
515	ret
516.cfi_endproc
517.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542.type	_vpaes_schedule_mangle,@function
543.align	16
544_vpaes_schedule_mangle:
545.cfi_startproc
546	movdqa	%xmm0,%xmm4
547	movdqa	.Lk_mc_forward(%rip),%xmm5
548
549
550	addq	$16,%rdx
551	pxor	.Lk_s63(%rip),%xmm4
552.byte	102,15,56,0,229
553	movdqa	%xmm4,%xmm3
554.byte	102,15,56,0,229
555	pxor	%xmm4,%xmm3
556.byte	102,15,56,0,229
557	pxor	%xmm4,%xmm3
558
559.Lschedule_mangle_both:
560	movdqa	(%r8,%r10,1),%xmm1
561.byte	102,15,56,0,217
562	addq	$-16,%r8
563	andq	$0x30,%r8
564	movdqu	%xmm3,(%rdx)
565	ret
566.cfi_endproc
567.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
568
569
570
571
572.globl	vpaes_set_encrypt_key
573.hidden vpaes_set_encrypt_key
574.type	vpaes_set_encrypt_key,@function
575.align	16
576vpaes_set_encrypt_key:
577.cfi_startproc
578_CET_ENDBR
579#ifdef BORINGSSL_DISPATCH_TEST
580.extern	BORINGSSL_function_hit
581.hidden BORINGSSL_function_hit
582	movb	$1,BORINGSSL_function_hit+5(%rip)
583#endif
584
585	movl	%esi,%eax
586	shrl	$5,%eax
587	addl	$5,%eax
588	movl	%eax,240(%rdx)
589
590	movl	$0,%ecx
591	movl	$0x30,%r8d
592	call	_vpaes_schedule_core
593	xorl	%eax,%eax
594	ret
595.cfi_endproc
596.size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
597
598.globl	vpaes_encrypt
599.hidden vpaes_encrypt
600.type	vpaes_encrypt,@function
601.align	16
602vpaes_encrypt:
603.cfi_startproc
604_CET_ENDBR
605#ifdef BORINGSSL_DISPATCH_TEST
606.extern	BORINGSSL_function_hit
607.hidden BORINGSSL_function_hit
608	movb	$1,BORINGSSL_function_hit+4(%rip)
609#endif
610	movdqu	(%rdi),%xmm0
611	call	_vpaes_preheat
612	call	_vpaes_encrypt_core
613	movdqu	%xmm0,(%rsi)
614	ret
615.cfi_endproc
616.size	vpaes_encrypt,.-vpaes_encrypt
617.globl	vpaes_ctr32_encrypt_blocks
618.hidden vpaes_ctr32_encrypt_blocks
619.type	vpaes_ctr32_encrypt_blocks,@function
620.align	16
621vpaes_ctr32_encrypt_blocks:
622.cfi_startproc
623_CET_ENDBR
624
625	xchgq	%rcx,%rdx
626	testq	%rcx,%rcx
627	jz	.Lctr32_abort
628	movdqu	(%r8),%xmm0
629	movdqa	.Lctr_add_one(%rip),%xmm8
630	subq	%rdi,%rsi
631	call	_vpaes_preheat
632	movdqa	%xmm0,%xmm6
633	pshufb	.Lrev_ctr(%rip),%xmm6
634
635	testq	$1,%rcx
636	jz	.Lctr32_prep_loop
637
638
639
640	movdqu	(%rdi),%xmm7
641	call	_vpaes_encrypt_core
642	pxor	%xmm7,%xmm0
643	paddd	%xmm8,%xmm6
644	movdqu	%xmm0,(%rsi,%rdi,1)
645	subq	$1,%rcx
646	leaq	16(%rdi),%rdi
647	jz	.Lctr32_done
648
649.Lctr32_prep_loop:
650
651
652	movdqa	%xmm6,%xmm14
653	movdqa	%xmm6,%xmm15
654	paddd	%xmm8,%xmm15
655
656.Lctr32_loop:
657	movdqa	.Lrev_ctr(%rip),%xmm1
658	movdqa	%xmm14,%xmm0
659	movdqa	%xmm15,%xmm6
660.byte	102,15,56,0,193
661.byte	102,15,56,0,241
662	call	_vpaes_encrypt_core_2x
663	movdqu	(%rdi),%xmm1
664	movdqu	16(%rdi),%xmm2
665	movdqa	.Lctr_add_two(%rip),%xmm3
666	pxor	%xmm1,%xmm0
667	pxor	%xmm2,%xmm6
668	paddd	%xmm3,%xmm14
669	paddd	%xmm3,%xmm15
670	movdqu	%xmm0,(%rsi,%rdi,1)
671	movdqu	%xmm6,16(%rsi,%rdi,1)
672	subq	$2,%rcx
673	leaq	32(%rdi),%rdi
674	jnz	.Lctr32_loop
675
676.Lctr32_done:
677.Lctr32_abort:
678	ret
679.cfi_endproc
680.size	vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
681
682
683
684
685
686
687.type	_vpaes_preheat,@function
688.align	16
689_vpaes_preheat:
690.cfi_startproc
691	leaq	.Lk_s0F(%rip),%r10
692	movdqa	-32(%r10),%xmm10
693	movdqa	-16(%r10),%xmm11
694	movdqa	0(%r10),%xmm9
695	movdqa	48(%r10),%xmm13
696	movdqa	64(%r10),%xmm12
697	movdqa	80(%r10),%xmm15
698	movdqa	96(%r10),%xmm14
699	ret
700.cfi_endproc
701.size	_vpaes_preheat,.-_vpaes_preheat
702
703
704
705
706
707.type	_vpaes_consts,@object
708.section	.rodata
709.align	64
710_vpaes_consts:
711.Lk_inv:
712.quad	0x0E05060F0D080180, 0x040703090A0B0C02
713.quad	0x01040A060F0B0780, 0x030D0E0C02050809
714
715.Lk_s0F:
716.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
717
718.Lk_ipt:
719.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
720.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
721
722.Lk_sb1:
723.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
724.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
725.Lk_sb2:
726.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
727.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
728.Lk_sbo:
729.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
730.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
731
732.Lk_mc_forward:
733.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
734.quad	0x080B0A0904070605, 0x000302010C0F0E0D
735.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
736.quad	0x000302010C0F0E0D, 0x080B0A0904070605
737
738.Lk_mc_backward:
739.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
740.quad	0x020100030E0D0C0F, 0x0A09080B06050407
741.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
742.quad	0x0A09080B06050407, 0x020100030E0D0C0F
743
744.Lk_sr:
745.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
746.quad	0x030E09040F0A0500, 0x0B06010C07020D08
747.quad	0x0F060D040B020900, 0x070E050C030A0108
748.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
749
750.Lk_rcon:
751.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
752
753.Lk_s63:
754.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
755
756.Lk_opt:
757.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
758.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
759
760.Lk_deskew:
761.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
762.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
763
764
765.Lrev_ctr:
766.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
767
768
769.Lctr_add_one:
770.quad	0x0000000000000000, 0x0000000100000000
771.Lctr_add_two:
772.quad	0x0000000000000000, 0x0000000200000000
773
774.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
775.align	64
776.size	_vpaes_consts,.-_vpaes_consts
777.text
778#endif
779