Lines Matching +full:11 +full:- +full:7
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
222 vmulouw 11, 6, 2
223 vmulouw 12, 7, 1
227 vaddudm 14, 14, 11
229 vmulouw 11, 6, 3
233 vaddudm 15, 15, 11
234 vmulouw 12, 7, 2
240 vmulouw 11, 6, 26
242 vaddudm 16, 16, 11
243 vmulouw 12, 7, 3
249 vmulouw 11, 6, 27
251 vaddudm 17, 17, 11
252 vmulouw 12, 7, 26
258 vmulouw 11, 6, 28
260 vaddudm 18, 18, 11
261 vmulouw 12, 7, 27
270 vmuleuw 11, 6, 2
271 vmuleuw 12, 7, 1
275 vaddudm 14, 14, 11
281 vmuleuw 11, 6, 3
282 vmuleuw 12, 7, 2
286 vaddudm 15, 15, 11
292 vmuleuw 11, 6, 26
293 vmuleuw 12, 7, 3
297 vaddudm 16, 16, 11
303 vmuleuw 11, 6, 27
304 vmuleuw 12, 7, 26
308 vaddudm 17, 17, 11
314 vmuleuw 11, 6, 28
315 vmuleuw 12, 7, 27
319 vaddudm 18, 18, 11
361 vmr 7, 29
377 vsld 11, 29, 13
381 vaddudm 2, 11, 29
388 vmrgow 29, 29, 7
393 vsld 11, 29, 13
397 vaddudm 2, 11, 29
408 xxlor 7, 34, 34
429 vsld 11, 29, 13
433 vaddudm 2, 11, 29
444 vsrd 11, 17, 31
445 vand 7, 17, 25
447 vaddudm 18, 18, 11
451 vsrd 11, 15, 31
456 vaddudm 6, 16, 11
462 vaddudm 7, 7, 13
464 vsrd 11, 7, 31
465 vand 7, 7, 25
468 vaddudm 8, 8, 11
479 ld 11, 0(10)
486 lvx 25, 0, 10 # v25 - mask
496 and. 9, 9, 11
568 vperm 14, 11, 12, 17
569 vperm 15, 11, 12, 18
572 vsrd 11, 10, 31 # 12 bits left
577 vor 11, 11, 12
578 vand 11, 11, 25 # a2
586 vaddudm 22, 6, 11
587 vaddudm 23, 7, 12
595 vperm 14, 11, 12, 17
596 vperm 15, 11, 12, 18
599 vsrd 11, 10, 31 # 12 bits left
605 vor 11, 11, 12
606 vand 11, 11, 25 # a2
614 vmrgow 6, 11, 22
615 vmrgow 7, 12, 23
619 addi 5, 5, -64 # len -= 64
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
646 vsrd 11, 17, 31
647 vand 7, 17, 25
649 vaddudm 18, 18, 11
653 vsrd 11, 15, 31
658 vaddudm 6, 16, 11
664 vaddudm 7, 7, 13
666 vsrd 11, 7, 31
667 vand 7, 7, 25
670 vaddudm 8, 8, 11
679 vperm 14, 11, 12, 17
680 vperm 15, 11, 12, 18
685 vperm 17, 11, 12, 17
686 vperm 18, 11, 12, 18
693 vsrd 11, 10, 31 # 12 bits left
705 vor 11, 11, 12
706 vand 11, 11, 25 # a2
718 vaddudm 7, 7, 23
724 vmrgow 6, 11, 6
725 vmrgow 7, 12, 7
729 addi 5, 5, -64 # len -= 64
742 xxlor 34, 7, 7
757 vaddudm 6, 16, 11
760 vaddudm 7, 17, 12
769 vsrd 11, 7, 31
770 vand 7, 7, 25
772 vaddudm 8, 8, 11
776 vsrd 11, 5, 31
781 vaddudm 6, 6, 11
787 vaddudm 7, 7, 13
789 vsrd 11, 7, 31
790 vand 7, 7, 25
796 vaddudm 8, 8, 11
805 vspltisb 11, 12
806 vsrd 12, 6, 11
810 vspltisb 11, 14
811 vsld 7, 7, 11
812 vor 21, 7, 12
814 vsld 8, 8, 11
846 ld 11, 0(10)
853 and. 9, 9, 11 # cramp mask r0
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
882 vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
897 mfvsrld 27, 32+7
899 mfvsrld 29, 32+11
900 mfvsrd 20, 32+7 # h0.h
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
957 add 11, 25, 4
975 ld 20, 0(11)
976 ld 21, 8(11)
977 addi 11, 11, 16
1035 ld 11, 8(3)
1039 # h + 5 + (-p)
1041 mr 7, 11
1044 addze 7, 7
1050 mr 11, 7
1055 ld 7, 8(4)
1057 adde 11, 11, 7
1061 std 11, 8(5)