Lines Matching +full:3 +full:- +full:31

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
114 SAVE_GPR 31, 248, 1
128 SAVE_VRS 31, 176, 9
147 SAVE_VSX 31, 464, 9
163 RESTORE_VRS 31, 176, 9
182 RESTORE_VSX 31, 464, 9
201 RESTORE_GPR 31, 248, 1
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
215 # [r^2, r^3, r^1, r^4]
221 vmulouw 10, 5, 3
229 vmulouw 11, 6, 3
243 vmulouw 12, 7, 3
253 vmulouw 13, 8, 3
269 vmuleuw 10, 5, 3
281 vmuleuw 11, 6, 3
293 vmuleuw 12, 7, 3
305 vmuleuw 13, 8, 3
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
354 xxlxor 31, 31, 31
356 # [r, r^3, r^2, r^4]
382 vaddudm 3, 12, 30
384 bl do_mul # r^4 r^3
398 vaddudm 3, 12, 30
404 xxlor 3, 61, 61
411 vspltw 9, 26, 3
414 vspltw 9, 27, 3
417 vspltw 9, 28, 3
420 vspltw 9, 29, 3
423 vspltw 9, 30, 3
434 vaddudm 3, 12, 30
443 vsrd 10, 14, 31
444 vsrd 11, 17, 31
448 vsrd 12, 18, 31
451 vsrd 11, 15, 31
458 vsrd 13, 6, 31
461 vsrd 10, 4, 31
464 vsrd 11, 7, 31
486 lvx 25, 0, 10 # v25 - mask
487 lvx 31, 14, 10 # v31 = 1a
494 ld 9, 24(3)
495 ld 10, 32(3)
518 vmulouw 3, 30, 4 # v3 = rr3
543 ld 9, 0(3)
544 ld 10, 8(3)
545 ld 19, 16(3)
571 vsrd 10, 14, 31 # >> 26
572 vsrd 11, 10, 31 # 12 bits left
581 vsrd 13, 12, 31 # >> 26, a4
598 vsrd 10, 14, 31 # >> 26
599 vsrd 11, 10, 31 # 12 bits left
608 vsrd 13, 12, 31 # >> 26, a4
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
625 cmpdi 31, 0
628 mtctr 31
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
645 vsrd 10, 14, 31
646 vsrd 11, 17, 31
650 vsrd 12, 18, 31
653 vsrd 11, 15, 31
660 vsrd 13, 6, 31
663 vsrd 10, 4, 31
666 vsrd 11, 7, 31
690 vsrd 21, 14, 31 # >> 26
691 vsrd 22, 21, 31 # 12 bits left
692 vsrd 10, 17, 31 # >> 26
693 vsrd 11, 10, 31 # 12 bits left
709 vsrd 24, 23, 31 # >> 26, a4
712 vsrd 13, 12, 31 # >> 26, a4
729 addi 5, 5, -64 # len -= 64
738 xxlor 61, 3, 3
750 xxpermdi 41, 31, 46, 0
751 xxpermdi 42, 31, 47, 0
753 xxpermdi 36, 31, 36, 3
755 xxpermdi 37, 31, 37, 3
756 xxpermdi 43, 31, 48, 0
758 xxpermdi 38, 31, 38, 3
759 xxpermdi 44, 31, 49, 0
761 xxpermdi 39, 31, 39, 3
762 xxpermdi 45, 31, 50, 0
764 xxpermdi 40, 31, 40, 3
768 vsrd 10, 4, 31
769 vsrd 11, 7, 31
773 vsrd 12, 8, 31
776 vsrd 11, 5, 31
783 vsrd 13, 6, 31
786 vsrd 10, 4, 31
789 vsrd 11, 7, 31
793 vsrd 10, 5, 31
803 vsld 5, 5, 31
807 vsld 6, 6, 31
808 vsld 6, 6, 31
815 vsld 8, 8, 31
821 std 17, 0(3)
822 std 19, 8(3)
823 stw 16, 16(3)
826 li 3, 0
833 li 3, 0
851 ld 9, 24(3)
852 ld 10, 32(3)
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
864 mtvsrdd 32+3, 9, 25 # r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
950 SAVE_GPR 31, 248, 1
961 ld 27, 0(3)
962 ld 28, 8(3)
963 lwz 29, 16(3)
966 divdu 31, 5, 30
968 mtctr 31
993 std 27, 0(3)
994 std 28, 8(3)
995 stw 29, 16(3)
997 li 3, 0
1016 RESTORE_GPR 31, 248, 1
1025 li 3, 0
1034 ld 10, 0(3)
1035 ld 11, 8(3)
1036 ld 12, 16(3)
1039 # h + 5 + (-p)