1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !purego
6
7#include "textflag.h"
8
9// func addMulVVW1024(z, x *uint, y uint) (c uint)
10TEXT ·addMulVVW1024(SB), $0-32
11	MOVD	$16, R5
12	JMP		addMulVVWx(SB)
13
14// func addMulVVW1536(z, x *uint, y uint) (c uint)
15TEXT ·addMulVVW1536(SB), $0-32
16	MOVD	$24, R5
17	JMP		addMulVVWx(SB)
18
19// func addMulVVW2048(z, x *uint, y uint) (c uint)
20TEXT ·addMulVVW2048(SB), $0-32
21	MOVD	$32, R5
22	JMP		addMulVVWx(SB)
23
24TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
25	MOVD z+0(FP), R2
26	MOVD x+8(FP), R8
27	MOVD y+16(FP), R9
28
29	MOVD $0, R1 // i*8 = 0
30	MOVD $0, R7 // i = 0
31	MOVD $0, R0 // make sure it's zero
32	MOVD $0, R4 // c = 0
33
34	MOVD   R5, R12
35	AND    $-2, R12
36	CMPBGE R5, $2, A6
37	BR     E6
38
39A6:
40	MOVD   (R8)(R1*1), R6
41	MULHDU R9, R6
42	MOVD   (R2)(R1*1), R10
43	ADDC   R10, R11        // add to low order bits
44	ADDE   R0, R6
45	ADDC   R4, R11
46	ADDE   R0, R6
47	MOVD   R6, R4
48	MOVD   R11, (R2)(R1*1)
49
50	MOVD   (8)(R8)(R1*1), R6
51	MULHDU R9, R6
52	MOVD   (8)(R2)(R1*1), R10
53	ADDC   R10, R11           // add to low order bits
54	ADDE   R0, R6
55	ADDC   R4, R11
56	ADDE   R0, R6
57	MOVD   R6, R4
58	MOVD   R11, (8)(R2)(R1*1)
59
60	ADD $16, R1 // i*8 + 8
61	ADD $2, R7  // i++
62
63	CMPBLT R7, R12, A6
64	BR     E6
65
66L6:
67	// TODO: drop unused single-step loop.
68	MOVD   (R8)(R1*1), R6
69	MULHDU R9, R6
70	MOVD   (R2)(R1*1), R10
71	ADDC   R10, R11        // add to low order bits
72	ADDE   R0, R6
73	ADDC   R4, R11
74	ADDE   R0, R6
75	MOVD   R6, R4
76	MOVD   R11, (R2)(R1*1)
77
78	ADD $8, R1 // i*8 + 8
79	ADD $1, R7 // i++
80
81E6:
82	CMPBLT R7, R5, L6 // i < n
83
84	MOVD R4, c+24(FP)
85	RET
86