1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !purego && (ppc64 || ppc64le)
6
7#include "textflag.h"
8
9// func addMulVVW1024(z, x *uint, y uint) (c uint)
10TEXT ·addMulVVW1024(SB), $0-32
11	MOVD	$4, R6 // R6 = z_len/4
12	JMP		addMulVVWx<>(SB)
13
14// func addMulVVW1536(z, x *uint, y uint) (c uint)
15TEXT ·addMulVVW1536(SB), $0-32
16	MOVD	$6, R6 // R6 = z_len/4
17	JMP		addMulVVWx<>(SB)
18
19// func addMulVVW2048(z, x *uint, y uint) (c uint)
20TEXT ·addMulVVW2048(SB), $0-32
21	MOVD	$8, R6 // R6 = z_len/4
22	JMP		addMulVVWx<>(SB)
23
24// This local function expects to be called only by
25// callers above. R6 contains the z length/4
26// since 4 values are processed for each
27// loop iteration, and is guaranteed to be > 0.
28// If other callers are added this function might
29// need to change.
30TEXT addMulVVWx<>(SB), NOSPLIT, $0
31	MOVD	z+0(FP), R3
32	MOVD	x+8(FP), R4
33	MOVD	y+16(FP), R5
34
35	MOVD	$0, R9		// R9 = c = 0
36	MOVD	R6, CTR		// Initialize loop counter
37	PCALIGN	$16
38
39loop:
40	MOVD	0(R4), R14	// x[i]
41	MOVD	8(R4), R16	// x[i+1]
42	MOVD	16(R4), R18	// x[i+2]
43	MOVD	24(R4), R20	// x[i+3]
44	MOVD	0(R3), R15	// z[i]
45	MOVD	8(R3), R17	// z[i+1]
46	MOVD	16(R3), R19	// z[i+2]
47	MOVD	24(R3), R21	// z[i+3]
48	MULLD	R5, R14, R10	// low x[i]*y
49	MULHDU	R5, R14, R11	// high x[i]*y
50	ADDC	R15, R10
51	ADDZE	R11
52	ADDC	R9, R10
53	ADDZE	R11, R9
54	MULLD	R5, R16, R14	// low x[i+1]*y
55	MULHDU	R5, R16, R15	// high x[i+1]*y
56	ADDC	R17, R14
57	ADDZE	R15
58	ADDC	R9, R14
59	ADDZE	R15, R9
60	MULLD	R5, R18, R16	// low x[i+2]*y
61	MULHDU	R5, R18, R17	// high x[i+2]*y
62	ADDC	R19, R16
63	ADDZE	R17
64	ADDC	R9, R16
65	ADDZE	R17, R9
66	MULLD	R5, R20, R18	// low x[i+3]*y
67	MULHDU	R5, R20, R19	// high x[i+3]*y
68	ADDC	R21, R18
69	ADDZE	R19
70	ADDC	R9, R18
71	ADDZE	R19, R9
72	MOVD	R10, 0(R3)	// z[i]
73	MOVD	R14, 8(R3)	// z[i+1]
74	MOVD	R16, 16(R3)	// z[i+2]
75	MOVD	R18, 24(R3)	// z[i+3]
76	ADD	$32, R3
77	ADD	$32, R4
78	BDNZ	loop
79
80done:
81	MOVD	R9, c+24(FP)
82	RET
83