1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build !purego && (ppc64 || ppc64le) 6 7#include "textflag.h" 8 9// func addMulVVW1024(z, x *uint, y uint) (c uint) 10TEXT ·addMulVVW1024(SB), $0-32 11 MOVD $4, R6 // R6 = z_len/4 12 JMP addMulVVWx<>(SB) 13 14// func addMulVVW1536(z, x *uint, y uint) (c uint) 15TEXT ·addMulVVW1536(SB), $0-32 16 MOVD $6, R6 // R6 = z_len/4 17 JMP addMulVVWx<>(SB) 18 19// func addMulVVW2048(z, x *uint, y uint) (c uint) 20TEXT ·addMulVVW2048(SB), $0-32 21 MOVD $8, R6 // R6 = z_len/4 22 JMP addMulVVWx<>(SB) 23 24// This local function expects to be called only by 25// callers above. R6 contains the z length/4 26// since 4 values are processed for each 27// loop iteration, and is guaranteed to be > 0. 28// If other callers are added this function might 29// need to change. 30TEXT addMulVVWx<>(SB), NOSPLIT, $0 31 MOVD z+0(FP), R3 32 MOVD x+8(FP), R4 33 MOVD y+16(FP), R5 34 35 MOVD $0, R9 // R9 = c = 0 36 MOVD R6, CTR // Initialize loop counter 37 PCALIGN $16 38 39loop: 40 MOVD 0(R4), R14 // x[i] 41 MOVD 8(R4), R16 // x[i+1] 42 MOVD 16(R4), R18 // x[i+2] 43 MOVD 24(R4), R20 // x[i+3] 44 MOVD 0(R3), R15 // z[i] 45 MOVD 8(R3), R17 // z[i+1] 46 MOVD 16(R3), R19 // z[i+2] 47 MOVD 24(R3), R21 // z[i+3] 48 MULLD R5, R14, R10 // low x[i]*y 49 MULHDU R5, R14, R11 // high x[i]*y 50 ADDC R15, R10 51 ADDZE R11 52 ADDC R9, R10 53 ADDZE R11, R9 54 MULLD R5, R16, R14 // low x[i+1]*y 55 MULHDU R5, R16, R15 // high x[i+1]*y 56 ADDC R17, R14 57 ADDZE R15 58 ADDC R9, R14 59 ADDZE R15, R9 60 MULLD R5, R18, R16 // low x[i+2]*y 61 MULHDU R5, R18, R17 // high x[i+2]*y 62 ADDC R19, R16 63 ADDZE R17 64 ADDC R9, R16 65 ADDZE R17, R9 66 MULLD R5, R20, R18 // low x[i+3]*y 67 MULHDU R5, R20, R19 // high x[i+3]*y 68 ADDC R21, R18 69 ADDZE R19 70 ADDC R9, R18 71 ADDZE R19, R9 72 MOVD R10, 0(R3) // z[i] 73 MOVD R14, 8(R3) // z[i+1] 74 MOVD R16, 16(R3) // z[i+2] 75 MOVD R18, 24(R3) // z[i+3] 76 ADD $32, R3 77 ADD $32, R4 78 BDNZ loop 79 80done: 81 MOVD R9, c+24(FP) 82 RET 83