1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build (ppc64 || ppc64le) && !purego
6
7#include "textflag.h"
8
9// func xorBytes(dst, a, b *byte, n int)
10TEXT ·xorBytes(SB), NOSPLIT, $0
11	MOVD	dst+0(FP), R3	// R3 = dst
12	MOVD	a+8(FP), R4	// R4 = a
13	MOVD	b+16(FP), R5	// R5 = b
14	MOVD	n+24(FP), R6	// R6 = n
15
16	CMPU	R6, $64, CR7	// Check if n ≥ 64 bytes
17	MOVD	R0, R8		// R8 = index
18	CMPU	R6, $8, CR6	// Check if 8 ≤ n < 64 bytes
19	BLE	CR6, small	// <= 8
20	BLT	CR7, xor32	// Case for 32 ≤ n < 64 bytes
21
22	// Case for n ≥ 64 bytes
23preloop64:
24	SRD	$6, R6, R7	// Set up loop counter
25	MOVD	R7, CTR
26	MOVD	$16, R10
27	MOVD	$32, R14
28	MOVD	$48, R15
29	ANDCC	$63, R6, R9	// Check for tailing bytes for later
30	PCALIGN $16
31	// Case for >= 64 bytes
32	// Process 64 bytes per iteration
33	// Load 4 vectors of a and b
34	// XOR the corresponding vectors
35	// from a and b and store the result
36loop64:
37	LXVD2X	(R4)(R8), VS32
38	LXVD2X	(R4)(R10), VS34
39	LXVD2X	(R4)(R14), VS36
40	LXVD2X	(R4)(R15), VS38
41	LXVD2X	(R5)(R8), VS33
42	LXVD2X	(R5)(R10), VS35
43	LXVD2X	(R5)(R14), VS37
44	LXVD2X	(R5)(R15), VS39
45	XXLXOR	VS32, VS33, VS32
46	XXLXOR	VS34, VS35, VS34
47	XXLXOR	VS36, VS37, VS36
48	XXLXOR	VS38, VS39, VS38
49	STXVD2X	VS32, (R3)(R8)
50	STXVD2X	VS34, (R3)(R10)
51	STXVD2X	VS36, (R3)(R14)
52	STXVD2X	VS38, (R3)(R15)
53	ADD	$64, R8
54	ADD	$64, R10
55	ADD	$64, R14
56	ADD	$64, R15
57	BDNZ	loop64
58	BC	12,2,LR		// BEQLR
59	MOVD	R9, R6
60	CMP	R6, $8
61	BLE	small
62	// Case for 8 <= n < 64 bytes
63	// Process 32 bytes if available
64xor32:
65	CMP	R6, $32
66	BLT	xor16
67	ADD	$16, R8, R9
68	LXVD2X	(R4)(R8), VS32
69	LXVD2X	(R4)(R9), VS33
70	LXVD2X	(R5)(R8), VS34
71	LXVD2X	(R5)(R9), VS35
72	XXLXOR	VS32, VS34, VS32
73	XXLXOR	VS33, VS35, VS33
74	STXVD2X	VS32, (R3)(R8)
75	STXVD2X	VS33, (R3)(R9)
76	ADD	$32, R8
77	ADD	$-32, R6
78	CMP	R6, $8
79	BLE	small
80	// Case for 8 <= n < 32 bytes
81	// Process 16 bytes if available
82xor16:
83	CMP	R6, $16
84	BLT	xor8
85	LXVD2X	(R4)(R8), VS32
86	LXVD2X	(R5)(R8), VS33
87	XXLXOR	VS32, VS33, VS32
88	STXVD2X	VS32, (R3)(R8)
89	ADD	$16, R8
90	ADD	$-16, R6
91small:
92	CMP	R6, $0
93	BC	12,2,LR		// BEQLR
94xor8:
95#ifdef GOPPC64_power10
96	SLD	$56,R6,R17
97	ADD	R4,R8,R18
98	ADD	R5,R8,R19
99	ADD	R3,R8,R20
100	LXVL	R18,R17,V0
101	LXVL	R19,R17,V1
102	VXOR	V0,V1,V1
103	STXVL	V1,R20,R17
104	RET
105#else
106	CMP	R6, $8
107	BLT	xor4
108	// Case for 8 ≤ n < 16 bytes
109	MOVD	(R4)(R8), R14   // R14 = a[i,...,i+7]
110	MOVD	(R5)(R8), R15   // R15 = b[i,...,i+7]
111	XOR	R14, R15, R16   // R16 = a[] ^ b[]
112	SUB	$8, R6          // n = n - 8
113	MOVD	R16, (R3)(R8)   // Store to dst
114	ADD	$8, R8
115xor4:
116	CMP	R6, $4
117	BLT	xor2
118	MOVWZ	(R4)(R8), R14
119	MOVWZ	(R5)(R8), R15
120	XOR	R14, R15, R16
121	MOVW	R16, (R3)(R8)
122	ADD	$4,R8
123	ADD	$-4,R6
124xor2:
125	CMP	R6, $2
126	BLT	xor1
127	MOVHZ	(R4)(R8), R14
128	MOVHZ	(R5)(R8), R15
129	XOR	R14, R15, R16
130	MOVH	R16, (R3)(R8)
131	ADD	$2,R8
132	ADD	$-2,R6
133xor1:
134	CMP	R6, $0
135	BC	12,2,LR		// BEQLR
136	MOVBZ	(R4)(R8), R14	// R14 = a[i]
137	MOVBZ	(R5)(R8), R15	// R15 = b[i]
138	XOR	R14, R15, R16	// R16 = a[i] ^ b[i]
139	MOVB	R16, (R3)(R8)	// Store to dst
140#endif
141done:
142	RET
143