1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "textflag.h"
8
9// For more details about how various memory models are
10// enforced on POWER, the following paper provides more
11// details about how they enforce C/C++ like models. This
12// gives context about why the strange looking code
13// sequences below work.
14//
15// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17// uint32 ·Load(uint32 volatile* ptr)
18TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19	MOVD	ptr+0(FP), R3
20	SYNC
21	MOVWZ	0(R3), R3
22	CMPW	R3, R3, CR7
23	BC	4, 30, 1(PC) // bne- cr7,0x4
24	ISYNC
25	MOVW	R3, ret+8(FP)
26	RET
27
28// uint8 ·Load8(uint8 volatile* ptr)
29TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30	MOVD	ptr+0(FP), R3
31	SYNC
32	MOVBZ	0(R3), R3
33	CMP	R3, R3, CR7
34	BC	4, 30, 1(PC) // bne- cr7,0x4
35	ISYNC
36	MOVB	R3, ret+8(FP)
37	RET
38
39// uint64 ·Load64(uint64 volatile* ptr)
40TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41	MOVD	ptr+0(FP), R3
42	SYNC
43	MOVD	0(R3), R3
44	CMP	R3, R3, CR7
45	BC	4, 30, 1(PC) // bne- cr7,0x4
46	ISYNC
47	MOVD	R3, ret+8(FP)
48	RET
49
50// void *·Loadp(void *volatile *ptr)
51TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52	MOVD	ptr+0(FP), R3
53	SYNC
54	MOVD	0(R3), R3
55	CMP	R3, R3, CR7
56	BC	4, 30, 1(PC) // bne- cr7,0x4
57	ISYNC
58	MOVD	R3, ret+8(FP)
59	RET
60
61// uint32 ·LoadAcq(uint32 volatile* ptr)
62TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63	MOVD   ptr+0(FP), R3
64	MOVWZ  0(R3), R3
65	CMPW   R3, R3, CR7
66	BC     4, 30, 1(PC) // bne- cr7, 0x4
67	ISYNC
68	MOVW   R3, ret+8(FP)
69	RET
70
71// uint64 ·LoadAcq64(uint64 volatile* ptr)
72TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73	MOVD   ptr+0(FP), R3
74	MOVD   0(R3), R3
75	CMP    R3, R3, CR7
76	BC     4, 30, 1(PC) // bne- cr7, 0x4
77	ISYNC
78	MOVD   R3, ret+8(FP)
79	RET
80
81// bool cas(uint32 *ptr, uint32 old, uint32 new)
82// Atomically:
83//	if(*val == old){
84//		*val = new;
85//		return 1;
86//	} else
87//		return 0;
88TEXT ·Cas(SB), NOSPLIT, $0-17
89	MOVD	ptr+0(FP), R3
90	MOVWZ	old+8(FP), R4
91	MOVWZ	new+12(FP), R5
92	LWSYNC
93cas_again:
94	LWAR	(R3), R6
95	CMPW	R6, R4
96	BNE	cas_fail
97	STWCCC	R5, (R3)
98	BNE	cas_again
99	MOVD	$1, R3
100	LWSYNC
101	MOVB	R3, ret+16(FP)
102	RET
103cas_fail:
104	LWSYNC
105	MOVB	R0, ret+16(FP)
106	RET
107
108// bool	·Cas64(uint64 *ptr, uint64 old, uint64 new)
109// Atomically:
110//	if(*val == old){
111//		*val = new;
112//		return 1;
113//	} else {
114//		return 0;
115//	}
116TEXT ·Cas64(SB), NOSPLIT, $0-25
117	MOVD	ptr+0(FP), R3
118	MOVD	old+8(FP), R4
119	MOVD	new+16(FP), R5
120	LWSYNC
121cas64_again:
122	LDAR	(R3), R6
123	CMP	R6, R4
124	BNE	cas64_fail
125	STDCCC	R5, (R3)
126	BNE	cas64_again
127	MOVD	$1, R3
128	LWSYNC
129	MOVB	R3, ret+24(FP)
130	RET
131cas64_fail:
132	LWSYNC
133	MOVB	R0, ret+24(FP)
134	RET
135
136TEXT ·CasRel(SB), NOSPLIT, $0-17
137	MOVD    ptr+0(FP), R3
138	MOVWZ   old+8(FP), R4
139	MOVWZ   new+12(FP), R5
140	LWSYNC
141cas_again:
142	LWAR    (R3), $0, R6        // 0 = Mutex release hint
143	CMPW    R6, R4
144	BNE     cas_fail
145	STWCCC  R5, (R3)
146	BNE     cas_again
147	MOVD    $1, R3
148	MOVB    R3, ret+16(FP)
149	RET
150cas_fail:
151	MOVB    R0, ret+16(FP)
152	RET
153
154TEXT ·Casint32(SB), NOSPLIT, $0-17
155	BR	·Cas(SB)
156
157TEXT ·Casint64(SB), NOSPLIT, $0-25
158	BR	·Cas64(SB)
159
160TEXT ·Casuintptr(SB), NOSPLIT, $0-25
161	BR	·Cas64(SB)
162
163TEXT ·Loaduintptr(SB),  NOSPLIT|NOFRAME, $0-16
164	BR	·Load64(SB)
165
166TEXT ·LoadAcquintptr(SB),  NOSPLIT|NOFRAME, $0-16
167	BR	·LoadAcq64(SB)
168
169TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
170	BR	·Load64(SB)
171
172TEXT ·Storeint32(SB), NOSPLIT, $0-12
173	BR	·Store(SB)
174
175TEXT ·Storeint64(SB), NOSPLIT, $0-16
176	BR	·Store64(SB)
177
178TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
179	BR	·Store64(SB)
180
181TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
182	BR	·StoreRel64(SB)
183
184TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
185	BR	·Xadd64(SB)
186
187TEXT ·Loadint32(SB), NOSPLIT, $0-12
188	BR	·Load(SB)
189
190TEXT ·Loadint64(SB), NOSPLIT, $0-16
191	BR	·Load64(SB)
192
193TEXT ·Xaddint32(SB), NOSPLIT, $0-20
194	BR	·Xadd(SB)
195
196TEXT ·Xaddint64(SB), NOSPLIT, $0-24
197	BR	·Xadd64(SB)
198
199// bool casp(void **val, void *old, void *new)
200// Atomically:
201//	if(*val == old){
202//		*val = new;
203//		return 1;
204//	} else
205//		return 0;
206TEXT ·Casp1(SB), NOSPLIT, $0-25
207	BR ·Cas64(SB)
208
209// uint32 xadd(uint32 volatile *ptr, int32 delta)
210// Atomically:
211//	*val += delta;
212//	return *val;
213TEXT ·Xadd(SB), NOSPLIT, $0-20
214	MOVD	ptr+0(FP), R4
215	MOVW	delta+8(FP), R5
216	LWSYNC
217	LWAR	(R4), R3
218	ADD	R5, R3
219	STWCCC	R3, (R4)
220	BNE	-3(PC)
221	MOVW	R3, ret+16(FP)
222	RET
223
224// uint64 Xadd64(uint64 volatile *val, int64 delta)
225// Atomically:
226//	*val += delta;
227//	return *val;
228TEXT ·Xadd64(SB), NOSPLIT, $0-24
229	MOVD	ptr+0(FP), R4
230	MOVD	delta+8(FP), R5
231	LWSYNC
232	LDAR	(R4), R3
233	ADD	R5, R3
234	STDCCC	R3, (R4)
235	BNE	-3(PC)
236	MOVD	R3, ret+16(FP)
237	RET
238
239// uint32 Xchg(ptr *uint32, new uint32)
240// Atomically:
241//	old := *ptr;
242//	*ptr = new;
243//	return old;
244TEXT ·Xchg(SB), NOSPLIT, $0-20
245	MOVD	ptr+0(FP), R4
246	MOVW	new+8(FP), R5
247	LWSYNC
248	LWAR	(R4), R3
249	STWCCC	R5, (R4)
250	BNE	-2(PC)
251	ISYNC
252	MOVW	R3, ret+16(FP)
253	RET
254
255// uint64 Xchg64(ptr *uint64, new uint64)
256// Atomically:
257//	old := *ptr;
258//	*ptr = new;
259//	return old;
260TEXT ·Xchg64(SB), NOSPLIT, $0-24
261	MOVD	ptr+0(FP), R4
262	MOVD	new+8(FP), R5
263	LWSYNC
264	LDAR	(R4), R3
265	STDCCC	R5, (R4)
266	BNE	-2(PC)
267	ISYNC
268	MOVD	R3, ret+16(FP)
269	RET
270
271TEXT ·Xchgint32(SB), NOSPLIT, $0-20
272	BR	·Xchg(SB)
273
274TEXT ·Xchgint64(SB), NOSPLIT, $0-24
275	BR	·Xchg64(SB)
276
277TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
278	BR	·Xchg64(SB)
279
280TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
281	BR	·Store64(SB)
282
283TEXT ·Store(SB), NOSPLIT, $0-12
284	MOVD	ptr+0(FP), R3
285	MOVW	val+8(FP), R4
286	SYNC
287	MOVW	R4, 0(R3)
288	RET
289
290TEXT ·Store8(SB), NOSPLIT, $0-9
291	MOVD	ptr+0(FP), R3
292	MOVB	val+8(FP), R4
293	SYNC
294	MOVB	R4, 0(R3)
295	RET
296
297TEXT ·Store64(SB), NOSPLIT, $0-16
298	MOVD	ptr+0(FP), R3
299	MOVD	val+8(FP), R4
300	SYNC
301	MOVD	R4, 0(R3)
302	RET
303
304TEXT ·StoreRel(SB), NOSPLIT, $0-12
305	MOVD	ptr+0(FP), R3
306	MOVW	val+8(FP), R4
307	LWSYNC
308	MOVW	R4, 0(R3)
309	RET
310
311TEXT ·StoreRel64(SB), NOSPLIT, $0-16
312	MOVD	ptr+0(FP), R3
313	MOVD	val+8(FP), R4
314	LWSYNC
315	MOVD	R4, 0(R3)
316	RET
317
318// void ·Or8(byte volatile*, byte);
319TEXT ·Or8(SB), NOSPLIT, $0-9
320	MOVD	ptr+0(FP), R3
321	MOVBZ	val+8(FP), R4
322	LWSYNC
323again:
324	LBAR	(R3), R6
325	OR	R4, R6
326	STBCCC	R6, (R3)
327	BNE	again
328	RET
329
330// void ·And8(byte volatile*, byte);
331TEXT ·And8(SB), NOSPLIT, $0-9
332	MOVD	ptr+0(FP), R3
333	MOVBZ	val+8(FP), R4
334	LWSYNC
335again:
336	LBAR	(R3), R6
337	AND	R4, R6
338	STBCCC	R6, (R3)
339	BNE	again
340	RET
341
342// func Or(addr *uint32, v uint32)
343TEXT ·Or(SB), NOSPLIT, $0-12
344	MOVD	ptr+0(FP), R3
345	MOVW	val+8(FP), R4
346	LWSYNC
347again:
348	LWAR	(R3), R6
349	OR	R4, R6
350	STWCCC	R6, (R3)
351	BNE	again
352	RET
353
354// func And(addr *uint32, v uint32)
355TEXT ·And(SB), NOSPLIT, $0-12
356	MOVD	ptr+0(FP), R3
357	MOVW	val+8(FP), R4
358	LWSYNC
359again:
360	LWAR	(R3),R6
361	AND	R4, R6
362	STWCCC	R6, (R3)
363	BNE	again
364	RET
365
366// func Or32(addr *uint32, v uint32) old uint32
367TEXT ·Or32(SB), NOSPLIT, $0-20
368	MOVD	ptr+0(FP), R3
369	MOVW	val+8(FP), R4
370	LWSYNC
371again:
372	LWAR	(R3), R6
373	OR	R4, R6, R7
374	STWCCC	R7, (R3)
375	BNE	again
376	MOVW	R6, ret+16(FP)
377	RET
378
379// func And32(addr *uint32, v uint32) old uint32
380TEXT ·And32(SB), NOSPLIT, $0-20
381	MOVD	ptr+0(FP), R3
382	MOVW	val+8(FP), R4
383	LWSYNC
384again:
385	LWAR	(R3),R6
386	AND	R4, R6, R7
387	STWCCC	R7, (R3)
388	BNE	again
389	MOVW	R6, ret+16(FP)
390	RET
391
392// func Or64(addr *uint64, v uint64) old uint64
393TEXT ·Or64(SB), NOSPLIT, $0-24
394	MOVD	ptr+0(FP), R3
395	MOVD	val+8(FP), R4
396	LWSYNC
397again:
398	LDAR	(R3), R6
399	OR	R4, R6, R7
400	STDCCC	R7, (R3)
401	BNE	again
402	MOVD	R6, ret+16(FP)
403	RET
404
405// func And64(addr *uint64, v uint64) old uint64
406TEXT ·And64(SB), NOSPLIT, $0-24
407	MOVD	ptr+0(FP), R3
408	MOVD	val+8(FP), R4
409	LWSYNC
410again:
411	LDAR	(R3),R6
412	AND	R4, R6, R7
413	STDCCC	R7, (R3)
414	BNE	again
415	MOVD	R6, ret+16(FP)
416	RET
417
418// func Anduintptr(addr *uintptr, v uintptr) old uintptr
419TEXT ·Anduintptr(SB), NOSPLIT, $0-24
420	JMP	·And64(SB)
421
422// func Oruintptr(addr *uintptr, v uintptr) old uintptr
423TEXT ·Oruintptr(SB), NOSPLIT, $0-24
424	JMP	·Or64(SB)
425