1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7// bool cas(uint32 *ptr, uint32 old, uint32 new)
8// Atomically:
9//	if(*ptr == old){
10//		*ptr = new;
11//		return 1;
12//	} else
13//		return 0;
14TEXT ·Cas(SB), NOSPLIT, $0-17
15	MOVV	ptr+0(FP), R4
16	MOVW	old+8(FP), R5
17	MOVW	new+12(FP), R6
18	DBAR
19cas_again:
20	MOVV	R6, R7
21	LL	(R4), R8
22	BNE	R5, R8, cas_fail
23	SC	R7, (R4)
24	BEQ	R7, cas_again
25	MOVV	$1, R4
26	MOVB	R4, ret+16(FP)
27	DBAR
28	RET
29cas_fail:
30	MOVV	$0, R4
31	JMP	-4(PC)
32
33// bool	cas64(uint64 *ptr, uint64 old, uint64 new)
34// Atomically:
35//	if(*ptr == old){
36//		*ptr = new;
37//		return 1;
38//	} else {
39//		return 0;
40//	}
41TEXT ·Cas64(SB), NOSPLIT, $0-25
42	MOVV	ptr+0(FP), R4
43	MOVV	old+8(FP), R5
44	MOVV	new+16(FP), R6
45	DBAR
46cas64_again:
47	MOVV	R6, R7
48	LLV	(R4), R8
49	BNE	R5, R8, cas64_fail
50	SCV	R7, (R4)
51	BEQ	R7, cas64_again
52	MOVV	$1, R4
53	MOVB	R4, ret+24(FP)
54	DBAR
55	RET
56cas64_fail:
57	MOVV	$0, R4
58	JMP	-4(PC)
59
60TEXT ·Casuintptr(SB), NOSPLIT, $0-25
61	JMP	·Cas64(SB)
62
63TEXT ·CasRel(SB), NOSPLIT, $0-17
64	JMP	·Cas(SB)
65
66TEXT ·Loaduintptr(SB),  NOSPLIT|NOFRAME, $0-16
67	JMP	·Load64(SB)
68
69TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
70	JMP	·Load64(SB)
71
72TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
73	JMP	·Store64(SB)
74
75TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
76	JMP	·Xadd64(SB)
77
78TEXT ·Loadint64(SB), NOSPLIT, $0-16
79	JMP	·Load64(SB)
80
81TEXT ·Xaddint64(SB), NOSPLIT, $0-24
82	JMP	·Xadd64(SB)
83
84// bool casp(void **val, void *old, void *new)
85// Atomically:
86//	if(*val == old){
87//		*val = new;
88//		return 1;
89//	} else
90//		return 0;
91TEXT ·Casp1(SB), NOSPLIT, $0-25
92	JMP	·Cas64(SB)
93
94// uint32 xadd(uint32 volatile *ptr, int32 delta)
95// Atomically:
96//	*val += delta;
97//	return *val;
98TEXT ·Xadd(SB), NOSPLIT, $0-20
99	MOVV	ptr+0(FP), R4
100	MOVW	delta+8(FP), R5
101	DBAR
102	LL	(R4), R6
103	ADDU	R6, R5, R7
104	MOVV	R7, R6
105	SC	R7, (R4)
106	BEQ	R7, -4(PC)
107	MOVW	R6, ret+16(FP)
108	DBAR
109	RET
110
111TEXT ·Xadd64(SB), NOSPLIT, $0-24
112	MOVV	ptr+0(FP), R4
113	MOVV	delta+8(FP), R5
114	DBAR
115	LLV	(R4), R6
116	ADDVU	R6, R5, R7
117	MOVV	R7, R6
118	SCV	R7, (R4)
119	BEQ	R7, -4(PC)
120	MOVV	R6, ret+16(FP)
121	DBAR
122	RET
123
124TEXT ·Xchg(SB), NOSPLIT, $0-20
125	MOVV	ptr+0(FP), R4
126	MOVW	new+8(FP), R5
127
128	DBAR
129	MOVV	R5, R6
130	LL	(R4), R7
131	SC	R6, (R4)
132	BEQ	R6, -3(PC)
133	MOVW	R7, ret+16(FP)
134	DBAR
135	RET
136
137TEXT ·Xchg64(SB), NOSPLIT, $0-24
138	MOVV	ptr+0(FP), R4
139	MOVV	new+8(FP), R5
140
141	DBAR
142	MOVV	R5, R6
143	LLV	(R4), R7
144	SCV	R6, (R4)
145	BEQ	R6, -3(PC)
146	MOVV	R7, ret+16(FP)
147	DBAR
148	RET
149
150TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
151	JMP	·Xchg64(SB)
152
153TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
154	JMP	·Store64(SB)
155
156TEXT ·StoreRel(SB), NOSPLIT, $0-12
157	JMP	·Store(SB)
158
159TEXT ·StoreRel64(SB), NOSPLIT, $0-16
160	JMP	·Store64(SB)
161
162TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
163	JMP     ·Store64(SB)
164
165TEXT ·Store(SB), NOSPLIT, $0-12
166	MOVV	ptr+0(FP), R4
167	MOVW	val+8(FP), R5
168	DBAR
169	MOVW	R5, 0(R4)
170	DBAR
171	RET
172
173TEXT ·Store8(SB), NOSPLIT, $0-9
174	MOVV	ptr+0(FP), R4
175	MOVB	val+8(FP), R5
176	DBAR
177	MOVB	R5, 0(R4)
178	DBAR
179	RET
180
181TEXT ·Store64(SB), NOSPLIT, $0-16
182	MOVV	ptr+0(FP), R4
183	MOVV	val+8(FP), R5
184	DBAR
185	MOVV	R5, 0(R4)
186	DBAR
187	RET
188
189// void	Or8(byte volatile*, byte);
190TEXT ·Or8(SB), NOSPLIT, $0-9
191	MOVV	ptr+0(FP), R4
192	MOVBU	val+8(FP), R5
193	// Align ptr down to 4 bytes so we can use 32-bit load/store.
194	MOVV	$~3, R6
195	AND	R4, R6
196	// R7 = ((ptr & 3) * 8)
197	AND	$3, R4, R7
198	SLLV	$3, R7
199	// Shift val for aligned ptr. R5 = val << R4
200	SLLV	R7, R5
201
202	DBAR
203	LL	(R6), R7
204	OR	R5, R7
205	SC	R7, (R6)
206	BEQ	R7, -4(PC)
207	DBAR
208	RET
209
210// void	And8(byte volatile*, byte);
211TEXT ·And8(SB), NOSPLIT, $0-9
212	MOVV	ptr+0(FP), R4
213	MOVBU	val+8(FP), R5
214	// Align ptr down to 4 bytes so we can use 32-bit load/store.
215	MOVV	$~3, R6
216	AND	R4, R6
217	// R7 = ((ptr & 3) * 8)
218	AND	$3, R4, R7
219	SLLV	$3, R7
220	// Shift val for aligned ptr. R5 = val << R7 | ^(0xFF << R7)
221	MOVV	$0xFF, R8
222	SLLV	R7, R5
223	SLLV	R7, R8
224	NOR	R0, R8
225	OR	R8, R5
226
227	DBAR
228	LL	(R6), R7
229	AND	R5, R7
230	SC	R7, (R6)
231	BEQ	R7, -4(PC)
232	DBAR
233	RET
234
235// func Or(addr *uint32, v uint32)
236TEXT ·Or(SB), NOSPLIT, $0-12
237	MOVV	ptr+0(FP), R4
238	MOVW	val+8(FP), R5
239	DBAR
240	LL	(R4), R6
241	OR	R5, R6
242	SC	R6, (R4)
243	BEQ	R6, -4(PC)
244	DBAR
245	RET
246
247// func And(addr *uint32, v uint32)
248TEXT ·And(SB), NOSPLIT, $0-12
249	MOVV	ptr+0(FP), R4
250	MOVW	val+8(FP), R5
251	DBAR
252	LL	(R4), R6
253	AND	R5, R6
254	SC	R6, (R4)
255	BEQ	R6, -4(PC)
256	DBAR
257	RET
258
259// func Or32(addr *uint32, v uint32) old uint32
260TEXT ·Or32(SB), NOSPLIT, $0-20
261	MOVV	ptr+0(FP), R4
262	MOVW	val+8(FP), R5
263	DBAR
264	LL	(R4), R6
265	OR	R5, R6, R7
266	SC	R7, (R4)
267	BEQ	R7, -4(PC)
268	DBAR
269	MOVW R6, ret+16(FP)
270	RET
271
272// func And32(addr *uint32, v uint32) old uint32
273TEXT ·And32(SB), NOSPLIT, $0-20
274	MOVV	ptr+0(FP), R4
275	MOVW	val+8(FP), R5
276	DBAR
277	LL	(R4), R6
278	AND	R5, R6, R7
279	SC	R7, (R4)
280	BEQ	R7, -4(PC)
281	DBAR
282	MOVW R6, ret+16(FP)
283	RET
284
285// func Or64(addr *uint64, v uint64) old uint64
286TEXT ·Or64(SB), NOSPLIT, $0-24
287	MOVV	ptr+0(FP), R4
288	MOVV	val+8(FP), R5
289	DBAR
290	LLV	(R4), R6
291	OR	R5, R6, R7
292	SCV	R7, (R4)
293	BEQ	R7, -4(PC)
294	DBAR
295	MOVV R6, ret+16(FP)
296	RET
297
298// func And64(addr *uint64, v uint64) old uint64
299TEXT ·And64(SB), NOSPLIT, $0-24
300	MOVV	ptr+0(FP), R4
301	MOVV	val+8(FP), R5
302	DBAR
303	LLV	(R4), R6
304	AND	R5, R6, R7
305	SCV	R7, (R4)
306	BEQ	R7, -4(PC)
307	DBAR
308	MOVV R6, ret+16(FP)
309	RET
310
311// func Anduintptr(addr *uintptr, v uintptr) old uintptr
312TEXT ·Anduintptr(SB), NOSPLIT, $0-24
313	JMP	·And64(SB)
314
315// func Oruintptr(addr *uintptr, v uintptr) old uintptr
316TEXT ·Oruintptr(SB), NOSPLIT, $0-24
317	JMP	·Or64(SB)
318
319// uint32 internal∕runtime∕atomic·Load(uint32 volatile* ptr)
320TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
321	MOVV	ptr+0(FP), R19
322	DBAR
323	MOVWU	0(R19), R19
324	DBAR
325	MOVW	R19, ret+8(FP)
326	RET
327
328// uint8 internal∕runtime∕atomic·Load8(uint8 volatile* ptr)
329TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
330	MOVV	ptr+0(FP), R19
331	DBAR
332	MOVBU	0(R19), R19
333	DBAR
334	MOVB	R19, ret+8(FP)
335	RET
336
337// uint64 internal∕runtime∕atomic·Load64(uint64 volatile* ptr)
338TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
339	MOVV	ptr+0(FP), R19
340	DBAR
341	MOVV	0(R19), R19
342	DBAR
343	MOVV	R19, ret+8(FP)
344	RET
345
346// void *internal∕runtime∕atomic·Loadp(void *volatile *ptr)
347TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
348	MOVV	ptr+0(FP), R19
349	DBAR
350	MOVV	0(R19), R19
351	DBAR
352	MOVV	R19, ret+8(FP)
353	RET
354
355// uint32 internal∕runtime∕atomic·LoadAcq(uint32 volatile* ptr)
356TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
357	JMP	·Load(SB)
358
359// uint64 ·LoadAcq64(uint64 volatile* ptr)
360TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
361	JMP	·Load64(SB)
362
363// uintptr ·LoadAcquintptr(uintptr volatile* ptr)
364TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
365	JMP	·Load64(SB)
366
367