1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "funcdata.h"
9#include "textflag.h"
10#include "tls_arm64.h"
11#include "cgo/abi_arm64.h"
12
13// The following thunks allow calling the gcc-compiled race runtime directly
14// from Go code without going all the way through cgo.
15// First, it's much faster (up to 50% speedup for real Go programs).
16// Second, it eliminates race-related special cases from cgocall and scheduler.
17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19// A brief recap of the arm64 calling convention.
20// Arguments are passed in R0...R7, the rest is on stack.
21// Callee-saved registers are: R19...R28.
22// Temporary registers are: R9...R15
23// SP must be 16-byte aligned.
24
25// When calling racecalladdr, R9 is the call target address.
26
27// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30// No-op on other OSes.
31#ifdef TLS_darwin
32#define TP_ALIGN	AND	$~7, R0
33#else
34#define TP_ALIGN
35#endif
36
37// Load g from TLS. (See tls_arm64.s)
38#define load_g \
39	MRS_TPIDR_R0 \
40	TP_ALIGN \
41	MOVD    runtime·tls_g(SB), R11 \
42	MOVD    (R0)(R11), g
43
44// func runtime·raceread(addr uintptr)
45// Called from instrumented code.
46// Defined as ABIInternal so as to avoid introducing a wrapper,
47// which would make caller's PC ineffective.
48TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49	MOVD	R0, R1	// addr
50	MOVD	LR, R2
51	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
52	MOVD	$__tsan_read(SB), R9
53	JMP	racecalladdr<>(SB)
54
55// func runtime·RaceRead(addr uintptr)
56TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
57	// This needs to be a tail call, because raceread reads caller pc.
58	JMP	runtime·raceread(SB)
59
60// func runtime·racereadpc(void *addr, void *callpc, void *pc)
61TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
62	MOVD	addr+0(FP), R1
63	MOVD	callpc+8(FP), R2
64	MOVD	pc+16(FP), R3
65	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66	MOVD	$__tsan_read_pc(SB), R9
67	JMP	racecalladdr<>(SB)
68
69// func runtime·racewrite(addr uintptr)
70// Called from instrumented code.
71// Defined as ABIInternal so as to avoid introducing a wrapper,
72// which would make caller's PC ineffective.
73TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74	MOVD	R0, R1	// addr
75	MOVD	LR, R2
76	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
77	MOVD	$__tsan_write(SB), R9
78	JMP	racecalladdr<>(SB)
79
80// func runtime·RaceWrite(addr uintptr)
81TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
82	// This needs to be a tail call, because racewrite reads caller pc.
83	JMP	runtime·racewrite(SB)
84
85// func runtime·racewritepc(void *addr, void *callpc, void *pc)
86TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
87	MOVD	addr+0(FP), R1
88	MOVD	callpc+8(FP), R2
89	MOVD	pc+16(FP), R3
90	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
91	MOVD	$__tsan_write_pc(SB), R9
92	JMP	racecalladdr<>(SB)
93
94// func runtime·racereadrange(addr, size uintptr)
95// Called from instrumented code.
96// Defined as ABIInternal so as to avoid introducing a wrapper,
97// which would make caller's PC ineffective.
98TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
99	MOVD	R1, R2	// size
100	MOVD	R0, R1	// addr
101	MOVD	LR, R3
102	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103	MOVD	$__tsan_read_range(SB), R9
104	JMP	racecalladdr<>(SB)
105
106// func runtime·RaceReadRange(addr, size uintptr)
107TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
108	// This needs to be a tail call, because racereadrange reads caller pc.
109	JMP	runtime·racereadrange(SB)
110
111// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
112TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
113	MOVD	addr+0(FP), R1
114	MOVD	size+8(FP), R2
115	MOVD	pc+16(FP), R3
116	ADD	$4, R3	// pc is function start, tsan wants return address.
117	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118	MOVD	$__tsan_read_range(SB), R9
119	JMP	racecalladdr<>(SB)
120
121// func runtime·racewriterange(addr, size uintptr)
122// Called from instrumented code.
123// Defined as ABIInternal so as to avoid introducing a wrapper,
124// which would make caller's PC ineffective.
125TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
126	MOVD	R1, R2	// size
127	MOVD	R0, R1	// addr
128	MOVD	LR, R3
129	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130	MOVD	$__tsan_write_range(SB), R9
131	JMP	racecalladdr<>(SB)
132
133// func runtime·RaceWriteRange(addr, size uintptr)
134TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
135	// This needs to be a tail call, because racewriterange reads caller pc.
136	JMP	runtime·racewriterange(SB)
137
138// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
139TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
140	MOVD	addr+0(FP), R1
141	MOVD	size+8(FP), R2
142	MOVD	pc+16(FP), R3
143	ADD	$4, R3	// pc is function start, tsan wants return address.
144	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
145	MOVD	$__tsan_write_range(SB), R9
146	JMP	racecalladdr<>(SB)
147
148// If addr (R1) is out of range, do nothing.
149// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
150TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
151	load_g
152	MOVD	g_racectx(g), R0
153	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154	MOVD	runtime·racearenastart(SB), R10
155	CMP	R10, R1
156	BLT	data
157	MOVD	runtime·racearenaend(SB), R10
158	CMP	R10, R1
159	BLT	call
160data:
161	MOVD	runtime·racedatastart(SB), R10
162	CMP	R10, R1
163	BLT	ret
164	MOVD	runtime·racedataend(SB), R10
165	CMP	R10, R1
166	BGT	ret
167call:
168	JMP	racecall<>(SB)
169ret:
170	RET
171
172// func runtime·racefuncenter(pc uintptr)
173// Called from instrumented code.
174TEXT	runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
175	MOVD	R0, R9	// callpc
176	JMP	racefuncenter<>(SB)
177
178// Common code for racefuncenter
179// R9 = caller's return address
180TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
181	load_g
182	MOVD	g_racectx(g), R0	// goroutine racectx
183	MOVD	R9, R1
184	// void __tsan_func_enter(ThreadState *thr, void *pc);
185	MOVD	$__tsan_func_enter(SB), R9
186	BL	racecall<>(SB)
187	RET
188
189// func runtime·racefuncexit()
190// Called from instrumented code.
191TEXT	runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
192	load_g
193	MOVD	g_racectx(g), R0	// race context
194	// void __tsan_func_exit(ThreadState *thr);
195	MOVD	$__tsan_func_exit(SB), R9
196	JMP	racecall<>(SB)
197
198// Atomic operations for sync/atomic package.
199// R3 = addr of arguments passed to this function, it can
200// be fetched at 40(RSP) in racecallatomic after two times BL
201// R0, R1, R2 set in racecallatomic
202
203// Load
204TEXT	syncatomic·LoadInt32(SB), NOSPLIT, $0-12
205	GO_ARGS
206	MOVD	$__tsan_go_atomic32_load(SB), R9
207	BL	racecallatomic<>(SB)
208	RET
209
210TEXT	syncatomic·LoadInt64(SB), NOSPLIT, $0-16
211	GO_ARGS
212	MOVD	$__tsan_go_atomic64_load(SB), R9
213	BL	racecallatomic<>(SB)
214	RET
215
216TEXT	syncatomic·LoadUint32(SB), NOSPLIT, $0-12
217	GO_ARGS
218	JMP	syncatomic·LoadInt32(SB)
219
220TEXT	syncatomic·LoadUint64(SB), NOSPLIT, $0-16
221	GO_ARGS
222	JMP	syncatomic·LoadInt64(SB)
223
224TEXT	syncatomic·LoadUintptr(SB), NOSPLIT, $0-16
225	GO_ARGS
226	JMP	syncatomic·LoadInt64(SB)
227
228TEXT	syncatomic·LoadPointer(SB), NOSPLIT, $0-16
229	GO_ARGS
230	JMP	syncatomic·LoadInt64(SB)
231
232// Store
233TEXT	syncatomic·StoreInt32(SB), NOSPLIT, $0-12
234	GO_ARGS
235	MOVD	$__tsan_go_atomic32_store(SB), R9
236	BL	racecallatomic<>(SB)
237	RET
238
239TEXT	syncatomic·StoreInt64(SB), NOSPLIT, $0-16
240	GO_ARGS
241	MOVD	$__tsan_go_atomic64_store(SB), R9
242	BL	racecallatomic<>(SB)
243	RET
244
245TEXT	syncatomic·StoreUint32(SB), NOSPLIT, $0-12
246	GO_ARGS
247	JMP	syncatomic·StoreInt32(SB)
248
249TEXT	syncatomic·StoreUint64(SB), NOSPLIT, $0-16
250	GO_ARGS
251	JMP	syncatomic·StoreInt64(SB)
252
253TEXT	syncatomic·StoreUintptr(SB), NOSPLIT, $0-16
254	GO_ARGS
255	JMP	syncatomic·StoreInt64(SB)
256
257// Swap
258TEXT	syncatomic·SwapInt32(SB), NOSPLIT, $0-20
259	GO_ARGS
260	MOVD	$__tsan_go_atomic32_exchange(SB), R9
261	BL	racecallatomic<>(SB)
262	RET
263
264TEXT	syncatomic·SwapInt64(SB), NOSPLIT, $0-24
265	GO_ARGS
266	MOVD	$__tsan_go_atomic64_exchange(SB), R9
267	BL	racecallatomic<>(SB)
268	RET
269
270TEXT	syncatomic·SwapUint32(SB), NOSPLIT, $0-20
271	GO_ARGS
272	JMP	syncatomic·SwapInt32(SB)
273
274TEXT	syncatomic·SwapUint64(SB), NOSPLIT, $0-24
275	GO_ARGS
276	JMP	syncatomic·SwapInt64(SB)
277
278TEXT	syncatomic·SwapUintptr(SB), NOSPLIT, $0-24
279	GO_ARGS
280	JMP	syncatomic·SwapInt64(SB)
281
282// Add
283TEXT	syncatomic·AddInt32(SB), NOSPLIT, $0-20
284	GO_ARGS
285	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
286	BL	racecallatomic<>(SB)
287	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
288	MOVW	ret+16(FP), R1
289	ADD	R0, R1, R0
290	MOVW	R0, ret+16(FP)
291	RET
292
293TEXT	syncatomic·AddInt64(SB), NOSPLIT, $0-24
294	GO_ARGS
295	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
296	BL	racecallatomic<>(SB)
297	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
298	MOVD	ret+16(FP), R1
299	ADD	R0, R1, R0
300	MOVD	R0, ret+16(FP)
301	RET
302
303TEXT	syncatomic·AddUint32(SB), NOSPLIT, $0-20
304	GO_ARGS
305	JMP	syncatomic·AddInt32(SB)
306
307TEXT	syncatomic·AddUint64(SB), NOSPLIT, $0-24
308	GO_ARGS
309	JMP	syncatomic·AddInt64(SB)
310
311TEXT	syncatomic·AddUintptr(SB), NOSPLIT, $0-24
312	GO_ARGS
313	JMP	syncatomic·AddInt64(SB)
314
315// And
316TEXT	syncatomic·AndInt32(SB), NOSPLIT, $0-20
317	GO_ARGS
318	MOVD	$__tsan_go_atomic32_fetch_and(SB), R9
319	BL	racecallatomic<>(SB)
320	RET
321
322TEXT	syncatomic·AndInt64(SB), NOSPLIT, $0-24
323	GO_ARGS
324	MOVD	$__tsan_go_atomic64_fetch_and(SB), R9
325	BL	racecallatomic<>(SB)
326	RET
327
328TEXT	syncatomic·AndUint32(SB), NOSPLIT, $0-20
329	GO_ARGS
330	JMP	syncatomic·AndInt32(SB)
331
332TEXT	syncatomic·AndUint64(SB), NOSPLIT, $0-24
333	GO_ARGS
334	JMP	syncatomic·AndInt64(SB)
335
336TEXT	syncatomic·AndUintptr(SB), NOSPLIT, $0-24
337	GO_ARGS
338	JMP	syncatomic·AndInt64(SB)
339
340// Or
341TEXT	syncatomic·OrInt32(SB), NOSPLIT, $0-20
342	GO_ARGS
343	MOVD	$__tsan_go_atomic32_fetch_or(SB), R9
344	BL	racecallatomic<>(SB)
345	RET
346
347TEXT	syncatomic·OrInt64(SB), NOSPLIT, $0-24
348	GO_ARGS
349	MOVD	$__tsan_go_atomic64_fetch_or(SB), R9
350	BL	racecallatomic<>(SB)
351	RET
352
353TEXT	syncatomic·OrUint32(SB), NOSPLIT, $0-20
354	GO_ARGS
355	JMP	syncatomic·OrInt32(SB)
356
357TEXT	syncatomic·OrUint64(SB), NOSPLIT, $0-24
358	GO_ARGS
359	JMP	syncatomic·OrInt64(SB)
360
361TEXT	syncatomic·OrUintptr(SB), NOSPLIT, $0-24
362	GO_ARGS
363	JMP	syncatomic·OrInt64(SB)
364
365// CompareAndSwap
366TEXT	syncatomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
367	GO_ARGS
368	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
369	BL	racecallatomic<>(SB)
370	RET
371
372TEXT	syncatomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
373	GO_ARGS
374	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
375	BL	racecallatomic<>(SB)
376	RET
377
378TEXT	syncatomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
379	GO_ARGS
380	JMP	syncatomic·CompareAndSwapInt32(SB)
381
382TEXT	syncatomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
383	GO_ARGS
384	JMP	syncatomic·CompareAndSwapInt64(SB)
385
386TEXT	syncatomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
387	GO_ARGS
388	JMP	syncatomic·CompareAndSwapInt64(SB)
389
390// Generic atomic operation implementation.
391// R9 = addr of target function
392TEXT	racecallatomic<>(SB), NOSPLIT, $0
393	// Set up these registers
394	// R0 = *ThreadState
395	// R1 = caller pc
396	// R2 = pc
397	// R3 = addr of incoming arg list
398
399	// Trigger SIGSEGV early.
400	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
401	MOVB	(R3), R13	// segv here if addr is bad
402	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
403	MOVD	runtime·racearenastart(SB), R10
404	CMP	R10, R3
405	BLT	racecallatomic_data
406	MOVD	runtime·racearenaend(SB), R10
407	CMP	R10, R3
408	BLT	racecallatomic_ok
409racecallatomic_data:
410	MOVD	runtime·racedatastart(SB), R10
411	CMP	R10, R3
412	BLT	racecallatomic_ignore
413	MOVD	runtime·racedataend(SB), R10
414	CMP	R10, R3
415	BGE	racecallatomic_ignore
416racecallatomic_ok:
417	// Addr is within the good range, call the atomic function.
418	load_g
419	MOVD	g_racectx(g), R0	// goroutine context
420	MOVD	16(RSP), R1	// caller pc
421	MOVD	R9, R2	// pc
422	ADD	$40, RSP, R3
423	JMP	racecall<>(SB)	// does not return
424racecallatomic_ignore:
425	// Addr is outside the good range.
426	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
427	// An attempt to synchronize on the address would cause crash.
428	MOVD	R9, R21	// remember the original function
429	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
430	load_g
431	MOVD	g_racectx(g), R0	// goroutine context
432	BL	racecall<>(SB)
433	MOVD	R21, R9	// restore the original function
434	// Call the atomic function.
435	// racecall will call LLVM race code which might clobber R28 (g)
436	load_g
437	MOVD	g_racectx(g), R0	// goroutine context
438	MOVD	16(RSP), R1	// caller pc
439	MOVD	R9, R2	// pc
440	ADD	$40, RSP, R3	// arguments
441	BL	racecall<>(SB)
442	// Call __tsan_go_ignore_sync_end.
443	MOVD	$__tsan_go_ignore_sync_end(SB), R9
444	MOVD	g_racectx(g), R0	// goroutine context
445	BL	racecall<>(SB)
446	RET
447
448// func runtime·racecall(void(*f)(...), ...)
449// Calls C function f from race runtime and passes up to 4 arguments to it.
450// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
451TEXT	runtime·racecall(SB), NOSPLIT, $0-0
452	MOVD	fn+0(FP), R9
453	MOVD	arg0+8(FP), R0
454	MOVD	arg1+16(FP), R1
455	MOVD	arg2+24(FP), R2
456	MOVD	arg3+32(FP), R3
457	JMP	racecall<>(SB)
458
459// Switches SP to g0 stack and calls (R9). Arguments already set.
460// Clobbers R19, R20.
461TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
462	MOVD	g_m(g), R10
463	// Switch to g0 stack.
464	MOVD	RSP, R19	// callee-saved, preserved across the CALL
465	MOVD	R30, R20	// callee-saved, preserved across the CALL
466	MOVD	m_g0(R10), R11
467	CMP	R11, g
468	BEQ	call	// already on g0
469	MOVD	(g_sched+gobuf_sp)(R11), R12
470	MOVD	R12, RSP
471call:
472	// Decrement SP past where the frame pointer is saved in the Go arm64
473	// ABI (one word below the stack pointer) so the race detector library
474	// code doesn't clobber it
475	SUB	$16, RSP
476	BL	R9
477	MOVD	R19, RSP
478	JMP	(R20)
479
480// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
481// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
482// The overall effect of Go->C->Go call chain is similar to that of mcall.
483// R0 contains command code. R1 contains command-specific context.
484// See racecallback for command codes.
485TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
486	// Handle command raceGetProcCmd (0) here.
487	// First, code below assumes that we are on curg, while raceGetProcCmd
488	// can be executed on g0. Second, it is called frequently, so will
489	// benefit from this fast path.
490	CBNZ	R0, rest
491	MOVD	g, R13
492#ifdef TLS_darwin
493	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
494#endif
495	load_g
496#ifdef TLS_darwin
497	MOVD	R12, R27
498#endif
499	MOVD	g_m(g), R0
500	MOVD	m_p(R0), R0
501	MOVD	p_raceprocctx(R0), R0
502	MOVD	R0, (R1)
503	MOVD	R13, g
504	JMP	(LR)
505rest:
506	// Save callee-saved registers (Go code won't respect that).
507	// 8(RSP) and 16(RSP) are for args passed through racecallback
508	SUB	$176, RSP
509	MOVD	LR, 0(RSP)
510
511	SAVE_R19_TO_R28(8*3)
512	SAVE_F8_TO_F15(8*13)
513	MOVD	R29, (8*21)(RSP)
514	// Set g = g0.
515	// load_g will clobber R0, Save R0
516	MOVD	R0, R13
517	load_g
518	// restore R0
519	MOVD	R13, R0
520	MOVD	g_m(g), R13
521	MOVD	m_g0(R13), R14
522	CMP	R14, g
523	BEQ	noswitch	// branch if already on g0
524	MOVD	R14, g
525
526	MOVD	R0, 8(RSP)	// func arg
527	MOVD	R1, 16(RSP)	// func arg
528	BL	runtime·racecallback(SB)
529
530	// All registers are smashed after Go code, reload.
531	MOVD	g_m(g), R13
532	MOVD	m_curg(R13), g	// g = m->curg
533ret:
534	// Restore callee-saved registers.
535	MOVD	0(RSP), LR
536	MOVD	(8*21)(RSP), R29
537	RESTORE_F8_TO_F15(8*13)
538	RESTORE_R19_TO_R28(8*3)
539	ADD	$176, RSP
540	JMP	(LR)
541
542noswitch:
543	// already on g0
544	MOVD	R0, 8(RSP)	// func arg
545	MOVD	R1, 16(RSP)	// func arg
546	BL	runtime·racecallback(SB)
547	JMP	ret
548
549#ifndef TLSG_IS_VARIABLE
550// tls_g, g value for each thread in TLS
551GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
552#endif
553