1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "go_tls.h"
9#include "funcdata.h"
10#include "textflag.h"
11#include "cgo/abi_amd64.h"
12
13// The following thunks allow calling the gcc-compiled race runtime directly
14// from Go code without going all the way through cgo.
15// First, it's much faster (up to 50% speedup for real Go programs).
16// Second, it eliminates race-related special cases from cgocall and scheduler.
17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19// A brief recap of the amd64 calling convention.
20// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
21// Callee-saved registers are: BX, BP, R12-R15.
22// SP must be 16-byte aligned.
23// On Windows:
24// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
25// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
26// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
27// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention
28// We do not do this, because it seems to be intended for vararg/unprototyped functions.
29// Gcc-compiled race runtime does not try to use that space.
30
31#ifdef GOOS_windows
32#define RARG0 CX
33#define RARG1 DX
34#define RARG2 R8
35#define RARG3 R9
36#else
37#define RARG0 DI
38#define RARG1 SI
39#define RARG2 DX
40#define RARG3 CX
41#endif
42
43// func runtime·raceread(addr uintptr)
44// Called from instrumented code.
45// Defined as ABIInternal so as to avoid introducing a wrapper,
46// which would render runtime.getcallerpc ineffective.
47TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
48	MOVQ	AX, RARG1
49	MOVQ	(SP), RARG2
50	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
51	MOVQ	$__tsan_read(SB), AX
52	JMP	racecalladdr<>(SB)
53
54// func runtime·RaceRead(addr uintptr)
55TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
56	// This needs to be a tail call, because raceread reads caller pc.
57	JMP	runtime·raceread(SB)
58
59// void runtime·racereadpc(void *addr, void *callpc, void *pc)
60TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
61	MOVQ	addr+0(FP), RARG1
62	MOVQ	callpc+8(FP), RARG2
63	MOVQ	pc+16(FP), RARG3
64	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
65	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66	MOVQ	$__tsan_read_pc(SB), AX
67	JMP	racecalladdr<>(SB)
68
69// func runtime·racewrite(addr uintptr)
70// Called from instrumented code.
71// Defined as ABIInternal so as to avoid introducing a wrapper,
72// which would render runtime.getcallerpc ineffective.
73TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74	MOVQ	AX, RARG1
75	MOVQ	(SP), RARG2
76	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
77	MOVQ	$__tsan_write(SB), AX
78	JMP	racecalladdr<>(SB)
79
80// func runtime·RaceWrite(addr uintptr)
81TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
82	// This needs to be a tail call, because racewrite reads caller pc.
83	JMP	runtime·racewrite(SB)
84
85// void runtime·racewritepc(void *addr, void *callpc, void *pc)
86TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
87	MOVQ	addr+0(FP), RARG1
88	MOVQ	callpc+8(FP), RARG2
89	MOVQ	pc+16(FP), RARG3
90	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
91	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
92	MOVQ	$__tsan_write_pc(SB), AX
93	JMP	racecalladdr<>(SB)
94
95// func runtime·racereadrange(addr, size uintptr)
96// Called from instrumented code.
97// Defined as ABIInternal so as to avoid introducing a wrapper,
98// which would render runtime.getcallerpc ineffective.
99TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
100	MOVQ	AX, RARG1
101	MOVQ	BX, RARG2
102	MOVQ	(SP), RARG3
103	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
104	MOVQ	$__tsan_read_range(SB), AX
105	JMP	racecalladdr<>(SB)
106
107// func runtime·RaceReadRange(addr, size uintptr)
108TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
109	// This needs to be a tail call, because racereadrange reads caller pc.
110	JMP	runtime·racereadrange(SB)
111
112// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
113TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
114	MOVQ	addr+0(FP), RARG1
115	MOVQ	size+8(FP), RARG2
116	MOVQ	pc+16(FP), RARG3
117	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
118	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
119	MOVQ	$__tsan_read_range(SB), AX
120	JMP	racecalladdr<>(SB)
121
122// func runtime·racewriterange(addr, size uintptr)
123// Called from instrumented code.
124// Defined as ABIInternal so as to avoid introducing a wrapper,
125// which would render runtime.getcallerpc ineffective.
126TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
127	MOVQ	AX, RARG1
128	MOVQ	BX, RARG2
129	MOVQ	(SP), RARG3
130	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
131	MOVQ	$__tsan_write_range(SB), AX
132	JMP	racecalladdr<>(SB)
133
134// func runtime·RaceWriteRange(addr, size uintptr)
135TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
136	// This needs to be a tail call, because racewriterange reads caller pc.
137	JMP	runtime·racewriterange(SB)
138
139// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
140TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
141	MOVQ	addr+0(FP), RARG1
142	MOVQ	size+8(FP), RARG2
143	MOVQ	pc+16(FP), RARG3
144	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
145	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
146	MOVQ	$__tsan_write_range(SB), AX
147	JMP	racecalladdr<>(SB)
148
149// If addr (RARG1) is out of range, do nothing.
150// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
151TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
152	MOVQ	g_racectx(R14), RARG0	// goroutine context
153	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154	CMPQ	RARG1, runtime·racearenastart(SB)
155	JB	data
156	CMPQ	RARG1, runtime·racearenaend(SB)
157	JB	call
158data:
159	CMPQ	RARG1, runtime·racedatastart(SB)
160	JB	ret
161	CMPQ	RARG1, runtime·racedataend(SB)
162	JAE	ret
163call:
164	MOVQ	AX, AX		// w/o this 6a miscompiles this function
165	JMP	racecall<>(SB)
166ret:
167	RET
168
169// func runtime·racefuncenter(pc uintptr)
170// Called from instrumented code.
171TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
172	MOVQ	callpc+0(FP), R11
173	JMP	racefuncenter<>(SB)
174
175// Common code for racefuncenter
176// R11 = caller's return address
177TEXT	racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0
178	MOVQ	DX, BX		// save function entry context (for closures)
179	MOVQ	g_racectx(R14), RARG0	// goroutine context
180	MOVQ	R11, RARG1
181	// void __tsan_func_enter(ThreadState *thr, void *pc);
182	MOVQ	$__tsan_func_enter(SB), AX
183	// racecall<> preserves BX
184	CALL	racecall<>(SB)
185	MOVQ	BX, DX	// restore function entry context
186	RET
187
188// func runtime·racefuncexit()
189// Called from instrumented code.
190TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
191	MOVQ	g_racectx(R14), RARG0	// goroutine context
192	// void __tsan_func_exit(ThreadState *thr);
193	MOVQ	$__tsan_func_exit(SB), AX
194	JMP	racecall<>(SB)
195
196// Atomic operations for sync/atomic package.
197
198// Load
199TEXT	syncatomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12
200	GO_ARGS
201	MOVQ	$__tsan_go_atomic32_load(SB), AX
202	CALL	racecallatomic<>(SB)
203	RET
204
205TEXT	syncatomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16
206	GO_ARGS
207	MOVQ	$__tsan_go_atomic64_load(SB), AX
208	CALL	racecallatomic<>(SB)
209	RET
210
211TEXT	syncatomic·LoadUint32(SB), NOSPLIT, $0-12
212	GO_ARGS
213	JMP	syncatomic·LoadInt32(SB)
214
215TEXT	syncatomic·LoadUint64(SB), NOSPLIT, $0-16
216	GO_ARGS
217	JMP	syncatomic·LoadInt64(SB)
218
219TEXT	syncatomic·LoadUintptr(SB), NOSPLIT, $0-16
220	GO_ARGS
221	JMP	syncatomic·LoadInt64(SB)
222
223TEXT	syncatomic·LoadPointer(SB), NOSPLIT, $0-16
224	GO_ARGS
225	JMP	syncatomic·LoadInt64(SB)
226
227// Store
228TEXT	syncatomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12
229	GO_ARGS
230	MOVQ	$__tsan_go_atomic32_store(SB), AX
231	CALL	racecallatomic<>(SB)
232	RET
233
234TEXT	syncatomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16
235	GO_ARGS
236	MOVQ	$__tsan_go_atomic64_store(SB), AX
237	CALL	racecallatomic<>(SB)
238	RET
239
240TEXT	syncatomic·StoreUint32(SB), NOSPLIT, $0-12
241	GO_ARGS
242	JMP	syncatomic·StoreInt32(SB)
243
244TEXT	syncatomic·StoreUint64(SB), NOSPLIT, $0-16
245	GO_ARGS
246	JMP	syncatomic·StoreInt64(SB)
247
248TEXT	syncatomic·StoreUintptr(SB), NOSPLIT, $0-16
249	GO_ARGS
250	JMP	syncatomic·StoreInt64(SB)
251
252// Swap
253TEXT	syncatomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20
254	GO_ARGS
255	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
256	CALL	racecallatomic<>(SB)
257	RET
258
259TEXT	syncatomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24
260	GO_ARGS
261	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
262	CALL	racecallatomic<>(SB)
263	RET
264
265TEXT	syncatomic·SwapUint32(SB), NOSPLIT, $0-20
266	GO_ARGS
267	JMP	syncatomic·SwapInt32(SB)
268
269TEXT	syncatomic·SwapUint64(SB), NOSPLIT, $0-24
270	GO_ARGS
271	JMP	syncatomic·SwapInt64(SB)
272
273TEXT	syncatomic·SwapUintptr(SB), NOSPLIT, $0-24
274	GO_ARGS
275	JMP	syncatomic·SwapInt64(SB)
276
277// Add
278TEXT	syncatomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20
279	GO_ARGS
280	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
281	CALL	racecallatomic<>(SB)
282	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
283	ADDL	AX, ret+16(FP)
284	RET
285
286TEXT	syncatomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24
287	GO_ARGS
288	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
289	CALL	racecallatomic<>(SB)
290	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
291	ADDQ	AX, ret+16(FP)
292	RET
293
294TEXT	syncatomic·AddUint32(SB), NOSPLIT, $0-20
295	GO_ARGS
296	JMP	syncatomic·AddInt32(SB)
297
298TEXT	syncatomic·AddUint64(SB), NOSPLIT, $0-24
299	GO_ARGS
300	JMP	syncatomic·AddInt64(SB)
301
302TEXT	syncatomic·AddUintptr(SB), NOSPLIT, $0-24
303	GO_ARGS
304	JMP	syncatomic·AddInt64(SB)
305
306// And
307TEXT	syncatomic·AndInt32(SB), NOSPLIT|NOFRAME, $0-20
308	GO_ARGS
309	MOVQ	$__tsan_go_atomic32_fetch_and(SB), AX
310	CALL	racecallatomic<>(SB)
311	RET
312
313TEXT	syncatomic·AndInt64(SB), NOSPLIT|NOFRAME, $0-24
314	GO_ARGS
315	MOVQ	$__tsan_go_atomic64_fetch_and(SB), AX
316	CALL	racecallatomic<>(SB)
317	RET
318
319TEXT	syncatomic·AndUint32(SB), NOSPLIT, $0-20
320	GO_ARGS
321	JMP	syncatomic·AndInt32(SB)
322
323TEXT	syncatomic·AndUint64(SB), NOSPLIT, $0-24
324	GO_ARGS
325	JMP	syncatomic·AndInt64(SB)
326
327TEXT	syncatomic·AndUintptr(SB), NOSPLIT, $0-24
328	GO_ARGS
329	JMP	syncatomic·AndInt64(SB)
330
331// Or
332TEXT	syncatomic·OrInt32(SB), NOSPLIT|NOFRAME, $0-20
333	GO_ARGS
334	MOVQ	$__tsan_go_atomic32_fetch_or(SB), AX
335	CALL	racecallatomic<>(SB)
336	RET
337
338TEXT	syncatomic·OrInt64(SB), NOSPLIT|NOFRAME, $0-24
339	GO_ARGS
340	MOVQ	$__tsan_go_atomic64_fetch_or(SB), AX
341	CALL	racecallatomic<>(SB)
342	RET
343
344TEXT	syncatomic·OrUint32(SB), NOSPLIT, $0-20
345	GO_ARGS
346	JMP	syncatomic·OrInt32(SB)
347
348TEXT	syncatomic·OrUint64(SB), NOSPLIT, $0-24
349	GO_ARGS
350	JMP	syncatomic·OrInt64(SB)
351
352TEXT	syncatomic·OrUintptr(SB), NOSPLIT, $0-24
353	GO_ARGS
354	JMP	syncatomic·OrInt64(SB)
355
356
357// CompareAndSwap
358TEXT	syncatomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17
359	GO_ARGS
360	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
361	CALL	racecallatomic<>(SB)
362	RET
363
364TEXT	syncatomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25
365	GO_ARGS
366	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
367	CALL	racecallatomic<>(SB)
368	RET
369
370TEXT	syncatomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
371	GO_ARGS
372	JMP	syncatomic·CompareAndSwapInt32(SB)
373
374TEXT	syncatomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
375	GO_ARGS
376	JMP	syncatomic·CompareAndSwapInt64(SB)
377
378TEXT	syncatomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
379	GO_ARGS
380	JMP	syncatomic·CompareAndSwapInt64(SB)
381
382// Generic atomic operation implementation.
383// AX already contains target function.
384TEXT	racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0
385	// Trigger SIGSEGV early.
386	MOVQ	16(SP), R12
387	MOVBLZX	(R12), R13
388	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
389	CMPQ	R12, runtime·racearenastart(SB)
390	JB	racecallatomic_data
391	CMPQ	R12, runtime·racearenaend(SB)
392	JB	racecallatomic_ok
393racecallatomic_data:
394	CMPQ	R12, runtime·racedatastart(SB)
395	JB	racecallatomic_ignore
396	CMPQ	R12, runtime·racedataend(SB)
397	JAE	racecallatomic_ignore
398racecallatomic_ok:
399	// Addr is within the good range, call the atomic function.
400	MOVQ	g_racectx(R14), RARG0	// goroutine context
401	MOVQ	8(SP), RARG1	// caller pc
402	MOVQ	(SP), RARG2	// pc
403	LEAQ	16(SP), RARG3	// arguments
404	JMP	racecall<>(SB)	// does not return
405racecallatomic_ignore:
406	// Addr is outside the good range.
407	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
408	// An attempt to synchronize on the address would cause crash.
409	MOVQ	AX, BX	// remember the original function
410	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
411	MOVQ	g_racectx(R14), RARG0	// goroutine context
412	CALL	racecall<>(SB)
413	MOVQ	BX, AX	// restore the original function
414	// Call the atomic function.
415	MOVQ	g_racectx(R14), RARG0	// goroutine context
416	MOVQ	8(SP), RARG1	// caller pc
417	MOVQ	(SP), RARG2	// pc
418	LEAQ	16(SP), RARG3	// arguments
419	CALL	racecall<>(SB)
420	// Call __tsan_go_ignore_sync_end.
421	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
422	MOVQ	g_racectx(R14), RARG0	// goroutine context
423	JMP	racecall<>(SB)
424
425// void runtime·racecall(void(*f)(...), ...)
426// Calls C function f from race runtime and passes up to 4 arguments to it.
427// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
428TEXT	runtime·racecall(SB), NOSPLIT, $0-0
429	MOVQ	fn+0(FP), AX
430	MOVQ	arg0+8(FP), RARG0
431	MOVQ	arg1+16(FP), RARG1
432	MOVQ	arg2+24(FP), RARG2
433	MOVQ	arg3+32(FP), RARG3
434	JMP	racecall<>(SB)
435
436// Switches SP to g0 stack and calls (AX). Arguments already set.
437TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
438	MOVQ	g_m(R14), R13
439	// Switch to g0 stack.
440	MOVQ	SP, R12		// callee-saved, preserved across the CALL
441	MOVQ	m_g0(R13), R10
442	CMPQ	R10, R14
443	JE	call	// already on g0
444	MOVQ	(g_sched+gobuf_sp)(R10), SP
445call:
446	ANDQ	$~15, SP	// alignment for gcc ABI
447	CALL	AX
448	MOVQ	R12, SP
449	// Back to Go world, set special registers.
450	// The g register (R14) is preserved in C.
451	XORPS	X15, X15
452	RET
453
454// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
455// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
456// The overall effect of Go->C->Go call chain is similar to that of mcall.
457// RARG0 contains command code. RARG1 contains command-specific context.
458// See racecallback for command codes.
459TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0
460	// Handle command raceGetProcCmd (0) here.
461	// First, code below assumes that we are on curg, while raceGetProcCmd
462	// can be executed on g0. Second, it is called frequently, so will
463	// benefit from this fast path.
464	CMPQ	RARG0, $0
465	JNE	rest
466	get_tls(RARG0)
467	MOVQ	g(RARG0), RARG0
468	MOVQ	g_m(RARG0), RARG0
469	MOVQ	m_p(RARG0), RARG0
470	MOVQ	p_raceprocctx(RARG0), RARG0
471	MOVQ	RARG0, (RARG1)
472	RET
473
474rest:
475	// Transition from C ABI to Go ABI.
476	PUSH_REGS_HOST_TO_ABI0()
477	// Set g = g0.
478	get_tls(R12)
479	MOVQ	g(R12), R14
480	MOVQ	g_m(R14), R13
481	MOVQ	m_g0(R13), R15
482	CMPQ	R13, R15
483	JEQ	noswitch	// branch if already on g0
484	MOVQ	R15, g(R12)	// g = m->g0
485	MOVQ	R15, R14	// set g register
486	PUSHQ	RARG1	// func arg
487	PUSHQ	RARG0	// func arg
488	CALL	runtime·racecallback(SB)
489	POPQ	R12
490	POPQ	R12
491	// All registers are smashed after Go code, reload.
492	get_tls(R12)
493	MOVQ	g(R12), R13
494	MOVQ	g_m(R13), R13
495	MOVQ	m_curg(R13), R14
496	MOVQ	R14, g(R12)	// g = m->curg
497ret:
498	POP_REGS_HOST_TO_ABI0()
499	RET
500
501noswitch:
502	// already on g0
503	PUSHQ	RARG1	// func arg
504	PUSHQ	RARG0	// func arg
505	CALL	runtime·racecallback(SB)
506	POPQ	R12
507	POPQ	R12
508	JMP	ret
509