1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "go_tls.h"
9#include "funcdata.h"
10#include "textflag.h"
11#include "asm_ppc64x.h"
12#include "cgo/abi_ppc64x.h"
13
14// The following functions allow calling the clang-compiled race runtime directly
15// from Go code without going all the way through cgo.
16// First, it's much faster (up to 50% speedup for real Go programs).
17// Second, it eliminates race-related special cases from cgocall and scheduler.
18// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
19
20// A brief recap of the ppc64le calling convention.
21// Arguments are passed in R3, R4, R5 ...
22// SP must be 16-byte aligned.
23
24// Note that for ppc64x, LLVM follows the standard ABI and
25// expects arguments in registers, so these functions move
26// the arguments from storage to the registers expected
27// by the ABI.
28
29// When calling from Go to Clang tsan code:
30// R3 is the 1st argument and is usually the ThreadState*
31// R4-? are the 2nd, 3rd, 4th, etc. arguments
32
33// When calling racecalladdr:
34// R8 is the call target address
35
36// The race ctx is passed in R3 and loaded in
37// racecalladdr.
38//
39// The sequence used to get the race ctx:
40//    MOVD    runtime·tls_g(SB), R10 // Address of TLS variable
41//    MOVD    0(R10), g              // g = R30
42//    MOVD    g_racectx(g), R3       // racectx == ThreadState
43
44// func runtime·RaceRead(addr uintptr)
45// Called from instrumented Go code
46TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
47	MOVD	R3, R4 // addr
48	MOVD	LR, R5 // caller of this?
49	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
50	MOVD	$__tsan_read(SB), R8
51	BR	racecalladdr<>(SB)
52
53TEXT    runtime·RaceRead(SB), NOSPLIT, $0-8
54	BR	runtime·raceread(SB)
55
56// void runtime·racereadpc(void *addr, void *callpc, void *pc)
57TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
58	MOVD	addr+0(FP), R4
59	MOVD	callpc+8(FP), R5
60	MOVD	pc+16(FP), R6
61	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
62	MOVD	$__tsan_read_pc(SB), R8
63	BR	racecalladdr<>(SB)
64
65// func runtime·RaceWrite(addr uintptr)
66// Called from instrumented Go code
67TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
68	MOVD	R3, R4 // addr
69	MOVD	LR, R5 // caller has set LR via BL inst
70	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
71	MOVD	$__tsan_write(SB), R8
72	BR	racecalladdr<>(SB)
73
74TEXT    runtime·RaceWrite(SB), NOSPLIT, $0-8
75	JMP	runtime·racewrite(SB)
76
77// void runtime·racewritepc(void *addr, void *callpc, void *pc)
78TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
79	MOVD	addr+0(FP), R4
80	MOVD	callpc+8(FP), R5
81	MOVD	pc+16(FP), R6
82	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
83	MOVD	$__tsan_write_pc(SB), R8
84	BR	racecalladdr<>(SB)
85
86// func runtime·RaceReadRange(addr, size uintptr)
87// Called from instrumented Go code.
88TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
89	MOVD	R4, R5 // size
90	MOVD	R3, R4 // addr
91	MOVD	LR, R6
92	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
93	MOVD	$__tsan_read_range(SB), R8
94	BR	racecalladdr<>(SB)
95
96// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
97TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
98	MOVD    addr+0(FP), R4
99	MOVD    size+8(FP), R5
100	MOVD    pc+16(FP), R6
101	ADD	$4, R6		// tsan wants return addr
102	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103	MOVD    $__tsan_read_range(SB), R8
104	BR	racecalladdr<>(SB)
105
106TEXT    runtime·RaceReadRange(SB), NOSPLIT, $0-16
107	BR	runtime·racereadrange(SB)
108
109// func runtime·RaceWriteRange(addr, size uintptr)
110// Called from instrumented Go code.
111TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
112	MOVD	R4, R5 // size
113	MOVD	R3, R4 // addr
114	MOVD	LR, R6
115	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
116	MOVD	$__tsan_write_range(SB), R8
117	BR	racecalladdr<>(SB)
118
119TEXT    runtime·RaceWriteRange(SB), NOSPLIT, $0-16
120	BR	runtime·racewriterange(SB)
121
122// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
123// Called from instrumented Go code
124TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
125	MOVD	addr+0(FP), R4
126	MOVD	size+8(FP), R5
127	MOVD	pc+16(FP), R6
128	ADD	$4, R6			// add 4 to inst offset?
129	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130	MOVD	$__tsan_write_range(SB), R8
131	BR	racecalladdr<>(SB)
132
133// Call a __tsan function from Go code.
134// R8 = tsan function address
135// R3 = *ThreadState a.k.a. g_racectx from g
136// R4 = addr passed to __tsan function
137//
138// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
139TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
140	MOVD    runtime·tls_g(SB), R10
141	MOVD	0(R10), g
142	MOVD	g_racectx(g), R3	// goroutine context
143	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
144	MOVD	runtime·racearenastart(SB), R9
145	CMP	R4, R9
146	BLT	data
147	MOVD	runtime·racearenaend(SB), R9
148	CMP	R4, R9
149	BLT	call
150data:
151	MOVD	runtime·racedatastart(SB), R9
152	CMP	R4, R9
153	BLT	ret
154	MOVD	runtime·racedataend(SB), R9
155	CMP	R4, R9
156	BGT	ret
157call:
158	// Careful!! racecall will save LR on its
159	// stack, which is OK as long as racecalladdr
160	// doesn't change in a way that generates a stack.
161	// racecall should return to the caller of
162	// recalladdr.
163	BR	racecall<>(SB)
164ret:
165	RET
166
167// func runtime·racefuncenter(pc uintptr)
168// Called from instrumented Go code.
169TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
170	MOVD	callpc+0(FP), R8
171	BR	racefuncenter<>(SB)
172
173// Common code for racefuncenter
174// R11 = caller's return address
175TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
176	MOVD    runtime·tls_g(SB), R10
177	MOVD    0(R10), g
178	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
179	MOVD	R8, R4			// caller pc set by caller in R8
180	// void __tsan_func_enter(ThreadState *thr, void *pc);
181	MOVD	$__tsan_func_enter(SB), R8
182	BR	racecall<>(SB)
183	RET
184
185// func runtime·racefuncexit()
186// Called from Go instrumented code.
187TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
188	MOVD    runtime·tls_g(SB), R10
189	MOVD    0(R10), g
190	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
191	// void __tsan_func_exit(ThreadState *thr);
192	MOVD	$__tsan_func_exit(SB), R8
193	BR	racecall<>(SB)
194
195// Atomic operations for sync/atomic package.
196// Some use the __tsan versions instead
197// R6 = addr of arguments passed to this function
198// R3, R4, R5 set in racecallatomic
199
200// Load atomic in tsan
201TEXT	syncatomic·LoadInt32(SB), NOSPLIT, $0-12
202	GO_ARGS
203	// void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
204	MOVD	$__tsan_go_atomic32_load(SB), R8
205	ADD	$32, R1, R6	// addr of caller's 1st arg
206	BR	racecallatomic<>(SB)
207	RET
208
209TEXT	syncatomic·LoadInt64(SB), NOSPLIT, $0-16
210	GO_ARGS
211	// void __tsan_go_atomic64_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
212	MOVD	$__tsan_go_atomic64_load(SB), R8
213	ADD	$32, R1, R6	// addr of caller's 1st arg
214	BR	racecallatomic<>(SB)
215	RET
216
217TEXT	syncatomic·LoadUint32(SB), NOSPLIT, $0-12
218	GO_ARGS
219	BR	syncatomic·LoadInt32(SB)
220
221TEXT	syncatomic·LoadUint64(SB), NOSPLIT, $0-16
222	GO_ARGS
223	BR	syncatomic·LoadInt64(SB)
224
225TEXT	syncatomic·LoadUintptr(SB), NOSPLIT, $0-16
226	GO_ARGS
227	BR	syncatomic·LoadInt64(SB)
228
229TEXT	syncatomic·LoadPointer(SB), NOSPLIT, $0-16
230	GO_ARGS
231	BR	syncatomic·LoadInt64(SB)
232
233// Store atomic in tsan
234TEXT	syncatomic·StoreInt32(SB), NOSPLIT, $0-12
235	GO_ARGS
236	// void __tsan_go_atomic32_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
237	MOVD	$__tsan_go_atomic32_store(SB), R8
238	ADD	$32, R1, R6	// addr of caller's 1st arg
239	BR	racecallatomic<>(SB)
240
241TEXT	syncatomic·StoreInt64(SB), NOSPLIT, $0-16
242	GO_ARGS
243	// void __tsan_go_atomic64_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
244	MOVD	$__tsan_go_atomic64_store(SB), R8
245	ADD	$32, R1, R6	// addr of caller's 1st arg
246	BR	racecallatomic<>(SB)
247
248TEXT	syncatomic·StoreUint32(SB), NOSPLIT, $0-12
249	GO_ARGS
250	BR	syncatomic·StoreInt32(SB)
251
252TEXT	syncatomic·StoreUint64(SB), NOSPLIT, $0-16
253	GO_ARGS
254	BR	syncatomic·StoreInt64(SB)
255
256TEXT	syncatomic·StoreUintptr(SB), NOSPLIT, $0-16
257	GO_ARGS
258	BR	syncatomic·StoreInt64(SB)
259
260// Swap in tsan
261TEXT	syncatomic·SwapInt32(SB), NOSPLIT, $0-20
262	GO_ARGS
263	// void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
264	MOVD	$__tsan_go_atomic32_exchange(SB), R8
265	ADD	$32, R1, R6	// addr of caller's 1st arg
266	BR	racecallatomic<>(SB)
267
268TEXT	syncatomic·SwapInt64(SB), NOSPLIT, $0-24
269	GO_ARGS
270	// void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a)
271	MOVD	$__tsan_go_atomic64_exchange(SB), R8
272	ADD	$32, R1, R6	// addr of caller's 1st arg
273	BR	racecallatomic<>(SB)
274
275TEXT	syncatomic·SwapUint32(SB), NOSPLIT, $0-20
276	GO_ARGS
277	BR	syncatomic·SwapInt32(SB)
278
279TEXT	syncatomic·SwapUint64(SB), NOSPLIT, $0-24
280	GO_ARGS
281	BR	syncatomic·SwapInt64(SB)
282
283TEXT	syncatomic·SwapUintptr(SB), NOSPLIT, $0-24
284	GO_ARGS
285	BR	syncatomic·SwapInt64(SB)
286
287// Add atomic in tsan
288TEXT	syncatomic·AddInt32(SB), NOSPLIT, $0-20
289	GO_ARGS
290	// void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
291	MOVD	$__tsan_go_atomic32_fetch_add(SB), R8
292	ADD	$64, R1, R6	// addr of caller's 1st arg
293	BL	racecallatomic<>(SB)
294	// The tsan fetch_add result is not as expected by Go,
295	// so the 'add' must be added to the result.
296	MOVW	add+8(FP), R3	// The tsa fetch_add does not return the
297	MOVW	ret+16(FP), R4	// result as expected by go, so fix it.
298	ADD	R3, R4, R3
299	MOVW	R3, ret+16(FP)
300	RET
301
302TEXT	syncatomic·AddInt64(SB), NOSPLIT, $0-24
303	GO_ARGS
304	// void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
305	MOVD	$__tsan_go_atomic64_fetch_add(SB), R8
306	ADD	$64, R1, R6	// addr of caller's 1st arg
307	BL	racecallatomic<>(SB)
308	// The tsan fetch_add result is not as expected by Go,
309	// so the 'add' must be added to the result.
310	MOVD	add+8(FP), R3
311	MOVD	ret+16(FP), R4
312	ADD	R3, R4, R3
313	MOVD	R3, ret+16(FP)
314	RET
315
316TEXT	syncatomic·AddUint32(SB), NOSPLIT, $0-20
317	GO_ARGS
318	BR	syncatomic·AddInt32(SB)
319
320TEXT	syncatomic·AddUint64(SB), NOSPLIT, $0-24
321	GO_ARGS
322	BR	syncatomic·AddInt64(SB)
323
324TEXT	syncatomic·AddUintptr(SB), NOSPLIT, $0-24
325	GO_ARGS
326	BR	syncatomic·AddInt64(SB)
327
328// And
329TEXT	syncatomic·AndInt32(SB), NOSPLIT, $0-20
330	GO_ARGS
331	MOVD	$__tsan_go_atomic32_fetch_and(SB), R8
332	BR	racecallatomic<>(SB)
333
334TEXT	syncatomic·AndInt64(SB), NOSPLIT, $0-24
335	GO_ARGS
336	MOVD	$__tsan_go_atomic64_fetch_and(SB), R8
337	BR	racecallatomic<>(SB)
338
339TEXT	syncatomic·AndUint32(SB), NOSPLIT, $0-20
340	GO_ARGS
341	BR	syncatomic·AndInt32(SB)
342
343TEXT	syncatomic·AndUint64(SB), NOSPLIT, $0-24
344	GO_ARGS
345	BR	syncatomic·AndInt64(SB)
346
347TEXT	syncatomic·AndUintptr(SB), NOSPLIT, $0-24
348	GO_ARGS
349	BR	syncatomic·AndInt64(SB)
350
351// Or
352TEXT	syncatomic·OrInt32(SB), NOSPLIT, $0-20
353	GO_ARGS
354	MOVD	$__tsan_go_atomic32_fetch_or(SB), R8
355	BR	racecallatomic<>(SB)
356
357TEXT	syncatomic·OrInt64(SB), NOSPLIT, $0-24
358	GO_ARGS
359	MOVD	$__tsan_go_atomic64_fetch_or(SB), R8
360	BR	racecallatomic<>(SB)
361
362TEXT	syncatomic·OrUint32(SB), NOSPLIT, $0-20
363	GO_ARGS
364	BR	syncatomic·OrInt32(SB)
365
366TEXT	syncatomic·OrUint64(SB), NOSPLIT, $0-24
367	GO_ARGS
368	BR	syncatomic·OrInt64(SB)
369
370TEXT	syncatomic·OrUintptr(SB), NOSPLIT, $0-24
371	GO_ARGS
372	BR	syncatomic·OrInt64(SB)
373
374// CompareAndSwap in tsan
375TEXT	syncatomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
376	GO_ARGS
377	// void __tsan_go_atomic32_compare_exchange(
378	//   ThreadState *thr, uptr cpc, uptr pc, u8 *a)
379	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R8
380	ADD	$32, R1, R6	// addr of caller's 1st arg
381	BR	racecallatomic<>(SB)
382
383TEXT	syncatomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
384	GO_ARGS
385	// void __tsan_go_atomic32_compare_exchange(
386	//   ThreadState *thr, uptr cpc, uptr pc, u8 *a)
387	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R8
388	ADD	$32, R1, R6	// addr of caller's 1st arg
389	BR	racecallatomic<>(SB)
390
391TEXT	syncatomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
392	GO_ARGS
393	BR	syncatomic·CompareAndSwapInt32(SB)
394
395TEXT	syncatomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
396	GO_ARGS
397	BR	syncatomic·CompareAndSwapInt64(SB)
398
399TEXT	syncatomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
400	GO_ARGS
401	BR	syncatomic·CompareAndSwapInt64(SB)
402
403// Common function used to call tsan's atomic functions
404// R3 = *ThreadState
405// R4 = TODO: What's this supposed to be?
406// R5 = caller pc
407// R6 = addr of incoming arg list
408// R8 contains addr of target function.
409TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
410	// Trigger SIGSEGV early if address passed to atomic function is bad.
411	MOVD	(R6), R7	// 1st arg is addr
412	MOVB	(R7), R9	// segv here if addr is bad
413	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
414	MOVD	runtime·racearenastart(SB), R9
415	CMP	R7, R9
416	BLT	racecallatomic_data
417	MOVD	runtime·racearenaend(SB), R9
418	CMP	R7, R9
419	BLT	racecallatomic_ok
420racecallatomic_data:
421	MOVD	runtime·racedatastart(SB), R9
422	CMP	R7, R9
423	BLT	racecallatomic_ignore
424	MOVD	runtime·racedataend(SB), R9
425	CMP	R7, R9
426	BGE	racecallatomic_ignore
427racecallatomic_ok:
428	// Addr is within the good range, call the atomic function.
429	MOVD    runtime·tls_g(SB), R10
430	MOVD    0(R10), g
431	MOVD    g_racectx(g), R3        // goroutine racectx aka *ThreadState
432	MOVD	R8, R5			// pc is the function called
433	MOVD	(R1), R4		// caller pc from stack
434	BL	racecall<>(SB)		// BL needed to maintain stack consistency
435	RET				//
436racecallatomic_ignore:
437	// Addr is outside the good range.
438	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
439	// An attempt to synchronize on the address would cause crash.
440	MOVD	R8, R15	// save the original function
441	MOVD	R6, R17 // save the original arg list addr
442	MOVD	$__tsan_go_ignore_sync_begin(SB), R8 // func addr to call
443	MOVD    runtime·tls_g(SB), R10
444	MOVD    0(R10), g
445	MOVD    g_racectx(g), R3        // goroutine context
446	BL	racecall<>(SB)
447	MOVD	R15, R8	// restore the original function
448	MOVD	R17, R6 // restore arg list addr
449	// Call the atomic function.
450	// racecall will call LLVM race code which might clobber r30 (g)
451	MOVD	runtime·tls_g(SB), R10
452	MOVD	0(R10), g
453
454	MOVD	g_racectx(g), R3
455	MOVD	R8, R4		// pc being called same TODO as above
456	MOVD	(R1), R5	// caller pc from latest LR
457	BL	racecall<>(SB)
458	// Call __tsan_go_ignore_sync_end.
459	MOVD	$__tsan_go_ignore_sync_end(SB), R8
460	MOVD	g_racectx(g), R3	// goroutine context g should still be good?
461	BL	racecall<>(SB)
462	RET
463
464// void runtime·racecall(void(*f)(...), ...)
465// Calls C function f from race runtime and passes up to 4 arguments to it.
466// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
467TEXT	runtime·racecall(SB), NOSPLIT, $0-0
468	MOVD	fn+0(FP), R8
469	MOVD	arg0+8(FP), R3
470	MOVD	arg1+16(FP), R4
471	MOVD	arg2+24(FP), R5
472	MOVD	arg3+32(FP), R6
473	JMP	racecall<>(SB)
474
475// Finds g0 and sets its stack
476// Arguments were loaded for call from Go to C
477TEXT	racecall<>(SB), NOSPLIT, $0-0
478	// Set the LR slot for the ppc64 ABI
479	MOVD	LR, R10
480	MOVD	R10, 0(R1)	// Go expectation
481	MOVD	R10, 16(R1)	// C ABI
482	// Get info from the current goroutine
483	MOVD    runtime·tls_g(SB), R10	// g offset in TLS
484	MOVD    0(R10), g
485	MOVD	g_m(g), R7		// m for g
486	MOVD	R1, R16			// callee-saved, preserved across C call
487	MOVD	m_g0(R7), R10		// g0 for m
488	CMP	R10, g			// same g0?
489	BEQ	call			// already on g0
490	MOVD	(g_sched+gobuf_sp)(R10), R1 // switch R1
491call:
492	// prepare frame for C ABI
493	SUB	$32, R1			// create frame for callee saving LR, CR, R2 etc.
494	RLDCR   $0, R1, $~15, R1	// align SP to 16 bytes
495	MOVD	R8, CTR			// R8 = caller addr
496	MOVD	R8, R12			// expected by PPC64 ABI
497	BL	(CTR)
498	XOR     R0, R0			// clear R0 on return from Clang
499	MOVD	R16, R1			// restore R1; R16 nonvol in Clang
500	MOVD    runtime·tls_g(SB), R10	// find correct g
501	MOVD    0(R10), g
502	MOVD	16(R1), R10		// LR was saved away, restore for return
503	MOVD	R10, LR
504	RET
505
506// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
507// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
508// The overall effect of Go->C->Go call chain is similar to that of mcall.
509// RARG0 contains command code. RARG1 contains command-specific context.
510// See racecallback for command codes.
511TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
512	// Handle command raceGetProcCmd (0) here.
513	// First, code below assumes that we are on curg, while raceGetProcCmd
514	// can be executed on g0. Second, it is called frequently, so will
515	// benefit from this fast path.
516	MOVD	$0, R0		// clear R0 since we came from C code
517	CMP	R3, $0
518	BNE	rest
519	// Inline raceGetProdCmd without clobbering callee-save registers.
520	MOVD	runtime·tls_g(SB), R10
521	MOVD	0(R10), R11
522	MOVD	g_m(R11), R3
523	MOVD	m_p(R3), R3
524	MOVD	p_raceprocctx(R3), R3
525	MOVD	R3, (R4)
526	RET
527
528rest:
529	// Save registers according to the host PPC64 ABI
530	// and reserve 16B for argument storage.
531	STACK_AND_SAVE_HOST_TO_GO_ABI(16)
532
533	// Load g, and switch to g0 if not already on it.
534	MOVD	runtime·tls_g(SB), R10
535	MOVD	0(R10), g
536
537	MOVD	g_m(g), R7
538	MOVD	m_g0(R7), R8
539	CMP	g, R8
540	BEQ	noswitch
541
542	MOVD	R8, g // set g = m->g0
543
544noswitch:
545	BL	runtime·racecallback<ABIInternal>(SB)
546
547	UNSTACK_AND_RESTORE_GO_TO_HOST_ABI(16)
548	RET
549
550// tls_g, g value for each thread in TLS
551GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
552