1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build race 6 7#include "go_asm.h" 8#include "go_tls.h" 9#include "funcdata.h" 10#include "textflag.h" 11#include "cgo/abi_amd64.h" 12 13// The following thunks allow calling the gcc-compiled race runtime directly 14// from Go code without going all the way through cgo. 15// First, it's much faster (up to 50% speedup for real Go programs). 16// Second, it eliminates race-related special cases from cgocall and scheduler. 17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 18 19// A brief recap of the amd64 calling convention. 20// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 21// Callee-saved registers are: BX, BP, R12-R15. 22// SP must be 16-byte aligned. 23// On Windows: 24// Arguments are passed in CX, DX, R8, R9, the rest is on stack. 25// Callee-saved registers are: BX, BP, DI, SI, R12-R15. 26// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 27// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention 28// We do not do this, because it seems to be intended for vararg/unprototyped functions. 29// Gcc-compiled race runtime does not try to use that space. 30 31#ifdef GOOS_windows 32#define RARG0 CX 33#define RARG1 DX 34#define RARG2 R8 35#define RARG3 R9 36#else 37#define RARG0 DI 38#define RARG1 SI 39#define RARG2 DX 40#define RARG3 CX 41#endif 42 43// func runtime·raceread(addr uintptr) 44// Called from instrumented code. 45// Defined as ABIInternal so as to avoid introducing a wrapper, 46// which would render runtime.getcallerpc ineffective. 47TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 48 MOVQ AX, RARG1 49 MOVQ (SP), RARG2 50 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 51 MOVQ $__tsan_read(SB), AX 52 JMP racecalladdr<>(SB) 53 54// func runtime·RaceRead(addr uintptr) 55TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 56 // This needs to be a tail call, because raceread reads caller pc. 57 JMP runtime·raceread(SB) 58 59// void runtime·racereadpc(void *addr, void *callpc, void *pc) 60TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 61 MOVQ addr+0(FP), RARG1 62 MOVQ callpc+8(FP), RARG2 63 MOVQ pc+16(FP), RARG3 64 ADDQ $1, RARG3 // pc is function start, tsan wants return address 65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 66 MOVQ $__tsan_read_pc(SB), AX 67 JMP racecalladdr<>(SB) 68 69// func runtime·racewrite(addr uintptr) 70// Called from instrumented code. 71// Defined as ABIInternal so as to avoid introducing a wrapper, 72// which would render runtime.getcallerpc ineffective. 73TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 74 MOVQ AX, RARG1 75 MOVQ (SP), RARG2 76 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 77 MOVQ $__tsan_write(SB), AX 78 JMP racecalladdr<>(SB) 79 80// func runtime·RaceWrite(addr uintptr) 81TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 82 // This needs to be a tail call, because racewrite reads caller pc. 83 JMP runtime·racewrite(SB) 84 85// void runtime·racewritepc(void *addr, void *callpc, void *pc) 86TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 87 MOVQ addr+0(FP), RARG1 88 MOVQ callpc+8(FP), RARG2 89 MOVQ pc+16(FP), RARG3 90 ADDQ $1, RARG3 // pc is function start, tsan wants return address 91 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 92 MOVQ $__tsan_write_pc(SB), AX 93 JMP racecalladdr<>(SB) 94 95// func runtime·racereadrange(addr, size uintptr) 96// Called from instrumented code. 97// Defined as ABIInternal so as to avoid introducing a wrapper, 98// which would render runtime.getcallerpc ineffective. 99TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16 100 MOVQ AX, RARG1 101 MOVQ BX, RARG2 102 MOVQ (SP), RARG3 103 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 104 MOVQ $__tsan_read_range(SB), AX 105 JMP racecalladdr<>(SB) 106 107// func runtime·RaceReadRange(addr, size uintptr) 108TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 109 // This needs to be a tail call, because racereadrange reads caller pc. 110 JMP runtime·racereadrange(SB) 111 112// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 113TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 114 MOVQ addr+0(FP), RARG1 115 MOVQ size+8(FP), RARG2 116 MOVQ pc+16(FP), RARG3 117 ADDQ $1, RARG3 // pc is function start, tsan wants return address 118 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 119 MOVQ $__tsan_read_range(SB), AX 120 JMP racecalladdr<>(SB) 121 122// func runtime·racewriterange(addr, size uintptr) 123// Called from instrumented code. 124// Defined as ABIInternal so as to avoid introducing a wrapper, 125// which would render runtime.getcallerpc ineffective. 126TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 127 MOVQ AX, RARG1 128 MOVQ BX, RARG2 129 MOVQ (SP), RARG3 130 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 131 MOVQ $__tsan_write_range(SB), AX 132 JMP racecalladdr<>(SB) 133 134// func runtime·RaceWriteRange(addr, size uintptr) 135TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 136 // This needs to be a tail call, because racewriterange reads caller pc. 137 JMP runtime·racewriterange(SB) 138 139// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 140TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 141 MOVQ addr+0(FP), RARG1 142 MOVQ size+8(FP), RARG2 143 MOVQ pc+16(FP), RARG3 144 ADDQ $1, RARG3 // pc is function start, tsan wants return address 145 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 146 MOVQ $__tsan_write_range(SB), AX 147 JMP racecalladdr<>(SB) 148 149// If addr (RARG1) is out of range, do nothing. 150// Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 151TEXT racecalladdr<>(SB), NOSPLIT, $0-0 152 MOVQ g_racectx(R14), RARG0 // goroutine context 153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 154 CMPQ RARG1, runtime·racearenastart(SB) 155 JB data 156 CMPQ RARG1, runtime·racearenaend(SB) 157 JB call 158data: 159 CMPQ RARG1, runtime·racedatastart(SB) 160 JB ret 161 CMPQ RARG1, runtime·racedataend(SB) 162 JAE ret 163call: 164 MOVQ AX, AX // w/o this 6a miscompiles this function 165 JMP racecall<>(SB) 166ret: 167 RET 168 169// func runtime·racefuncenter(pc uintptr) 170// Called from instrumented code. 171TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 172 MOVQ callpc+0(FP), R11 173 JMP racefuncenter<>(SB) 174 175// Common code for racefuncenter 176// R11 = caller's return address 177TEXT racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0 178 MOVQ DX, BX // save function entry context (for closures) 179 MOVQ g_racectx(R14), RARG0 // goroutine context 180 MOVQ R11, RARG1 181 // void __tsan_func_enter(ThreadState *thr, void *pc); 182 MOVQ $__tsan_func_enter(SB), AX 183 // racecall<> preserves BX 184 CALL racecall<>(SB) 185 MOVQ BX, DX // restore function entry context 186 RET 187 188// func runtime·racefuncexit() 189// Called from instrumented code. 190TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 191 MOVQ g_racectx(R14), RARG0 // goroutine context 192 // void __tsan_func_exit(ThreadState *thr); 193 MOVQ $__tsan_func_exit(SB), AX 194 JMP racecall<>(SB) 195 196// Atomic operations for sync/atomic package. 197 198// Load 199TEXT sync∕atomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12 200 GO_ARGS 201 MOVQ $__tsan_go_atomic32_load(SB), AX 202 CALL racecallatomic<>(SB) 203 RET 204 205TEXT sync∕atomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16 206 GO_ARGS 207 MOVQ $__tsan_go_atomic64_load(SB), AX 208 CALL racecallatomic<>(SB) 209 RET 210 211TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 212 GO_ARGS 213 JMP sync∕atomic·LoadInt32(SB) 214 215TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 216 GO_ARGS 217 JMP sync∕atomic·LoadInt64(SB) 218 219TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 220 GO_ARGS 221 JMP sync∕atomic·LoadInt64(SB) 222 223TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 224 GO_ARGS 225 JMP sync∕atomic·LoadInt64(SB) 226 227// Store 228TEXT sync∕atomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12 229 GO_ARGS 230 MOVQ $__tsan_go_atomic32_store(SB), AX 231 CALL racecallatomic<>(SB) 232 RET 233 234TEXT sync∕atomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16 235 GO_ARGS 236 MOVQ $__tsan_go_atomic64_store(SB), AX 237 CALL racecallatomic<>(SB) 238 RET 239 240TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 241 GO_ARGS 242 JMP sync∕atomic·StoreInt32(SB) 243 244TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 245 GO_ARGS 246 JMP sync∕atomic·StoreInt64(SB) 247 248TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 249 GO_ARGS 250 JMP sync∕atomic·StoreInt64(SB) 251 252// Swap 253TEXT sync∕atomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20 254 GO_ARGS 255 MOVQ $__tsan_go_atomic32_exchange(SB), AX 256 CALL racecallatomic<>(SB) 257 RET 258 259TEXT sync∕atomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24 260 GO_ARGS 261 MOVQ $__tsan_go_atomic64_exchange(SB), AX 262 CALL racecallatomic<>(SB) 263 RET 264 265TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 266 GO_ARGS 267 JMP sync∕atomic·SwapInt32(SB) 268 269TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 270 GO_ARGS 271 JMP sync∕atomic·SwapInt64(SB) 272 273TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 274 GO_ARGS 275 JMP sync∕atomic·SwapInt64(SB) 276 277// Add 278TEXT sync∕atomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20 279 GO_ARGS 280 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 281 CALL racecallatomic<>(SB) 282 MOVL add+8(FP), AX // convert fetch_add to add_fetch 283 ADDL AX, ret+16(FP) 284 RET 285 286TEXT sync∕atomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24 287 GO_ARGS 288 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 289 CALL racecallatomic<>(SB) 290 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 291 ADDQ AX, ret+16(FP) 292 RET 293 294TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 295 GO_ARGS 296 JMP sync∕atomic·AddInt32(SB) 297 298TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 299 GO_ARGS 300 JMP sync∕atomic·AddInt64(SB) 301 302TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 303 GO_ARGS 304 JMP sync∕atomic·AddInt64(SB) 305 306// And 307TEXT sync∕atomic·AndInt32(SB), NOSPLIT|NOFRAME, $0-20 308 GO_ARGS 309 MOVQ $__tsan_go_atomic32_fetch_and(SB), AX 310 CALL racecallatomic<>(SB) 311 RET 312 313TEXT sync∕atomic·AndInt64(SB), NOSPLIT|NOFRAME, $0-24 314 GO_ARGS 315 MOVQ $__tsan_go_atomic64_fetch_and(SB), AX 316 CALL racecallatomic<>(SB) 317 RET 318 319TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20 320 GO_ARGS 321 JMP sync∕atomic·AndInt32(SB) 322 323TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24 324 GO_ARGS 325 JMP sync∕atomic·AndInt64(SB) 326 327TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24 328 GO_ARGS 329 JMP sync∕atomic·AndInt64(SB) 330 331// Or 332TEXT sync∕atomic·OrInt32(SB), NOSPLIT|NOFRAME, $0-20 333 GO_ARGS 334 MOVQ $__tsan_go_atomic32_fetch_or(SB), AX 335 CALL racecallatomic<>(SB) 336 RET 337 338TEXT sync∕atomic·OrInt64(SB), NOSPLIT|NOFRAME, $0-24 339 GO_ARGS 340 MOVQ $__tsan_go_atomic64_fetch_or(SB), AX 341 CALL racecallatomic<>(SB) 342 RET 343 344TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20 345 GO_ARGS 346 JMP sync∕atomic·OrInt32(SB) 347 348TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24 349 GO_ARGS 350 JMP sync∕atomic·OrInt64(SB) 351 352TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24 353 GO_ARGS 354 JMP sync∕atomic·OrInt64(SB) 355 356 357// CompareAndSwap 358TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17 359 GO_ARGS 360 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 361 CALL racecallatomic<>(SB) 362 RET 363 364TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25 365 GO_ARGS 366 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 367 CALL racecallatomic<>(SB) 368 RET 369 370TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 371 GO_ARGS 372 JMP sync∕atomic·CompareAndSwapInt32(SB) 373 374TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 375 GO_ARGS 376 JMP sync∕atomic·CompareAndSwapInt64(SB) 377 378TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 379 GO_ARGS 380 JMP sync∕atomic·CompareAndSwapInt64(SB) 381 382// Generic atomic operation implementation. 383// AX already contains target function. 384TEXT racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0 385 // Trigger SIGSEGV early. 386 MOVQ 16(SP), R12 387 MOVBLZX (R12), R13 388 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 389 CMPQ R12, runtime·racearenastart(SB) 390 JB racecallatomic_data 391 CMPQ R12, runtime·racearenaend(SB) 392 JB racecallatomic_ok 393racecallatomic_data: 394 CMPQ R12, runtime·racedatastart(SB) 395 JB racecallatomic_ignore 396 CMPQ R12, runtime·racedataend(SB) 397 JAE racecallatomic_ignore 398racecallatomic_ok: 399 // Addr is within the good range, call the atomic function. 400 MOVQ g_racectx(R14), RARG0 // goroutine context 401 MOVQ 8(SP), RARG1 // caller pc 402 MOVQ (SP), RARG2 // pc 403 LEAQ 16(SP), RARG3 // arguments 404 JMP racecall<>(SB) // does not return 405racecallatomic_ignore: 406 // Addr is outside the good range. 407 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 408 // An attempt to synchronize on the address would cause crash. 409 MOVQ AX, BX // remember the original function 410 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 411 MOVQ g_racectx(R14), RARG0 // goroutine context 412 CALL racecall<>(SB) 413 MOVQ BX, AX // restore the original function 414 // Call the atomic function. 415 MOVQ g_racectx(R14), RARG0 // goroutine context 416 MOVQ 8(SP), RARG1 // caller pc 417 MOVQ (SP), RARG2 // pc 418 LEAQ 16(SP), RARG3 // arguments 419 CALL racecall<>(SB) 420 // Call __tsan_go_ignore_sync_end. 421 MOVQ $__tsan_go_ignore_sync_end(SB), AX 422 MOVQ g_racectx(R14), RARG0 // goroutine context 423 JMP racecall<>(SB) 424 425// void runtime·racecall(void(*f)(...), ...) 426// Calls C function f from race runtime and passes up to 4 arguments to it. 427// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 428TEXT runtime·racecall(SB), NOSPLIT, $0-0 429 MOVQ fn+0(FP), AX 430 MOVQ arg0+8(FP), RARG0 431 MOVQ arg1+16(FP), RARG1 432 MOVQ arg2+24(FP), RARG2 433 MOVQ arg3+32(FP), RARG3 434 JMP racecall<>(SB) 435 436// Switches SP to g0 stack and calls (AX). Arguments already set. 437TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 438 MOVQ g_m(R14), R13 439 // Switch to g0 stack. 440 MOVQ SP, R12 // callee-saved, preserved across the CALL 441 MOVQ m_g0(R13), R10 442 CMPQ R10, R14 443 JE call // already on g0 444 MOVQ (g_sched+gobuf_sp)(R10), SP 445call: 446 ANDQ $~15, SP // alignment for gcc ABI 447 CALL AX 448 MOVQ R12, SP 449 // Back to Go world, set special registers. 450 // The g register (R14) is preserved in C. 451 XORPS X15, X15 452 RET 453 454// C->Go callback thunk that allows to call runtime·racesymbolize from C code. 455// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 456// The overall effect of Go->C->Go call chain is similar to that of mcall. 457// RARG0 contains command code. RARG1 contains command-specific context. 458// See racecallback for command codes. 459TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0 460 // Handle command raceGetProcCmd (0) here. 461 // First, code below assumes that we are on curg, while raceGetProcCmd 462 // can be executed on g0. Second, it is called frequently, so will 463 // benefit from this fast path. 464 CMPQ RARG0, $0 465 JNE rest 466 get_tls(RARG0) 467 MOVQ g(RARG0), RARG0 468 MOVQ g_m(RARG0), RARG0 469 MOVQ m_p(RARG0), RARG0 470 MOVQ p_raceprocctx(RARG0), RARG0 471 MOVQ RARG0, (RARG1) 472 RET 473 474rest: 475 // Transition from C ABI to Go ABI. 476 PUSH_REGS_HOST_TO_ABI0() 477 // Set g = g0. 478 get_tls(R12) 479 MOVQ g(R12), R14 480 MOVQ g_m(R14), R13 481 MOVQ m_g0(R13), R15 482 CMPQ R13, R15 483 JEQ noswitch // branch if already on g0 484 MOVQ R15, g(R12) // g = m->g0 485 MOVQ R15, R14 // set g register 486 PUSHQ RARG1 // func arg 487 PUSHQ RARG0 // func arg 488 CALL runtime·racecallback(SB) 489 POPQ R12 490 POPQ R12 491 // All registers are smashed after Go code, reload. 492 get_tls(R12) 493 MOVQ g(R12), R13 494 MOVQ g_m(R13), R13 495 MOVQ m_curg(R13), R14 496 MOVQ R14, g(R12) // g = m->curg 497ret: 498 POP_REGS_HOST_TO_ABI0() 499 RET 500 501noswitch: 502 // already on g0 503 PUSHQ RARG1 // func arg 504 PUSHQ RARG0 // func arg 505 CALL runtime·racecallback(SB) 506 POPQ R12 507 POPQ R12 508 JMP ret 509