1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build race 6 7#include "go_asm.h" 8#include "funcdata.h" 9#include "textflag.h" 10#include "tls_arm64.h" 11#include "cgo/abi_arm64.h" 12 13// The following thunks allow calling the gcc-compiled race runtime directly 14// from Go code without going all the way through cgo. 15// First, it's much faster (up to 50% speedup for real Go programs). 16// Second, it eliminates race-related special cases from cgocall and scheduler. 17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 18 19// A brief recap of the arm64 calling convention. 20// Arguments are passed in R0...R7, the rest is on stack. 21// Callee-saved registers are: R19...R28. 22// Temporary registers are: R9...R15 23// SP must be 16-byte aligned. 24 25// When calling racecalladdr, R9 is the call target address. 26 27// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr. 28 29// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s) 30// No-op on other OSes. 31#ifdef TLS_darwin 32#define TP_ALIGN AND $~7, R0 33#else 34#define TP_ALIGN 35#endif 36 37// Load g from TLS. (See tls_arm64.s) 38#define load_g \ 39 MRS_TPIDR_R0 \ 40 TP_ALIGN \ 41 MOVD runtime·tls_g(SB), R11 \ 42 MOVD (R0)(R11), g 43 44// func runtime·raceread(addr uintptr) 45// Called from instrumented code. 46// Defined as ABIInternal so as to avoid introducing a wrapper, 47// which would make caller's PC ineffective. 48TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 49 MOVD R0, R1 // addr 50 MOVD LR, R2 51 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 52 MOVD $__tsan_read(SB), R9 53 JMP racecalladdr<>(SB) 54 55// func runtime·RaceRead(addr uintptr) 56TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 57 // This needs to be a tail call, because raceread reads caller pc. 58 JMP runtime·raceread(SB) 59 60// func runtime·racereadpc(void *addr, void *callpc, void *pc) 61TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 62 MOVD addr+0(FP), R1 63 MOVD callpc+8(FP), R2 64 MOVD pc+16(FP), R3 65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 66 MOVD $__tsan_read_pc(SB), R9 67 JMP racecalladdr<>(SB) 68 69// func runtime·racewrite(addr uintptr) 70// Called from instrumented code. 71// Defined as ABIInternal so as to avoid introducing a wrapper, 72// which would make caller's PC ineffective. 73TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 74 MOVD R0, R1 // addr 75 MOVD LR, R2 76 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 77 MOVD $__tsan_write(SB), R9 78 JMP racecalladdr<>(SB) 79 80// func runtime·RaceWrite(addr uintptr) 81TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 82 // This needs to be a tail call, because racewrite reads caller pc. 83 JMP runtime·racewrite(SB) 84 85// func runtime·racewritepc(void *addr, void *callpc, void *pc) 86TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 87 MOVD addr+0(FP), R1 88 MOVD callpc+8(FP), R2 89 MOVD pc+16(FP), R3 90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 91 MOVD $__tsan_write_pc(SB), R9 92 JMP racecalladdr<>(SB) 93 94// func runtime·racereadrange(addr, size uintptr) 95// Called from instrumented code. 96// Defined as ABIInternal so as to avoid introducing a wrapper, 97// which would make caller's PC ineffective. 98TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16 99 MOVD R1, R2 // size 100 MOVD R0, R1 // addr 101 MOVD LR, R3 102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 103 MOVD $__tsan_read_range(SB), R9 104 JMP racecalladdr<>(SB) 105 106// func runtime·RaceReadRange(addr, size uintptr) 107TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 108 // This needs to be a tail call, because racereadrange reads caller pc. 109 JMP runtime·racereadrange(SB) 110 111// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) 112TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 113 MOVD addr+0(FP), R1 114 MOVD size+8(FP), R2 115 MOVD pc+16(FP), R3 116 ADD $4, R3 // pc is function start, tsan wants return address. 117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 118 MOVD $__tsan_read_range(SB), R9 119 JMP racecalladdr<>(SB) 120 121// func runtime·racewriterange(addr, size uintptr) 122// Called from instrumented code. 123// Defined as ABIInternal so as to avoid introducing a wrapper, 124// which would make caller's PC ineffective. 125TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 126 MOVD R1, R2 // size 127 MOVD R0, R1 // addr 128 MOVD LR, R3 129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 130 MOVD $__tsan_write_range(SB), R9 131 JMP racecalladdr<>(SB) 132 133// func runtime·RaceWriteRange(addr, size uintptr) 134TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 135 // This needs to be a tail call, because racewriterange reads caller pc. 136 JMP runtime·racewriterange(SB) 137 138// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) 139TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 140 MOVD addr+0(FP), R1 141 MOVD size+8(FP), R2 142 MOVD pc+16(FP), R3 143 ADD $4, R3 // pc is function start, tsan wants return address. 144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 145 MOVD $__tsan_write_range(SB), R9 146 JMP racecalladdr<>(SB) 147 148// If addr (R1) is out of range, do nothing. 149// Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 150TEXT racecalladdr<>(SB), NOSPLIT, $0-0 151 load_g 152 MOVD g_racectx(g), R0 153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 154 MOVD runtime·racearenastart(SB), R10 155 CMP R10, R1 156 BLT data 157 MOVD runtime·racearenaend(SB), R10 158 CMP R10, R1 159 BLT call 160data: 161 MOVD runtime·racedatastart(SB), R10 162 CMP R10, R1 163 BLT ret 164 MOVD runtime·racedataend(SB), R10 165 CMP R10, R1 166 BGT ret 167call: 168 JMP racecall<>(SB) 169ret: 170 RET 171 172// func runtime·racefuncenter(pc uintptr) 173// Called from instrumented code. 174TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8 175 MOVD R0, R9 // callpc 176 JMP racefuncenter<>(SB) 177 178// Common code for racefuncenter 179// R9 = caller's return address 180TEXT racefuncenter<>(SB), NOSPLIT, $0-0 181 load_g 182 MOVD g_racectx(g), R0 // goroutine racectx 183 MOVD R9, R1 184 // void __tsan_func_enter(ThreadState *thr, void *pc); 185 MOVD $__tsan_func_enter(SB), R9 186 BL racecall<>(SB) 187 RET 188 189// func runtime·racefuncexit() 190// Called from instrumented code. 191TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0 192 load_g 193 MOVD g_racectx(g), R0 // race context 194 // void __tsan_func_exit(ThreadState *thr); 195 MOVD $__tsan_func_exit(SB), R9 196 JMP racecall<>(SB) 197 198// Atomic operations for sync/atomic package. 199// R3 = addr of arguments passed to this function, it can 200// be fetched at 40(RSP) in racecallatomic after two times BL 201// R0, R1, R2 set in racecallatomic 202 203// Load 204TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 205 GO_ARGS 206 MOVD $__tsan_go_atomic32_load(SB), R9 207 BL racecallatomic<>(SB) 208 RET 209 210TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 211 GO_ARGS 212 MOVD $__tsan_go_atomic64_load(SB), R9 213 BL racecallatomic<>(SB) 214 RET 215 216TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 217 GO_ARGS 218 JMP sync∕atomic·LoadInt32(SB) 219 220TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 221 GO_ARGS 222 JMP sync∕atomic·LoadInt64(SB) 223 224TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 225 GO_ARGS 226 JMP sync∕atomic·LoadInt64(SB) 227 228TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 229 GO_ARGS 230 JMP sync∕atomic·LoadInt64(SB) 231 232// Store 233TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 234 GO_ARGS 235 MOVD $__tsan_go_atomic32_store(SB), R9 236 BL racecallatomic<>(SB) 237 RET 238 239TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 240 GO_ARGS 241 MOVD $__tsan_go_atomic64_store(SB), R9 242 BL racecallatomic<>(SB) 243 RET 244 245TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 246 GO_ARGS 247 JMP sync∕atomic·StoreInt32(SB) 248 249TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 250 GO_ARGS 251 JMP sync∕atomic·StoreInt64(SB) 252 253TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 254 GO_ARGS 255 JMP sync∕atomic·StoreInt64(SB) 256 257// Swap 258TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 259 GO_ARGS 260 MOVD $__tsan_go_atomic32_exchange(SB), R9 261 BL racecallatomic<>(SB) 262 RET 263 264TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 265 GO_ARGS 266 MOVD $__tsan_go_atomic64_exchange(SB), R9 267 BL racecallatomic<>(SB) 268 RET 269 270TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 271 GO_ARGS 272 JMP sync∕atomic·SwapInt32(SB) 273 274TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 275 GO_ARGS 276 JMP sync∕atomic·SwapInt64(SB) 277 278TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 279 GO_ARGS 280 JMP sync∕atomic·SwapInt64(SB) 281 282// Add 283TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 284 GO_ARGS 285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9 286 BL racecallatomic<>(SB) 287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch 288 MOVW ret+16(FP), R1 289 ADD R0, R1, R0 290 MOVW R0, ret+16(FP) 291 RET 292 293TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 294 GO_ARGS 295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9 296 BL racecallatomic<>(SB) 297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch 298 MOVD ret+16(FP), R1 299 ADD R0, R1, R0 300 MOVD R0, ret+16(FP) 301 RET 302 303TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 304 GO_ARGS 305 JMP sync∕atomic·AddInt32(SB) 306 307TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 308 GO_ARGS 309 JMP sync∕atomic·AddInt64(SB) 310 311TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 312 GO_ARGS 313 JMP sync∕atomic·AddInt64(SB) 314 315// And 316TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20 317 GO_ARGS 318 MOVD $__tsan_go_atomic32_fetch_and(SB), R9 319 BL racecallatomic<>(SB) 320 RET 321 322TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24 323 GO_ARGS 324 MOVD $__tsan_go_atomic64_fetch_and(SB), R9 325 BL racecallatomic<>(SB) 326 RET 327 328TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20 329 GO_ARGS 330 JMP sync∕atomic·AndInt32(SB) 331 332TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24 333 GO_ARGS 334 JMP sync∕atomic·AndInt64(SB) 335 336TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24 337 GO_ARGS 338 JMP sync∕atomic·AndInt64(SB) 339 340// Or 341TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20 342 GO_ARGS 343 MOVD $__tsan_go_atomic32_fetch_or(SB), R9 344 BL racecallatomic<>(SB) 345 RET 346 347TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24 348 GO_ARGS 349 MOVD $__tsan_go_atomic64_fetch_or(SB), R9 350 BL racecallatomic<>(SB) 351 RET 352 353TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20 354 GO_ARGS 355 JMP sync∕atomic·OrInt32(SB) 356 357TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24 358 GO_ARGS 359 JMP sync∕atomic·OrInt64(SB) 360 361TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24 362 GO_ARGS 363 JMP sync∕atomic·OrInt64(SB) 364 365// CompareAndSwap 366TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 367 GO_ARGS 368 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9 369 BL racecallatomic<>(SB) 370 RET 371 372TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 373 GO_ARGS 374 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9 375 BL racecallatomic<>(SB) 376 RET 377 378TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 379 GO_ARGS 380 JMP sync∕atomic·CompareAndSwapInt32(SB) 381 382TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 383 GO_ARGS 384 JMP sync∕atomic·CompareAndSwapInt64(SB) 385 386TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 387 GO_ARGS 388 JMP sync∕atomic·CompareAndSwapInt64(SB) 389 390// Generic atomic operation implementation. 391// R9 = addr of target function 392TEXT racecallatomic<>(SB), NOSPLIT, $0 393 // Set up these registers 394 // R0 = *ThreadState 395 // R1 = caller pc 396 // R2 = pc 397 // R3 = addr of incoming arg list 398 399 // Trigger SIGSEGV early. 400 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP) 401 MOVB (R3), R13 // segv here if addr is bad 402 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 403 MOVD runtime·racearenastart(SB), R10 404 CMP R10, R3 405 BLT racecallatomic_data 406 MOVD runtime·racearenaend(SB), R10 407 CMP R10, R3 408 BLT racecallatomic_ok 409racecallatomic_data: 410 MOVD runtime·racedatastart(SB), R10 411 CMP R10, R3 412 BLT racecallatomic_ignore 413 MOVD runtime·racedataend(SB), R10 414 CMP R10, R3 415 BGE racecallatomic_ignore 416racecallatomic_ok: 417 // Addr is within the good range, call the atomic function. 418 load_g 419 MOVD g_racectx(g), R0 // goroutine context 420 MOVD 16(RSP), R1 // caller pc 421 MOVD R9, R2 // pc 422 ADD $40, RSP, R3 423 JMP racecall<>(SB) // does not return 424racecallatomic_ignore: 425 // Addr is outside the good range. 426 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 427 // An attempt to synchronize on the address would cause crash. 428 MOVD R9, R21 // remember the original function 429 MOVD $__tsan_go_ignore_sync_begin(SB), R9 430 load_g 431 MOVD g_racectx(g), R0 // goroutine context 432 BL racecall<>(SB) 433 MOVD R21, R9 // restore the original function 434 // Call the atomic function. 435 // racecall will call LLVM race code which might clobber R28 (g) 436 load_g 437 MOVD g_racectx(g), R0 // goroutine context 438 MOVD 16(RSP), R1 // caller pc 439 MOVD R9, R2 // pc 440 ADD $40, RSP, R3 // arguments 441 BL racecall<>(SB) 442 // Call __tsan_go_ignore_sync_end. 443 MOVD $__tsan_go_ignore_sync_end(SB), R9 444 MOVD g_racectx(g), R0 // goroutine context 445 BL racecall<>(SB) 446 RET 447 448// func runtime·racecall(void(*f)(...), ...) 449// Calls C function f from race runtime and passes up to 4 arguments to it. 450// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 451TEXT runtime·racecall(SB), NOSPLIT, $0-0 452 MOVD fn+0(FP), R9 453 MOVD arg0+8(FP), R0 454 MOVD arg1+16(FP), R1 455 MOVD arg2+24(FP), R2 456 MOVD arg3+32(FP), R3 457 JMP racecall<>(SB) 458 459// Switches SP to g0 stack and calls (R9). Arguments already set. 460// Clobbers R19, R20. 461TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 462 MOVD g_m(g), R10 463 // Switch to g0 stack. 464 MOVD RSP, R19 // callee-saved, preserved across the CALL 465 MOVD R30, R20 // callee-saved, preserved across the CALL 466 MOVD m_g0(R10), R11 467 CMP R11, g 468 BEQ call // already on g0 469 MOVD (g_sched+gobuf_sp)(R11), R12 470 MOVD R12, RSP 471call: 472 // Decrement SP past where the frame pointer is saved in the Go arm64 473 // ABI (one word below the stack pointer) so the race detector library 474 // code doesn't clobber it 475 SUB $16, RSP 476 BL R9 477 MOVD R19, RSP 478 JMP (R20) 479 480// C->Go callback thunk that allows to call runtime·racesymbolize from C code. 481// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 482// The overall effect of Go->C->Go call chain is similar to that of mcall. 483// R0 contains command code. R1 contains command-specific context. 484// See racecallback for command codes. 485TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 486 // Handle command raceGetProcCmd (0) here. 487 // First, code below assumes that we are on curg, while raceGetProcCmd 488 // can be executed on g0. Second, it is called frequently, so will 489 // benefit from this fast path. 490 CBNZ R0, rest 491 MOVD g, R13 492#ifdef TLS_darwin 493 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it 494#endif 495 load_g 496#ifdef TLS_darwin 497 MOVD R12, R27 498#endif 499 MOVD g_m(g), R0 500 MOVD m_p(R0), R0 501 MOVD p_raceprocctx(R0), R0 502 MOVD R0, (R1) 503 MOVD R13, g 504 JMP (LR) 505rest: 506 // Save callee-saved registers (Go code won't respect that). 507 // 8(RSP) and 16(RSP) are for args passed through racecallback 508 SUB $176, RSP 509 MOVD LR, 0(RSP) 510 511 SAVE_R19_TO_R28(8*3) 512 SAVE_F8_TO_F15(8*13) 513 MOVD R29, (8*21)(RSP) 514 // Set g = g0. 515 // load_g will clobber R0, Save R0 516 MOVD R0, R13 517 load_g 518 // restore R0 519 MOVD R13, R0 520 MOVD g_m(g), R13 521 MOVD m_g0(R13), R14 522 CMP R14, g 523 BEQ noswitch // branch if already on g0 524 MOVD R14, g 525 526 MOVD R0, 8(RSP) // func arg 527 MOVD R1, 16(RSP) // func arg 528 BL runtime·racecallback(SB) 529 530 // All registers are smashed after Go code, reload. 531 MOVD g_m(g), R13 532 MOVD m_curg(R13), g // g = m->curg 533ret: 534 // Restore callee-saved registers. 535 MOVD 0(RSP), LR 536 MOVD (8*21)(RSP), R29 537 RESTORE_F8_TO_F15(8*13) 538 RESTORE_R19_TO_R28(8*3) 539 ADD $176, RSP 540 JMP (LR) 541 542noswitch: 543 // already on g0 544 MOVD R0, 8(RSP) // func arg 545 MOVD R1, 16(RSP) // func arg 546 BL runtime·racecallback(SB) 547 JMP ret 548 549#ifndef TLSG_IS_VARIABLE 550// tls_g, g value for each thread in TLS 551GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 552#endif 553