1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Goroutine preemption 6// 7// A goroutine can be preempted at any safe-point. Currently, there 8// are a few categories of safe-points: 9// 10// 1. A blocked safe-point occurs for the duration that a goroutine is 11// descheduled, blocked on synchronization, or in a system call. 12// 13// 2. Synchronous safe-points occur when a running goroutine checks 14// for a preemption request. 15// 16// 3. Asynchronous safe-points occur at any instruction in user code 17// where the goroutine can be safely paused and a conservative 18// stack and register scan can find stack roots. The runtime can 19// stop a goroutine at an async safe-point using a signal. 20// 21// At both blocked and synchronous safe-points, a goroutine's CPU 22// state is minimal and the garbage collector has complete information 23// about its entire stack. This makes it possible to deschedule a 24// goroutine with minimal space, and to precisely scan a goroutine's 25// stack. 26// 27// Synchronous safe-points are implemented by overloading the stack 28// bound check in function prologues. To preempt a goroutine at the 29// next synchronous safe-point, the runtime poisons the goroutine's 30// stack bound to a value that will cause the next stack bound check 31// to fail and enter the stack growth implementation, which will 32// detect that it was actually a preemption and redirect to preemption 33// handling. 34// 35// Preemption at asynchronous safe-points is implemented by suspending 36// the thread using an OS mechanism (e.g., signals) and inspecting its 37// state to determine if the goroutine was at an asynchronous 38// safe-point. Since the thread suspension itself is generally 39// asynchronous, it also checks if the running goroutine wants to be 40// preempted, since this could have changed. If all conditions are 41// satisfied, it adjusts the signal context to make it look like the 42// signaled thread just called asyncPreempt and resumes the thread. 43// asyncPreempt spills all registers and enters the scheduler. 44// 45// (An alternative would be to preempt in the signal handler itself. 46// This would let the OS save and restore the register state and the 47// runtime would only need to know how to extract potentially 48// pointer-containing registers from the signal context. However, this 49// would consume an M for every preempted G, and the scheduler itself 50// is not designed to run from a signal handler, as it tends to 51// allocate memory and start threads in the preemption path.) 52 53package runtime 54 55import ( 56 "internal/abi" 57 "internal/goarch" 58 "internal/stringslite" 59) 60 61type suspendGState struct { 62 g *g 63 64 // dead indicates the goroutine was not suspended because it 65 // is dead. This goroutine could be reused after the dead 66 // state was observed, so the caller must not assume that it 67 // remains dead. 68 dead bool 69 70 // stopped indicates that this suspendG transitioned the G to 71 // _Gwaiting via g.preemptStop and thus is responsible for 72 // readying it when done. 73 stopped bool 74} 75 76// suspendG suspends goroutine gp at a safe-point and returns the 77// state of the suspended goroutine. The caller gets read access to 78// the goroutine until it calls resumeG. 79// 80// It is safe for multiple callers to attempt to suspend the same 81// goroutine at the same time. The goroutine may execute between 82// subsequent successful suspend operations. The current 83// implementation grants exclusive access to the goroutine, and hence 84// multiple callers will serialize. However, the intent is to grant 85// shared read access, so please don't depend on exclusive access. 86// 87// This must be called from the system stack and the user goroutine on 88// the current M (if any) must be in a preemptible state. This 89// prevents deadlocks where two goroutines attempt to suspend each 90// other and both are in non-preemptible states. There are other ways 91// to resolve this deadlock, but this seems simplest. 92// 93// TODO(austin): What if we instead required this to be called from a 94// user goroutine? Then we could deschedule the goroutine while 95// waiting instead of blocking the thread. If two goroutines tried to 96// suspend each other, one of them would win and the other wouldn't 97// complete the suspend until it was resumed. We would have to be 98// careful that they couldn't actually queue up suspend for each other 99// and then both be suspended. This would also avoid the need for a 100// kernel context switch in the synchronous case because we could just 101// directly schedule the waiter. The context switch is unavoidable in 102// the signal case. 103// 104//go:systemstack 105func suspendG(gp *g) suspendGState { 106 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning { 107 // Since we're on the system stack of this M, the user 108 // G is stuck at an unsafe point. If another goroutine 109 // were to try to preempt m.curg, it could deadlock. 110 throw("suspendG from non-preemptible goroutine") 111 } 112 113 // See https://golang.org/cl/21503 for justification of the yield delay. 114 const yieldDelay = 10 * 1000 115 var nextYield int64 116 117 // Drive the goroutine to a preemption point. 118 stopped := false 119 var asyncM *m 120 var asyncGen uint32 121 var nextPreemptM int64 122 for i := 0; ; i++ { 123 switch s := readgstatus(gp); s { 124 default: 125 if s&_Gscan != 0 { 126 // Someone else is suspending it. Wait 127 // for them to finish. 128 // 129 // TODO: It would be nicer if we could 130 // coalesce suspends. 131 break 132 } 133 134 dumpgstatus(gp) 135 throw("invalid g status") 136 137 case _Gdead: 138 // Nothing to suspend. 139 // 140 // preemptStop may need to be cleared, but 141 // doing that here could race with goroutine 142 // reuse. Instead, goexit0 clears it. 143 return suspendGState{dead: true} 144 145 case _Gcopystack: 146 // The stack is being copied. We need to wait 147 // until this is done. 148 149 case _Gpreempted: 150 // We (or someone else) suspended the G. Claim 151 // ownership of it by transitioning it to 152 // _Gwaiting. 153 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) { 154 break 155 } 156 157 // We stopped the G, so we have to ready it later. 158 stopped = true 159 160 s = _Gwaiting 161 fallthrough 162 163 case _Grunnable, _Gsyscall, _Gwaiting: 164 // Claim goroutine by setting scan bit. 165 // This may race with execution or readying of gp. 166 // The scan bit keeps it from transition state. 167 if !castogscanstatus(gp, s, s|_Gscan) { 168 break 169 } 170 171 // Clear the preemption request. It's safe to 172 // reset the stack guard because we hold the 173 // _Gscan bit and thus own the stack. 174 gp.preemptStop = false 175 gp.preempt = false 176 gp.stackguard0 = gp.stack.lo + stackGuard 177 178 // The goroutine was already at a safe-point 179 // and we've now locked that in. 180 // 181 // TODO: It would be much better if we didn't 182 // leave it in _Gscan, but instead gently 183 // prevented its scheduling until resumption. 184 // Maybe we only use this to bump a suspended 185 // count and the scheduler skips suspended 186 // goroutines? That wouldn't be enough for 187 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe 188 // for all those transitions we need to check 189 // suspended and deschedule? 190 return suspendGState{g: gp, stopped: stopped} 191 192 case _Grunning: 193 // Optimization: if there is already a pending preemption request 194 // (from the previous loop iteration), don't bother with the atomics. 195 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen { 196 break 197 } 198 199 // Temporarily block state transitions. 200 if !castogscanstatus(gp, _Grunning, _Gscanrunning) { 201 break 202 } 203 204 // Request synchronous preemption. 205 gp.preemptStop = true 206 gp.preempt = true 207 gp.stackguard0 = stackPreempt 208 209 // Prepare for asynchronous preemption. 210 asyncM2 := gp.m 211 asyncGen2 := asyncM2.preemptGen.Load() 212 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2 213 asyncM = asyncM2 214 asyncGen = asyncGen2 215 216 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) 217 218 // Send asynchronous preemption. We do this 219 // after CASing the G back to _Grunning 220 // because preemptM may be synchronous and we 221 // don't want to catch the G just spinning on 222 // its status. 223 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync { 224 // Rate limit preemptM calls. This is 225 // particularly important on Windows 226 // where preemptM is actually 227 // synchronous and the spin loop here 228 // can lead to live-lock. 229 now := nanotime() 230 if now >= nextPreemptM { 231 nextPreemptM = now + yieldDelay/2 232 preemptM(asyncM) 233 } 234 } 235 } 236 237 // TODO: Don't busy wait. This loop should really only 238 // be a simple read/decide/CAS loop that only fails if 239 // there's an active race. Once the CAS succeeds, we 240 // should queue up the preemption (which will require 241 // it to be reliable in the _Grunning case, not 242 // best-effort) and then sleep until we're notified 243 // that the goroutine is suspended. 244 if i == 0 { 245 nextYield = nanotime() + yieldDelay 246 } 247 if nanotime() < nextYield { 248 procyield(10) 249 } else { 250 osyield() 251 nextYield = nanotime() + yieldDelay/2 252 } 253 } 254} 255 256// resumeG undoes the effects of suspendG, allowing the suspended 257// goroutine to continue from its current safe-point. 258func resumeG(state suspendGState) { 259 if state.dead { 260 // We didn't actually stop anything. 261 return 262 } 263 264 gp := state.g 265 switch s := readgstatus(gp); s { 266 default: 267 dumpgstatus(gp) 268 throw("unexpected g status") 269 270 case _Grunnable | _Gscan, 271 _Gwaiting | _Gscan, 272 _Gsyscall | _Gscan: 273 casfrom_Gscanstatus(gp, s, s&^_Gscan) 274 } 275 276 if state.stopped { 277 // We stopped it, so we need to re-schedule it. 278 ready(gp, 0, true) 279 } 280} 281 282// canPreemptM reports whether mp is in a state that is safe to preempt. 283// 284// It is nosplit because it has nosplit callers. 285// 286//go:nosplit 287func canPreemptM(mp *m) bool { 288 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning 289} 290 291//go:generate go run mkpreempt.go 292 293// asyncPreempt saves all user registers and calls asyncPreempt2. 294// 295// When stack scanning encounters an asyncPreempt frame, it scans that 296// frame and its parent frame conservatively. 297// 298// asyncPreempt is implemented in assembly. 299func asyncPreempt() 300 301//go:nosplit 302func asyncPreempt2() { 303 gp := getg() 304 gp.asyncSafePoint = true 305 if gp.preemptStop { 306 mcall(preemptPark) 307 } else { 308 mcall(gopreempt_m) 309 } 310 gp.asyncSafePoint = false 311} 312 313// asyncPreemptStack is the bytes of stack space required to inject an 314// asyncPreempt call. 315var asyncPreemptStack = ^uintptr(0) 316 317func init() { 318 f := findfunc(abi.FuncPCABI0(asyncPreempt)) 319 total := funcMaxSPDelta(f) 320 f = findfunc(abi.FuncPCABIInternal(asyncPreempt2)) 321 total += funcMaxSPDelta(f) 322 // Add some overhead for return PCs, etc. 323 asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize 324 if asyncPreemptStack > stackNosplit { 325 // We need more than the nosplit limit. This isn't 326 // unsafe, but it may limit asynchronous preemption. 327 // 328 // This may be a problem if we start using more 329 // registers. In that case, we should store registers 330 // in a context object. If we pre-allocate one per P, 331 // asyncPreempt can spill just a few registers to the 332 // stack, then grab its context object and spill into 333 // it. When it enters the runtime, it would allocate a 334 // new context for the P. 335 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n") 336 throw("async stack too large") 337 } 338} 339 340// wantAsyncPreempt returns whether an asynchronous preemption is 341// queued for gp. 342func wantAsyncPreempt(gp *g) bool { 343 // Check both the G and the P. 344 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning 345} 346 347// isAsyncSafePoint reports whether gp at instruction PC is an 348// asynchronous safe point. This indicates that: 349// 350// 1. It's safe to suspend gp and conservatively scan its stack and 351// registers. There are no potentially hidden pointer values and it's 352// not in the middle of an atomic sequence like a write barrier. 353// 354// 2. gp has enough stack space to inject the asyncPreempt call. 355// 356// 3. It's generally safe to interact with the runtime, even if we're 357// in a signal handler stopped here. For example, there are no runtime 358// locks held, so acquiring a runtime lock won't self-deadlock. 359// 360// In some cases the PC is safe for asynchronous preemption but it 361// also needs to adjust the resumption PC. The new PC is returned in 362// the second result. 363func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) { 364 mp := gp.m 365 366 // Only user Gs can have safe-points. We check this first 367 // because it's extremely common that we'll catch mp in the 368 // scheduler processing this G preemption. 369 if mp.curg != gp { 370 return false, 0 371 } 372 373 // Check M state. 374 if mp.p == 0 || !canPreemptM(mp) { 375 return false, 0 376 } 377 378 // Check stack space. 379 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack { 380 return false, 0 381 } 382 383 // Check if PC is an unsafe-point. 384 f := findfunc(pc) 385 if !f.valid() { 386 // Not Go code. 387 return false, 0 388 } 389 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc) == 0 { 390 // We probably stopped at a half-executed CALL instruction, 391 // where the LR is updated but the PC has not. If we preempt 392 // here we'll see a seemingly self-recursive call, which is in 393 // fact not. 394 // This is normally ok, as we use the return address saved on 395 // stack for unwinding, not the LR value. But if this is a 396 // call to morestack, we haven't created the frame, and we'll 397 // use the LR for unwinding, which will be bad. 398 return false, 0 399 } 400 up, startpc := pcdatavalue2(f, abi.PCDATA_UnsafePoint, pc) 401 if up == abi.UnsafePointUnsafe { 402 // Unsafe-point marked by compiler. This includes 403 // atomic sequences (e.g., write barrier) and nosplit 404 // functions (except at calls). 405 return false, 0 406 } 407 if fd := funcdata(f, abi.FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&abi.FuncFlagAsm != 0 { 408 // This is assembly code. Don't assume it's well-formed. 409 // TODO: Empirically we still need the fd == nil check. Why? 410 // 411 // TODO: Are there cases that are safe but don't have a 412 // locals pointer map, like empty frame functions? 413 // It might be possible to preempt any assembly functions 414 // except the ones that have funcFlag_SPWRITE set in f.flag. 415 return false, 0 416 } 417 // Check the inner-most name 418 u, uf := newInlineUnwinder(f, pc) 419 name := u.srcFunc(uf).name() 420 if stringslite.HasPrefix(name, "runtime.") || 421 stringslite.HasPrefix(name, "runtime/internal/") || 422 stringslite.HasPrefix(name, "reflect.") { 423 // For now we never async preempt the runtime or 424 // anything closely tied to the runtime. Known issues 425 // include: various points in the scheduler ("don't 426 // preempt between here and here"), much of the defer 427 // implementation (untyped info on stack), bulk write 428 // barriers (write barrier check), 429 // reflect.{makeFuncStub,methodValueCall}. 430 // 431 // TODO(austin): We should improve this, or opt things 432 // in incrementally. 433 return false, 0 434 } 435 switch up { 436 case abi.UnsafePointRestart1, abi.UnsafePointRestart2: 437 // Restartable instruction sequence. Back off PC to 438 // the start PC. 439 if startpc == 0 || startpc > pc || pc-startpc > 20 { 440 throw("bad restart PC") 441 } 442 return true, startpc 443 case abi.UnsafePointRestartAtEntry: 444 // Restart from the function entry at resumption. 445 return true, f.entry() 446 } 447 return true, pc 448} 449