1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Goroutine preemption
6//
7// A goroutine can be preempted at any safe-point. Currently, there
8// are a few categories of safe-points:
9//
10// 1. A blocked safe-point occurs for the duration that a goroutine is
11//    descheduled, blocked on synchronization, or in a system call.
12//
13// 2. Synchronous safe-points occur when a running goroutine checks
14//    for a preemption request.
15//
16// 3. Asynchronous safe-points occur at any instruction in user code
17//    where the goroutine can be safely paused and a conservative
18//    stack and register scan can find stack roots. The runtime can
19//    stop a goroutine at an async safe-point using a signal.
20//
21// At both blocked and synchronous safe-points, a goroutine's CPU
22// state is minimal and the garbage collector has complete information
23// about its entire stack. This makes it possible to deschedule a
24// goroutine with minimal space, and to precisely scan a goroutine's
25// stack.
26//
27// Synchronous safe-points are implemented by overloading the stack
28// bound check in function prologues. To preempt a goroutine at the
29// next synchronous safe-point, the runtime poisons the goroutine's
30// stack bound to a value that will cause the next stack bound check
31// to fail and enter the stack growth implementation, which will
32// detect that it was actually a preemption and redirect to preemption
33// handling.
34//
35// Preemption at asynchronous safe-points is implemented by suspending
36// the thread using an OS mechanism (e.g., signals) and inspecting its
37// state to determine if the goroutine was at an asynchronous
38// safe-point. Since the thread suspension itself is generally
39// asynchronous, it also checks if the running goroutine wants to be
40// preempted, since this could have changed. If all conditions are
41// satisfied, it adjusts the signal context to make it look like the
42// signaled thread just called asyncPreempt and resumes the thread.
43// asyncPreempt spills all registers and enters the scheduler.
44//
45// (An alternative would be to preempt in the signal handler itself.
46// This would let the OS save and restore the register state and the
47// runtime would only need to know how to extract potentially
48// pointer-containing registers from the signal context. However, this
49// would consume an M for every preempted G, and the scheduler itself
50// is not designed to run from a signal handler, as it tends to
51// allocate memory and start threads in the preemption path.)
52
53package runtime
54
55import (
56	"internal/abi"
57	"internal/goarch"
58	"internal/stringslite"
59)
60
61type suspendGState struct {
62	g *g
63
64	// dead indicates the goroutine was not suspended because it
65	// is dead. This goroutine could be reused after the dead
66	// state was observed, so the caller must not assume that it
67	// remains dead.
68	dead bool
69
70	// stopped indicates that this suspendG transitioned the G to
71	// _Gwaiting via g.preemptStop and thus is responsible for
72	// readying it when done.
73	stopped bool
74}
75
76// suspendG suspends goroutine gp at a safe-point and returns the
77// state of the suspended goroutine. The caller gets read access to
78// the goroutine until it calls resumeG.
79//
80// It is safe for multiple callers to attempt to suspend the same
81// goroutine at the same time. The goroutine may execute between
82// subsequent successful suspend operations. The current
83// implementation grants exclusive access to the goroutine, and hence
84// multiple callers will serialize. However, the intent is to grant
85// shared read access, so please don't depend on exclusive access.
86//
87// This must be called from the system stack and the user goroutine on
88// the current M (if any) must be in a preemptible state. This
89// prevents deadlocks where two goroutines attempt to suspend each
90// other and both are in non-preemptible states. There are other ways
91// to resolve this deadlock, but this seems simplest.
92//
93// TODO(austin): What if we instead required this to be called from a
94// user goroutine? Then we could deschedule the goroutine while
95// waiting instead of blocking the thread. If two goroutines tried to
96// suspend each other, one of them would win and the other wouldn't
97// complete the suspend until it was resumed. We would have to be
98// careful that they couldn't actually queue up suspend for each other
99// and then both be suspended. This would also avoid the need for a
100// kernel context switch in the synchronous case because we could just
101// directly schedule the waiter. The context switch is unavoidable in
102// the signal case.
103//
104//go:systemstack
105func suspendG(gp *g) suspendGState {
106	if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
107		// Since we're on the system stack of this M, the user
108		// G is stuck at an unsafe point. If another goroutine
109		// were to try to preempt m.curg, it could deadlock.
110		throw("suspendG from non-preemptible goroutine")
111	}
112
113	// See https://golang.org/cl/21503 for justification of the yield delay.
114	const yieldDelay = 10 * 1000
115	var nextYield int64
116
117	// Drive the goroutine to a preemption point.
118	stopped := false
119	var asyncM *m
120	var asyncGen uint32
121	var nextPreemptM int64
122	for i := 0; ; i++ {
123		switch s := readgstatus(gp); s {
124		default:
125			if s&_Gscan != 0 {
126				// Someone else is suspending it. Wait
127				// for them to finish.
128				//
129				// TODO: It would be nicer if we could
130				// coalesce suspends.
131				break
132			}
133
134			dumpgstatus(gp)
135			throw("invalid g status")
136
137		case _Gdead:
138			// Nothing to suspend.
139			//
140			// preemptStop may need to be cleared, but
141			// doing that here could race with goroutine
142			// reuse. Instead, goexit0 clears it.
143			return suspendGState{dead: true}
144
145		case _Gcopystack:
146			// The stack is being copied. We need to wait
147			// until this is done.
148
149		case _Gpreempted:
150			// We (or someone else) suspended the G. Claim
151			// ownership of it by transitioning it to
152			// _Gwaiting.
153			if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
154				break
155			}
156
157			// We stopped the G, so we have to ready it later.
158			stopped = true
159
160			s = _Gwaiting
161			fallthrough
162
163		case _Grunnable, _Gsyscall, _Gwaiting:
164			// Claim goroutine by setting scan bit.
165			// This may race with execution or readying of gp.
166			// The scan bit keeps it from transition state.
167			if !castogscanstatus(gp, s, s|_Gscan) {
168				break
169			}
170
171			// Clear the preemption request. It's safe to
172			// reset the stack guard because we hold the
173			// _Gscan bit and thus own the stack.
174			gp.preemptStop = false
175			gp.preempt = false
176			gp.stackguard0 = gp.stack.lo + stackGuard
177
178			// The goroutine was already at a safe-point
179			// and we've now locked that in.
180			//
181			// TODO: It would be much better if we didn't
182			// leave it in _Gscan, but instead gently
183			// prevented its scheduling until resumption.
184			// Maybe we only use this to bump a suspended
185			// count and the scheduler skips suspended
186			// goroutines? That wouldn't be enough for
187			// {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
188			// for all those transitions we need to check
189			// suspended and deschedule?
190			return suspendGState{g: gp, stopped: stopped}
191
192		case _Grunning:
193			// Optimization: if there is already a pending preemption request
194			// (from the previous loop iteration), don't bother with the atomics.
195			if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen {
196				break
197			}
198
199			// Temporarily block state transitions.
200			if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
201				break
202			}
203
204			// Request synchronous preemption.
205			gp.preemptStop = true
206			gp.preempt = true
207			gp.stackguard0 = stackPreempt
208
209			// Prepare for asynchronous preemption.
210			asyncM2 := gp.m
211			asyncGen2 := asyncM2.preemptGen.Load()
212			needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
213			asyncM = asyncM2
214			asyncGen = asyncGen2
215
216			casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
217
218			// Send asynchronous preemption. We do this
219			// after CASing the G back to _Grunning
220			// because preemptM may be synchronous and we
221			// don't want to catch the G just spinning on
222			// its status.
223			if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
224				// Rate limit preemptM calls. This is
225				// particularly important on Windows
226				// where preemptM is actually
227				// synchronous and the spin loop here
228				// can lead to live-lock.
229				now := nanotime()
230				if now >= nextPreemptM {
231					nextPreemptM = now + yieldDelay/2
232					preemptM(asyncM)
233				}
234			}
235		}
236
237		// TODO: Don't busy wait. This loop should really only
238		// be a simple read/decide/CAS loop that only fails if
239		// there's an active race. Once the CAS succeeds, we
240		// should queue up the preemption (which will require
241		// it to be reliable in the _Grunning case, not
242		// best-effort) and then sleep until we're notified
243		// that the goroutine is suspended.
244		if i == 0 {
245			nextYield = nanotime() + yieldDelay
246		}
247		if nanotime() < nextYield {
248			procyield(10)
249		} else {
250			osyield()
251			nextYield = nanotime() + yieldDelay/2
252		}
253	}
254}
255
256// resumeG undoes the effects of suspendG, allowing the suspended
257// goroutine to continue from its current safe-point.
258func resumeG(state suspendGState) {
259	if state.dead {
260		// We didn't actually stop anything.
261		return
262	}
263
264	gp := state.g
265	switch s := readgstatus(gp); s {
266	default:
267		dumpgstatus(gp)
268		throw("unexpected g status")
269
270	case _Grunnable | _Gscan,
271		_Gwaiting | _Gscan,
272		_Gsyscall | _Gscan:
273		casfrom_Gscanstatus(gp, s, s&^_Gscan)
274	}
275
276	if state.stopped {
277		// We stopped it, so we need to re-schedule it.
278		ready(gp, 0, true)
279	}
280}
281
282// canPreemptM reports whether mp is in a state that is safe to preempt.
283//
284// It is nosplit because it has nosplit callers.
285//
286//go:nosplit
287func canPreemptM(mp *m) bool {
288	return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
289}
290
291//go:generate go run mkpreempt.go
292
293// asyncPreempt saves all user registers and calls asyncPreempt2.
294//
295// When stack scanning encounters an asyncPreempt frame, it scans that
296// frame and its parent frame conservatively.
297//
298// asyncPreempt is implemented in assembly.
299func asyncPreempt()
300
301//go:nosplit
302func asyncPreempt2() {
303	gp := getg()
304	gp.asyncSafePoint = true
305	if gp.preemptStop {
306		mcall(preemptPark)
307	} else {
308		mcall(gopreempt_m)
309	}
310	gp.asyncSafePoint = false
311}
312
313// asyncPreemptStack is the bytes of stack space required to inject an
314// asyncPreempt call.
315var asyncPreemptStack = ^uintptr(0)
316
317func init() {
318	f := findfunc(abi.FuncPCABI0(asyncPreempt))
319	total := funcMaxSPDelta(f)
320	f = findfunc(abi.FuncPCABIInternal(asyncPreempt2))
321	total += funcMaxSPDelta(f)
322	// Add some overhead for return PCs, etc.
323	asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize
324	if asyncPreemptStack > stackNosplit {
325		// We need more than the nosplit limit. This isn't
326		// unsafe, but it may limit asynchronous preemption.
327		//
328		// This may be a problem if we start using more
329		// registers. In that case, we should store registers
330		// in a context object. If we pre-allocate one per P,
331		// asyncPreempt can spill just a few registers to the
332		// stack, then grab its context object and spill into
333		// it. When it enters the runtime, it would allocate a
334		// new context for the P.
335		print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
336		throw("async stack too large")
337	}
338}
339
340// wantAsyncPreempt returns whether an asynchronous preemption is
341// queued for gp.
342func wantAsyncPreempt(gp *g) bool {
343	// Check both the G and the P.
344	return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
345}
346
347// isAsyncSafePoint reports whether gp at instruction PC is an
348// asynchronous safe point. This indicates that:
349//
350// 1. It's safe to suspend gp and conservatively scan its stack and
351// registers. There are no potentially hidden pointer values and it's
352// not in the middle of an atomic sequence like a write barrier.
353//
354// 2. gp has enough stack space to inject the asyncPreempt call.
355//
356// 3. It's generally safe to interact with the runtime, even if we're
357// in a signal handler stopped here. For example, there are no runtime
358// locks held, so acquiring a runtime lock won't self-deadlock.
359//
360// In some cases the PC is safe for asynchronous preemption but it
361// also needs to adjust the resumption PC. The new PC is returned in
362// the second result.
363func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
364	mp := gp.m
365
366	// Only user Gs can have safe-points. We check this first
367	// because it's extremely common that we'll catch mp in the
368	// scheduler processing this G preemption.
369	if mp.curg != gp {
370		return false, 0
371	}
372
373	// Check M state.
374	if mp.p == 0 || !canPreemptM(mp) {
375		return false, 0
376	}
377
378	// Check stack space.
379	if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack {
380		return false, 0
381	}
382
383	// Check if PC is an unsafe-point.
384	f := findfunc(pc)
385	if !f.valid() {
386		// Not Go code.
387		return false, 0
388	}
389	if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc) == 0 {
390		// We probably stopped at a half-executed CALL instruction,
391		// where the LR is updated but the PC has not. If we preempt
392		// here we'll see a seemingly self-recursive call, which is in
393		// fact not.
394		// This is normally ok, as we use the return address saved on
395		// stack for unwinding, not the LR value. But if this is a
396		// call to morestack, we haven't created the frame, and we'll
397		// use the LR for unwinding, which will be bad.
398		return false, 0
399	}
400	up, startpc := pcdatavalue2(f, abi.PCDATA_UnsafePoint, pc)
401	if up == abi.UnsafePointUnsafe {
402		// Unsafe-point marked by compiler. This includes
403		// atomic sequences (e.g., write barrier) and nosplit
404		// functions (except at calls).
405		return false, 0
406	}
407	if fd := funcdata(f, abi.FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&abi.FuncFlagAsm != 0 {
408		// This is assembly code. Don't assume it's well-formed.
409		// TODO: Empirically we still need the fd == nil check. Why?
410		//
411		// TODO: Are there cases that are safe but don't have a
412		// locals pointer map, like empty frame functions?
413		// It might be possible to preempt any assembly functions
414		// except the ones that have funcFlag_SPWRITE set in f.flag.
415		return false, 0
416	}
417	// Check the inner-most name
418	u, uf := newInlineUnwinder(f, pc)
419	name := u.srcFunc(uf).name()
420	if stringslite.HasPrefix(name, "runtime.") ||
421		stringslite.HasPrefix(name, "runtime/internal/") ||
422		stringslite.HasPrefix(name, "reflect.") {
423		// For now we never async preempt the runtime or
424		// anything closely tied to the runtime. Known issues
425		// include: various points in the scheduler ("don't
426		// preempt between here and here"), much of the defer
427		// implementation (untyped info on stack), bulk write
428		// barriers (write barrier check),
429		// reflect.{makeFuncStub,methodValueCall}.
430		//
431		// TODO(austin): We should improve this, or opt things
432		// in incrementally.
433		return false, 0
434	}
435	switch up {
436	case abi.UnsafePointRestart1, abi.UnsafePointRestart2:
437		// Restartable instruction sequence. Back off PC to
438		// the start PC.
439		if startpc == 0 || startpc > pc || pc-startpc > 20 {
440			throw("bad restart PC")
441		}
442		return true, startpc
443	case abi.UnsafePointRestartAtEntry:
444		// Restart from the function entry at resumption.
445		return true, f.entry()
446	}
447	return true, pc
448}
449