1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Trace stack table and acquisition.
6
7package runtime
8
9import (
10	"internal/abi"
11	"internal/goarch"
12	"unsafe"
13)
14
15const (
16	// Maximum number of PCs in a single stack trace.
17	// Since events contain only stack id rather than whole stack trace,
18	// we can allow quite large values here.
19	traceStackSize = 128
20
21	// logicalStackSentinel is a sentinel value at pcBuf[0] signifying that
22	// pcBuf[1:] holds a logical stack requiring no further processing. Any other
23	// value at pcBuf[0] represents a skip value to apply to the physical stack in
24	// pcBuf[1:] after inline expansion.
25	logicalStackSentinel = ^uintptr(0)
26)
27
28// traceStack captures a stack trace from a goroutine and registers it in the trace
29// stack table. It then returns its unique ID. If gp == nil, then traceStack will
30// attempt to use the current execution context.
31//
32// skip controls the number of leaf frames to omit in order to hide tracer internals
33// from stack traces, see CL 5523.
34//
35// Avoid calling this function directly. gen needs to be the current generation
36// that this stack trace is being written out for, which needs to be synchronized with
37// generations moving forward. Prefer traceEventWriter.stack.
38func traceStack(skip int, gp *g, gen uintptr) uint64 {
39	var pcBuf [traceStackSize]uintptr
40
41	// Figure out gp and mp for the backtrace.
42	var mp *m
43	if gp == nil {
44		mp = getg().m
45		gp = mp.curg
46	}
47
48	// Double-check that we own the stack we're about to trace.
49	if debug.traceCheckStackOwnership != 0 && gp != nil {
50		status := readgstatus(gp)
51		// If the scan bit is set, assume we're the ones that acquired it.
52		if status&_Gscan == 0 {
53			// Use the trace status to check this. There are a number of cases
54			// where a running goroutine might be in _Gwaiting, and these cases
55			// are totally fine for taking a stack trace. They're captured
56			// correctly in goStatusToTraceGoStatus.
57			switch goStatusToTraceGoStatus(status, gp.waitreason) {
58			case traceGoRunning, traceGoSyscall:
59				if getg() == gp || mp.curg == gp {
60					break
61				}
62				fallthrough
63			default:
64				print("runtime: gp=", unsafe.Pointer(gp), " gp.goid=", gp.goid, " status=", gStatusStrings[status], "\n")
65				throw("attempted to trace stack of a goroutine this thread does not own")
66			}
67		}
68	}
69
70	if gp != nil && mp == nil {
71		// We're getting the backtrace for a G that's not currently executing.
72		// It may still have an M, if it's locked to some M.
73		mp = gp.lockedm.ptr()
74	}
75	nstk := 1
76	if tracefpunwindoff() || (mp != nil && mp.hasCgoOnStack()) {
77		// Slow path: Unwind using default unwinder. Used when frame pointer
78		// unwinding is unavailable or disabled (tracefpunwindoff), or might
79		// produce incomplete results or crashes (hasCgoOnStack). Note that no
80		// cgo callback related crashes have been observed yet. The main
81		// motivation is to take advantage of a potentially registered cgo
82		// symbolizer.
83		pcBuf[0] = logicalStackSentinel
84		if getg() == gp {
85			nstk += callers(skip+1, pcBuf[1:])
86		} else if gp != nil {
87			nstk += gcallers(gp, skip, pcBuf[1:])
88		}
89	} else {
90		// Fast path: Unwind using frame pointers.
91		pcBuf[0] = uintptr(skip)
92		if getg() == gp {
93			nstk += fpTracebackPCs(unsafe.Pointer(getfp()), pcBuf[1:])
94		} else if gp != nil {
95			// Three cases:
96			//
97			// (1) We're called on the g0 stack through mcall(fn) or systemstack(fn). To
98			// behave like gcallers above, we start unwinding from sched.bp, which
99			// points to the caller frame of the leaf frame on g's stack. The return
100			// address of the leaf frame is stored in sched.pc, which we manually
101			// capture here.
102			//
103			// (2) We're called against a gp that we're not currently executing on, but that isn't
104			// in a syscall, in which case it's currently not executing. gp.sched contains the most
105			// up-to-date information about where it stopped, and like case (1), we match gcallers
106			// here.
107			//
108			// (3) We're called against a gp that we're not currently executing on, but that is in
109			// a syscall, in which case gp.syscallsp != 0. gp.syscall* contains the most up-to-date
110			// information about where it stopped, and like case (1), we match gcallers here.
111			if gp.syscallsp != 0 {
112				pcBuf[1] = gp.syscallpc
113				nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.syscallbp), pcBuf[2:])
114			} else {
115				pcBuf[1] = gp.sched.pc
116				nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.sched.bp), pcBuf[2:])
117			}
118		}
119	}
120	if nstk > 0 {
121		nstk-- // skip runtime.goexit
122	}
123	if nstk > 0 && gp.goid == 1 {
124		nstk-- // skip runtime.main
125	}
126	id := trace.stackTab[gen%2].put(pcBuf[:nstk])
127	return id
128}
129
130// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids.
131// It is lock-free for reading.
132type traceStackTable struct {
133	tab traceMap
134}
135
136// put returns a unique id for the stack trace pcs and caches it in the table,
137// if it sees the trace for the first time.
138func (t *traceStackTable) put(pcs []uintptr) uint64 {
139	if len(pcs) == 0 {
140		return 0
141	}
142	id, _ := t.tab.put(noescape(unsafe.Pointer(&pcs[0])), uintptr(len(pcs))*unsafe.Sizeof(uintptr(0)))
143	return id
144}
145
146// dump writes all previously cached stacks to trace buffers,
147// releases all memory and resets state. It must only be called once the caller
148// can guarantee that there are no more writers to the table.
149func (t *traceStackTable) dump(gen uintptr) {
150	stackBuf := make([]uintptr, traceStackSize)
151	w := unsafeTraceWriter(gen, nil)
152	if root := (*traceMapNode)(t.tab.root.Load()); root != nil {
153		w = dumpStacksRec(root, w, stackBuf)
154	}
155	w.flush().end()
156	t.tab.reset()
157}
158
159func dumpStacksRec(node *traceMapNode, w traceWriter, stackBuf []uintptr) traceWriter {
160	stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&node.data[0])), uintptr(len(node.data))/unsafe.Sizeof(uintptr(0)))
161
162	// N.B. This might allocate, but that's OK because we're not writing to the M's buffer,
163	// but one we're about to create (with ensure).
164	n := fpunwindExpand(stackBuf, stack)
165	frames := makeTraceFrames(w.gen, stackBuf[:n])
166
167	// The maximum number of bytes required to hold the encoded stack, given that
168	// it contains N frames.
169	maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber
170
171	// Estimate the size of this record. This
172	// bound is pretty loose, but avoids counting
173	// lots of varint sizes.
174	//
175	// Add 1 because we might also write traceEvStacks.
176	var flushed bool
177	w, flushed = w.ensure(1 + maxBytes)
178	if flushed {
179		w.byte(byte(traceEvStacks))
180	}
181
182	// Emit stack event.
183	w.byte(byte(traceEvStack))
184	w.varint(uint64(node.id))
185	w.varint(uint64(len(frames)))
186	for _, frame := range frames {
187		w.varint(uint64(frame.PC))
188		w.varint(frame.funcID)
189		w.varint(frame.fileID)
190		w.varint(frame.line)
191	}
192
193	// Recursively walk all child nodes.
194	for i := range node.children {
195		child := node.children[i].Load()
196		if child == nil {
197			continue
198		}
199		w = dumpStacksRec((*traceMapNode)(child), w, stackBuf)
200	}
201	return w
202}
203
204// makeTraceFrames returns the frames corresponding to pcs. It may
205// allocate and may emit trace events.
206func makeTraceFrames(gen uintptr, pcs []uintptr) []traceFrame {
207	frames := make([]traceFrame, 0, len(pcs))
208	ci := CallersFrames(pcs)
209	for {
210		f, more := ci.Next()
211		frames = append(frames, makeTraceFrame(gen, f))
212		if !more {
213			return frames
214		}
215	}
216}
217
218type traceFrame struct {
219	PC     uintptr
220	funcID uint64
221	fileID uint64
222	line   uint64
223}
224
225// makeTraceFrame sets up a traceFrame for a frame.
226func makeTraceFrame(gen uintptr, f Frame) traceFrame {
227	var frame traceFrame
228	frame.PC = f.PC
229
230	fn := f.Function
231	const maxLen = 1 << 10
232	if len(fn) > maxLen {
233		fn = fn[len(fn)-maxLen:]
234	}
235	frame.funcID = trace.stringTab[gen%2].put(gen, fn)
236	frame.line = uint64(f.Line)
237	file := f.File
238	if len(file) > maxLen {
239		file = file[len(file)-maxLen:]
240	}
241	frame.fileID = trace.stringTab[gen%2].put(gen, file)
242	return frame
243}
244
245// tracefpunwindoff returns true if frame pointer unwinding for the tracer is
246// disabled via GODEBUG or not supported by the architecture.
247func tracefpunwindoff() bool {
248	return debug.tracefpunwindoff != 0 || (goarch.ArchFamily != goarch.AMD64 && goarch.ArchFamily != goarch.ARM64)
249}
250
251// fpTracebackPCs populates pcBuf with the return addresses for each frame and
252// returns the number of PCs written to pcBuf. The returned PCs correspond to
253// "physical frames" rather than "logical frames"; that is if A is inlined into
254// B, this will return a PC for only B.
255func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) {
256	for i = 0; i < len(pcBuf) && fp != nil; i++ {
257		// return addr sits one word above the frame pointer
258		pcBuf[i] = *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
259		// follow the frame pointer to the next one
260		fp = unsafe.Pointer(*(*uintptr)(fp))
261	}
262	return i
263}
264
265//go:linkname pprof_fpunwindExpand
266func pprof_fpunwindExpand(dst, src []uintptr) int {
267	return fpunwindExpand(dst, src)
268}
269
270// fpunwindExpand expands a call stack from pcBuf into dst,
271// returning the number of PCs written to dst.
272// pcBuf and dst should not overlap.
273//
274// fpunwindExpand checks if pcBuf contains logical frames (which include inlined
275// frames) or physical frames (produced by frame pointer unwinding) using a
276// sentinel value in pcBuf[0]. Logical frames are simply returned without the
277// sentinel. Physical frames are turned into logical frames via inline unwinding
278// and by applying the skip value that's stored in pcBuf[0].
279func fpunwindExpand(dst, pcBuf []uintptr) int {
280	if len(pcBuf) == 0 {
281		return 0
282	} else if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
283		// pcBuf contains logical rather than inlined frames, skip has already been
284		// applied, just return it without the sentinel value in pcBuf[0].
285		return copy(dst, pcBuf[1:])
286	}
287
288	var (
289		n          int
290		lastFuncID = abi.FuncIDNormal
291		skip       = pcBuf[0]
292		// skipOrAdd skips or appends retPC to newPCBuf and returns true if more
293		// pcs can be added.
294		skipOrAdd = func(retPC uintptr) bool {
295			if skip > 0 {
296				skip--
297			} else if n < len(dst) {
298				dst[n] = retPC
299				n++
300			}
301			return n < len(dst)
302		}
303	)
304
305outer:
306	for _, retPC := range pcBuf[1:] {
307		callPC := retPC - 1
308		fi := findfunc(callPC)
309		if !fi.valid() {
310			// There is no funcInfo if callPC belongs to a C function. In this case
311			// we still keep the pc, but don't attempt to expand inlined frames.
312			if more := skipOrAdd(retPC); !more {
313				break outer
314			}
315			continue
316		}
317
318		u, uf := newInlineUnwinder(fi, callPC)
319		for ; uf.valid(); uf = u.next(uf) {
320			sf := u.srcFunc(uf)
321			if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
322				// ignore wrappers
323			} else if more := skipOrAdd(uf.pc + 1); !more {
324				break outer
325			}
326			lastFuncID = sf.funcID
327		}
328	}
329	return n
330}
331
332// startPCForTrace returns the start PC of a goroutine for tracing purposes.
333// If pc is a wrapper, it returns the PC of the wrapped function. Otherwise it
334// returns pc.
335func startPCForTrace(pc uintptr) uintptr {
336	f := findfunc(pc)
337	if !f.valid() {
338		return pc // may happen for locked g in extra M since its pc is 0.
339	}
340	w := funcdata(f, abi.FUNCDATA_WrapInfo)
341	if w == nil {
342		return pc // not a wrapper
343	}
344	return f.datap.textAddr(*(*uint32)(w))
345}
346