1// Copyright 2023 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Trace stack table and acquisition. 6 7package runtime 8 9import ( 10 "internal/abi" 11 "internal/goarch" 12 "unsafe" 13) 14 15const ( 16 // Maximum number of PCs in a single stack trace. 17 // Since events contain only stack id rather than whole stack trace, 18 // we can allow quite large values here. 19 traceStackSize = 128 20 21 // logicalStackSentinel is a sentinel value at pcBuf[0] signifying that 22 // pcBuf[1:] holds a logical stack requiring no further processing. Any other 23 // value at pcBuf[0] represents a skip value to apply to the physical stack in 24 // pcBuf[1:] after inline expansion. 25 logicalStackSentinel = ^uintptr(0) 26) 27 28// traceStack captures a stack trace from a goroutine and registers it in the trace 29// stack table. It then returns its unique ID. If gp == nil, then traceStack will 30// attempt to use the current execution context. 31// 32// skip controls the number of leaf frames to omit in order to hide tracer internals 33// from stack traces, see CL 5523. 34// 35// Avoid calling this function directly. gen needs to be the current generation 36// that this stack trace is being written out for, which needs to be synchronized with 37// generations moving forward. Prefer traceEventWriter.stack. 38func traceStack(skip int, gp *g, gen uintptr) uint64 { 39 var pcBuf [traceStackSize]uintptr 40 41 // Figure out gp and mp for the backtrace. 42 var mp *m 43 if gp == nil { 44 mp = getg().m 45 gp = mp.curg 46 } 47 48 // Double-check that we own the stack we're about to trace. 49 if debug.traceCheckStackOwnership != 0 && gp != nil { 50 status := readgstatus(gp) 51 // If the scan bit is set, assume we're the ones that acquired it. 52 if status&_Gscan == 0 { 53 // Use the trace status to check this. There are a number of cases 54 // where a running goroutine might be in _Gwaiting, and these cases 55 // are totally fine for taking a stack trace. They're captured 56 // correctly in goStatusToTraceGoStatus. 57 switch goStatusToTraceGoStatus(status, gp.waitreason) { 58 case traceGoRunning, traceGoSyscall: 59 if getg() == gp || mp.curg == gp { 60 break 61 } 62 fallthrough 63 default: 64 print("runtime: gp=", unsafe.Pointer(gp), " gp.goid=", gp.goid, " status=", gStatusStrings[status], "\n") 65 throw("attempted to trace stack of a goroutine this thread does not own") 66 } 67 } 68 } 69 70 if gp != nil && mp == nil { 71 // We're getting the backtrace for a G that's not currently executing. 72 // It may still have an M, if it's locked to some M. 73 mp = gp.lockedm.ptr() 74 } 75 nstk := 1 76 if tracefpunwindoff() || (mp != nil && mp.hasCgoOnStack()) { 77 // Slow path: Unwind using default unwinder. Used when frame pointer 78 // unwinding is unavailable or disabled (tracefpunwindoff), or might 79 // produce incomplete results or crashes (hasCgoOnStack). Note that no 80 // cgo callback related crashes have been observed yet. The main 81 // motivation is to take advantage of a potentially registered cgo 82 // symbolizer. 83 pcBuf[0] = logicalStackSentinel 84 if getg() == gp { 85 nstk += callers(skip+1, pcBuf[1:]) 86 } else if gp != nil { 87 nstk += gcallers(gp, skip, pcBuf[1:]) 88 } 89 } else { 90 // Fast path: Unwind using frame pointers. 91 pcBuf[0] = uintptr(skip) 92 if getg() == gp { 93 nstk += fpTracebackPCs(unsafe.Pointer(getfp()), pcBuf[1:]) 94 } else if gp != nil { 95 // Three cases: 96 // 97 // (1) We're called on the g0 stack through mcall(fn) or systemstack(fn). To 98 // behave like gcallers above, we start unwinding from sched.bp, which 99 // points to the caller frame of the leaf frame on g's stack. The return 100 // address of the leaf frame is stored in sched.pc, which we manually 101 // capture here. 102 // 103 // (2) We're called against a gp that we're not currently executing on, but that isn't 104 // in a syscall, in which case it's currently not executing. gp.sched contains the most 105 // up-to-date information about where it stopped, and like case (1), we match gcallers 106 // here. 107 // 108 // (3) We're called against a gp that we're not currently executing on, but that is in 109 // a syscall, in which case gp.syscallsp != 0. gp.syscall* contains the most up-to-date 110 // information about where it stopped, and like case (1), we match gcallers here. 111 if gp.syscallsp != 0 { 112 pcBuf[1] = gp.syscallpc 113 nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.syscallbp), pcBuf[2:]) 114 } else { 115 pcBuf[1] = gp.sched.pc 116 nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.sched.bp), pcBuf[2:]) 117 } 118 } 119 } 120 if nstk > 0 { 121 nstk-- // skip runtime.goexit 122 } 123 if nstk > 0 && gp.goid == 1 { 124 nstk-- // skip runtime.main 125 } 126 id := trace.stackTab[gen%2].put(pcBuf[:nstk]) 127 return id 128} 129 130// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids. 131// It is lock-free for reading. 132type traceStackTable struct { 133 tab traceMap 134} 135 136// put returns a unique id for the stack trace pcs and caches it in the table, 137// if it sees the trace for the first time. 138func (t *traceStackTable) put(pcs []uintptr) uint64 { 139 if len(pcs) == 0 { 140 return 0 141 } 142 id, _ := t.tab.put(noescape(unsafe.Pointer(&pcs[0])), uintptr(len(pcs))*unsafe.Sizeof(uintptr(0))) 143 return id 144} 145 146// dump writes all previously cached stacks to trace buffers, 147// releases all memory and resets state. It must only be called once the caller 148// can guarantee that there are no more writers to the table. 149func (t *traceStackTable) dump(gen uintptr) { 150 stackBuf := make([]uintptr, traceStackSize) 151 w := unsafeTraceWriter(gen, nil) 152 if root := (*traceMapNode)(t.tab.root.Load()); root != nil { 153 w = dumpStacksRec(root, w, stackBuf) 154 } 155 w.flush().end() 156 t.tab.reset() 157} 158 159func dumpStacksRec(node *traceMapNode, w traceWriter, stackBuf []uintptr) traceWriter { 160 stack := unsafe.Slice((*uintptr)(unsafe.Pointer(&node.data[0])), uintptr(len(node.data))/unsafe.Sizeof(uintptr(0))) 161 162 // N.B. This might allocate, but that's OK because we're not writing to the M's buffer, 163 // but one we're about to create (with ensure). 164 n := fpunwindExpand(stackBuf, stack) 165 frames := makeTraceFrames(w.gen, stackBuf[:n]) 166 167 // The maximum number of bytes required to hold the encoded stack, given that 168 // it contains N frames. 169 maxBytes := 1 + (2+4*len(frames))*traceBytesPerNumber 170 171 // Estimate the size of this record. This 172 // bound is pretty loose, but avoids counting 173 // lots of varint sizes. 174 // 175 // Add 1 because we might also write traceEvStacks. 176 var flushed bool 177 w, flushed = w.ensure(1 + maxBytes) 178 if flushed { 179 w.byte(byte(traceEvStacks)) 180 } 181 182 // Emit stack event. 183 w.byte(byte(traceEvStack)) 184 w.varint(uint64(node.id)) 185 w.varint(uint64(len(frames))) 186 for _, frame := range frames { 187 w.varint(uint64(frame.PC)) 188 w.varint(frame.funcID) 189 w.varint(frame.fileID) 190 w.varint(frame.line) 191 } 192 193 // Recursively walk all child nodes. 194 for i := range node.children { 195 child := node.children[i].Load() 196 if child == nil { 197 continue 198 } 199 w = dumpStacksRec((*traceMapNode)(child), w, stackBuf) 200 } 201 return w 202} 203 204// makeTraceFrames returns the frames corresponding to pcs. It may 205// allocate and may emit trace events. 206func makeTraceFrames(gen uintptr, pcs []uintptr) []traceFrame { 207 frames := make([]traceFrame, 0, len(pcs)) 208 ci := CallersFrames(pcs) 209 for { 210 f, more := ci.Next() 211 frames = append(frames, makeTraceFrame(gen, f)) 212 if !more { 213 return frames 214 } 215 } 216} 217 218type traceFrame struct { 219 PC uintptr 220 funcID uint64 221 fileID uint64 222 line uint64 223} 224 225// makeTraceFrame sets up a traceFrame for a frame. 226func makeTraceFrame(gen uintptr, f Frame) traceFrame { 227 var frame traceFrame 228 frame.PC = f.PC 229 230 fn := f.Function 231 const maxLen = 1 << 10 232 if len(fn) > maxLen { 233 fn = fn[len(fn)-maxLen:] 234 } 235 frame.funcID = trace.stringTab[gen%2].put(gen, fn) 236 frame.line = uint64(f.Line) 237 file := f.File 238 if len(file) > maxLen { 239 file = file[len(file)-maxLen:] 240 } 241 frame.fileID = trace.stringTab[gen%2].put(gen, file) 242 return frame 243} 244 245// tracefpunwindoff returns true if frame pointer unwinding for the tracer is 246// disabled via GODEBUG or not supported by the architecture. 247func tracefpunwindoff() bool { 248 return debug.tracefpunwindoff != 0 || (goarch.ArchFamily != goarch.AMD64 && goarch.ArchFamily != goarch.ARM64) 249} 250 251// fpTracebackPCs populates pcBuf with the return addresses for each frame and 252// returns the number of PCs written to pcBuf. The returned PCs correspond to 253// "physical frames" rather than "logical frames"; that is if A is inlined into 254// B, this will return a PC for only B. 255func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) { 256 for i = 0; i < len(pcBuf) && fp != nil; i++ { 257 // return addr sits one word above the frame pointer 258 pcBuf[i] = *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize)) 259 // follow the frame pointer to the next one 260 fp = unsafe.Pointer(*(*uintptr)(fp)) 261 } 262 return i 263} 264 265//go:linkname pprof_fpunwindExpand 266func pprof_fpunwindExpand(dst, src []uintptr) int { 267 return fpunwindExpand(dst, src) 268} 269 270// fpunwindExpand expands a call stack from pcBuf into dst, 271// returning the number of PCs written to dst. 272// pcBuf and dst should not overlap. 273// 274// fpunwindExpand checks if pcBuf contains logical frames (which include inlined 275// frames) or physical frames (produced by frame pointer unwinding) using a 276// sentinel value in pcBuf[0]. Logical frames are simply returned without the 277// sentinel. Physical frames are turned into logical frames via inline unwinding 278// and by applying the skip value that's stored in pcBuf[0]. 279func fpunwindExpand(dst, pcBuf []uintptr) int { 280 if len(pcBuf) == 0 { 281 return 0 282 } else if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel { 283 // pcBuf contains logical rather than inlined frames, skip has already been 284 // applied, just return it without the sentinel value in pcBuf[0]. 285 return copy(dst, pcBuf[1:]) 286 } 287 288 var ( 289 n int 290 lastFuncID = abi.FuncIDNormal 291 skip = pcBuf[0] 292 // skipOrAdd skips or appends retPC to newPCBuf and returns true if more 293 // pcs can be added. 294 skipOrAdd = func(retPC uintptr) bool { 295 if skip > 0 { 296 skip-- 297 } else if n < len(dst) { 298 dst[n] = retPC 299 n++ 300 } 301 return n < len(dst) 302 } 303 ) 304 305outer: 306 for _, retPC := range pcBuf[1:] { 307 callPC := retPC - 1 308 fi := findfunc(callPC) 309 if !fi.valid() { 310 // There is no funcInfo if callPC belongs to a C function. In this case 311 // we still keep the pc, but don't attempt to expand inlined frames. 312 if more := skipOrAdd(retPC); !more { 313 break outer 314 } 315 continue 316 } 317 318 u, uf := newInlineUnwinder(fi, callPC) 319 for ; uf.valid(); uf = u.next(uf) { 320 sf := u.srcFunc(uf) 321 if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) { 322 // ignore wrappers 323 } else if more := skipOrAdd(uf.pc + 1); !more { 324 break outer 325 } 326 lastFuncID = sf.funcID 327 } 328 } 329 return n 330} 331 332// startPCForTrace returns the start PC of a goroutine for tracing purposes. 333// If pc is a wrapper, it returns the PC of the wrapped function. Otherwise it 334// returns pc. 335func startPCForTrace(pc uintptr) uintptr { 336 f := findfunc(pc) 337 if !f.valid() { 338 return pc // may happen for locked g in extra M since its pc is 0. 339 } 340 w := funcdata(f, abi.FUNCDATA_WrapInfo) 341 if w == nil { 342 return pc // not a wrapper 343 } 344 return f.datap.textAddr(*(*uint32)(w)) 345} 346