1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Trace buffer management.
6
7package runtime
8
9import (
10	"runtime/internal/sys"
11	"unsafe"
12)
13
14// Maximum number of bytes required to encode uint64 in base-128.
15const traceBytesPerNumber = 10
16
17// traceWriter is the interface for writing all trace data.
18//
19// This type is passed around as a value, and all of its methods return
20// a new traceWriter. This allows for chaining together calls in a fluent-style
21// API. This is partly stylistic, and very slightly for performance, since
22// the compiler can destructure this value and pass it between calls as
23// just regular arguments. However, this style is not load-bearing, and
24// we can change it if it's deemed too error-prone.
25type traceWriter struct {
26	traceLocker
27	*traceBuf
28}
29
30// write returns an a traceWriter that writes into the current M's stream.
31func (tl traceLocker) writer() traceWriter {
32	return traceWriter{traceLocker: tl, traceBuf: tl.mp.trace.buf[tl.gen%2]}
33}
34
35// unsafeTraceWriter produces a traceWriter that doesn't lock the trace.
36//
37// It should only be used in contexts where either:
38// - Another traceLocker is held.
39// - trace.gen is prevented from advancing.
40//
41// buf may be nil.
42func unsafeTraceWriter(gen uintptr, buf *traceBuf) traceWriter {
43	return traceWriter{traceLocker: traceLocker{gen: gen}, traceBuf: buf}
44}
45
46// end writes the buffer back into the m.
47func (w traceWriter) end() {
48	if w.mp == nil {
49		// Tolerate a nil mp. It makes code that creates traceWriters directly
50		// less error-prone.
51		return
52	}
53	w.mp.trace.buf[w.gen%2] = w.traceBuf
54}
55
56// ensure makes sure that at least maxSize bytes are available to write.
57//
58// Returns whether the buffer was flushed.
59func (w traceWriter) ensure(maxSize int) (traceWriter, bool) {
60	refill := w.traceBuf == nil || !w.available(maxSize)
61	if refill {
62		w = w.refill(traceNoExperiment)
63	}
64	return w, refill
65}
66
67// flush puts w.traceBuf on the queue of full buffers.
68func (w traceWriter) flush() traceWriter {
69	systemstack(func() {
70		lock(&trace.lock)
71		if w.traceBuf != nil {
72			traceBufFlush(w.traceBuf, w.gen)
73		}
74		unlock(&trace.lock)
75	})
76	w.traceBuf = nil
77	return w
78}
79
80// refill puts w.traceBuf on the queue of full buffers and refresh's w's buffer.
81//
82// exp indicates whether the refilled batch should be EvExperimentalBatch.
83func (w traceWriter) refill(exp traceExperiment) traceWriter {
84	systemstack(func() {
85		lock(&trace.lock)
86		if w.traceBuf != nil {
87			traceBufFlush(w.traceBuf, w.gen)
88		}
89		if trace.empty != nil {
90			w.traceBuf = trace.empty
91			trace.empty = w.traceBuf.link
92			unlock(&trace.lock)
93		} else {
94			unlock(&trace.lock)
95			w.traceBuf = (*traceBuf)(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys))
96			if w.traceBuf == nil {
97				throw("trace: out of memory")
98			}
99		}
100	})
101	// Initialize the buffer.
102	ts := traceClockNow()
103	if ts <= w.traceBuf.lastTime {
104		ts = w.traceBuf.lastTime + 1
105	}
106	w.traceBuf.lastTime = ts
107	w.traceBuf.link = nil
108	w.traceBuf.pos = 0
109
110	// Tolerate a nil mp.
111	mID := ^uint64(0)
112	if w.mp != nil {
113		mID = uint64(w.mp.procid)
114	}
115
116	// Write the buffer's header.
117	if exp == traceNoExperiment {
118		w.byte(byte(traceEvEventBatch))
119	} else {
120		w.byte(byte(traceEvExperimentalBatch))
121		w.byte(byte(exp))
122	}
123	w.varint(uint64(w.gen))
124	w.varint(uint64(mID))
125	w.varint(uint64(ts))
126	w.traceBuf.lenPos = w.varintReserve()
127	return w
128}
129
130// traceBufQueue is a FIFO of traceBufs.
131type traceBufQueue struct {
132	head, tail *traceBuf
133}
134
135// push queues buf into queue of buffers.
136func (q *traceBufQueue) push(buf *traceBuf) {
137	buf.link = nil
138	if q.head == nil {
139		q.head = buf
140	} else {
141		q.tail.link = buf
142	}
143	q.tail = buf
144}
145
146// pop dequeues from the queue of buffers.
147func (q *traceBufQueue) pop() *traceBuf {
148	buf := q.head
149	if buf == nil {
150		return nil
151	}
152	q.head = buf.link
153	if q.head == nil {
154		q.tail = nil
155	}
156	buf.link = nil
157	return buf
158}
159
160func (q *traceBufQueue) empty() bool {
161	return q.head == nil
162}
163
164// traceBufHeader is per-P tracing buffer.
165type traceBufHeader struct {
166	link     *traceBuf // in trace.empty/full
167	lastTime traceTime // when we wrote the last event
168	pos      int       // next write offset in arr
169	lenPos   int       // position of batch length value
170}
171
172// traceBuf is per-M tracing buffer.
173//
174// TODO(mknyszek): Rename traceBuf to traceBatch, since they map 1:1 with event batches.
175type traceBuf struct {
176	_ sys.NotInHeap
177	traceBufHeader
178	arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
179}
180
181// byte appends v to buf.
182func (buf *traceBuf) byte(v byte) {
183	buf.arr[buf.pos] = v
184	buf.pos++
185}
186
187// varint appends v to buf in little-endian-base-128 encoding.
188func (buf *traceBuf) varint(v uint64) {
189	pos := buf.pos
190	arr := buf.arr[pos : pos+traceBytesPerNumber]
191	for i := range arr {
192		if v < 0x80 {
193			pos += i + 1
194			arr[i] = byte(v)
195			break
196		}
197		arr[i] = 0x80 | byte(v)
198		v >>= 7
199	}
200	buf.pos = pos
201}
202
203// varintReserve reserves enough space in buf to hold any varint.
204//
205// Space reserved this way can be filled in with the varintAt method.
206func (buf *traceBuf) varintReserve() int {
207	p := buf.pos
208	buf.pos += traceBytesPerNumber
209	return p
210}
211
212// stringData appends s's data directly to buf.
213func (buf *traceBuf) stringData(s string) {
214	buf.pos += copy(buf.arr[buf.pos:], s)
215}
216
217func (buf *traceBuf) available(size int) bool {
218	return len(buf.arr)-buf.pos >= size
219}
220
221// varintAt writes varint v at byte position pos in buf. This always
222// consumes traceBytesPerNumber bytes. This is intended for when the caller
223// needs to reserve space for a varint but can't populate it until later.
224// Use varintReserve to reserve this space.
225func (buf *traceBuf) varintAt(pos int, v uint64) {
226	for i := 0; i < traceBytesPerNumber; i++ {
227		if i < traceBytesPerNumber-1 {
228			buf.arr[pos] = 0x80 | byte(v)
229		} else {
230			buf.arr[pos] = byte(v)
231		}
232		v >>= 7
233		pos++
234	}
235	if v != 0 {
236		throw("v could not fit in traceBytesPerNumber")
237	}
238}
239
240// traceBufFlush flushes a trace buffer.
241//
242// Must run on the system stack because trace.lock must be held.
243//
244//go:systemstack
245func traceBufFlush(buf *traceBuf, gen uintptr) {
246	assertLockHeld(&trace.lock)
247
248	// Write out the non-header length of the batch in the header.
249	//
250	// Note: the length of the header is not included to make it easier
251	// to calculate this value when deserializing and reserializing the
252	// trace. Varints can have additional padding of zero bits that is
253	// quite difficult to preserve, and if we include the header we
254	// force serializers to do more work. Nothing else actually needs
255	// padding.
256	buf.varintAt(buf.lenPos, uint64(buf.pos-(buf.lenPos+traceBytesPerNumber)))
257	trace.full[gen%2].push(buf)
258
259	// Notify the scheduler that there's work available and that the trace
260	// reader should be scheduled.
261	if !trace.workAvailable.Load() {
262		trace.workAvailable.Store(true)
263	}
264}
265