1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This implements the write barrier buffer. The write barrier itself
6// is gcWriteBarrier and is implemented in assembly.
7//
8// See mbarrier.go for algorithmic details on the write barrier. This
9// file deals only with the buffer.
10//
11// The write barrier has a fast path and a slow path. The fast path
12// simply enqueues to a per-P write barrier buffer. It's written in
13// assembly and doesn't clobber any general purpose registers, so it
14// doesn't have the usual overheads of a Go call.
15//
16// When the buffer fills up, the write barrier invokes the slow path
17// (wbBufFlush) to flush the buffer to the GC work queues. In this
18// path, since the compiler didn't spill registers, we spill *all*
19// registers and disallow any GC safe points that could observe the
20// stack frame (since we don't know the types of the spilled
21// registers).
22
23package runtime
24
25import (
26	"internal/goarch"
27	"internal/runtime/atomic"
28	"unsafe"
29)
30
31// testSmallBuf forces a small write barrier buffer to stress write
32// barrier flushing.
33const testSmallBuf = false
34
35// wbBuf is a per-P buffer of pointers queued by the write barrier.
36// This buffer is flushed to the GC workbufs when it fills up and on
37// various GC transitions.
38//
39// This is closely related to a "sequential store buffer" (SSB),
40// except that SSBs are usually used for maintaining remembered sets,
41// while this is used for marking.
42type wbBuf struct {
43	// next points to the next slot in buf. It must not be a
44	// pointer type because it can point past the end of buf and
45	// must be updated without write barriers.
46	//
47	// This is a pointer rather than an index to optimize the
48	// write barrier assembly.
49	next uintptr
50
51	// end points to just past the end of buf. It must not be a
52	// pointer type because it points past the end of buf and must
53	// be updated without write barriers.
54	end uintptr
55
56	// buf stores a series of pointers to execute write barriers on.
57	buf [wbBufEntries]uintptr
58}
59
60const (
61	// wbBufEntries is the maximum number of pointers that can be
62	// stored in the write barrier buffer.
63	//
64	// This trades latency for throughput amortization. Higher
65	// values amortize flushing overhead more, but increase the
66	// latency of flushing. Higher values also increase the cache
67	// footprint of the buffer.
68	//
69	// TODO: What is the latency cost of this? Tune this value.
70	wbBufEntries = 512
71
72	// Maximum number of entries that we need to ask from the
73	// buffer in a single call.
74	wbMaxEntriesPerCall = 8
75)
76
77// reset empties b by resetting its next and end pointers.
78func (b *wbBuf) reset() {
79	start := uintptr(unsafe.Pointer(&b.buf[0]))
80	b.next = start
81	if testSmallBuf {
82		// For testing, make the buffer smaller but more than
83		// 1 write barrier's worth, so it tests both the
84		// immediate flush and delayed flush cases.
85		b.end = uintptr(unsafe.Pointer(&b.buf[wbMaxEntriesPerCall+1]))
86	} else {
87		b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
88	}
89
90	if (b.end-b.next)%unsafe.Sizeof(b.buf[0]) != 0 {
91		throw("bad write barrier buffer bounds")
92	}
93}
94
95// discard resets b's next pointer, but not its end pointer.
96//
97// This must be nosplit because it's called by wbBufFlush.
98//
99//go:nosplit
100func (b *wbBuf) discard() {
101	b.next = uintptr(unsafe.Pointer(&b.buf[0]))
102}
103
104// empty reports whether b contains no pointers.
105func (b *wbBuf) empty() bool {
106	return b.next == uintptr(unsafe.Pointer(&b.buf[0]))
107}
108
109// getX returns space in the write barrier buffer to store X pointers.
110// getX will flush the buffer if necessary. Callers should use this as:
111//
112//	buf := &getg().m.p.ptr().wbBuf
113//	p := buf.get2()
114//	p[0], p[1] = old, new
115//	... actual memory write ...
116//
117// The caller must ensure there are no preemption points during the
118// above sequence. There must be no preemption points while buf is in
119// use because it is a per-P resource. There must be no preemption
120// points between the buffer put and the write to memory because this
121// could allow a GC phase change, which could result in missed write
122// barriers.
123//
124// getX must be nowritebarrierrec to because write barriers here would
125// corrupt the write barrier buffer. It (and everything it calls, if
126// it called anything) has to be nosplit to avoid scheduling on to a
127// different P and a different buffer.
128//
129//go:nowritebarrierrec
130//go:nosplit
131func (b *wbBuf) get1() *[1]uintptr {
132	if b.next+goarch.PtrSize > b.end {
133		wbBufFlush()
134	}
135	p := (*[1]uintptr)(unsafe.Pointer(b.next))
136	b.next += goarch.PtrSize
137	return p
138}
139
140//go:nowritebarrierrec
141//go:nosplit
142func (b *wbBuf) get2() *[2]uintptr {
143	if b.next+2*goarch.PtrSize > b.end {
144		wbBufFlush()
145	}
146	p := (*[2]uintptr)(unsafe.Pointer(b.next))
147	b.next += 2 * goarch.PtrSize
148	return p
149}
150
151// wbBufFlush flushes the current P's write barrier buffer to the GC
152// workbufs.
153//
154// This must not have write barriers because it is part of the write
155// barrier implementation.
156//
157// This and everything it calls must be nosplit because 1) the stack
158// contains untyped slots from gcWriteBarrier and 2) there must not be
159// a GC safe point between the write barrier test in the caller and
160// flushing the buffer.
161//
162// TODO: A "go:nosplitrec" annotation would be perfect for this.
163//
164//go:nowritebarrierrec
165//go:nosplit
166func wbBufFlush() {
167	// Note: Every possible return from this function must reset
168	// the buffer's next pointer to prevent buffer overflow.
169
170	if getg().m.dying > 0 {
171		// We're going down. Not much point in write barriers
172		// and this way we can allow write barriers in the
173		// panic path.
174		getg().m.p.ptr().wbBuf.discard()
175		return
176	}
177
178	// Switch to the system stack so we don't have to worry about
179	// safe points.
180	systemstack(func() {
181		wbBufFlush1(getg().m.p.ptr())
182	})
183}
184
185// wbBufFlush1 flushes p's write barrier buffer to the GC work queue.
186//
187// This must not have write barriers because it is part of the write
188// barrier implementation, so this may lead to infinite loops or
189// buffer corruption.
190//
191// This must be non-preemptible because it uses the P's workbuf.
192//
193//go:nowritebarrierrec
194//go:systemstack
195func wbBufFlush1(pp *p) {
196	// Get the buffered pointers.
197	start := uintptr(unsafe.Pointer(&pp.wbBuf.buf[0]))
198	n := (pp.wbBuf.next - start) / unsafe.Sizeof(pp.wbBuf.buf[0])
199	ptrs := pp.wbBuf.buf[:n]
200
201	// Poison the buffer to make extra sure nothing is enqueued
202	// while we're processing the buffer.
203	pp.wbBuf.next = 0
204
205	if useCheckmark {
206		// Slow path for checkmark mode.
207		for _, ptr := range ptrs {
208			shade(ptr)
209		}
210		pp.wbBuf.reset()
211		return
212	}
213
214	// Mark all of the pointers in the buffer and record only the
215	// pointers we greyed. We use the buffer itself to temporarily
216	// record greyed pointers.
217	//
218	// TODO: Should scanobject/scanblock just stuff pointers into
219	// the wbBuf? Then this would become the sole greying path.
220	//
221	// TODO: We could avoid shading any of the "new" pointers in
222	// the buffer if the stack has been shaded, or even avoid
223	// putting them in the buffer at all (which would double its
224	// capacity). This is slightly complicated with the buffer; we
225	// could track whether any un-shaded goroutine has used the
226	// buffer, or just track globally whether there are any
227	// un-shaded stacks and flush after each stack scan.
228	gcw := &pp.gcw
229	pos := 0
230	for _, ptr := range ptrs {
231		if ptr < minLegalPointer {
232			// nil pointers are very common, especially
233			// for the "old" values. Filter out these and
234			// other "obvious" non-heap pointers ASAP.
235			//
236			// TODO: Should we filter out nils in the fast
237			// path to reduce the rate of flushes?
238			continue
239		}
240		obj, span, objIndex := findObject(ptr, 0, 0)
241		if obj == 0 {
242			continue
243		}
244		// TODO: Consider making two passes where the first
245		// just prefetches the mark bits.
246		mbits := span.markBitsForIndex(objIndex)
247		if mbits.isMarked() {
248			continue
249		}
250		mbits.setMarked()
251
252		// Mark span.
253		arena, pageIdx, pageMask := pageIndexOf(span.base())
254		if arena.pageMarks[pageIdx]&pageMask == 0 {
255			atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
256		}
257
258		if span.spanclass.noscan() {
259			gcw.bytesMarked += uint64(span.elemsize)
260			continue
261		}
262		ptrs[pos] = obj
263		pos++
264	}
265
266	// Enqueue the greyed objects.
267	gcw.putBatch(ptrs[:pos])
268
269	pp.wbBuf.reset()
270}
271