1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Implementation of (safe) user arenas.
6//
7// This file contains the implementation of user arenas wherein Go values can
8// be manually allocated and freed in bulk. The act of manually freeing memory,
9// potentially before a GC cycle, means that a garbage collection cycle can be
10// delayed, improving efficiency by reducing GC cycle frequency. There are other
11// potential efficiency benefits, such as improved locality and access to a more
12// efficient allocation strategy.
13//
14// What makes the arenas here safe is that once they are freed, accessing the
15// arena's memory will cause an explicit program fault, and the arena's address
16// space will not be reused until no more pointers into it are found. There's one
17// exception to this: if an arena allocated memory that isn't exhausted, it's placed
18// back into a pool for reuse. This means that a crash is not always guaranteed.
19//
20// While this may seem unsafe, it still prevents memory corruption, and is in fact
21// necessary in order to make new(T) a valid implementation of arenas. Such a property
22// is desirable to allow for a trivial implementation. (It also avoids complexities
23// that arise from synchronization with the GC when trying to set the arena chunks to
24// fault while the GC is active.)
25//
26// The implementation works in layers. At the bottom, arenas are managed in chunks.
27// Each chunk must be a multiple of the heap arena size, or the heap arena size must
28// be divisible by the arena chunks. The address space for each chunk, and each
29// corresponding heapArena for that address space, are eternally reserved for use as
30// arena chunks. That is, they can never be used for the general heap. Each chunk
31// is also represented by a single mspan, and is modeled as a single large heap
32// allocation. It must be, because each chunk contains ordinary Go values that may
33// point into the heap, so it must be scanned just like any other object. Any
34// pointer into a chunk will therefore always cause the whole chunk to be scanned
35// while its corresponding arena is still live.
36//
37// Chunks may be allocated either from new memory mapped by the OS on our behalf,
38// or by reusing old freed chunks. When chunks are freed, their underlying memory
39// is returned to the OS, set to fault on access, and may not be reused until the
40// program doesn't point into the chunk anymore (the code refers to this state as
41// "quarantined"), a property checked by the GC.
42//
43// The sweeper handles moving chunks out of this quarantine state to be ready for
44// reuse. When the chunk is placed into the quarantine state, its corresponding
45// span is marked as noscan so that the GC doesn't try to scan memory that would
46// cause a fault.
47//
48// At the next layer are the user arenas themselves. They consist of a single
49// active chunk which new Go values are bump-allocated into and a list of chunks
50// that were exhausted when allocating into the arena. Once the arena is freed,
51// it frees all full chunks it references, and places the active one onto a reuse
52// list for a future arena to use. Each arena keeps its list of referenced chunks
53// explicitly live until it is freed. Each user arena also maps to an object which
54// has a finalizer attached that ensures the arena's chunks are all freed even if
55// the arena itself is never explicitly freed.
56//
57// Pointer-ful memory is bump-allocated from low addresses to high addresses in each
58// chunk, while pointer-free memory is bump-allocated from high address to low
59// addresses. The reason for this is to take advantage of a GC optimization wherein
60// the GC will stop scanning an object when there are no more pointers in it, which
61// also allows us to elide clearing the heap bitmap for pointer-free Go values
62// allocated into arenas.
63//
64// Note that arenas are not safe to use concurrently.
65//
66// In summary, there are 2 resources: arenas, and arena chunks. They exist in the
67// following lifecycle:
68//
69// (1) A new arena is created via newArena.
70// (2) Chunks are allocated to hold memory allocated into the arena with new or slice.
71//    (a) Chunks are first allocated from the reuse list of partially-used chunks.
72//    (b) If there are no such chunks, then chunks on the ready list are taken.
73//    (c) Failing all the above, memory for a new chunk is mapped.
74// (3) The arena is freed, or all references to it are dropped, triggering its finalizer.
75//    (a) If the GC is not active, exhausted chunks are set to fault and placed on a
76//        quarantine list.
77//    (b) If the GC is active, exhausted chunks are placed on a fault list and will
78//        go through step (a) at a later point in time.
79//    (c) Any remaining partially-used chunk is placed on a reuse list.
80// (4) Once no more pointers are found into quarantined arena chunks, the sweeper
81//     takes these chunks out of quarantine and places them on the ready list.
82
83package runtime
84
85import (
86	"internal/abi"
87	"internal/goarch"
88	"internal/runtime/atomic"
89	"runtime/internal/math"
90	"runtime/internal/sys"
91	"unsafe"
92)
93
94// Functions starting with arena_ are meant to be exported to downstream users
95// of arenas. They should wrap these functions in a higher-lever API.
96//
97// The underlying arena and its resources are managed through an opaque unsafe.Pointer.
98
99// arena_newArena is a wrapper around newUserArena.
100//
101//go:linkname arena_newArena arena.runtime_arena_newArena
102func arena_newArena() unsafe.Pointer {
103	return unsafe.Pointer(newUserArena())
104}
105
106// arena_arena_New is a wrapper around (*userArena).new, except that typ
107// is an any (must be a *_type, still) and typ must be a type descriptor
108// for a pointer to the type to actually be allocated, i.e. pass a *T
109// to allocate a T. This is necessary because this function returns a *T.
110//
111//go:linkname arena_arena_New arena.runtime_arena_arena_New
112func arena_arena_New(arena unsafe.Pointer, typ any) any {
113	t := (*_type)(efaceOf(&typ).data)
114	if t.Kind_&abi.KindMask != abi.Pointer {
115		throw("arena_New: non-pointer type")
116	}
117	te := (*ptrtype)(unsafe.Pointer(t)).Elem
118	x := ((*userArena)(arena)).new(te)
119	var result any
120	e := efaceOf(&result)
121	e._type = t
122	e.data = x
123	return result
124}
125
126// arena_arena_Slice is a wrapper around (*userArena).slice.
127//
128//go:linkname arena_arena_Slice arena.runtime_arena_arena_Slice
129func arena_arena_Slice(arena unsafe.Pointer, slice any, cap int) {
130	((*userArena)(arena)).slice(slice, cap)
131}
132
133// arena_arena_Free is a wrapper around (*userArena).free.
134//
135//go:linkname arena_arena_Free arena.runtime_arena_arena_Free
136func arena_arena_Free(arena unsafe.Pointer) {
137	((*userArena)(arena)).free()
138}
139
140// arena_heapify takes a value that lives in an arena and makes a copy
141// of it on the heap. Values that don't live in an arena are returned unmodified.
142//
143//go:linkname arena_heapify arena.runtime_arena_heapify
144func arena_heapify(s any) any {
145	var v unsafe.Pointer
146	e := efaceOf(&s)
147	t := e._type
148	switch t.Kind_ & abi.KindMask {
149	case abi.String:
150		v = stringStructOf((*string)(e.data)).str
151	case abi.Slice:
152		v = (*slice)(e.data).array
153	case abi.Pointer:
154		v = e.data
155	default:
156		panic("arena: Clone only supports pointers, slices, and strings")
157	}
158	span := spanOf(uintptr(v))
159	if span == nil || !span.isUserArenaChunk {
160		// Not stored in a user arena chunk.
161		return s
162	}
163	// Heap-allocate storage for a copy.
164	var x any
165	switch t.Kind_ & abi.KindMask {
166	case abi.String:
167		s1 := s.(string)
168		s2, b := rawstring(len(s1))
169		copy(b, s1)
170		x = s2
171	case abi.Slice:
172		len := (*slice)(e.data).len
173		et := (*slicetype)(unsafe.Pointer(t)).Elem
174		sl := new(slice)
175		*sl = slice{makeslicecopy(et, len, len, (*slice)(e.data).array), len, len}
176		xe := efaceOf(&x)
177		xe._type = t
178		xe.data = unsafe.Pointer(sl)
179	case abi.Pointer:
180		et := (*ptrtype)(unsafe.Pointer(t)).Elem
181		e2 := newobject(et)
182		typedmemmove(et, e2, e.data)
183		xe := efaceOf(&x)
184		xe._type = t
185		xe.data = e2
186	}
187	return x
188}
189
190const (
191	// userArenaChunkBytes is the size of a user arena chunk.
192	userArenaChunkBytesMax = 8 << 20
193	userArenaChunkBytes    = uintptr(int64(userArenaChunkBytesMax-heapArenaBytes)&(int64(userArenaChunkBytesMax-heapArenaBytes)>>63) + heapArenaBytes) // min(userArenaChunkBytesMax, heapArenaBytes)
194
195	// userArenaChunkPages is the number of pages a user arena chunk uses.
196	userArenaChunkPages = userArenaChunkBytes / pageSize
197
198	// userArenaChunkMaxAllocBytes is the maximum size of an object that can
199	// be allocated from an arena. This number is chosen to cap worst-case
200	// fragmentation of user arenas to 25%. Larger allocations are redirected
201	// to the heap.
202	userArenaChunkMaxAllocBytes = userArenaChunkBytes / 4
203)
204
205func init() {
206	if userArenaChunkPages*pageSize != userArenaChunkBytes {
207		throw("user arena chunk size is not a multiple of the page size")
208	}
209	if userArenaChunkBytes%physPageSize != 0 {
210		throw("user arena chunk size is not a multiple of the physical page size")
211	}
212	if userArenaChunkBytes < heapArenaBytes {
213		if heapArenaBytes%userArenaChunkBytes != 0 {
214			throw("user arena chunk size is smaller than a heap arena, but doesn't divide it")
215		}
216	} else {
217		if userArenaChunkBytes%heapArenaBytes != 0 {
218			throw("user arena chunks size is larger than a heap arena, but not a multiple")
219		}
220	}
221	lockInit(&userArenaState.lock, lockRankUserArenaState)
222}
223
224// userArenaChunkReserveBytes returns the amount of additional bytes to reserve for
225// heap metadata.
226func userArenaChunkReserveBytes() uintptr {
227	// In the allocation headers experiment, we reserve the end of the chunk for
228	// a pointer/scalar bitmap. We also reserve space for a dummy _type that
229	// refers to the bitmap. The PtrBytes field of the dummy _type indicates how
230	// many of those bits are valid.
231	return userArenaChunkBytes/goarch.PtrSize/8 + unsafe.Sizeof(_type{})
232}
233
234type userArena struct {
235	// fullList is a list of full chunks that have not enough free memory left, and
236	// that we'll free once this user arena is freed.
237	//
238	// Can't use mSpanList here because it's not-in-heap.
239	fullList *mspan
240
241	// active is the user arena chunk we're currently allocating into.
242	active *mspan
243
244	// refs is a set of references to the arena chunks so that they're kept alive.
245	//
246	// The last reference in the list always refers to active, while the rest of
247	// them correspond to fullList. Specifically, the head of fullList is the
248	// second-to-last one, fullList.next is the third-to-last, and so on.
249	//
250	// In other words, every time a new chunk becomes active, its appended to this
251	// list.
252	refs []unsafe.Pointer
253
254	// defunct is true if free has been called on this arena.
255	//
256	// This is just a best-effort way to discover a concurrent allocation
257	// and free. Also used to detect a double-free.
258	defunct atomic.Bool
259}
260
261// newUserArena creates a new userArena ready to be used.
262func newUserArena() *userArena {
263	a := new(userArena)
264	SetFinalizer(a, func(a *userArena) {
265		// If arena handle is dropped without being freed, then call
266		// free on the arena, so the arena chunks are never reclaimed
267		// by the garbage collector.
268		a.free()
269	})
270	a.refill()
271	return a
272}
273
274// new allocates a new object of the provided type into the arena, and returns
275// its pointer.
276//
277// This operation is not safe to call concurrently with other operations on the
278// same arena.
279func (a *userArena) new(typ *_type) unsafe.Pointer {
280	return a.alloc(typ, -1)
281}
282
283// slice allocates a new slice backing store. slice must be a pointer to a slice
284// (i.e. *[]T), because userArenaSlice will update the slice directly.
285//
286// cap determines the capacity of the slice backing store and must be non-negative.
287//
288// This operation is not safe to call concurrently with other operations on the
289// same arena.
290func (a *userArena) slice(sl any, cap int) {
291	if cap < 0 {
292		panic("userArena.slice: negative cap")
293	}
294	i := efaceOf(&sl)
295	typ := i._type
296	if typ.Kind_&abi.KindMask != abi.Pointer {
297		panic("slice result of non-ptr type")
298	}
299	typ = (*ptrtype)(unsafe.Pointer(typ)).Elem
300	if typ.Kind_&abi.KindMask != abi.Slice {
301		panic("slice of non-ptr-to-slice type")
302	}
303	typ = (*slicetype)(unsafe.Pointer(typ)).Elem
304	// t is now the element type of the slice we want to allocate.
305
306	*((*slice)(i.data)) = slice{a.alloc(typ, cap), cap, cap}
307}
308
309// free returns the userArena's chunks back to mheap and marks it as defunct.
310//
311// Must be called at most once for any given arena.
312//
313// This operation is not safe to call concurrently with other operations on the
314// same arena.
315func (a *userArena) free() {
316	// Check for a double-free.
317	if a.defunct.Load() {
318		panic("arena double free")
319	}
320
321	// Mark ourselves as defunct.
322	a.defunct.Store(true)
323	SetFinalizer(a, nil)
324
325	// Free all the full arenas.
326	//
327	// The refs on this list are in reverse order from the second-to-last.
328	s := a.fullList
329	i := len(a.refs) - 2
330	for s != nil {
331		a.fullList = s.next
332		s.next = nil
333		freeUserArenaChunk(s, a.refs[i])
334		s = a.fullList
335		i--
336	}
337	if a.fullList != nil || i >= 0 {
338		// There's still something left on the full list, or we
339		// failed to actually iterate over the entire refs list.
340		throw("full list doesn't match refs list in length")
341	}
342
343	// Put the active chunk onto the reuse list.
344	//
345	// Note that active's reference is always the last reference in refs.
346	s = a.active
347	if s != nil {
348		if raceenabled || msanenabled || asanenabled {
349			// Don't reuse arenas with sanitizers enabled. We want to catch
350			// any use-after-free errors aggressively.
351			freeUserArenaChunk(s, a.refs[len(a.refs)-1])
352		} else {
353			lock(&userArenaState.lock)
354			userArenaState.reuse = append(userArenaState.reuse, liveUserArenaChunk{s, a.refs[len(a.refs)-1]})
355			unlock(&userArenaState.lock)
356		}
357	}
358	// nil out a.active so that a race with freeing will more likely cause a crash.
359	a.active = nil
360	a.refs = nil
361}
362
363// alloc reserves space in the current chunk or calls refill and reserves space
364// in a new chunk. If cap is negative, the type will be taken literally, otherwise
365// it will be considered as an element type for a slice backing store with capacity
366// cap.
367func (a *userArena) alloc(typ *_type, cap int) unsafe.Pointer {
368	s := a.active
369	var x unsafe.Pointer
370	for {
371		x = s.userArenaNextFree(typ, cap)
372		if x != nil {
373			break
374		}
375		s = a.refill()
376	}
377	return x
378}
379
380// refill inserts the current arena chunk onto the full list and obtains a new
381// one, either from the partial list or allocating a new one, both from mheap.
382func (a *userArena) refill() *mspan {
383	// If there's an active chunk, assume it's full.
384	s := a.active
385	if s != nil {
386		if s.userArenaChunkFree.size() > userArenaChunkMaxAllocBytes {
387			// It's difficult to tell when we're actually out of memory
388			// in a chunk because the allocation that failed may still leave
389			// some free space available. However, that amount of free space
390			// should never exceed the maximum allocation size.
391			throw("wasted too much memory in an arena chunk")
392		}
393		s.next = a.fullList
394		a.fullList = s
395		a.active = nil
396		s = nil
397	}
398	var x unsafe.Pointer
399
400	// Check the partially-used list.
401	lock(&userArenaState.lock)
402	if len(userArenaState.reuse) > 0 {
403		// Pick off the last arena chunk from the list.
404		n := len(userArenaState.reuse) - 1
405		x = userArenaState.reuse[n].x
406		s = userArenaState.reuse[n].mspan
407		userArenaState.reuse[n].x = nil
408		userArenaState.reuse[n].mspan = nil
409		userArenaState.reuse = userArenaState.reuse[:n]
410	}
411	unlock(&userArenaState.lock)
412	if s == nil {
413		// Allocate a new one.
414		x, s = newUserArenaChunk()
415		if s == nil {
416			throw("out of memory")
417		}
418	}
419	a.refs = append(a.refs, x)
420	a.active = s
421	return s
422}
423
424type liveUserArenaChunk struct {
425	*mspan // Must represent a user arena chunk.
426
427	// Reference to mspan.base() to keep the chunk alive.
428	x unsafe.Pointer
429}
430
431var userArenaState struct {
432	lock mutex
433
434	// reuse contains a list of partially-used and already-live
435	// user arena chunks that can be quickly reused for another
436	// arena.
437	//
438	// Protected by lock.
439	reuse []liveUserArenaChunk
440
441	// fault contains full user arena chunks that need to be faulted.
442	//
443	// Protected by lock.
444	fault []liveUserArenaChunk
445}
446
447// userArenaNextFree reserves space in the user arena for an item of the specified
448// type. If cap is not -1, this is for an array of cap elements of type t.
449func (s *mspan) userArenaNextFree(typ *_type, cap int) unsafe.Pointer {
450	size := typ.Size_
451	if cap > 0 {
452		if size > ^uintptr(0)/uintptr(cap) {
453			// Overflow.
454			throw("out of memory")
455		}
456		size *= uintptr(cap)
457	}
458	if size == 0 || cap == 0 {
459		return unsafe.Pointer(&zerobase)
460	}
461	if size > userArenaChunkMaxAllocBytes {
462		// Redirect allocations that don't fit into a chunk well directly
463		// from the heap.
464		if cap >= 0 {
465			return newarray(typ, cap)
466		}
467		return newobject(typ)
468	}
469
470	// Prevent preemption as we set up the space for a new object.
471	//
472	// Act like we're allocating.
473	mp := acquirem()
474	if mp.mallocing != 0 {
475		throw("malloc deadlock")
476	}
477	if mp.gsignal == getg() {
478		throw("malloc during signal")
479	}
480	mp.mallocing = 1
481
482	var ptr unsafe.Pointer
483	if !typ.Pointers() {
484		// Allocate pointer-less objects from the tail end of the chunk.
485		v, ok := s.userArenaChunkFree.takeFromBack(size, typ.Align_)
486		if ok {
487			ptr = unsafe.Pointer(v)
488		}
489	} else {
490		v, ok := s.userArenaChunkFree.takeFromFront(size, typ.Align_)
491		if ok {
492			ptr = unsafe.Pointer(v)
493		}
494	}
495	if ptr == nil {
496		// Failed to allocate.
497		mp.mallocing = 0
498		releasem(mp)
499		return nil
500	}
501	if s.needzero != 0 {
502		throw("arena chunk needs zeroing, but should already be zeroed")
503	}
504	// Set up heap bitmap and do extra accounting.
505	if typ.Pointers() {
506		if cap >= 0 {
507			userArenaHeapBitsSetSliceType(typ, cap, ptr, s)
508		} else {
509			userArenaHeapBitsSetType(typ, ptr, s)
510		}
511		c := getMCache(mp)
512		if c == nil {
513			throw("mallocgc called without a P or outside bootstrapping")
514		}
515		if cap > 0 {
516			c.scanAlloc += size - (typ.Size_ - typ.PtrBytes)
517		} else {
518			c.scanAlloc += typ.PtrBytes
519		}
520	}
521
522	// Ensure that the stores above that initialize x to
523	// type-safe memory and set the heap bits occur before
524	// the caller can make ptr observable to the garbage
525	// collector. Otherwise, on weakly ordered machines,
526	// the garbage collector could follow a pointer to x,
527	// but see uninitialized memory or stale heap bits.
528	publicationBarrier()
529
530	mp.mallocing = 0
531	releasem(mp)
532
533	return ptr
534}
535
536// userArenaHeapBitsSetSliceType is the equivalent of heapBitsSetType but for
537// Go slice backing store values allocated in a user arena chunk. It sets up the
538// heap bitmap for n consecutive values with type typ allocated at address ptr.
539func userArenaHeapBitsSetSliceType(typ *_type, n int, ptr unsafe.Pointer, s *mspan) {
540	mem, overflow := math.MulUintptr(typ.Size_, uintptr(n))
541	if overflow || n < 0 || mem > maxAlloc {
542		panic(plainError("runtime: allocation size out of range"))
543	}
544	for i := 0; i < n; i++ {
545		userArenaHeapBitsSetType(typ, add(ptr, uintptr(i)*typ.Size_), s)
546	}
547}
548
549// userArenaHeapBitsSetType is the equivalent of heapSetType but for
550// non-slice-backing-store Go values allocated in a user arena chunk. It
551// sets up the type metadata for the value with type typ allocated at address ptr.
552// base is the base address of the arena chunk.
553func userArenaHeapBitsSetType(typ *_type, ptr unsafe.Pointer, s *mspan) {
554	base := s.base()
555	h := s.writeUserArenaHeapBits(uintptr(ptr))
556
557	p := typ.GCData // start of 1-bit pointer mask (or GC program)
558	var gcProgBits uintptr
559	if typ.Kind_&abi.KindGCProg != 0 {
560		// Expand gc program, using the object itself for storage.
561		gcProgBits = runGCProg(addb(p, 4), (*byte)(ptr))
562		p = (*byte)(ptr)
563	}
564	nb := typ.PtrBytes / goarch.PtrSize
565
566	for i := uintptr(0); i < nb; i += ptrBits {
567		k := nb - i
568		if k > ptrBits {
569			k = ptrBits
570		}
571		// N.B. On big endian platforms we byte swap the data that we
572		// read from GCData, which is always stored in little-endian order
573		// by the compiler. writeUserArenaHeapBits handles data in
574		// a platform-ordered way for efficiency, but stores back the
575		// data in little endian order, since we expose the bitmap through
576		// a dummy type.
577		h = h.write(s, readUintptr(addb(p, i/8)), k)
578	}
579	// Note: we call pad here to ensure we emit explicit 0 bits
580	// for the pointerless tail of the object. This ensures that
581	// there's only a single noMorePtrs mark for the next object
582	// to clear. We don't need to do this to clear stale noMorePtrs
583	// markers from previous uses because arena chunk pointer bitmaps
584	// are always fully cleared when reused.
585	h = h.pad(s, typ.Size_-typ.PtrBytes)
586	h.flush(s, uintptr(ptr), typ.Size_)
587
588	if typ.Kind_&abi.KindGCProg != 0 {
589		// Zero out temporary ptrmask buffer inside object.
590		memclrNoHeapPointers(ptr, (gcProgBits+7)/8)
591	}
592
593	// Update the PtrBytes value in the type information. After this
594	// point, the GC will observe the new bitmap.
595	s.largeType.PtrBytes = uintptr(ptr) - base + typ.PtrBytes
596
597	// Double-check that the bitmap was written out correctly.
598	const doubleCheck = false
599	if doubleCheck {
600		doubleCheckHeapPointersInterior(uintptr(ptr), uintptr(ptr), typ.Size_, typ.Size_, typ, &s.largeType, s)
601	}
602}
603
604type writeUserArenaHeapBits struct {
605	offset uintptr // offset in span that the low bit of mask represents the pointer state of.
606	mask   uintptr // some pointer bits starting at the address addr.
607	valid  uintptr // number of bits in buf that are valid (including low)
608	low    uintptr // number of low-order bits to not overwrite
609}
610
611func (s *mspan) writeUserArenaHeapBits(addr uintptr) (h writeUserArenaHeapBits) {
612	offset := addr - s.base()
613
614	// We start writing bits maybe in the middle of a heap bitmap word.
615	// Remember how many bits into the word we started, so we can be sure
616	// not to overwrite the previous bits.
617	h.low = offset / goarch.PtrSize % ptrBits
618
619	// round down to heap word that starts the bitmap word.
620	h.offset = offset - h.low*goarch.PtrSize
621
622	// We don't have any bits yet.
623	h.mask = 0
624	h.valid = h.low
625
626	return
627}
628
629// write appends the pointerness of the next valid pointer slots
630// using the low valid bits of bits. 1=pointer, 0=scalar.
631func (h writeUserArenaHeapBits) write(s *mspan, bits, valid uintptr) writeUserArenaHeapBits {
632	if h.valid+valid <= ptrBits {
633		// Fast path - just accumulate the bits.
634		h.mask |= bits << h.valid
635		h.valid += valid
636		return h
637	}
638	// Too many bits to fit in this word. Write the current word
639	// out and move on to the next word.
640
641	data := h.mask | bits<<h.valid       // mask for this word
642	h.mask = bits >> (ptrBits - h.valid) // leftover for next word
643	h.valid += valid - ptrBits           // have h.valid+valid bits, writing ptrBits of them
644
645	// Flush mask to the memory bitmap.
646	idx := h.offset / (ptrBits * goarch.PtrSize)
647	m := uintptr(1)<<h.low - 1
648	bitmap := s.heapBits()
649	bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx])&m | data)
650	// Note: no synchronization required for this write because
651	// the allocator has exclusive access to the page, and the bitmap
652	// entries are all for a single page. Also, visibility of these
653	// writes is guaranteed by the publication barrier in mallocgc.
654
655	// Move to next word of bitmap.
656	h.offset += ptrBits * goarch.PtrSize
657	h.low = 0
658	return h
659}
660
661// Add padding of size bytes.
662func (h writeUserArenaHeapBits) pad(s *mspan, size uintptr) writeUserArenaHeapBits {
663	if size == 0 {
664		return h
665	}
666	words := size / goarch.PtrSize
667	for words > ptrBits {
668		h = h.write(s, 0, ptrBits)
669		words -= ptrBits
670	}
671	return h.write(s, 0, words)
672}
673
674// Flush the bits that have been written, and add zeros as needed
675// to cover the full object [addr, addr+size).
676func (h writeUserArenaHeapBits) flush(s *mspan, addr, size uintptr) {
677	offset := addr - s.base()
678
679	// zeros counts the number of bits needed to represent the object minus the
680	// number of bits we've already written. This is the number of 0 bits
681	// that need to be added.
682	zeros := (offset+size-h.offset)/goarch.PtrSize - h.valid
683
684	// Add zero bits up to the bitmap word boundary
685	if zeros > 0 {
686		z := ptrBits - h.valid
687		if z > zeros {
688			z = zeros
689		}
690		h.valid += z
691		zeros -= z
692	}
693
694	// Find word in bitmap that we're going to write.
695	bitmap := s.heapBits()
696	idx := h.offset / (ptrBits * goarch.PtrSize)
697
698	// Write remaining bits.
699	if h.valid != h.low {
700		m := uintptr(1)<<h.low - 1      // don't clear existing bits below "low"
701		m |= ^(uintptr(1)<<h.valid - 1) // don't clear existing bits above "valid"
702		bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx])&m | h.mask)
703	}
704	if zeros == 0 {
705		return
706	}
707
708	// Advance to next bitmap word.
709	h.offset += ptrBits * goarch.PtrSize
710
711	// Continue on writing zeros for the rest of the object.
712	// For standard use of the ptr bits this is not required, as
713	// the bits are read from the beginning of the object. Some uses,
714	// like noscan spans, oblets, bulk write barriers, and cgocheck, might
715	// start mid-object, so these writes are still required.
716	for {
717		// Write zero bits.
718		idx := h.offset / (ptrBits * goarch.PtrSize)
719		if zeros < ptrBits {
720			bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx]) &^ (uintptr(1)<<zeros - 1))
721			break
722		} else if zeros == ptrBits {
723			bitmap[idx] = 0
724			break
725		} else {
726			bitmap[idx] = 0
727			zeros -= ptrBits
728		}
729		h.offset += ptrBits * goarch.PtrSize
730	}
731}
732
733// bswapIfBigEndian swaps the byte order of the uintptr on goarch.BigEndian platforms,
734// and leaves it alone elsewhere.
735func bswapIfBigEndian(x uintptr) uintptr {
736	if goarch.BigEndian {
737		if goarch.PtrSize == 8 {
738			return uintptr(sys.Bswap64(uint64(x)))
739		}
740		return uintptr(sys.Bswap32(uint32(x)))
741	}
742	return x
743}
744
745// newUserArenaChunk allocates a user arena chunk, which maps to a single
746// heap arena and single span. Returns a pointer to the base of the chunk
747// (this is really important: we need to keep the chunk alive) and the span.
748func newUserArenaChunk() (unsafe.Pointer, *mspan) {
749	if gcphase == _GCmarktermination {
750		throw("newUserArenaChunk called with gcphase == _GCmarktermination")
751	}
752
753	// Deduct assist credit. Because user arena chunks are modeled as one
754	// giant heap object which counts toward heapLive, we're obligated to
755	// assist the GC proportionally (and it's worth noting that the arena
756	// does represent additional work for the GC, but we also have no idea
757	// what that looks like until we actually allocate things into the
758	// arena).
759	deductAssistCredit(userArenaChunkBytes)
760
761	// Set mp.mallocing to keep from being preempted by GC.
762	mp := acquirem()
763	if mp.mallocing != 0 {
764		throw("malloc deadlock")
765	}
766	if mp.gsignal == getg() {
767		throw("malloc during signal")
768	}
769	mp.mallocing = 1
770
771	// Allocate a new user arena.
772	var span *mspan
773	systemstack(func() {
774		span = mheap_.allocUserArenaChunk()
775	})
776	if span == nil {
777		throw("out of memory")
778	}
779	x := unsafe.Pointer(span.base())
780
781	// Allocate black during GC.
782	// All slots hold nil so no scanning is needed.
783	// This may be racing with GC so do it atomically if there can be
784	// a race marking the bit.
785	if gcphase != _GCoff {
786		gcmarknewobject(span, span.base())
787	}
788
789	if raceenabled {
790		// TODO(mknyszek): Track individual objects.
791		racemalloc(unsafe.Pointer(span.base()), span.elemsize)
792	}
793
794	if msanenabled {
795		// TODO(mknyszek): Track individual objects.
796		msanmalloc(unsafe.Pointer(span.base()), span.elemsize)
797	}
798
799	if asanenabled {
800		// TODO(mknyszek): Track individual objects.
801		rzSize := computeRZlog(span.elemsize)
802		span.elemsize -= rzSize
803		span.largeType.Size_ = span.elemsize
804		rzStart := span.base() + span.elemsize
805		span.userArenaChunkFree = makeAddrRange(span.base(), rzStart)
806		asanpoison(unsafe.Pointer(rzStart), span.limit-rzStart)
807		asanunpoison(unsafe.Pointer(span.base()), span.elemsize)
808	}
809
810	if rate := MemProfileRate; rate > 0 {
811		c := getMCache(mp)
812		if c == nil {
813			throw("newUserArenaChunk called without a P or outside bootstrapping")
814		}
815		// Note cache c only valid while m acquired; see #47302
816		if rate != 1 && userArenaChunkBytes < c.nextSample {
817			c.nextSample -= userArenaChunkBytes
818		} else {
819			profilealloc(mp, unsafe.Pointer(span.base()), userArenaChunkBytes)
820		}
821	}
822	mp.mallocing = 0
823	releasem(mp)
824
825	// Again, because this chunk counts toward heapLive, potentially trigger a GC.
826	if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
827		gcStart(t)
828	}
829
830	if debug.malloc {
831		if inittrace.active && inittrace.id == getg().goid {
832			// Init functions are executed sequentially in a single goroutine.
833			inittrace.bytes += uint64(userArenaChunkBytes)
834		}
835	}
836
837	// Double-check it's aligned to the physical page size. Based on the current
838	// implementation this is trivially true, but it need not be in the future.
839	// However, if it's not aligned to the physical page size then we can't properly
840	// set it to fault later.
841	if uintptr(x)%physPageSize != 0 {
842		throw("user arena chunk is not aligned to the physical page size")
843	}
844
845	return x, span
846}
847
848// isUnusedUserArenaChunk indicates that the arena chunk has been set to fault
849// and doesn't contain any scannable memory anymore. However, it might still be
850// mSpanInUse as it sits on the quarantine list, since it needs to be swept.
851//
852// This is not safe to execute unless the caller has ownership of the mspan or
853// the world is stopped (preemption is prevented while the relevant state changes).
854//
855// This is really only meant to be used by accounting tests in the runtime to
856// distinguish when a span shouldn't be counted (since mSpanInUse might not be
857// enough).
858func (s *mspan) isUnusedUserArenaChunk() bool {
859	return s.isUserArenaChunk && s.spanclass == makeSpanClass(0, true)
860}
861
862// setUserArenaChunkToFault sets the address space for the user arena chunk to fault
863// and releases any underlying memory resources.
864//
865// Must be in a non-preemptible state to ensure the consistency of statistics
866// exported to MemStats.
867func (s *mspan) setUserArenaChunkToFault() {
868	if !s.isUserArenaChunk {
869		throw("invalid span in heapArena for user arena")
870	}
871	if s.npages*pageSize != userArenaChunkBytes {
872		throw("span on userArena.faultList has invalid size")
873	}
874
875	// Update the span class to be noscan. What we want to happen is that
876	// any pointer into the span keeps it from getting recycled, so we want
877	// the mark bit to get set, but we're about to set the address space to fault,
878	// so we have to prevent the GC from scanning this memory.
879	//
880	// It's OK to set it here because (1) a GC isn't in progress, so the scanning code
881	// won't make a bad decision, (2) we're currently non-preemptible and in the runtime,
882	// so a GC is blocked from starting. We might race with sweeping, which could
883	// put it on the "wrong" sweep list, but really don't care because the chunk is
884	// treated as a large object span and there's no meaningful difference between scan
885	// and noscan large objects in the sweeper. The STW at the start of the GC acts as a
886	// barrier for this update.
887	s.spanclass = makeSpanClass(0, true)
888
889	// Actually set the arena chunk to fault, so we'll get dangling pointer errors.
890	// sysFault currently uses a method on each OS that forces it to evacuate all
891	// memory backing the chunk.
892	sysFault(unsafe.Pointer(s.base()), s.npages*pageSize)
893
894	// Everything on the list is counted as in-use, however sysFault transitions to
895	// Reserved, not Prepared, so we skip updating heapFree or heapReleased and just
896	// remove the memory from the total altogether; it's just address space now.
897	gcController.heapInUse.add(-int64(s.npages * pageSize))
898
899	// Count this as a free of an object right now as opposed to when
900	// the span gets off the quarantine list. The main reason is so that the
901	// amount of bytes allocated doesn't exceed how much is counted as
902	// "mapped ready," which could cause a deadlock in the pacer.
903	gcController.totalFree.Add(int64(s.elemsize))
904
905	// Update consistent stats to match.
906	//
907	// We're non-preemptible, so it's safe to update consistent stats (our P
908	// won't change out from under us).
909	stats := memstats.heapStats.acquire()
910	atomic.Xaddint64(&stats.committed, -int64(s.npages*pageSize))
911	atomic.Xaddint64(&stats.inHeap, -int64(s.npages*pageSize))
912	atomic.Xadd64(&stats.largeFreeCount, 1)
913	atomic.Xadd64(&stats.largeFree, int64(s.elemsize))
914	memstats.heapStats.release()
915
916	// This counts as a free, so update heapLive.
917	gcController.update(-int64(s.elemsize), 0)
918
919	// Mark it as free for the race detector.
920	if raceenabled {
921		racefree(unsafe.Pointer(s.base()), s.elemsize)
922	}
923
924	systemstack(func() {
925		// Add the user arena to the quarantine list.
926		lock(&mheap_.lock)
927		mheap_.userArena.quarantineList.insert(s)
928		unlock(&mheap_.lock)
929	})
930}
931
932// inUserArenaChunk returns true if p points to a user arena chunk.
933func inUserArenaChunk(p uintptr) bool {
934	s := spanOf(p)
935	if s == nil {
936		return false
937	}
938	return s.isUserArenaChunk
939}
940
941// freeUserArenaChunk releases the user arena represented by s back to the runtime.
942//
943// x must be a live pointer within s.
944//
945// The runtime will set the user arena to fault once it's safe (the GC is no longer running)
946// and then once the user arena is no longer referenced by the application, will allow it to
947// be reused.
948func freeUserArenaChunk(s *mspan, x unsafe.Pointer) {
949	if !s.isUserArenaChunk {
950		throw("span is not for a user arena")
951	}
952	if s.npages*pageSize != userArenaChunkBytes {
953		throw("invalid user arena span size")
954	}
955
956	// Mark the region as free to various sanitizers immediately instead
957	// of handling them at sweep time.
958	if raceenabled {
959		racefree(unsafe.Pointer(s.base()), s.elemsize)
960	}
961	if msanenabled {
962		msanfree(unsafe.Pointer(s.base()), s.elemsize)
963	}
964	if asanenabled {
965		asanpoison(unsafe.Pointer(s.base()), s.elemsize)
966	}
967
968	// Make ourselves non-preemptible as we manipulate state and statistics.
969	//
970	// Also required by setUserArenaChunksToFault.
971	mp := acquirem()
972
973	// We can only set user arenas to fault if we're in the _GCoff phase.
974	if gcphase == _GCoff {
975		lock(&userArenaState.lock)
976		faultList := userArenaState.fault
977		userArenaState.fault = nil
978		unlock(&userArenaState.lock)
979
980		s.setUserArenaChunkToFault()
981		for _, lc := range faultList {
982			lc.mspan.setUserArenaChunkToFault()
983		}
984
985		// Until the chunks are set to fault, keep them alive via the fault list.
986		KeepAlive(x)
987		KeepAlive(faultList)
988	} else {
989		// Put the user arena on the fault list.
990		lock(&userArenaState.lock)
991		userArenaState.fault = append(userArenaState.fault, liveUserArenaChunk{s, x})
992		unlock(&userArenaState.lock)
993	}
994	releasem(mp)
995}
996
997// allocUserArenaChunk attempts to reuse a free user arena chunk represented
998// as a span.
999//
1000// Must be in a non-preemptible state to ensure the consistency of statistics
1001// exported to MemStats.
1002//
1003// Acquires the heap lock. Must run on the system stack for that reason.
1004//
1005//go:systemstack
1006func (h *mheap) allocUserArenaChunk() *mspan {
1007	var s *mspan
1008	var base uintptr
1009
1010	// First check the free list.
1011	lock(&h.lock)
1012	if !h.userArena.readyList.isEmpty() {
1013		s = h.userArena.readyList.first
1014		h.userArena.readyList.remove(s)
1015		base = s.base()
1016	} else {
1017		// Free list was empty, so allocate a new arena.
1018		hintList := &h.userArena.arenaHints
1019		if raceenabled {
1020			// In race mode just use the regular heap hints. We might fragment
1021			// the address space, but the race detector requires that the heap
1022			// is mapped contiguously.
1023			hintList = &h.arenaHints
1024		}
1025		v, size := h.sysAlloc(userArenaChunkBytes, hintList, false)
1026		if size%userArenaChunkBytes != 0 {
1027			throw("sysAlloc size is not divisible by userArenaChunkBytes")
1028		}
1029		if size > userArenaChunkBytes {
1030			// We got more than we asked for. This can happen if
1031			// heapArenaSize > userArenaChunkSize, or if sysAlloc just returns
1032			// some extra as a result of trying to find an aligned region.
1033			//
1034			// Divide it up and put it on the ready list.
1035			for i := userArenaChunkBytes; i < size; i += userArenaChunkBytes {
1036				s := h.allocMSpanLocked()
1037				s.init(uintptr(v)+i, userArenaChunkPages)
1038				h.userArena.readyList.insertBack(s)
1039			}
1040			size = userArenaChunkBytes
1041		}
1042		base = uintptr(v)
1043		if base == 0 {
1044			// Out of memory.
1045			unlock(&h.lock)
1046			return nil
1047		}
1048		s = h.allocMSpanLocked()
1049	}
1050	unlock(&h.lock)
1051
1052	// sysAlloc returns Reserved address space, and any span we're
1053	// reusing is set to fault (so, also Reserved), so transition
1054	// it to Prepared and then Ready.
1055	//
1056	// Unlike (*mheap).grow, just map in everything that we
1057	// asked for. We're likely going to use it all.
1058	sysMap(unsafe.Pointer(base), userArenaChunkBytes, &gcController.heapReleased)
1059	sysUsed(unsafe.Pointer(base), userArenaChunkBytes, userArenaChunkBytes)
1060
1061	// Model the user arena as a heap span for a large object.
1062	spc := makeSpanClass(0, false)
1063	h.initSpan(s, spanAllocHeap, spc, base, userArenaChunkPages)
1064	s.isUserArenaChunk = true
1065	s.elemsize -= userArenaChunkReserveBytes()
1066	s.limit = s.base() + s.elemsize
1067	s.freeindex = 1
1068	s.allocCount = 1
1069
1070	// Account for this new arena chunk memory.
1071	gcController.heapInUse.add(int64(userArenaChunkBytes))
1072	gcController.heapReleased.add(-int64(userArenaChunkBytes))
1073
1074	stats := memstats.heapStats.acquire()
1075	atomic.Xaddint64(&stats.inHeap, int64(userArenaChunkBytes))
1076	atomic.Xaddint64(&stats.committed, int64(userArenaChunkBytes))
1077
1078	// Model the arena as a single large malloc.
1079	atomic.Xadd64(&stats.largeAlloc, int64(s.elemsize))
1080	atomic.Xadd64(&stats.largeAllocCount, 1)
1081	memstats.heapStats.release()
1082
1083	// Count the alloc in inconsistent, internal stats.
1084	gcController.totalAlloc.Add(int64(s.elemsize))
1085
1086	// Update heapLive.
1087	gcController.update(int64(s.elemsize), 0)
1088
1089	// This must clear the entire heap bitmap so that it's safe
1090	// to allocate noscan data without writing anything out.
1091	s.initHeapBits(true)
1092
1093	// Clear the span preemptively. It's an arena chunk, so let's assume
1094	// everything is going to be used.
1095	//
1096	// This also seems to make a massive difference as to whether or
1097	// not Linux decides to back this memory with transparent huge
1098	// pages. There's latency involved in this zeroing, but the hugepage
1099	// gains are almost always worth it. Note: it's important that we
1100	// clear even if it's freshly mapped and we know there's no point
1101	// to zeroing as *that* is the critical signal to use huge pages.
1102	memclrNoHeapPointers(unsafe.Pointer(s.base()), s.elemsize)
1103	s.needzero = 0
1104
1105	s.freeIndexForScan = 1
1106
1107	// Set up the range for allocation.
1108	s.userArenaChunkFree = makeAddrRange(base, base+s.elemsize)
1109
1110	// Put the large span in the mcentral swept list so that it's
1111	// visible to the background sweeper.
1112	h.central[spc].mcentral.fullSwept(h.sweepgen).push(s)
1113
1114	// Set up an allocation header. Avoid write barriers here because this type
1115	// is not a real type, and it exists in an invalid location.
1116	*(*uintptr)(unsafe.Pointer(&s.largeType)) = uintptr(unsafe.Pointer(s.limit))
1117	*(*uintptr)(unsafe.Pointer(&s.largeType.GCData)) = s.limit + unsafe.Sizeof(_type{})
1118	s.largeType.PtrBytes = 0
1119	s.largeType.Size_ = s.elemsize
1120
1121	return s
1122}
1123