1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"internal/runtime/atomic"
9	"unsafe"
10)
11
12const (
13	_EACCES = 13
14	_EINVAL = 22
15)
16
17// Don't split the stack as this method may be invoked without a valid G, which
18// prevents us from allocating more stack.
19//
20//go:nosplit
21func sysAllocOS(n uintptr) unsafe.Pointer {
22	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
23	if err != 0 {
24		if err == _EACCES {
25			print("runtime: mmap: access denied\n")
26			exit(2)
27		}
28		if err == _EAGAIN {
29			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
30			exit(2)
31		}
32		return nil
33	}
34	return p
35}
36
37var adviseUnused = uint32(_MADV_FREE)
38
39const madviseUnsupported = 0
40
41func sysUnusedOS(v unsafe.Pointer, n uintptr) {
42	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
43		// madvise will round this to any physical page
44		// *covered* by this range, so an unaligned madvise
45		// will release more memory than intended.
46		throw("unaligned sysUnused")
47	}
48
49	advise := atomic.Load(&adviseUnused)
50	if debug.madvdontneed != 0 && advise != madviseUnsupported {
51		advise = _MADV_DONTNEED
52	}
53	switch advise {
54	case _MADV_FREE:
55		if madvise(v, n, _MADV_FREE) == 0 {
56			break
57		}
58		atomic.Store(&adviseUnused, _MADV_DONTNEED)
59		fallthrough
60	case _MADV_DONTNEED:
61		// MADV_FREE was added in Linux 4.5. Fall back on MADV_DONTNEED if it's
62		// not supported.
63		if madvise(v, n, _MADV_DONTNEED) == 0 {
64			break
65		}
66		atomic.Store(&adviseUnused, madviseUnsupported)
67		fallthrough
68	case madviseUnsupported:
69		// Since Linux 3.18, support for madvise is optional.
70		// Fall back on mmap if it's not supported.
71		// _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE will unmap all the
72		// pages in the old mapping, and remap the memory region.
73		mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
74	}
75
76	if debug.harddecommit > 0 {
77		p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
78		if p != v || err != 0 {
79			throw("runtime: cannot disable permissions in address space")
80		}
81	}
82}
83
84func sysUsedOS(v unsafe.Pointer, n uintptr) {
85	if debug.harddecommit > 0 {
86		p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
87		if err == _ENOMEM {
88			throw("runtime: out of memory")
89		}
90		if p != v || err != 0 {
91			throw("runtime: cannot remap pages in address space")
92		}
93		return
94	}
95}
96
97func sysHugePageOS(v unsafe.Pointer, n uintptr) {
98	if physHugePageSize != 0 {
99		// Round v up to a huge page boundary.
100		beg := alignUp(uintptr(v), physHugePageSize)
101		// Round v+n down to a huge page boundary.
102		end := alignDown(uintptr(v)+n, physHugePageSize)
103
104		if beg < end {
105			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
106		}
107	}
108}
109
110func sysNoHugePageOS(v unsafe.Pointer, n uintptr) {
111	if uintptr(v)&(physPageSize-1) != 0 {
112		// The Linux implementation requires that the address
113		// addr be page-aligned, and allows length to be zero.
114		throw("unaligned sysNoHugePageOS")
115	}
116	madvise(v, n, _MADV_NOHUGEPAGE)
117}
118
119func sysHugePageCollapseOS(v unsafe.Pointer, n uintptr) {
120	if uintptr(v)&(physPageSize-1) != 0 {
121		// The Linux implementation requires that the address
122		// addr be page-aligned, and allows length to be zero.
123		throw("unaligned sysHugePageCollapseOS")
124	}
125	if physHugePageSize == 0 {
126		return
127	}
128	// N.B. If you find yourself debugging this code, note that
129	// this call can fail with EAGAIN because it's best-effort.
130	// Also, when it returns an error, it's only for the last
131	// huge page in the region requested.
132	//
133	// It can also sometimes return EINVAL if the corresponding
134	// region hasn't been backed by physical memory. This is
135	// difficult to guarantee in general, and it also means
136	// there's no way to distinguish whether this syscall is
137	// actually available. Oops.
138	//
139	// Anyway, that's why this call just doesn't bother checking
140	// any errors.
141	madvise(v, n, _MADV_COLLAPSE)
142}
143
144// Don't split the stack as this function may be invoked without a valid G,
145// which prevents us from allocating more stack.
146//
147//go:nosplit
148func sysFreeOS(v unsafe.Pointer, n uintptr) {
149	munmap(v, n)
150}
151
152func sysFaultOS(v unsafe.Pointer, n uintptr) {
153	mprotect(v, n, _PROT_NONE)
154	madvise(v, n, _MADV_DONTNEED)
155}
156
157func sysReserveOS(v unsafe.Pointer, n uintptr) unsafe.Pointer {
158	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
159	if err != 0 {
160		return nil
161	}
162	return p
163}
164
165func sysMapOS(v unsafe.Pointer, n uintptr) {
166	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
167	if err == _ENOMEM {
168		throw("runtime: out of memory")
169	}
170	if p != v || err != 0 {
171		print("runtime: mmap(", v, ", ", n, ") returned ", p, ", ", err, "\n")
172		throw("runtime: cannot map pages in arena address space")
173	}
174
175	// Disable huge pages if the GODEBUG for it is set.
176	//
177	// Note that there are a few sysHugePage calls that can override this, but
178	// they're all for GC metadata.
179	if debug.disablethp != 0 {
180		sysNoHugePageOS(v, n)
181	}
182}
183