xref: /aosp_15_r20/external/ltp/testcases/kernel/syscalls/madvise/madvise06.c (revision 49cdfc7efb34551c7342be41a7384b9c40d7cab7)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2016 Red Hat, Inc.
4  */
5 
6 /*\
7  * [Description]
8  *
9  * Page fault occurs in spite that madvise(WILLNEED) system call is called
10  * to prefetch the page. This issue is reproduced by running a program
11  * which sequentially accesses to a shared memory and calls madvise(WILLNEED)
12  * to the next page on a page fault.
13  *
14  * This bug is present in all RHEL7 versions. It looks like this was fixed in
15  * mainline kernel > v3.15 by the following patch:
16  *
17  *  commit 55231e5c898c5c03c14194001e349f40f59bd300
18  *  Author: Johannes Weiner <[email protected]>
19  *  Date:   Thu May 22 11:54:17 2014 -0700
20  *
21  *     mm: madvise: fix MADV_WILLNEED on shmem swapouts
22  *
23  * Two checks are performed, the first looks at how SwapCache
24  * changes during madvise. When the pages are dirtied, about half
25  * will be accounted for under Cached and the other half will be
26  * moved into Swap. When madvise is run it will cause the pages
27  * under Cached to also be moved to Swap while rotating the pages
28  * already in Swap into SwapCached. So we expect that SwapCached has
29  * roughly MEM_LIMIT bytes added to it, but for reliability the
30  * PASS_THRESHOLD is much lower than that.
31  *
32  * Secondly we run madvise again, but only on the first
33  * PASS_THRESHOLD bytes to ensure these are entirely in RAM. Then we
34  * dirty these pages and check there were (almost) no page
35  * faults. Two faults are allowed incase some tasklet or something
36  * else unexpected, but irrelevant procedure, registers a fault to
37  * our process.
38  *
39  * It also can reproduce the MADV_WILLNEED preformance problem.
40  * It was introduced since 5.9 kernel with the following commit
41  *   e6e88712e43b ("mm: optimise madvise WILLNEED")
42  * and fixed since 5.10-rc5 kernel with the following commit
43  *   66383800df9c ("mm: fix madvise WILLNEED performance problem").
44  */
45 
46 #include <errno.h>
47 #include <stdio.h>
48 #include <sys/mount.h>
49 #include <sys/sysinfo.h>
50 #include "tst_test.h"
51 
52 #define CHUNK_SZ (400*1024*1024L)
53 #define MEM_LIMIT (CHUNK_SZ / 2)
54 #define MEMSW_LIMIT (2 * CHUNK_SZ)
55 #define PASS_THRESHOLD (CHUNK_SZ / 4)
56 #define PASS_THRESHOLD_KB (PASS_THRESHOLD / 1024)
57 
58 static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches";
59 static int pg_sz, stat_refresh_sup;
60 
61 static long init_swap, init_swap_cached, init_cached;
62 
check_path(const char * path)63 static void check_path(const char *path)
64 {
65 	if (access(path, R_OK | W_OK))
66 		tst_brk(TCONF, "file needed: %s", path);
67 }
68 
print_cgmem(const char * name)69 static void print_cgmem(const char *name)
70 {
71 	long ret;
72 
73 	if (!SAFE_CG_HAS(tst_cg, name))
74 		return;
75 
76 	SAFE_CG_SCANF(tst_cg, name, "%ld", &ret);
77 	tst_res(TINFO, "\t%s: %ld Kb", name, ret / 1024);
78 }
79 
meminfo_diag(const char * point)80 static void meminfo_diag(const char *point)
81 {
82 	if (stat_refresh_sup)
83 		SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
84 
85 	tst_res(TINFO, "%s", point);
86 	tst_res(TINFO, "\tSwap: %ld Kb",
87 		SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:") - init_swap);
88 	tst_res(TINFO, "\tSwapCached: %ld Kb",
89 		SAFE_READ_MEMINFO("SwapCached:") - init_swap_cached);
90 	tst_res(TINFO, "\tCached: %ld Kb",
91 		SAFE_READ_MEMINFO("Cached:") - init_cached);
92 
93 	print_cgmem("memory.current");
94 	print_cgmem("memory.swap.current");
95 	print_cgmem("memory.kmem.usage_in_bytes");
96 }
97 
setup(void)98 static void setup(void)
99 {
100 	struct sysinfo sys_buf_start;
101 
102 	pg_sz = getpagesize();
103 
104 	tst_res(TINFO, "dropping caches");
105 	sync();
106 	SAFE_FILE_PRINTF(drop_caches_fname, "3");
107 
108 	sysinfo(&sys_buf_start);
109 	if (sys_buf_start.freeram < 2 * CHUNK_SZ) {
110 		tst_brk(TCONF, "System RAM is too small (%li bytes needed)",
111 			2 * CHUNK_SZ);
112 	}
113 	if (sys_buf_start.freeswap < 2 * CHUNK_SZ) {
114 		tst_brk(TCONF, "System swap is too small (%li bytes needed)",
115 			2 * CHUNK_SZ);
116 	}
117 
118 	check_path("/proc/self/oom_score_adj");
119 	SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000);
120 
121 	SAFE_CG_PRINTF(tst_cg, "memory.max", "%ld", MEM_LIMIT);
122 	if (SAFE_CG_HAS(tst_cg, "memory.swap.max"))
123 		SAFE_CG_PRINTF(tst_cg, "memory.swap.max", "%ld", MEMSW_LIMIT);
124 
125 	if (SAFE_CG_HAS(tst_cg, "memory.swappiness")) {
126 		SAFE_CG_PRINT(tst_cg, "memory.swappiness", "60");
127 	} else {
128 		check_path("/proc/sys/vm/swappiness");
129 		SAFE_FILE_PRINTF("/proc/sys/vm/swappiness", "%d", 60);
130 	}
131 
132 	SAFE_CG_PRINTF(tst_cg, "cgroup.procs", "%d", getpid());
133 
134 	meminfo_diag("Initial meminfo, later values are relative to this (except memcg)");
135 	init_swap = SAFE_READ_MEMINFO("SwapTotal:") - SAFE_READ_MEMINFO("SwapFree:");
136 	init_swap_cached = SAFE_READ_MEMINFO("SwapCached:");
137 	init_cached = SAFE_READ_MEMINFO("Cached:");
138 
139 	if (!access("/proc/sys/vm/stat_refresh", W_OK))
140 		stat_refresh_sup = 1;
141 
142 	tst_res(TINFO, "mapping %ld Kb (%ld pages), limit %ld Kb, pass threshold %ld Kb",
143 		CHUNK_SZ / 1024, CHUNK_SZ / pg_sz, MEM_LIMIT / 1024, PASS_THRESHOLD_KB);
144 }
145 
dirty_pages(char * ptr,long size)146 static void dirty_pages(char *ptr, long size)
147 {
148 	long i;
149 	long pages = size / pg_sz;
150 
151 	for (i = 0; i < pages; i++)
152 		ptr[i * pg_sz] = 'x';
153 }
154 
get_page_fault_num(void)155 static int get_page_fault_num(void)
156 {
157 	int pg;
158 
159 	SAFE_FILE_SCANF("/proc/self/stat",
160 			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
161 			&pg);
162 	return pg;
163 }
164 
test_advice_willneed(void)165 static void test_advice_willneed(void)
166 {
167 	int loops = 100, res;
168 	char *target;
169 	long swapcached_start, swapcached;
170 	int page_fault_num_1, page_fault_num_2;
171 
172 	meminfo_diag("Before mmap");
173 	tst_res(TINFO, "PageFault(before mmap): %d", get_page_fault_num());
174 	target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE,
175 			MAP_SHARED | MAP_ANONYMOUS,
176 			-1, 0);
177 	meminfo_diag("Before dirty");
178 	tst_res(TINFO, "PageFault(before dirty): %d", get_page_fault_num());
179 	dirty_pages(target, CHUNK_SZ);
180 	tst_res(TINFO, "PageFault(after dirty): %d", get_page_fault_num());
181 
182 	meminfo_diag("Before madvise");
183 	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
184 		&swapcached_start);
185 
186 	TEST(madvise(target, MEM_LIMIT, MADV_WILLNEED));
187 	if (TST_RET == -1)
188 		tst_brk(TBROK | TTERRNO, "madvise failed");
189 
190 	do {
191 		loops--;
192 		usleep(100000);
193 		if (stat_refresh_sup)
194 			SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
195 		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
196 			&swapcached);
197 	} while (swapcached < swapcached_start + PASS_THRESHOLD_KB && loops > 0);
198 
199 	meminfo_diag("After madvise");
200 	res = swapcached > swapcached_start + PASS_THRESHOLD_KB;
201 	tst_res(res ? TPASS : TINFO,
202 		"%s than %ld Kb were moved to the swap cache",
203 		res ? "more" : "less", PASS_THRESHOLD_KB);
204 
205 	loops = 100;
206 	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", &swapcached_start);
207 	TEST(madvise(target, pg_sz * 3, MADV_WILLNEED));
208 	if (TST_RET == -1)
209 		tst_brk(TBROK | TTERRNO, "madvise failed");
210 	do {
211 		loops--;
212 		usleep(100000);
213 		if (stat_refresh_sup)
214 			SAFE_FILE_PRINTF("/proc/sys/vm/stat_refresh", "1");
215 		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
216 				&swapcached);
217 	} while (swapcached < swapcached_start + pg_sz*3/1024 && loops > 0);
218 
219 	page_fault_num_1 = get_page_fault_num();
220 	tst_res(TINFO, "PageFault(madvice / no mem access): %d",
221 			page_fault_num_1);
222 	dirty_pages(target, pg_sz * 3);
223 	page_fault_num_2 = get_page_fault_num();
224 	tst_res(TINFO, "PageFault(madvice / mem access): %d",
225 			page_fault_num_2);
226 	meminfo_diag("After page access");
227 
228 	res = page_fault_num_2 - page_fault_num_1;
229 	tst_res(res == 0 ? TPASS : TINFO,
230 		"%d pages were faulted out of 3 max", res);
231 
232 	SAFE_MUNMAP(target, CHUNK_SZ);
233 
234 	if (tst_taint_check())
235 		tst_res(TFAIL, "Kernel tainted");
236 	else
237 		tst_res(TPASS, "No kernel taints");
238 }
239 
240 static struct tst_test test = {
241 	.test_all = test_advice_willneed,
242 	.setup = setup,
243 	.needs_tmpdir = 1,
244 	.needs_root = 1,
245 	.max_runtime = 60,
246 	.taint_check = TST_TAINT_W | TST_TAINT_D,
247 	.save_restore = (const struct tst_path_val[]) {
248 		{"/proc/sys/vm/swappiness", NULL,
249 			TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
250 		{}
251 	},
252 	.needs_cgroup_ctrls = (const char *const []){ "memory", NULL },
253 	.tags = (const struct tst_tag[]) {
254 		{"linux-git", "55231e5c898c"},
255 		{"linux-git", "8de15e920dc8"},
256 		{"linux-git", "66383800df9c"},
257 		{}
258 	}
259 };
260