1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2023 Oracle and/or its affiliates.
4 */
5
6 /*\
7 * [Description]
8 *
9 * Stress a possible race condition between memory pages allocation
10 * and soft-offline of unrelated pages as explained in the commit:
11 * d4ae9916ea29 (mm: soft-offline: close the race against page allocation)
12 *
13 * Control that soft-offlined pages get correctly replaced: with the
14 * same content and without SIGBUS generation when accessed.
15 */
16
17 #include <errno.h>
18 #include <mntent.h>
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <sys/types.h>
25 #include <sys/klog.h>
26
27 #include "tst_test.h"
28 #include "tst_safe_pthread.h"
29 #include "tst_safe_stdio.h"
30 #include "lapi/mmap.h"
31
32 #define NUM_LOOPS 5
33 #define NUM_PAGES 32
34 #define NUM_PAGES_OFFSET 5
35
36 /* Needed module to online back memory pages */
37 #define HW_MODULE "hwpoison_inject"
38
39 static pthread_t *thread_ids;
40 static int number_threads;
41 static int run_iterations;
42 static int maximum_pfns;
43
44 static volatile int sigbus_received;
45 static pthread_cond_t sigbus_received_cv;
46 static pthread_mutex_t sigbus_received_mtx = PTHREAD_MUTEX_INITIALIZER;
47
48 static long pagesize;
49 static char beginning_tag[BUFSIZ];
50 static int hwpoison_probe;
51
my_yield(void)52 static void my_yield(void)
53 {
54 static const struct timespec t0 = { 0, 0 };
55
56 nanosleep(&t0, NULL);
57 }
58
59 /* a SIGBUS received is a confirmation of test failure */
sigbus_handler(int signum LTP_ATTRIBUTE_UNUSED)60 static void sigbus_handler(int signum LTP_ATTRIBUTE_UNUSED)
61 {
62 pthread_mutex_lock(&sigbus_received_mtx);
63 sigbus_received++;
64 pthread_cond_signal(&sigbus_received_cv);
65 pthread_mutex_unlock(&sigbus_received_mtx);
66 pause();
67 }
68
sigbus_monitor(void * arg LTP_ATTRIBUTE_UNUSED)69 static void *sigbus_monitor(void *arg LTP_ATTRIBUTE_UNUSED)
70 {
71 pthread_mutex_lock(&sigbus_received_mtx);
72 while (!sigbus_received)
73 pthread_cond_wait(&sigbus_received_cv, &sigbus_received_mtx);
74 pthread_mutex_unlock(&sigbus_received_mtx);
75 tst_res(TFAIL, "SIGBUS Received");
76 exit(1);
77 }
78
79 /*
80 * Allocate a page and write a sentinel value into it.
81 */
allocate_write(int sentinel)82 static void *allocate_write(int sentinel)
83 {
84 void *p;
85 int *s;
86
87 p = SAFE_MMAP(NULL, pagesize, PROT_READ|PROT_WRITE,
88 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
89 s = (int *)p;
90 *s = sentinel;
91 return p;
92 }
93
94 /*
95 * Verify and unmap the given page.
96 */
verif_unmap(void * page,int sentinel)97 static int verif_unmap(void *page, int sentinel)
98 {
99 int *s = (int *)page;
100
101 if (*s != sentinel) {
102 tst_res(TFAIL, "pid[%d]: fail: bad sentinel value seen: %d expected: %d\n", getpid(), *s, sentinel);
103 return 1;
104 }
105
106 return SAFE_MUNMAP(page, pagesize);
107 }
108
109 /*
110 * allocate_offline() - Allocate and offline test called per-thread
111 *
112 * This function does the allocation and offline by mmapping an
113 * anonymous page and offlining it.
114 */
allocate_offline(int tnum)115 static int allocate_offline(int tnum)
116 {
117 int loop;
118
119 for (loop = 0; loop < NUM_LOOPS; loop++) {
120 long *ptrs[NUM_PAGES];
121 int num_alloc;
122 int i;
123
124 for (num_alloc = 0; num_alloc < NUM_PAGES; num_alloc++) {
125
126 ptrs[num_alloc] = allocate_write((tnum << NUM_PAGES_OFFSET) | num_alloc);
127 if (ptrs[num_alloc] == NULL)
128 return -1;
129
130 if (madvise(ptrs[num_alloc], pagesize, MADV_SOFT_OFFLINE) == -1) {
131 if (errno == EBUSY)
132 continue;
133 if (errno != EINVAL)
134 tst_res(TFAIL | TERRNO, "madvise failed");
135 if (errno == EINVAL)
136 tst_res(TCONF, "madvise() didn't support MADV_SOFT_OFFLINE");
137 return errno;
138 }
139 }
140
141 for (i = 0; i < num_alloc; i++) {
142 if (verif_unmap(ptrs[i], (tnum << NUM_PAGES_OFFSET) | i) != 0)
143 return 1;
144 }
145
146 my_yield();
147 if (!tst_remaining_runtime()) {
148 tst_res(TINFO, "Thread [%d]: Test runtime is over, exiting", tnum);
149 break;
150 }
151 }
152
153 return 0;
154 }
155
alloc_mem(void * threadnum)156 static void *alloc_mem(void *threadnum)
157 {
158 int err;
159 int tnum = (int)(uintptr_t)threadnum;
160
161 /* waiting for other threads starting */
162 TST_CHECKPOINT_WAIT(0);
163
164 err = allocate_offline(tnum);
165 tst_res(TINFO,
166 "Thread [%d] returned %d, %s.", tnum, err, (err ? "failed" : "succeeded"));
167 return (void *)(uintptr_t) (err ? -1 : 0);
168 }
169
stress_alloc_offl(void)170 static void stress_alloc_offl(void)
171 {
172 int thread_index;
173 int thread_failure = 0;
174 pthread_t sigbus_monitor_t;
175
176 run_iterations++;
177
178 SAFE_PTHREAD_CREATE(&sigbus_monitor_t, NULL, sigbus_monitor, NULL);
179 pthread_detach(sigbus_monitor_t);
180
181 for (thread_index = 0; thread_index < number_threads; thread_index++) {
182 SAFE_PTHREAD_CREATE(&thread_ids[thread_index], NULL, alloc_mem,
183 (void *)(uintptr_t)thread_index);
184 }
185
186 TST_CHECKPOINT_WAKE2(0, number_threads);
187
188 for (thread_index = 0; thread_index < number_threads; thread_index++) {
189 void *status;
190
191 SAFE_PTHREAD_JOIN(thread_ids[thread_index], &status);
192 if ((intptr_t)status != 0) {
193 tst_res(TFAIL, "thread [%d] - exited with errors",
194 thread_index);
195 thread_failure++;
196 }
197 }
198
199 if (thread_failure == 0)
200 tst_res(TPASS, "soft-offline / mmap race still clean");
201 }
202
203 /*
204 * ------------
205 * Cleanup code:
206 * The idea is to retrieve all the pfn numbers that have been soft-offined
207 * (generating a "Soft offlining pfn 0x..." message in the kernel ring buffer)
208 * by the current test (since a "beginning_tag" message we write when starting).
209 * And to put these pages back online by writing the pfn number to the
210 * <debugfs>/hwpoison/unpoison-pfn special file.
211 * ------------
212 */
213 #define OFFLINE_PATTERN "Soft offlining pfn 0x"
214 #define OFFLINE_PATTERN_LEN sizeof(OFFLINE_PATTERN)
215
216 /* return the pfn if the kmsg msg is a soft-offline indication*/
parse_kmsg_soft_offlined_pfn(char * line,ssize_t len)217 static unsigned long parse_kmsg_soft_offlined_pfn(char *line, ssize_t len)
218 {
219 char *pos;
220 unsigned long addr = 0UL;
221
222 pos = strstr(line, OFFLINE_PATTERN);
223 if (pos == NULL)
224 return 0UL;
225
226 pos += OFFLINE_PATTERN_LEN-1;
227 if (pos > (line + len))
228 return 0UL;
229
230 addr = strtoul(pos, NULL, 16);
231 if ((addr == ULONG_MAX) && (errno == ERANGE))
232 return 0UL;
233
234 return addr;
235 }
236
237 /* return the pfns seen in kernel message log */
populate_from_klog(char * begin_tag,unsigned long * pfns,int max)238 static int populate_from_klog(char *begin_tag, unsigned long *pfns, int max)
239 {
240 int found = 0, fd, beginning_tag_found = 0;
241 ssize_t sz;
242 unsigned long pfn;
243 char buf[BUFSIZ];
244
245 fd = SAFE_OPEN("/dev/kmsg", O_RDONLY|O_NONBLOCK);
246
247 while (found < max) {
248 sz = read(fd, buf, sizeof(buf));
249 /* kmsg returns EPIPE if record was modified while reading */
250 if (sz < 0 && errno == EPIPE)
251 continue;
252 if (sz <= 0)
253 break;
254 if (!beginning_tag_found) {
255 if (strstr(buf, begin_tag))
256 beginning_tag_found = 1;
257 continue;
258 }
259 pfn = parse_kmsg_soft_offlined_pfn(buf, sz);
260 if (pfn)
261 pfns[found++] = pfn;
262 }
263 SAFE_CLOSE(fd);
264 return found;
265 }
266
267 /*
268 * Read the given file to search for the key.
269 * Return 1 if the key is found.
270 */
find_in_file(char * path,char * key)271 static int find_in_file(char *path, char *key)
272 {
273 char line[4096];
274 int found = 0;
275 FILE *file = SAFE_FOPEN(path, "r");
276
277 while (fgets(line, sizeof(line), file)) {
278 if (strstr(line, key)) {
279 found = 1;
280 break;
281 }
282 }
283 SAFE_FCLOSE(file);
284 return found;
285 }
286
unpoison_this_pfn(unsigned long pfn,int fd)287 static void unpoison_this_pfn(unsigned long pfn, int fd)
288 {
289 char pfn_str[19];
290
291 snprintf(pfn_str, sizeof(pfn_str), "0x%lx", pfn);
292 SAFE_WRITE(0, fd, pfn_str, strlen(pfn_str));
293 }
294
295 /* Find and open the <debugfs>/hwpoison/unpoison-pfn special file */
open_unpoison_pfn(void)296 static int open_unpoison_pfn(void)
297 {
298 char *added_file_path = "/hwpoison/unpoison-pfn";
299 const char *const cmd_modprobe[] = {"modprobe", HW_MODULE, NULL};
300 char debugfs_fp[4096];
301 struct mntent *mnt;
302 FILE *mntf;
303
304 if (!find_in_file("/proc/modules", HW_MODULE) && tst_check_builtin_driver(HW_MODULE))
305 hwpoison_probe = 1;
306
307 /* probe hwpoison only if it isn't already there */
308 if (hwpoison_probe)
309 SAFE_CMD(cmd_modprobe, NULL, NULL);
310
311 /* debugfs mount point */
312 mntf = setmntent("/proc/mounts", "r");
313 if (!mntf) {
314 tst_brk(TBROK | TERRNO, "Can't open /proc/mounts");
315 return -1;
316 }
317 while ((mnt = getmntent(mntf)) != NULL) {
318 if (strcmp(mnt->mnt_type, "debugfs") == 0) {
319 strcpy(debugfs_fp, mnt->mnt_dir);
320 strcat(debugfs_fp, added_file_path);
321 break;
322 }
323 }
324 endmntent(mntf);
325 if (!mnt)
326 return -1;
327
328 TEST(open(debugfs_fp, O_WRONLY));
329
330 if (TST_RET == -1 && TST_ERR == EPERM && tst_lockdown_enabled() > 0) {
331 tst_res(TINFO,
332 "Cannot restore soft-offlined memory due to lockdown");
333 return TST_RET;
334 }
335
336 if (TST_RET == -1) {
337 tst_brk(TBROK | TTERRNO, "open(%s) failed", debugfs_fp);
338 } else if (TST_RET < 0) {
339 tst_brk(TBROK | TTERRNO, "Invalid open() return value %ld",
340 TST_RET);
341 }
342
343 return TST_RET;
344 }
345
346 /*
347 * Get all the Offlined PFNs indicated in the dmesg output
348 * starting after the given beginning tag, and request a debugfs
349 * hwpoison/unpoison-pfn for each of them.
350 */
unpoison_pfn(char * begin_tag)351 static void unpoison_pfn(char *begin_tag)
352 {
353 unsigned long *pfns;
354 const char *const cmd_rmmod[] = {"rmmod", HW_MODULE, NULL};
355 int found_pfns, fd;
356
357 pfns = SAFE_MALLOC(sizeof(pfns) * maximum_pfns * run_iterations);
358
359 fd = open_unpoison_pfn();
360 if (fd >= 0) {
361 found_pfns = populate_from_klog(begin_tag, pfns, maximum_pfns * run_iterations);
362
363 tst_res(TINFO, "Restore %d Soft-offlined pages", found_pfns);
364 /* unpoison in reverse order */
365 while (found_pfns-- > 0)
366 unpoison_this_pfn(pfns[found_pfns], fd);
367
368 SAFE_CLOSE(fd);
369 }
370 /* remove hwpoison only if we probed it */
371 if (hwpoison_probe)
372 SAFE_CMD(cmd_rmmod, NULL, NULL);
373 }
374
375 /*
376 * Create and write a beginning tag to the kernel buffer to be used on cleanup
377 * when trying to restore the soft-offlined pages of our test run.
378 */
write_beginning_tag_to_kmsg(void)379 static void write_beginning_tag_to_kmsg(void)
380 {
381 int fd;
382
383 fd = SAFE_OPEN("/dev/kmsg", O_WRONLY);
384 snprintf(beginning_tag, sizeof(beginning_tag),
385 "Soft-offlining pages test starting (pid: %ld)",
386 (long)getpid());
387 SAFE_WRITE(1, fd, beginning_tag, strlen(beginning_tag));
388 SAFE_CLOSE(fd);
389 }
390
setup(void)391 static void setup(void)
392 {
393 struct sigaction my_sigaction;
394
395 number_threads = (int)sysconf(_SC_NPROCESSORS_ONLN) * 2;
396 if (number_threads <= 1)
397 number_threads = 2;
398 else if (number_threads > 5)
399 number_threads = 5;
400
401 maximum_pfns = number_threads * NUM_LOOPS * NUM_PAGES;
402 thread_ids = SAFE_MALLOC(sizeof(pthread_t) * number_threads);
403 pagesize = sysconf(_SC_PAGESIZE);
404
405 /* SIGBUS is the main failure criteria */
406 my_sigaction.sa_handler = sigbus_handler;
407 if (sigaction(SIGBUS, &my_sigaction, NULL) == -1)
408 tst_res(TFAIL | TERRNO, "Signal handler attach failed");
409
410 write_beginning_tag_to_kmsg();
411 tst_res(TINFO, "Spawning %d threads, with a total of %d memory pages",
412 number_threads, maximum_pfns);
413 }
414
cleanup(void)415 static void cleanup(void)
416 {
417 unpoison_pfn(beginning_tag);
418 }
419
420 static struct tst_test test = {
421 .needs_root = 1,
422 .needs_drivers = (const char *const []) {
423 HW_MODULE,
424 NULL
425 },
426 .needs_cmds = (const char *[]) {
427 "modprobe",
428 "rmmod",
429 NULL
430 },
431 .needs_kconfigs = (const char *[]) {
432 "CONFIG_MEMORY_FAILURE=y",
433 NULL
434 },
435 .max_runtime = 30,
436 .needs_checkpoints = 1,
437 .setup = setup,
438 .cleanup = cleanup,
439 .test_all = stress_alloc_offl,
440 .tags = (const struct tst_tag[]) {
441 {"linux-git", "d4ae9916ea29"},
442 {}
443 }
444 };
445