1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * xsave/xrstor support.
4 *
5 * Author: Suresh Siddha <[email protected]>
6 */
7 #include <linux/bitops.h>
8 #include <linux/compat.h>
9 #include <linux/cpu.h>
10 #include <linux/mman.h>
11 #include <linux/nospec.h>
12 #include <linux/pkeys.h>
13 #include <linux/seq_file.h>
14 #include <linux/proc_fs.h>
15 #include <linux/vmalloc.h>
16 #include <linux/coredump.h>
17
18 #include <asm/fpu/api.h>
19 #include <asm/fpu/regset.h>
20 #include <asm/fpu/signal.h>
21 #include <asm/fpu/xcr.h>
22
23 #include <asm/cpuid.h>
24 #include <asm/tlbflush.h>
25 #include <asm/prctl.h>
26 #include <asm/elf.h>
27
28 #include <uapi/asm/elf.h>
29
30 #include "context.h"
31 #include "internal.h"
32 #include "legacy.h"
33 #include "xstate.h"
34
35 #define for_each_extended_xfeature(bit, mask) \
36 (bit) = FIRST_EXTENDED_XFEATURE; \
37 for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
38
39 /*
40 * Although we spell it out in here, the Processor Trace
41 * xfeature is completely unused. We use other mechanisms
42 * to save/restore PT state in Linux.
43 */
44 static const char *xfeature_names[] =
45 {
46 "x87 floating point registers",
47 "SSE registers",
48 "AVX registers",
49 "MPX bounds registers",
50 "MPX CSR",
51 "AVX-512 opmask",
52 "AVX-512 Hi256",
53 "AVX-512 ZMM_Hi256",
54 "Processor Trace (unused)",
55 "Protection Keys User registers",
56 "PASID state",
57 "Control-flow User registers",
58 "Control-flow Kernel registers (unused)",
59 "unknown xstate feature",
60 "unknown xstate feature",
61 "unknown xstate feature",
62 "unknown xstate feature",
63 "AMX Tile config",
64 "AMX Tile data",
65 "unknown xstate feature",
66 };
67
68 static unsigned short xsave_cpuid_features[] __initdata = {
69 [XFEATURE_FP] = X86_FEATURE_FPU,
70 [XFEATURE_SSE] = X86_FEATURE_XMM,
71 [XFEATURE_YMM] = X86_FEATURE_AVX,
72 [XFEATURE_BNDREGS] = X86_FEATURE_MPX,
73 [XFEATURE_BNDCSR] = X86_FEATURE_MPX,
74 [XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
75 [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
76 [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
77 [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
78 [XFEATURE_PKRU] = X86_FEATURE_OSPKE,
79 [XFEATURE_PASID] = X86_FEATURE_ENQCMD,
80 [XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
81 [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
82 [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
83 };
84
85 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
86 { [ 0 ... XFEATURE_MAX - 1] = -1};
87 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
88 { [ 0 ... XFEATURE_MAX - 1] = -1};
89 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
90
91 #define XSTATE_FLAG_SUPERVISOR BIT(0)
92 #define XSTATE_FLAG_ALIGNED64 BIT(1)
93
94 /*
95 * Return whether the system supports a given xfeature.
96 *
97 * Also return the name of the (most advanced) feature that the caller requested:
98 */
cpu_has_xfeatures(u64 xfeatures_needed,const char ** feature_name)99 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
100 {
101 u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
102
103 if (unlikely(feature_name)) {
104 long xfeature_idx, max_idx;
105 u64 xfeatures_print;
106 /*
107 * So we use FLS here to be able to print the most advanced
108 * feature that was requested but is missing. So if a driver
109 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
110 * missing AVX feature - this is the most informative message
111 * to users:
112 */
113 if (xfeatures_missing)
114 xfeatures_print = xfeatures_missing;
115 else
116 xfeatures_print = xfeatures_needed;
117
118 xfeature_idx = fls64(xfeatures_print)-1;
119 max_idx = ARRAY_SIZE(xfeature_names)-1;
120 xfeature_idx = min(xfeature_idx, max_idx);
121
122 *feature_name = xfeature_names[xfeature_idx];
123 }
124
125 if (xfeatures_missing)
126 return 0;
127
128 return 1;
129 }
130 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
131
xfeature_is_aligned64(int xfeature_nr)132 static bool xfeature_is_aligned64(int xfeature_nr)
133 {
134 return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
135 }
136
xfeature_is_supervisor(int xfeature_nr)137 static bool xfeature_is_supervisor(int xfeature_nr)
138 {
139 return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
140 }
141
xfeature_get_offset(u64 xcomp_bv,int xfeature)142 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
143 {
144 unsigned int offs, i;
145
146 /*
147 * Non-compacted format and legacy features use the cached fixed
148 * offsets.
149 */
150 if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
151 xfeature <= XFEATURE_SSE)
152 return xstate_offsets[xfeature];
153
154 /*
155 * Compacted format offsets depend on the actual content of the
156 * compacted xsave area which is determined by the xcomp_bv header
157 * field.
158 */
159 offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
160 for_each_extended_xfeature(i, xcomp_bv) {
161 if (xfeature_is_aligned64(i))
162 offs = ALIGN(offs, 64);
163 if (i == xfeature)
164 break;
165 offs += xstate_sizes[i];
166 }
167 return offs;
168 }
169
170 /*
171 * Enable the extended processor state save/restore feature.
172 * Called once per CPU onlining.
173 */
fpu__init_cpu_xstate(void)174 void fpu__init_cpu_xstate(void)
175 {
176 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
177 return;
178
179 cr4_set_bits(X86_CR4_OSXSAVE);
180
181 /*
182 * Must happen after CR4 setup and before xsetbv() to allow KVM
183 * lazy passthrough. Write independent of the dynamic state static
184 * key as that does not work on the boot CPU. This also ensures
185 * that any stale state is wiped out from XFD. Reset the per CPU
186 * xfd cache too.
187 */
188 if (cpu_feature_enabled(X86_FEATURE_XFD))
189 xfd_set_state(init_fpstate.xfd);
190
191 /*
192 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
193 * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
194 * states can be set here.
195 */
196 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
197
198 /*
199 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
200 */
201 if (boot_cpu_has(X86_FEATURE_XSAVES)) {
202 wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
203 xfeatures_mask_independent());
204 }
205 }
206
xfeature_enabled(enum xfeature xfeature)207 static bool xfeature_enabled(enum xfeature xfeature)
208 {
209 return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
210 }
211
212 /*
213 * Record the offsets and sizes of various xstates contained
214 * in the XSAVE state memory layout.
215 */
setup_xstate_cache(void)216 static void __init setup_xstate_cache(void)
217 {
218 u32 eax, ebx, ecx, edx, i;
219 /* start at the beginning of the "extended state" */
220 unsigned int last_good_offset = offsetof(struct xregs_state,
221 extended_state_area);
222 /*
223 * The FP xstates and SSE xstates are legacy states. They are always
224 * in the fixed offsets in the xsave area in either compacted form
225 * or standard form.
226 */
227 xstate_offsets[XFEATURE_FP] = 0;
228 xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
229 xmm_space);
230
231 xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
232 xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
233 xmm_space);
234
235 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
236 cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
237
238 xstate_sizes[i] = eax;
239 xstate_flags[i] = ecx;
240
241 /*
242 * If an xfeature is supervisor state, the offset in EBX is
243 * invalid, leave it to -1.
244 */
245 if (xfeature_is_supervisor(i))
246 continue;
247
248 xstate_offsets[i] = ebx;
249
250 /*
251 * In our xstate size checks, we assume that the highest-numbered
252 * xstate feature has the highest offset in the buffer. Ensure
253 * it does.
254 */
255 WARN_ONCE(last_good_offset > xstate_offsets[i],
256 "x86/fpu: misordered xstate at %d\n", last_good_offset);
257
258 last_good_offset = xstate_offsets[i];
259 }
260 }
261
print_xstate_feature(u64 xstate_mask)262 static void __init print_xstate_feature(u64 xstate_mask)
263 {
264 const char *feature_name;
265
266 if (cpu_has_xfeatures(xstate_mask, &feature_name))
267 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
268 }
269
270 /*
271 * Print out all the supported xstate features:
272 */
print_xstate_features(void)273 static void __init print_xstate_features(void)
274 {
275 print_xstate_feature(XFEATURE_MASK_FP);
276 print_xstate_feature(XFEATURE_MASK_SSE);
277 print_xstate_feature(XFEATURE_MASK_YMM);
278 print_xstate_feature(XFEATURE_MASK_BNDREGS);
279 print_xstate_feature(XFEATURE_MASK_BNDCSR);
280 print_xstate_feature(XFEATURE_MASK_OPMASK);
281 print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
282 print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
283 print_xstate_feature(XFEATURE_MASK_PKRU);
284 print_xstate_feature(XFEATURE_MASK_PASID);
285 print_xstate_feature(XFEATURE_MASK_CET_USER);
286 print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
287 print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
288 }
289
290 /*
291 * This check is important because it is easy to get XSTATE_*
292 * confused with XSTATE_BIT_*.
293 */
294 #define CHECK_XFEATURE(nr) do { \
295 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
296 WARN_ON(nr >= XFEATURE_MAX); \
297 } while (0)
298
299 /*
300 * Print out xstate component offsets and sizes
301 */
print_xstate_offset_size(void)302 static void __init print_xstate_offset_size(void)
303 {
304 int i;
305
306 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
307 pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
308 i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
309 i, xstate_sizes[i]);
310 }
311 }
312
313 /*
314 * This function is called only during boot time when x86 caps are not set
315 * up and alternative can not be used yet.
316 */
os_xrstor_booting(struct xregs_state * xstate)317 static __init void os_xrstor_booting(struct xregs_state *xstate)
318 {
319 u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
320 u32 lmask = mask;
321 u32 hmask = mask >> 32;
322 int err;
323
324 if (cpu_feature_enabled(X86_FEATURE_XSAVES))
325 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
326 else
327 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
328
329 /*
330 * We should never fault when copying from a kernel buffer, and the FPU
331 * state we set at boot time should be valid.
332 */
333 WARN_ON_FPU(err);
334 }
335
336 /*
337 * All supported features have either init state all zeros or are
338 * handled in setup_init_fpu() individually. This is an explicit
339 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
340 * newly added supported features at build time and make people
341 * actually look at the init state for the new feature.
342 */
343 #define XFEATURES_INIT_FPSTATE_HANDLED \
344 (XFEATURE_MASK_FP | \
345 XFEATURE_MASK_SSE | \
346 XFEATURE_MASK_YMM | \
347 XFEATURE_MASK_OPMASK | \
348 XFEATURE_MASK_ZMM_Hi256 | \
349 XFEATURE_MASK_Hi16_ZMM | \
350 XFEATURE_MASK_PKRU | \
351 XFEATURE_MASK_BNDREGS | \
352 XFEATURE_MASK_BNDCSR | \
353 XFEATURE_MASK_PASID | \
354 XFEATURE_MASK_CET_USER | \
355 XFEATURE_MASK_XTILE)
356
357 /*
358 * setup the xstate image representing the init state
359 */
setup_init_fpu_buf(void)360 static void __init setup_init_fpu_buf(void)
361 {
362 BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
363 XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
364 XFEATURES_INIT_FPSTATE_HANDLED);
365
366 if (!boot_cpu_has(X86_FEATURE_XSAVE))
367 return;
368
369 print_xstate_features();
370
371 xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
372
373 /*
374 * Init all the features state with header.xfeatures being 0x0
375 */
376 os_xrstor_booting(&init_fpstate.regs.xsave);
377
378 /*
379 * All components are now in init state. Read the state back so
380 * that init_fpstate contains all non-zero init state. This only
381 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
382 * those use the init optimization which skips writing data for
383 * components in init state.
384 *
385 * XSAVE could be used, but that would require to reshuffle the
386 * data when XSAVEC/S is available because XSAVEC/S uses xstate
387 * compaction. But doing so is a pointless exercise because most
388 * components have an all zeros init state except for the legacy
389 * ones (FP and SSE). Those can be saved with FXSAVE into the
390 * legacy area. Adding new features requires to ensure that init
391 * state is all zeroes or if not to add the necessary handling
392 * here.
393 */
394 fxsave(&init_fpstate.regs.fxsave);
395 }
396
xfeature_size(int xfeature_nr)397 int xfeature_size(int xfeature_nr)
398 {
399 u32 eax, ebx, ecx, edx;
400
401 CHECK_XFEATURE(xfeature_nr);
402 cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx);
403 return eax;
404 }
405
406 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
validate_user_xstate_header(const struct xstate_header * hdr,struct fpstate * fpstate)407 static int validate_user_xstate_header(const struct xstate_header *hdr,
408 struct fpstate *fpstate)
409 {
410 /* No unknown or supervisor features may be set */
411 if (hdr->xfeatures & ~fpstate->user_xfeatures)
412 return -EINVAL;
413
414 /* Userspace must use the uncompacted format */
415 if (hdr->xcomp_bv)
416 return -EINVAL;
417
418 /*
419 * If 'reserved' is shrunken to add a new field, make sure to validate
420 * that new field here!
421 */
422 BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
423
424 /* No reserved bits may be set */
425 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
426 return -EINVAL;
427
428 return 0;
429 }
430
__xstate_dump_leaves(void)431 static void __init __xstate_dump_leaves(void)
432 {
433 int i;
434 u32 eax, ebx, ecx, edx;
435 static int should_dump = 1;
436
437 if (!should_dump)
438 return;
439 should_dump = 0;
440 /*
441 * Dump out a few leaves past the ones that we support
442 * just in case there are some goodies up there
443 */
444 for (i = 0; i < XFEATURE_MAX + 10; i++) {
445 cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
446 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
447 CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx);
448 }
449 }
450
451 #define XSTATE_WARN_ON(x, fmt, ...) do { \
452 if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
453 __xstate_dump_leaves(); \
454 } \
455 } while (0)
456
457 #define XCHECK_SZ(sz, nr, __struct) ({ \
458 if (WARN_ONCE(sz != sizeof(__struct), \
459 "[%s]: struct is %zu bytes, cpu state %d bytes\n", \
460 xfeature_names[nr], sizeof(__struct), sz)) { \
461 __xstate_dump_leaves(); \
462 } \
463 true; \
464 })
465
466
467 /**
468 * check_xtile_data_against_struct - Check tile data state size.
469 *
470 * Calculate the state size by multiplying the single tile size which is
471 * recorded in a C struct, and the number of tiles that the CPU informs.
472 * Compare the provided size with the calculation.
473 *
474 * @size: The tile data state size
475 *
476 * Returns: 0 on success, -EINVAL on mismatch.
477 */
check_xtile_data_against_struct(int size)478 static int __init check_xtile_data_against_struct(int size)
479 {
480 u32 max_palid, palid, state_size;
481 u32 eax, ebx, ecx, edx;
482 u16 max_tile;
483
484 /*
485 * Check the maximum palette id:
486 * eax: the highest numbered palette subleaf.
487 */
488 cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx);
489
490 /*
491 * Cross-check each tile size and find the maximum number of
492 * supported tiles.
493 */
494 for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
495 u16 tile_size, max;
496
497 /*
498 * Check the tile size info:
499 * eax[31:16]: bytes per title
500 * ebx[31:16]: the max names (or max number of tiles)
501 */
502 cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx);
503 tile_size = eax >> 16;
504 max = ebx >> 16;
505
506 if (tile_size != sizeof(struct xtile_data)) {
507 pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
508 __stringify(XFEATURE_XTILE_DATA),
509 sizeof(struct xtile_data), tile_size);
510 __xstate_dump_leaves();
511 return -EINVAL;
512 }
513
514 if (max > max_tile)
515 max_tile = max;
516 }
517
518 state_size = sizeof(struct xtile_data) * max_tile;
519 if (size != state_size) {
520 pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
521 __stringify(XFEATURE_XTILE_DATA), state_size, size);
522 __xstate_dump_leaves();
523 return -EINVAL;
524 }
525 return 0;
526 }
527
528 /*
529 * We have a C struct for each 'xstate'. We need to ensure
530 * that our software representation matches what the CPU
531 * tells us about the state's size.
532 */
check_xstate_against_struct(int nr)533 static bool __init check_xstate_against_struct(int nr)
534 {
535 /*
536 * Ask the CPU for the size of the state.
537 */
538 int sz = xfeature_size(nr);
539
540 /*
541 * Match each CPU state with the corresponding software
542 * structure.
543 */
544 switch (nr) {
545 case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
546 case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
547 case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
548 case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
549 case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
550 case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
551 case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
552 case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
553 case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
554 case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
555 case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
556 default:
557 XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
558 return false;
559 }
560
561 return true;
562 }
563
xstate_calculate_size(u64 xfeatures,bool compacted)564 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
565 {
566 unsigned int topmost = fls64(xfeatures) - 1;
567 unsigned int offset = xstate_offsets[topmost];
568
569 if (topmost <= XFEATURE_SSE)
570 return sizeof(struct xregs_state);
571
572 if (compacted)
573 offset = xfeature_get_offset(xfeatures, topmost);
574 return offset + xstate_sizes[topmost];
575 }
576
577 /*
578 * This essentially double-checks what the cpu told us about
579 * how large the XSAVE buffer needs to be. We are recalculating
580 * it to be safe.
581 *
582 * Independent XSAVE features allocate their own buffers and are not
583 * covered by these checks. Only the size of the buffer for task->fpu
584 * is checked here.
585 */
paranoid_xstate_size_valid(unsigned int kernel_size)586 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
587 {
588 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
589 bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
590 unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
591 int i;
592
593 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
594 if (!check_xstate_against_struct(i))
595 return false;
596 /*
597 * Supervisor state components can be managed only by
598 * XSAVES.
599 */
600 if (!xsaves && xfeature_is_supervisor(i)) {
601 XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
602 return false;
603 }
604 }
605 size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
606 XSTATE_WARN_ON(size != kernel_size,
607 "size %u != kernel_size %u\n", size, kernel_size);
608 return size == kernel_size;
609 }
610
611 /*
612 * Get total size of enabled xstates in XCR0 | IA32_XSS.
613 *
614 * Note the SDM's wording here. "sub-function 0" only enumerates
615 * the size of the *user* states. If we use it to size a buffer
616 * that we use 'XSAVES' on, we could potentially overflow the
617 * buffer because 'XSAVES' saves system states too.
618 *
619 * This also takes compaction into account. So this works for
620 * XSAVEC as well.
621 */
get_compacted_size(void)622 static unsigned int __init get_compacted_size(void)
623 {
624 unsigned int eax, ebx, ecx, edx;
625 /*
626 * - CPUID function 0DH, sub-function 1:
627 * EBX enumerates the size (in bytes) required by
628 * the XSAVES instruction for an XSAVE area
629 * containing all the state components
630 * corresponding to bits currently set in
631 * XCR0 | IA32_XSS.
632 *
633 * When XSAVES is not available but XSAVEC is (virt), then there
634 * are no supervisor states, but XSAVEC still uses compacted
635 * format.
636 */
637 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
638 return ebx;
639 }
640
641 /*
642 * Get the total size of the enabled xstates without the independent supervisor
643 * features.
644 */
get_xsave_compacted_size(void)645 static unsigned int __init get_xsave_compacted_size(void)
646 {
647 u64 mask = xfeatures_mask_independent();
648 unsigned int size;
649
650 if (!mask)
651 return get_compacted_size();
652
653 /* Disable independent features. */
654 wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
655
656 /*
657 * Ask the hardware what size is required of the buffer.
658 * This is the size required for the task->fpu buffer.
659 */
660 size = get_compacted_size();
661
662 /* Re-enable independent features so XSAVES will work on them again. */
663 wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
664
665 return size;
666 }
667
get_xsave_size_user(void)668 static unsigned int __init get_xsave_size_user(void)
669 {
670 unsigned int eax, ebx, ecx, edx;
671 /*
672 * - CPUID function 0DH, sub-function 0:
673 * EBX enumerates the size (in bytes) required by
674 * the XSAVE instruction for an XSAVE area
675 * containing all the *user* state components
676 * corresponding to bits currently set in XCR0.
677 */
678 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
679 return ebx;
680 }
681
init_xstate_size(void)682 static int __init init_xstate_size(void)
683 {
684 /* Recompute the context size for enabled features: */
685 unsigned int user_size, kernel_size, kernel_default_size;
686 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
687
688 /* Uncompacted user space size */
689 user_size = get_xsave_size_user();
690
691 /*
692 * XSAVES kernel size includes supervisor states and uses compacted
693 * format. XSAVEC uses compacted format, but does not save
694 * supervisor states.
695 *
696 * XSAVE[OPT] do not support supervisor states so kernel and user
697 * size is identical.
698 */
699 if (compacted)
700 kernel_size = get_xsave_compacted_size();
701 else
702 kernel_size = user_size;
703
704 kernel_default_size =
705 xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
706
707 if (!paranoid_xstate_size_valid(kernel_size))
708 return -EINVAL;
709
710 fpu_kernel_cfg.max_size = kernel_size;
711 fpu_user_cfg.max_size = user_size;
712
713 fpu_kernel_cfg.default_size = kernel_default_size;
714 fpu_user_cfg.default_size =
715 xstate_calculate_size(fpu_user_cfg.default_features, false);
716
717 return 0;
718 }
719
720 /*
721 * We enabled the XSAVE hardware, but something went wrong and
722 * we can not use it. Disable it.
723 */
fpu__init_disable_system_xstate(unsigned int legacy_size)724 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
725 {
726 fpu_kernel_cfg.max_features = 0;
727 cr4_clear_bits(X86_CR4_OSXSAVE);
728 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
729
730 /* Restore the legacy size.*/
731 fpu_kernel_cfg.max_size = legacy_size;
732 fpu_kernel_cfg.default_size = legacy_size;
733 fpu_user_cfg.max_size = legacy_size;
734 fpu_user_cfg.default_size = legacy_size;
735
736 /*
737 * Prevent enabling the static branch which enables writes to the
738 * XFD MSR.
739 */
740 init_fpstate.xfd = 0;
741
742 fpstate_reset(¤t->thread.fpu);
743 }
744
745 /*
746 * Enable and initialize the xsave feature.
747 * Called once per system bootup.
748 */
fpu__init_system_xstate(unsigned int legacy_size)749 void __init fpu__init_system_xstate(unsigned int legacy_size)
750 {
751 unsigned int eax, ebx, ecx, edx;
752 u64 xfeatures;
753 int err;
754 int i;
755
756 if (!boot_cpu_has(X86_FEATURE_FPU)) {
757 pr_info("x86/fpu: No FPU detected\n");
758 return;
759 }
760
761 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
762 pr_info("x86/fpu: x87 FPU will use %s\n",
763 boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
764 return;
765 }
766
767 /*
768 * Find user xstates supported by the processor.
769 */
770 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
771 fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
772
773 /*
774 * Find supervisor xstates supported by the processor.
775 */
776 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
777 fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
778
779 if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
780 /*
781 * This indicates that something really unexpected happened
782 * with the enumeration. Disable XSAVE and try to continue
783 * booting without it. This is too early to BUG().
784 */
785 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
786 fpu_kernel_cfg.max_features);
787 goto out_disable;
788 }
789
790 fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
791 XFEATURE_MASK_INDEPENDENT;
792
793 /*
794 * Clear XSAVE features that are disabled in the normal CPUID.
795 */
796 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
797 unsigned short cid = xsave_cpuid_features[i];
798
799 /* Careful: X86_FEATURE_FPU is 0! */
800 if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
801 fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
802 }
803
804 if (!cpu_feature_enabled(X86_FEATURE_XFD))
805 fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
806
807 if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
808 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
809 else
810 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
811 XFEATURE_MASK_SUPERVISOR_SUPPORTED;
812
813 fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
814 fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
815
816 /* Clean out dynamic features from default */
817 fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
818 fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
819
820 fpu_user_cfg.default_features = fpu_user_cfg.max_features;
821 fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
822
823 /* Store it for paranoia check at the end */
824 xfeatures = fpu_kernel_cfg.max_features;
825
826 /*
827 * Initialize the default XFD state in initfp_state and enable the
828 * dynamic sizing mechanism if dynamic states are available. The
829 * static key cannot be enabled here because this runs before
830 * jump_label_init(). This is delayed to an initcall.
831 */
832 init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
833
834 /* Set up compaction feature bit */
835 if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
836 cpu_feature_enabled(X86_FEATURE_XSAVES))
837 setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
838
839 /* Enable xstate instructions to be able to continue with initialization: */
840 fpu__init_cpu_xstate();
841
842 /* Cache size, offset and flags for initialization */
843 setup_xstate_cache();
844
845 err = init_xstate_size();
846 if (err)
847 goto out_disable;
848
849 /* Reset the state for the current task */
850 fpstate_reset(¤t->thread.fpu);
851
852 /*
853 * Update info used for ptrace frames; use standard-format size and no
854 * supervisor xstates:
855 */
856 update_regset_xstate_info(fpu_user_cfg.max_size,
857 fpu_user_cfg.max_features);
858
859 /*
860 * init_fpstate excludes dynamic states as they are large but init
861 * state is zero.
862 */
863 init_fpstate.size = fpu_kernel_cfg.default_size;
864 init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
865
866 if (init_fpstate.size > sizeof(init_fpstate.regs)) {
867 pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
868 sizeof(init_fpstate.regs), init_fpstate.size);
869 goto out_disable;
870 }
871
872 setup_init_fpu_buf();
873
874 /*
875 * Paranoia check whether something in the setup modified the
876 * xfeatures mask.
877 */
878 if (xfeatures != fpu_kernel_cfg.max_features) {
879 pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
880 xfeatures, fpu_kernel_cfg.max_features);
881 goto out_disable;
882 }
883
884 /*
885 * CPU capabilities initialization runs before FPU init. So
886 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
887 * functional, set the feature bit so depending code works.
888 */
889 setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
890
891 print_xstate_offset_size();
892 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
893 fpu_kernel_cfg.max_features,
894 fpu_kernel_cfg.max_size,
895 boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
896 return;
897
898 out_disable:
899 /* something went wrong, try to boot without any XSAVE support */
900 fpu__init_disable_system_xstate(legacy_size);
901 }
902
903 /*
904 * Restore minimal FPU state after suspend:
905 */
fpu__resume_cpu(void)906 void fpu__resume_cpu(void)
907 {
908 /*
909 * Restore XCR0 on xsave capable CPUs:
910 */
911 if (cpu_feature_enabled(X86_FEATURE_XSAVE))
912 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
913
914 /*
915 * Restore IA32_XSS. The same CPUID bit enumerates support
916 * of XSAVES and MSR_IA32_XSS.
917 */
918 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
919 wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
920 xfeatures_mask_independent());
921 }
922
923 if (fpu_state_size_dynamic())
924 wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
925 }
926
927 /*
928 * Given an xstate feature nr, calculate where in the xsave
929 * buffer the state is. Callers should ensure that the buffer
930 * is valid.
931 */
__raw_xsave_addr(struct xregs_state * xsave,int xfeature_nr)932 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
933 {
934 u64 xcomp_bv = xsave->header.xcomp_bv;
935
936 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
937 return NULL;
938
939 if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
940 if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
941 return NULL;
942 }
943
944 return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
945 }
946
947 /*
948 * Given the xsave area and a state inside, this function returns the
949 * address of the state.
950 *
951 * This is the API that is called to get xstate address in either
952 * standard format or compacted format of xsave area.
953 *
954 * Note that if there is no data for the field in the xsave buffer
955 * this will return NULL.
956 *
957 * Inputs:
958 * xstate: the thread's storage area for all FPU data
959 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
960 * XFEATURE_SSE, etc...)
961 * Output:
962 * address of the state in the xsave area, or NULL if the
963 * field is not present in the xsave buffer.
964 */
get_xsave_addr(struct xregs_state * xsave,int xfeature_nr)965 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
966 {
967 /*
968 * Do we even *have* xsave state?
969 */
970 if (!boot_cpu_has(X86_FEATURE_XSAVE))
971 return NULL;
972
973 /*
974 * We should not ever be requesting features that we
975 * have not enabled.
976 */
977 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
978 return NULL;
979
980 /*
981 * This assumes the last 'xsave*' instruction to
982 * have requested that 'xfeature_nr' be saved.
983 * If it did not, we might be seeing and old value
984 * of the field in the buffer.
985 *
986 * This can happen because the last 'xsave' did not
987 * request that this feature be saved (unlikely)
988 * or because the "init optimization" caused it
989 * to not be saved.
990 */
991 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
992 return NULL;
993
994 return __raw_xsave_addr(xsave, xfeature_nr);
995 }
996 EXPORT_SYMBOL_GPL(get_xsave_addr);
997
998 /*
999 * Given an xstate feature nr, calculate where in the xsave buffer the state is.
1000 * The xsave buffer should be in standard format, not compacted (e.g. user mode
1001 * signal frames).
1002 */
get_xsave_addr_user(struct xregs_state __user * xsave,int xfeature_nr)1003 void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
1004 {
1005 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1006 return NULL;
1007
1008 return (void __user *)xsave + xstate_offsets[xfeature_nr];
1009 }
1010
1011 #ifdef CONFIG_ARCH_HAS_PKEYS
1012
1013 /*
1014 * This will go out and modify PKRU register to set the access
1015 * rights for @pkey to @init_val.
1016 */
arch_set_user_pkey_access(struct task_struct * tsk,int pkey,unsigned long init_val)1017 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1018 unsigned long init_val)
1019 {
1020 u32 old_pkru, new_pkru_bits = 0;
1021 int pkey_shift;
1022
1023 /*
1024 * This check implies XSAVE support. OSPKE only gets
1025 * set if we enable XSAVE and we enable PKU in XCR0.
1026 */
1027 if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1028 return -EINVAL;
1029
1030 /*
1031 * This code should only be called with valid 'pkey'
1032 * values originating from in-kernel users. Complain
1033 * if a bad value is observed.
1034 */
1035 if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1036 return -EINVAL;
1037
1038 /* Set the bits we need in PKRU: */
1039 if (init_val & PKEY_DISABLE_ACCESS)
1040 new_pkru_bits |= PKRU_AD_BIT;
1041 if (init_val & PKEY_DISABLE_WRITE)
1042 new_pkru_bits |= PKRU_WD_BIT;
1043
1044 /* Shift the bits in to the correct place in PKRU for pkey: */
1045 pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1046 new_pkru_bits <<= pkey_shift;
1047
1048 /* Get old PKRU and mask off any old bits in place: */
1049 old_pkru = read_pkru();
1050 old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1051
1052 /* Write old part along with new part: */
1053 write_pkru(old_pkru | new_pkru_bits);
1054
1055 return 0;
1056 }
1057 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
1058
copy_feature(bool from_xstate,struct membuf * to,void * xstate,void * init_xstate,unsigned int size)1059 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1060 void *init_xstate, unsigned int size)
1061 {
1062 membuf_write(to, from_xstate ? xstate : init_xstate, size);
1063 }
1064
1065 /**
1066 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1067 * @to: membuf descriptor
1068 * @fpstate: The fpstate buffer from which to copy
1069 * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
1070 * @pkru_val: The PKRU value to store in the PKRU component
1071 * @copy_mode: The requested copy mode
1072 *
1073 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1074 * format, i.e. from the kernel internal hardware dependent storage format
1075 * to the requested @mode. UABI XSTATE is always uncompacted!
1076 *
1077 * It supports partial copy but @to.pos always starts from zero.
1078 */
__copy_xstate_to_uabi_buf(struct membuf to,struct fpstate * fpstate,u64 xfeatures,u32 pkru_val,enum xstate_copy_mode copy_mode)1079 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1080 u64 xfeatures, u32 pkru_val,
1081 enum xstate_copy_mode copy_mode)
1082 {
1083 const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1084 struct xregs_state *xinit = &init_fpstate.regs.xsave;
1085 struct xregs_state *xsave = &fpstate->regs.xsave;
1086 struct xstate_header header;
1087 unsigned int zerofrom;
1088 u64 mask;
1089 int i;
1090
1091 memset(&header, 0, sizeof(header));
1092 header.xfeatures = xsave->header.xfeatures;
1093
1094 /* Mask out the feature bits depending on copy mode */
1095 switch (copy_mode) {
1096 case XSTATE_COPY_FP:
1097 header.xfeatures &= XFEATURE_MASK_FP;
1098 break;
1099
1100 case XSTATE_COPY_FX:
1101 header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1102 break;
1103
1104 case XSTATE_COPY_XSAVE:
1105 header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1106 break;
1107 }
1108
1109 /* Copy FP state up to MXCSR */
1110 copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1111 &xinit->i387, off_mxcsr);
1112
1113 /* Copy MXCSR when SSE or YMM are set in the feature mask */
1114 copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1115 &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1116 MXCSR_AND_FLAGS_SIZE);
1117
1118 /* Copy the remaining FP state */
1119 copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1120 &to, &xsave->i387.st_space, &xinit->i387.st_space,
1121 sizeof(xsave->i387.st_space));
1122
1123 /* Copy the SSE state - shared with YMM, but independently managed */
1124 copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1125 &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1126 sizeof(xsave->i387.xmm_space));
1127
1128 if (copy_mode != XSTATE_COPY_XSAVE)
1129 goto out;
1130
1131 /* Zero the padding area */
1132 membuf_zero(&to, sizeof(xsave->i387.padding));
1133
1134 /* Copy xsave->i387.sw_reserved */
1135 membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1136
1137 /* Copy the user space relevant state of @xsave->header */
1138 membuf_write(&to, &header, sizeof(header));
1139
1140 zerofrom = offsetof(struct xregs_state, extended_state_area);
1141
1142 /*
1143 * This 'mask' indicates which states to copy from fpstate.
1144 * Those extended states that are not present in fpstate are
1145 * either disabled or initialized:
1146 *
1147 * In non-compacted format, disabled features still occupy
1148 * state space but there is no state to copy from in the
1149 * compacted init_fpstate. The gap tracking will zero these
1150 * states.
1151 *
1152 * The extended features have an all zeroes init state. Thus,
1153 * remove them from 'mask' to zero those features in the user
1154 * buffer instead of retrieving them from init_fpstate.
1155 */
1156 mask = header.xfeatures;
1157
1158 for_each_extended_xfeature(i, mask) {
1159 /*
1160 * If there was a feature or alignment gap, zero the space
1161 * in the destination buffer.
1162 */
1163 if (zerofrom < xstate_offsets[i])
1164 membuf_zero(&to, xstate_offsets[i] - zerofrom);
1165
1166 if (i == XFEATURE_PKRU) {
1167 struct pkru_state pkru = {0};
1168 /*
1169 * PKRU is not necessarily up to date in the
1170 * XSAVE buffer. Use the provided value.
1171 */
1172 pkru.pkru = pkru_val;
1173 membuf_write(&to, &pkru, sizeof(pkru));
1174 } else {
1175 membuf_write(&to,
1176 __raw_xsave_addr(xsave, i),
1177 xstate_sizes[i]);
1178 }
1179 /*
1180 * Keep track of the last copied state in the non-compacted
1181 * target buffer for gap zeroing.
1182 */
1183 zerofrom = xstate_offsets[i] + xstate_sizes[i];
1184 }
1185
1186 out:
1187 if (to.left)
1188 membuf_zero(&to, to.left);
1189 }
1190
1191 /**
1192 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1193 * @to: membuf descriptor
1194 * @tsk: The task from which to copy the saved xstate
1195 * @copy_mode: The requested copy mode
1196 *
1197 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1198 * format, i.e. from the kernel internal hardware dependent storage format
1199 * to the requested @mode. UABI XSTATE is always uncompacted!
1200 *
1201 * It supports partial copy but @to.pos always starts from zero.
1202 */
copy_xstate_to_uabi_buf(struct membuf to,struct task_struct * tsk,enum xstate_copy_mode copy_mode)1203 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1204 enum xstate_copy_mode copy_mode)
1205 {
1206 __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1207 tsk->thread.fpu.fpstate->user_xfeatures,
1208 tsk->thread.pkru, copy_mode);
1209 }
1210
copy_from_buffer(void * dst,unsigned int offset,unsigned int size,const void * kbuf,const void __user * ubuf)1211 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1212 const void *kbuf, const void __user *ubuf)
1213 {
1214 if (kbuf) {
1215 memcpy(dst, kbuf + offset, size);
1216 } else {
1217 if (copy_from_user(dst, ubuf + offset, size))
1218 return -EFAULT;
1219 }
1220 return 0;
1221 }
1222
1223
1224 /**
1225 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1226 * @fpstate: The fpstate buffer to copy to
1227 * @kbuf: The UABI format buffer, if it comes from the kernel
1228 * @ubuf: The UABI format buffer, if it comes from userspace
1229 * @pkru: The location to write the PKRU value to
1230 *
1231 * Converts from the UABI format into the kernel internal hardware
1232 * dependent format.
1233 *
1234 * This function ultimately has three different callers with distinct PKRU
1235 * behavior.
1236 * 1. When called from sigreturn the PKRU register will be restored from
1237 * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1238 * @fpstate is sufficient to cover this case, but the caller will also
1239 * pass a pointer to the thread_struct's pkru field in @pkru and updating
1240 * it is harmless.
1241 * 2. When called from ptrace the PKRU register will be restored from the
1242 * thread_struct's pkru field. A pointer to that is passed in @pkru.
1243 * The kernel will restore it manually, so the XRSTOR behavior that resets
1244 * the PKRU register to the hardware init value (0) if the corresponding
1245 * xfeatures bit is not set is emulated here.
1246 * 3. When called from KVM the PKRU register will be restored from the vcpu's
1247 * pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1248 * XRSTOR and hasn't had the PKRU resetting behavior described above. To
1249 * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1250 * bit is not set.
1251 */
copy_uabi_to_xstate(struct fpstate * fpstate,const void * kbuf,const void __user * ubuf,u32 * pkru)1252 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1253 const void __user *ubuf, u32 *pkru)
1254 {
1255 struct xregs_state *xsave = &fpstate->regs.xsave;
1256 unsigned int offset, size;
1257 struct xstate_header hdr;
1258 u64 mask;
1259 int i;
1260
1261 offset = offsetof(struct xregs_state, header);
1262 if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1263 return -EFAULT;
1264
1265 if (validate_user_xstate_header(&hdr, fpstate))
1266 return -EINVAL;
1267
1268 /* Validate MXCSR when any of the related features is in use */
1269 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1270 if (hdr.xfeatures & mask) {
1271 u32 mxcsr[2];
1272
1273 offset = offsetof(struct fxregs_state, mxcsr);
1274 if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1275 return -EFAULT;
1276
1277 /* Reserved bits in MXCSR must be zero. */
1278 if (mxcsr[0] & ~mxcsr_feature_mask)
1279 return -EINVAL;
1280
1281 /* SSE and YMM require MXCSR even when FP is not in use. */
1282 if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1283 xsave->i387.mxcsr = mxcsr[0];
1284 xsave->i387.mxcsr_mask = mxcsr[1];
1285 }
1286 }
1287
1288 for (i = 0; i < XFEATURE_MAX; i++) {
1289 mask = BIT_ULL(i);
1290
1291 if (hdr.xfeatures & mask) {
1292 void *dst = __raw_xsave_addr(xsave, i);
1293
1294 offset = xstate_offsets[i];
1295 size = xstate_sizes[i];
1296
1297 if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1298 return -EFAULT;
1299 }
1300 }
1301
1302 if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1303 struct pkru_state *xpkru;
1304
1305 xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1306 *pkru = xpkru->pkru;
1307 } else {
1308 /*
1309 * KVM may pass NULL here to indicate that it does not need
1310 * PKRU updated.
1311 */
1312 if (pkru)
1313 *pkru = 0;
1314 }
1315
1316 /*
1317 * The state that came in from userspace was user-state only.
1318 * Mask all the user states out of 'xfeatures':
1319 */
1320 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1321
1322 /*
1323 * Add back in the features that came in from userspace:
1324 */
1325 xsave->header.xfeatures |= hdr.xfeatures;
1326
1327 return 0;
1328 }
1329
1330 /*
1331 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1332 * format and copy to the target thread. Used by ptrace and KVM.
1333 */
copy_uabi_from_kernel_to_xstate(struct fpstate * fpstate,const void * kbuf,u32 * pkru)1334 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1335 {
1336 return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1337 }
1338
1339 /*
1340 * Convert from a sigreturn standard-format user-space buffer to kernel
1341 * XSAVE[S] format and copy to the target thread. This is called from the
1342 * sigreturn() and rt_sigreturn() system calls.
1343 */
copy_sigframe_from_user_to_xstate(struct task_struct * tsk,const void __user * ubuf)1344 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1345 const void __user *ubuf)
1346 {
1347 return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
1348 }
1349
validate_independent_components(u64 mask)1350 static bool validate_independent_components(u64 mask)
1351 {
1352 u64 xchk;
1353
1354 if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1355 return false;
1356
1357 xchk = ~xfeatures_mask_independent();
1358
1359 if (WARN_ON_ONCE(!mask || mask & xchk))
1360 return false;
1361
1362 return true;
1363 }
1364
1365 /**
1366 * xsaves - Save selected components to a kernel xstate buffer
1367 * @xstate: Pointer to the buffer
1368 * @mask: Feature mask to select the components to save
1369 *
1370 * The @xstate buffer must be 64 byte aligned and correctly initialized as
1371 * XSAVES does not write the full xstate header. Before first use the
1372 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1373 * can #GP.
1374 *
1375 * The feature mask must be a subset of the independent features.
1376 */
xsaves(struct xregs_state * xstate,u64 mask)1377 void xsaves(struct xregs_state *xstate, u64 mask)
1378 {
1379 int err;
1380
1381 if (!validate_independent_components(mask))
1382 return;
1383
1384 XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1385 WARN_ON_ONCE(err);
1386 }
1387
1388 /**
1389 * xrstors - Restore selected components from a kernel xstate buffer
1390 * @xstate: Pointer to the buffer
1391 * @mask: Feature mask to select the components to restore
1392 *
1393 * The @xstate buffer must be 64 byte aligned and correctly initialized
1394 * otherwise XRSTORS from that buffer can #GP.
1395 *
1396 * Proper usage is to restore the state which was saved with
1397 * xsaves() into @xstate.
1398 *
1399 * The feature mask must be a subset of the independent features.
1400 */
xrstors(struct xregs_state * xstate,u64 mask)1401 void xrstors(struct xregs_state *xstate, u64 mask)
1402 {
1403 int err;
1404
1405 if (!validate_independent_components(mask))
1406 return;
1407
1408 XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1409 WARN_ON_ONCE(err);
1410 }
1411
1412 #if IS_ENABLED(CONFIG_KVM)
fpstate_clear_xstate_component(struct fpstate * fps,unsigned int xfeature)1413 void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1414 {
1415 void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1416
1417 if (addr)
1418 memset(addr, 0, xstate_sizes[xfeature]);
1419 }
1420 EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1421 #endif
1422
1423 #ifdef CONFIG_X86_64
1424
1425 #ifdef CONFIG_X86_DEBUG_FPU
1426 /*
1427 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1428 * can safely operate on the @fpstate buffer.
1429 */
xstate_op_valid(struct fpstate * fpstate,u64 mask,bool rstor)1430 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1431 {
1432 u64 xfd = __this_cpu_read(xfd_state);
1433
1434 if (fpstate->xfd == xfd)
1435 return true;
1436
1437 /*
1438 * The XFD MSR does not match fpstate->xfd. That's invalid when
1439 * the passed in fpstate is current's fpstate.
1440 */
1441 if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
1442 return false;
1443
1444 /*
1445 * XRSTOR(S) from init_fpstate are always correct as it will just
1446 * bring all components into init state and not read from the
1447 * buffer. XSAVE(S) raises #PF after init.
1448 */
1449 if (fpstate == &init_fpstate)
1450 return rstor;
1451
1452 /*
1453 * XSAVE(S): clone(), fpu_swap_kvm_fpstate()
1454 * XRSTORS(S): fpu_swap_kvm_fpstate()
1455 */
1456
1457 /*
1458 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1459 * the buffer area for XFD-disabled state components.
1460 */
1461 mask &= ~xfd;
1462
1463 /*
1464 * Remove features which are valid in fpstate. They
1465 * have space allocated in fpstate.
1466 */
1467 mask &= ~fpstate->xfeatures;
1468
1469 /*
1470 * Any remaining state components in 'mask' might be written
1471 * by XSAVE/XRSTOR. Fail validation it found.
1472 */
1473 return !mask;
1474 }
1475
xfd_validate_state(struct fpstate * fpstate,u64 mask,bool rstor)1476 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1477 {
1478 WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1479 }
1480 #endif /* CONFIG_X86_DEBUG_FPU */
1481
xfd_update_static_branch(void)1482 static int __init xfd_update_static_branch(void)
1483 {
1484 /*
1485 * If init_fpstate.xfd has bits set then dynamic features are
1486 * available and the dynamic sizing must be enabled.
1487 */
1488 if (init_fpstate.xfd)
1489 static_branch_enable(&__fpu_state_size_dynamic);
1490 return 0;
1491 }
arch_initcall(xfd_update_static_branch)1492 arch_initcall(xfd_update_static_branch)
1493
1494 void fpstate_free(struct fpu *fpu)
1495 {
1496 if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1497 vfree(fpu->fpstate);
1498 }
1499
1500 /**
1501 * fpstate_realloc - Reallocate struct fpstate for the requested new features
1502 *
1503 * @xfeatures: A bitmap of xstate features which extend the enabled features
1504 * of that task
1505 * @ksize: The required size for the kernel buffer
1506 * @usize: The required size for user space buffers
1507 * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
1508 *
1509 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1510 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1511 * with large states are likely to live longer.
1512 *
1513 * Returns: 0 on success, -ENOMEM on allocation error.
1514 */
fpstate_realloc(u64 xfeatures,unsigned int ksize,unsigned int usize,struct fpu_guest * guest_fpu)1515 static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1516 unsigned int usize, struct fpu_guest *guest_fpu)
1517 {
1518 struct fpu *fpu = ¤t->thread.fpu;
1519 struct fpstate *curfps, *newfps = NULL;
1520 unsigned int fpsize;
1521 bool in_use;
1522
1523 fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1524
1525 newfps = vzalloc(fpsize);
1526 if (!newfps)
1527 return -ENOMEM;
1528 newfps->size = ksize;
1529 newfps->user_size = usize;
1530 newfps->is_valloc = true;
1531
1532 /*
1533 * When a guest FPU is supplied, use @guest_fpu->fpstate
1534 * as reference independent whether it is in use or not.
1535 */
1536 curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1537
1538 /* Determine whether @curfps is the active fpstate */
1539 in_use = fpu->fpstate == curfps;
1540
1541 if (guest_fpu) {
1542 newfps->is_guest = true;
1543 newfps->is_confidential = curfps->is_confidential;
1544 newfps->in_use = curfps->in_use;
1545 guest_fpu->xfeatures |= xfeatures;
1546 guest_fpu->uabi_size = usize;
1547 }
1548
1549 fpregs_lock();
1550 /*
1551 * If @curfps is in use, ensure that the current state is in the
1552 * registers before swapping fpstate as that might invalidate it
1553 * due to layout changes.
1554 */
1555 if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1556 fpregs_restore_userregs();
1557
1558 newfps->xfeatures = curfps->xfeatures | xfeatures;
1559 newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1560 newfps->xfd = curfps->xfd & ~xfeatures;
1561
1562 /* Do the final updates within the locked region */
1563 xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1564
1565 if (guest_fpu) {
1566 guest_fpu->fpstate = newfps;
1567 /* If curfps is active, update the FPU fpstate pointer */
1568 if (in_use)
1569 fpu->fpstate = newfps;
1570 } else {
1571 fpu->fpstate = newfps;
1572 }
1573
1574 if (in_use)
1575 xfd_update_state(fpu->fpstate);
1576 fpregs_unlock();
1577
1578 /* Only free valloc'ed state */
1579 if (curfps && curfps->is_valloc)
1580 vfree(curfps);
1581
1582 return 0;
1583 }
1584
validate_sigaltstack(unsigned int usize)1585 static int validate_sigaltstack(unsigned int usize)
1586 {
1587 struct task_struct *thread, *leader = current->group_leader;
1588 unsigned long framesize = get_sigframe_size();
1589
1590 lockdep_assert_held(¤t->sighand->siglock);
1591
1592 /* get_sigframe_size() is based on fpu_user_cfg.max_size */
1593 framesize -= fpu_user_cfg.max_size;
1594 framesize += usize;
1595 for_each_thread(leader, thread) {
1596 if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1597 return -ENOSPC;
1598 }
1599 return 0;
1600 }
1601
__xstate_request_perm(u64 permitted,u64 requested,bool guest)1602 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1603 {
1604 /*
1605 * This deliberately does not exclude !XSAVES as we still might
1606 * decide to optionally context switch XCR0 or talk the silicon
1607 * vendors into extending XFD for the pre AMX states, especially
1608 * AVX512.
1609 */
1610 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1611 struct fpu *fpu = ¤t->group_leader->thread.fpu;
1612 struct fpu_state_perm *perm;
1613 unsigned int ksize, usize;
1614 u64 mask;
1615 int ret = 0;
1616
1617 /* Check whether fully enabled */
1618 if ((permitted & requested) == requested)
1619 return 0;
1620
1621 /* Calculate the resulting kernel state size */
1622 mask = permitted | requested;
1623 /* Take supervisor states into account on the host */
1624 if (!guest)
1625 mask |= xfeatures_mask_supervisor();
1626 ksize = xstate_calculate_size(mask, compacted);
1627
1628 /* Calculate the resulting user state size */
1629 mask &= XFEATURE_MASK_USER_SUPPORTED;
1630 usize = xstate_calculate_size(mask, false);
1631
1632 if (!guest) {
1633 ret = validate_sigaltstack(usize);
1634 if (ret)
1635 return ret;
1636 }
1637
1638 perm = guest ? &fpu->guest_perm : &fpu->perm;
1639 /* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1640 WRITE_ONCE(perm->__state_perm, mask);
1641 /* Protected by sighand lock */
1642 perm->__state_size = ksize;
1643 perm->__user_state_size = usize;
1644 return ret;
1645 }
1646
1647 /*
1648 * Permissions array to map facilities with more than one component
1649 */
1650 static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1651 [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1652 };
1653
xstate_request_perm(unsigned long idx,bool guest)1654 static int xstate_request_perm(unsigned long idx, bool guest)
1655 {
1656 u64 permitted, requested;
1657 int ret;
1658
1659 if (idx >= XFEATURE_MAX)
1660 return -EINVAL;
1661
1662 /*
1663 * Look up the facility mask which can require more than
1664 * one xstate component.
1665 */
1666 idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1667 requested = xstate_prctl_req[idx];
1668 if (!requested)
1669 return -EOPNOTSUPP;
1670
1671 if ((fpu_user_cfg.max_features & requested) != requested)
1672 return -EOPNOTSUPP;
1673
1674 /* Lockless quick check */
1675 permitted = xstate_get_group_perm(guest);
1676 if ((permitted & requested) == requested)
1677 return 0;
1678
1679 /* Protect against concurrent modifications */
1680 spin_lock_irq(¤t->sighand->siglock);
1681 permitted = xstate_get_group_perm(guest);
1682
1683 /* First vCPU allocation locks the permissions. */
1684 if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1685 ret = -EBUSY;
1686 else
1687 ret = __xstate_request_perm(permitted, requested, guest);
1688 spin_unlock_irq(¤t->sighand->siglock);
1689 return ret;
1690 }
1691
__xfd_enable_feature(u64 xfd_err,struct fpu_guest * guest_fpu)1692 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1693 {
1694 u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1695 struct fpu_state_perm *perm;
1696 unsigned int ksize, usize;
1697 struct fpu *fpu;
1698
1699 if (!xfd_event) {
1700 if (!guest_fpu)
1701 pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1702 return 0;
1703 }
1704
1705 /* Protect against concurrent modifications */
1706 spin_lock_irq(¤t->sighand->siglock);
1707
1708 /* If not permitted let it die */
1709 if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1710 spin_unlock_irq(¤t->sighand->siglock);
1711 return -EPERM;
1712 }
1713
1714 fpu = ¤t->group_leader->thread.fpu;
1715 perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1716 ksize = perm->__state_size;
1717 usize = perm->__user_state_size;
1718
1719 /*
1720 * The feature is permitted. State size is sufficient. Dropping
1721 * the lock is safe here even if more features are added from
1722 * another task, the retrieved buffer sizes are valid for the
1723 * currently requested feature(s).
1724 */
1725 spin_unlock_irq(¤t->sighand->siglock);
1726
1727 /*
1728 * Try to allocate a new fpstate. If that fails there is no way
1729 * out.
1730 */
1731 if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1732 return -EFAULT;
1733 return 0;
1734 }
1735
xfd_enable_feature(u64 xfd_err)1736 int xfd_enable_feature(u64 xfd_err)
1737 {
1738 return __xfd_enable_feature(xfd_err, NULL);
1739 }
1740
1741 #else /* CONFIG_X86_64 */
xstate_request_perm(unsigned long idx,bool guest)1742 static inline int xstate_request_perm(unsigned long idx, bool guest)
1743 {
1744 return -EPERM;
1745 }
1746 #endif /* !CONFIG_X86_64 */
1747
xstate_get_guest_group_perm(void)1748 u64 xstate_get_guest_group_perm(void)
1749 {
1750 return xstate_get_group_perm(true);
1751 }
1752 EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1753
1754 /**
1755 * fpu_xstate_prctl - xstate permission operations
1756 * @option: A subfunction of arch_prctl()
1757 * @arg2: option argument
1758 * Return: 0 if successful; otherwise, an error code
1759 *
1760 * Option arguments:
1761 *
1762 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1763 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1764 * ARCH_REQ_XCOMP_PERM: Facility number requested
1765 *
1766 * For facilities which require more than one XSTATE component, the request
1767 * must be the highest state component number related to that facility,
1768 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1769 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1770 */
fpu_xstate_prctl(int option,unsigned long arg2)1771 long fpu_xstate_prctl(int option, unsigned long arg2)
1772 {
1773 u64 __user *uptr = (u64 __user *)arg2;
1774 u64 permitted, supported;
1775 unsigned long idx = arg2;
1776 bool guest = false;
1777
1778 switch (option) {
1779 case ARCH_GET_XCOMP_SUPP:
1780 supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
1781 return put_user(supported, uptr);
1782
1783 case ARCH_GET_XCOMP_PERM:
1784 /*
1785 * Lockless snapshot as it can also change right after the
1786 * dropping the lock.
1787 */
1788 permitted = xstate_get_host_group_perm();
1789 permitted &= XFEATURE_MASK_USER_SUPPORTED;
1790 return put_user(permitted, uptr);
1791
1792 case ARCH_GET_XCOMP_GUEST_PERM:
1793 permitted = xstate_get_guest_group_perm();
1794 permitted &= XFEATURE_MASK_USER_SUPPORTED;
1795 return put_user(permitted, uptr);
1796
1797 case ARCH_REQ_XCOMP_GUEST_PERM:
1798 guest = true;
1799 fallthrough;
1800
1801 case ARCH_REQ_XCOMP_PERM:
1802 if (!IS_ENABLED(CONFIG_X86_64))
1803 return -EOPNOTSUPP;
1804
1805 return xstate_request_perm(idx, guest);
1806
1807 default:
1808 return -EINVAL;
1809 }
1810 }
1811
1812 #ifdef CONFIG_PROC_PID_ARCH_STATUS
1813 /*
1814 * Report the amount of time elapsed in millisecond since last AVX512
1815 * use in the task.
1816 */
avx512_status(struct seq_file * m,struct task_struct * task)1817 static void avx512_status(struct seq_file *m, struct task_struct *task)
1818 {
1819 unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1820 long delta;
1821
1822 if (!timestamp) {
1823 /*
1824 * Report -1 if no AVX512 usage
1825 */
1826 delta = -1;
1827 } else {
1828 delta = (long)(jiffies - timestamp);
1829 /*
1830 * Cap to LONG_MAX if time difference > LONG_MAX
1831 */
1832 if (delta < 0)
1833 delta = LONG_MAX;
1834 delta = jiffies_to_msecs(delta);
1835 }
1836
1837 seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1838 seq_putc(m, '\n');
1839 }
1840
1841 /*
1842 * Report architecture specific information
1843 */
proc_pid_arch_status(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)1844 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1845 struct pid *pid, struct task_struct *task)
1846 {
1847 /*
1848 * Report AVX512 state if the processor and build option supported.
1849 */
1850 if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1851 avx512_status(m, task);
1852
1853 return 0;
1854 }
1855 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1856
1857 #ifdef CONFIG_COREDUMP
1858 static const char owner_name[] = "LINUX";
1859
1860 /*
1861 * Dump type, size, offset and flag values for every xfeature that is present.
1862 */
dump_xsave_layout_desc(struct coredump_params * cprm)1863 static int dump_xsave_layout_desc(struct coredump_params *cprm)
1864 {
1865 int num_records = 0;
1866 int i;
1867
1868 for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
1869 struct x86_xfeat_component xc = {
1870 .type = i,
1871 .size = xstate_sizes[i],
1872 .offset = xstate_offsets[i],
1873 /* reserved for future use */
1874 .flags = 0,
1875 };
1876
1877 if (!dump_emit(cprm, &xc, sizeof(xc)))
1878 return 0;
1879
1880 num_records++;
1881 }
1882 return num_records;
1883 }
1884
get_xsave_desc_size(void)1885 static u32 get_xsave_desc_size(void)
1886 {
1887 u32 cnt = 0;
1888 u32 i;
1889
1890 for_each_extended_xfeature(i, fpu_user_cfg.max_features)
1891 cnt++;
1892
1893 return cnt * (sizeof(struct x86_xfeat_component));
1894 }
1895
elf_coredump_extra_notes_write(struct coredump_params * cprm)1896 int elf_coredump_extra_notes_write(struct coredump_params *cprm)
1897 {
1898 int num_records = 0;
1899 struct elf_note en;
1900
1901 if (!fpu_user_cfg.max_features)
1902 return 0;
1903
1904 en.n_namesz = sizeof(owner_name);
1905 en.n_descsz = get_xsave_desc_size();
1906 en.n_type = NT_X86_XSAVE_LAYOUT;
1907
1908 if (!dump_emit(cprm, &en, sizeof(en)))
1909 return 1;
1910 if (!dump_emit(cprm, owner_name, en.n_namesz))
1911 return 1;
1912 if (!dump_align(cprm, 4))
1913 return 1;
1914
1915 num_records = dump_xsave_layout_desc(cprm);
1916 if (!num_records)
1917 return 1;
1918
1919 /* Total size should be equal to the number of records */
1920 if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
1921 return 1;
1922
1923 return 0;
1924 }
1925
elf_coredump_extra_notes_size(void)1926 int elf_coredump_extra_notes_size(void)
1927 {
1928 int size;
1929
1930 if (!fpu_user_cfg.max_features)
1931 return 0;
1932
1933 /* .note header */
1934 size = sizeof(struct elf_note);
1935 /* Name plus alignment to 4 bytes */
1936 size += roundup(sizeof(owner_name), 4);
1937 size += get_xsave_desc_size();
1938
1939 return size;
1940 }
1941 #endif /* CONFIG_COREDUMP */
1942