1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman  <[email protected]>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16#include <asm/asm-offsets.h>
17
18/*
19 * Must be relocatable PIC code callable as a C function, in particular
20 * there must be a plain RET and not jump to return thunk.
21 */
22
23#define PTR(x) (x << 3)
24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
25
26/*
27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied
28 * into the control page, and the remainder of the page is used as the stack.
29 */
30
31	.section .data..relocate_kernel,"a";
32/* Minimal CPU state */
33SYM_DATA_LOCAL(saved_rsp, .quad 0)
34SYM_DATA_LOCAL(saved_cr0, .quad 0)
35SYM_DATA_LOCAL(saved_cr3, .quad 0)
36SYM_DATA_LOCAL(saved_cr4, .quad 0)
37	/* other data */
38SYM_DATA(kexec_va_control_page, .quad 0)
39SYM_DATA(kexec_pa_table_page, .quad 0)
40SYM_DATA(kexec_pa_swap_page, .quad 0)
41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
42
43	.section .text..relocate_kernel,"ax";
44	.code64
45SYM_CODE_START_NOALIGN(relocate_kernel)
46	UNWIND_HINT_END_OF_STACK
47	ANNOTATE_NOENDBR
48	/*
49	 * %rdi indirection_page
50	 * %rsi pa_control_page
51	 * %rdx start address
52	 * %rcx preserve_context
53	 * %r8  host_mem_enc_active
54	 */
55
56	/* Save the CPU context, used for jumping back */
57	pushq %rbx
58	pushq %rbp
59	pushq %r12
60	pushq %r13
61	pushq %r14
62	pushq %r15
63	pushf
64
65	/* zero out flags, and disable interrupts */
66	pushq $0
67	popfq
68
69	/* Switch to the identity mapped page tables */
70	movq	%cr3, %rax
71	movq	kexec_pa_table_page(%rip), %r9
72	movq	%r9, %cr3
73
74	/* Leave CR4 in %r13 to enable the right paging mode later. */
75	movq	%cr4, %r13
76
77	/* Disable global pages immediately to ensure this mapping is RWX */
78	movq	%r13, %r12
79	andq	$~(X86_CR4_PGE), %r12
80	movq	%r12, %cr4
81
82	/* Save %rsp and CRs. */
83	movq	%r13, saved_cr4(%rip)
84	movq    %rsp, saved_rsp(%rip)
85	movq	%rax, saved_cr3(%rip)
86	movq	%cr0, %rax
87	movq	%rax, saved_cr0(%rip)
88
89	/* save indirection list for jumping back */
90	movq	%rdi, pa_backup_pages_map(%rip)
91
92	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
93	movq	%rcx, %r11
94
95	/* setup a new stack at the end of the physical control page */
96	lea	PAGE_SIZE(%rsi), %rsp
97
98	/* jump to identity mapped page */
990:	addq	$identity_mapped - 0b, %rsi
100	subq	$__relocate_kernel_start - 0b, %rsi
101	ANNOTATE_RETPOLINE_SAFE
102	jmp	*%rsi
103SYM_CODE_END(relocate_kernel)
104
105SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
106	UNWIND_HINT_END_OF_STACK
107	/*
108	 * %rdi	indirection page
109	 * %rdx start address
110	 * %r8 host_mem_enc_active
111	 * %r9 page table page
112	 * %r11 preserve_context
113	 * %r13 original CR4 when relocate_kernel() was invoked
114	 */
115
116	/* store the start address on the stack */
117	pushq   %rdx
118
119	/*
120	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
121	 * below.
122	 */
123	movq	%cr4, %rax
124	andq	$~(X86_CR4_CET), %rax
125	movq	%rax, %cr4
126
127	/*
128	 * Set cr0 to a known state:
129	 *  - Paging enabled
130	 *  - Alignment check disabled
131	 *  - Write protect disabled
132	 *  - No task switch
133	 *  - Don't do FP software emulation.
134	 *  - Protected mode enabled
135	 */
136	movq	%cr0, %rax
137	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
138	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
139	movq	%rax, %cr0
140
141	/*
142	 * Set cr4 to a known state:
143	 *  - physical address extension enabled
144	 *  - 5-level paging, if it was enabled before
145	 *  - Machine check exception on TDX guest, if it was enabled before.
146	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
147	 *
148	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
149	 * PAE is always set in the original CR4.
150	 */
151	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
152	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
153	movq	%r13, %cr4
154
155	/* Flush the TLB (needed?) */
156	movq	%r9, %cr3
157
158	/*
159	 * If SME is active, there could be old encrypted cache line
160	 * entries that will conflict with the now unencrypted memory
161	 * used by kexec. Flush the caches before copying the kernel.
162	 */
163	testq	%r8, %r8
164	jz .Lsme_off
165	wbinvd
166.Lsme_off:
167
168	call	swap_pages
169
170	/*
171	 * To be certain of avoiding problems with self-modifying code
172	 * I need to execute a serializing instruction here.
173	 * So I flush the TLB by reloading %cr3 here, it's handy,
174	 * and not processor dependent.
175	 */
176	movq	%cr3, %rax
177	movq	%rax, %cr3
178
179	testq	%r11, %r11	/* preserve_context */
180	jnz .Lrelocate
181
182	/*
183	 * set all of the registers to known values
184	 * leave %rsp alone
185	 */
186
187	xorl	%eax, %eax
188	xorl	%ebx, %ebx
189	xorl    %ecx, %ecx
190	xorl    %edx, %edx
191	xorl    %esi, %esi
192	xorl    %edi, %edi
193	xorl    %ebp, %ebp
194	xorl	%r8d, %r8d
195	xorl	%r9d, %r9d
196	xorl	%r10d, %r10d
197	xorl	%r11d, %r11d
198	xorl	%r12d, %r12d
199	xorl	%r13d, %r13d
200	xorl	%r14d, %r14d
201	xorl	%r15d, %r15d
202
203	ANNOTATE_UNRET_SAFE
204	ret
205	int3
206
207.Lrelocate:
208	popq	%rdx
209
210	/* Use the swap page for the callee's stack */
211	movq	kexec_pa_swap_page(%rip), %r10
212	leaq	PAGE_SIZE(%r10), %rsp
213
214	/* push the existing entry point onto the callee's stack */
215	pushq	%rdx
216
217	ANNOTATE_RETPOLINE_SAFE
218	call	*%rdx
219
220	/* get the re-entry point of the peer system */
221	popq	%rbp
222	movq	kexec_pa_swap_page(%rip), %r10
223	movq	pa_backup_pages_map(%rip), %rdi
224	movq	kexec_pa_table_page(%rip), %rax
225	movq	%rax, %cr3
226
227	/* Find start (and end) of this physical mapping of control page */
228	leaq	(%rip), %r8
229	ANNOTATE_NOENDBR
230	andq	$PAGE_MASK, %r8
231	lea	PAGE_SIZE(%r8), %rsp
232	movl	$1, %r11d	/* Ensure preserve_context flag is set */
233	call	swap_pages
234	movq	kexec_va_control_page(%rip), %rax
2350:	addq	$virtual_mapped - 0b, %rax
236	subq	$__relocate_kernel_start - 0b, %rax
237	pushq	%rax
238	ANNOTATE_UNRET_SAFE
239	ret
240	int3
241SYM_CODE_END(identity_mapped)
242
243SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
244	UNWIND_HINT_END_OF_STACK
245	ANNOTATE_NOENDBR // RET target, above
246	movq	saved_rsp(%rip), %rsp
247	movq	saved_cr4(%rip), %rax
248	movq	%rax, %cr4
249	movq	saved_cr3(%rip), %rax
250	movq	saved_cr0(%rip), %r8
251	movq	%rax, %cr3
252	movq	%r8, %cr0
253
254#ifdef CONFIG_KEXEC_JUMP
255	/* Saved in save_processor_state. */
256	movq    $saved_context, %rax
257	lgdt    saved_context_gdt_desc(%rax)
258#endif
259
260	/* relocate_kernel() returns the re-entry point for next time */
261	movq	%rbp, %rax
262
263	popf
264	popq	%r15
265	popq	%r14
266	popq	%r13
267	popq	%r12
268	popq	%rbp
269	popq	%rbx
270	ANNOTATE_UNRET_SAFE
271	ret
272	int3
273SYM_CODE_END(virtual_mapped)
274
275	/* Do the copies */
276SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
277	UNWIND_HINT_END_OF_STACK
278	/*
279	 * %rdi indirection page
280	 * %r11 preserve_context
281	 */
282	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
283	xorl	%edi, %edi
284	xorl	%esi, %esi
285	jmp	.Lstart		/* Should start with an indirection record */
286
287.Lloop:	/* top, read another word for the indirection page */
288
289	movq	(%rbx), %rcx
290	addq	$8,	%rbx
291.Lstart:
292	testb	$0x1,	%cl   /* is it a destination page? */
293	jz	.Lnotdest
294	movq	%rcx,	%rdi
295	andq	$0xfffffffffffff000, %rdi
296	jmp	.Lloop
297.Lnotdest:
298	testb	$0x2,	%cl   /* is it an indirection page? */
299	jz	.Lnotind
300	movq	%rcx,   %rbx
301	andq	$0xfffffffffffff000, %rbx
302	jmp	.Lloop
303.Lnotind:
304	testb	$0x4,	%cl   /* is it the done indicator? */
305	jz	.Lnotdone
306	jmp	.Ldone
307.Lnotdone:
308	testb	$0x8,	%cl   /* is it the source indicator? */
309	jz	.Lloop	      /* Ignore it otherwise */
310	movq	%rcx,   %rsi  /* For ever source page do a copy */
311	andq	$0xfffffffffffff000, %rsi
312
313	movq	%rdi, %rdx    /* Save destination page to %rdx */
314	movq	%rsi, %rax    /* Save source page to %rax */
315
316	testq	%r11, %r11    /* Only actually swap for ::preserve_context */
317	jz	.Lnoswap
318
319	/* copy source page to swap page */
320	movq	kexec_pa_swap_page(%rip), %rdi
321	movl	$512, %ecx
322	rep ; movsq
323
324	/* copy destination page to source page */
325	movq	%rax, %rdi
326	movq	%rdx, %rsi
327	movl	$512, %ecx
328	rep ; movsq
329
330	/* copy swap page to destination page */
331	movq	%rdx, %rdi
332	movq	kexec_pa_swap_page(%rip), %rsi
333.Lnoswap:
334	movl	$512, %ecx
335	rep ; movsq
336
337	lea	PAGE_SIZE(%rax), %rsi
338	jmp	.Lloop
339.Ldone:
340	ANNOTATE_UNRET_SAFE
341	ret
342	int3
343SYM_CODE_END(swap_pages)
344