1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <[email protected]> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied 28 * into the control page, and the remainder of the page is used as the stack. 29 */ 30 31 .section .data..relocate_kernel,"a"; 32/* Minimal CPU state */ 33SYM_DATA_LOCAL(saved_rsp, .quad 0) 34SYM_DATA_LOCAL(saved_cr0, .quad 0) 35SYM_DATA_LOCAL(saved_cr3, .quad 0) 36SYM_DATA_LOCAL(saved_cr4, .quad 0) 37 /* other data */ 38SYM_DATA(kexec_va_control_page, .quad 0) 39SYM_DATA(kexec_pa_table_page, .quad 0) 40SYM_DATA(kexec_pa_swap_page, .quad 0) 41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42 43 .section .text..relocate_kernel,"ax"; 44 .code64 45SYM_CODE_START_NOALIGN(relocate_kernel) 46 UNWIND_HINT_END_OF_STACK 47 ANNOTATE_NOENDBR 48 /* 49 * %rdi indirection_page 50 * %rsi pa_control_page 51 * %rdx start address 52 * %rcx preserve_context 53 * %r8 host_mem_enc_active 54 */ 55 56 /* Save the CPU context, used for jumping back */ 57 pushq %rbx 58 pushq %rbp 59 pushq %r12 60 pushq %r13 61 pushq %r14 62 pushq %r15 63 pushf 64 65 /* zero out flags, and disable interrupts */ 66 pushq $0 67 popfq 68 69 /* Switch to the identity mapped page tables */ 70 movq %cr3, %rax 71 movq kexec_pa_table_page(%rip), %r9 72 movq %r9, %cr3 73 74 /* Leave CR4 in %r13 to enable the right paging mode later. */ 75 movq %cr4, %r13 76 77 /* Disable global pages immediately to ensure this mapping is RWX */ 78 movq %r13, %r12 79 andq $~(X86_CR4_PGE), %r12 80 movq %r12, %cr4 81 82 /* Save %rsp and CRs. */ 83 movq %r13, saved_cr4(%rip) 84 movq %rsp, saved_rsp(%rip) 85 movq %rax, saved_cr3(%rip) 86 movq %cr0, %rax 87 movq %rax, saved_cr0(%rip) 88 89 /* save indirection list for jumping back */ 90 movq %rdi, pa_backup_pages_map(%rip) 91 92 /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ 93 movq %rcx, %r11 94 95 /* setup a new stack at the end of the physical control page */ 96 lea PAGE_SIZE(%rsi), %rsp 97 98 /* jump to identity mapped page */ 990: addq $identity_mapped - 0b, %rsi 100 subq $__relocate_kernel_start - 0b, %rsi 101 ANNOTATE_RETPOLINE_SAFE 102 jmp *%rsi 103SYM_CODE_END(relocate_kernel) 104 105SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 106 UNWIND_HINT_END_OF_STACK 107 /* 108 * %rdi indirection page 109 * %rdx start address 110 * %r8 host_mem_enc_active 111 * %r9 page table page 112 * %r11 preserve_context 113 * %r13 original CR4 when relocate_kernel() was invoked 114 */ 115 116 /* store the start address on the stack */ 117 pushq %rdx 118 119 /* 120 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 121 * below. 122 */ 123 movq %cr4, %rax 124 andq $~(X86_CR4_CET), %rax 125 movq %rax, %cr4 126 127 /* 128 * Set cr0 to a known state: 129 * - Paging enabled 130 * - Alignment check disabled 131 * - Write protect disabled 132 * - No task switch 133 * - Don't do FP software emulation. 134 * - Protected mode enabled 135 */ 136 movq %cr0, %rax 137 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 138 orl $(X86_CR0_PG | X86_CR0_PE), %eax 139 movq %rax, %cr0 140 141 /* 142 * Set cr4 to a known state: 143 * - physical address extension enabled 144 * - 5-level paging, if it was enabled before 145 * - Machine check exception on TDX guest, if it was enabled before. 146 * Clearing MCE might not be allowed in TDX guests, depending on setup. 147 * 148 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 149 * PAE is always set in the original CR4. 150 */ 151 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 152 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 153 movq %r13, %cr4 154 155 /* Flush the TLB (needed?) */ 156 movq %r9, %cr3 157 158 /* 159 * If SME is active, there could be old encrypted cache line 160 * entries that will conflict with the now unencrypted memory 161 * used by kexec. Flush the caches before copying the kernel. 162 */ 163 testq %r8, %r8 164 jz .Lsme_off 165 wbinvd 166.Lsme_off: 167 168 call swap_pages 169 170 /* 171 * To be certain of avoiding problems with self-modifying code 172 * I need to execute a serializing instruction here. 173 * So I flush the TLB by reloading %cr3 here, it's handy, 174 * and not processor dependent. 175 */ 176 movq %cr3, %rax 177 movq %rax, %cr3 178 179 testq %r11, %r11 /* preserve_context */ 180 jnz .Lrelocate 181 182 /* 183 * set all of the registers to known values 184 * leave %rsp alone 185 */ 186 187 xorl %eax, %eax 188 xorl %ebx, %ebx 189 xorl %ecx, %ecx 190 xorl %edx, %edx 191 xorl %esi, %esi 192 xorl %edi, %edi 193 xorl %ebp, %ebp 194 xorl %r8d, %r8d 195 xorl %r9d, %r9d 196 xorl %r10d, %r10d 197 xorl %r11d, %r11d 198 xorl %r12d, %r12d 199 xorl %r13d, %r13d 200 xorl %r14d, %r14d 201 xorl %r15d, %r15d 202 203 ANNOTATE_UNRET_SAFE 204 ret 205 int3 206 207.Lrelocate: 208 popq %rdx 209 210 /* Use the swap page for the callee's stack */ 211 movq kexec_pa_swap_page(%rip), %r10 212 leaq PAGE_SIZE(%r10), %rsp 213 214 /* push the existing entry point onto the callee's stack */ 215 pushq %rdx 216 217 ANNOTATE_RETPOLINE_SAFE 218 call *%rdx 219 220 /* get the re-entry point of the peer system */ 221 popq %rbp 222 movq kexec_pa_swap_page(%rip), %r10 223 movq pa_backup_pages_map(%rip), %rdi 224 movq kexec_pa_table_page(%rip), %rax 225 movq %rax, %cr3 226 227 /* Find start (and end) of this physical mapping of control page */ 228 leaq (%rip), %r8 229 ANNOTATE_NOENDBR 230 andq $PAGE_MASK, %r8 231 lea PAGE_SIZE(%r8), %rsp 232 movl $1, %r11d /* Ensure preserve_context flag is set */ 233 call swap_pages 234 movq kexec_va_control_page(%rip), %rax 2350: addq $virtual_mapped - 0b, %rax 236 subq $__relocate_kernel_start - 0b, %rax 237 pushq %rax 238 ANNOTATE_UNRET_SAFE 239 ret 240 int3 241SYM_CODE_END(identity_mapped) 242 243SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 244 UNWIND_HINT_END_OF_STACK 245 ANNOTATE_NOENDBR // RET target, above 246 movq saved_rsp(%rip), %rsp 247 movq saved_cr4(%rip), %rax 248 movq %rax, %cr4 249 movq saved_cr3(%rip), %rax 250 movq saved_cr0(%rip), %r8 251 movq %rax, %cr3 252 movq %r8, %cr0 253 254#ifdef CONFIG_KEXEC_JUMP 255 /* Saved in save_processor_state. */ 256 movq $saved_context, %rax 257 lgdt saved_context_gdt_desc(%rax) 258#endif 259 260 /* relocate_kernel() returns the re-entry point for next time */ 261 movq %rbp, %rax 262 263 popf 264 popq %r15 265 popq %r14 266 popq %r13 267 popq %r12 268 popq %rbp 269 popq %rbx 270 ANNOTATE_UNRET_SAFE 271 ret 272 int3 273SYM_CODE_END(virtual_mapped) 274 275 /* Do the copies */ 276SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 277 UNWIND_HINT_END_OF_STACK 278 /* 279 * %rdi indirection page 280 * %r11 preserve_context 281 */ 282 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 283 xorl %edi, %edi 284 xorl %esi, %esi 285 jmp .Lstart /* Should start with an indirection record */ 286 287.Lloop: /* top, read another word for the indirection page */ 288 289 movq (%rbx), %rcx 290 addq $8, %rbx 291.Lstart: 292 testb $0x1, %cl /* is it a destination page? */ 293 jz .Lnotdest 294 movq %rcx, %rdi 295 andq $0xfffffffffffff000, %rdi 296 jmp .Lloop 297.Lnotdest: 298 testb $0x2, %cl /* is it an indirection page? */ 299 jz .Lnotind 300 movq %rcx, %rbx 301 andq $0xfffffffffffff000, %rbx 302 jmp .Lloop 303.Lnotind: 304 testb $0x4, %cl /* is it the done indicator? */ 305 jz .Lnotdone 306 jmp .Ldone 307.Lnotdone: 308 testb $0x8, %cl /* is it the source indicator? */ 309 jz .Lloop /* Ignore it otherwise */ 310 movq %rcx, %rsi /* For ever source page do a copy */ 311 andq $0xfffffffffffff000, %rsi 312 313 movq %rdi, %rdx /* Save destination page to %rdx */ 314 movq %rsi, %rax /* Save source page to %rax */ 315 316 testq %r11, %r11 /* Only actually swap for ::preserve_context */ 317 jz .Lnoswap 318 319 /* copy source page to swap page */ 320 movq kexec_pa_swap_page(%rip), %rdi 321 movl $512, %ecx 322 rep ; movsq 323 324 /* copy destination page to source page */ 325 movq %rax, %rdi 326 movq %rdx, %rsi 327 movl $512, %ecx 328 rep ; movsq 329 330 /* copy swap page to destination page */ 331 movq %rdx, %rdi 332 movq kexec_pa_swap_page(%rip), %rsi 333.Lnoswap: 334 movl $512, %ecx 335 rep ; movsq 336 337 lea PAGE_SIZE(%rax), %rsi 338 jmp .Lloop 339.Ldone: 340 ANNOTATE_UNRET_SAFE 341 ret 342 int3 343SYM_CODE_END(swap_pages) 344