1/*
2 * Copyright (c) 2009 Corey Tabaka
3 * Copyright (c) 2015 Intel Corporation
4 * Copyright (c) 2016 Travis Geiselbrecht
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files
8 * (the "Software"), to deal in the Software without restriction,
9 * including without limitation the rights to use, copy, modify, merge,
10 * publish, distribute, sublicense, and/or sell copies of the Software,
11 * and to permit persons to whom the Software is furnished to do so,
12 * subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25#include <asm.h>
26#include <arch/x86/descriptor.h>
27#include <arch/x86/mmu.h>
28
29#define MSR_EFER    0xc0000080
30#define EFER_NXE    0x00000800
31#define EFER_SCE    0x00000001
32#define MSR_GS_BASE 0xc0000101
33
34#define PHYS(x) ((x) - KERNEL_BASE + MEMBASE)
35
36#define PAGE_MASK               0xfff
37#define ONE_GB                  0x40000000
38#define ADDR_OFFSET_MASK        ((1 << ADDR_OFFSET) - 1)
39#define MAX_MEM_SIZE            (0xFFFFFFFFFFFFFFFF - KERNEL_BASE)
40#define X86_PAGE_ALIGN          (0xFFFFFFFFFFFFF000)
41
42
43.section ".text.boot"
44.code64
45
46/*
47 * Macro used to fill in an entry in the page table for the region of memory
48 * beginning at virtual address KERNEL_BASE. This macro can be used to fill
49 * in PML4E and PDPTE.
50 *
51 * Input:
52 *  table: PML4 or PDPT
53 *  entry: base address of PDPT or PD to be filled in PML4E or PDPTE
54 *  shift: Since we need right shift address for 9 bits to calculate
55 *         index in PML4 (512GB for each entry), this argument should be 9
56 *         when fill in PML4 table.
57 *         This argument should be 0 when fill in PDPT table.
58 */
59.macro map_kernel_base table, entry, shift
60    movq $KERNEL_BASE, %rcx
61    shrq $30, %rcx
62    shrq \shift, %rcx
63    andq $ADDR_OFFSET_MASK, %rcx
64    shlq $3, %rcx
65    movq $PHYS(\entry), %rax
66    orq  $X86_KERNEL_PD_FLAGS, %rax
67    movq $PHYS(\table), %rdi
68    addq %rcx, %rdi
69    movq %rax, (%rdi)
70.endm
71
72/*
73 * Macro used to fill in an entry in PD or PT in page table.
74 * Before invoking this macro, please fill in:
75 *   RDI:  address of first entry to be filled
76 *   RCX:  numbers of entry to be filled
77 * Input:
78 *   src_addr: base address of PT if fill in PDE
79 *             base address of physical address if fill in PTE
80 *   attr:     X86_KERNEL_PD_FLAGS if fill in PDE
81 *             X86_KERNEL_PT_FLAGS if fill in PTE
82 */
83.macro fill_page_table_entry src_addr, attr
84    movq \src_addr, %rsi
85    orq  \attr, %rsi
86    xorq %rax, %rax
87.Lfill_entry_\@:
88    movq %rax, %rdx
89    shlq $PAGE_DIV_SHIFT, %rdx
90    addq %rsi, %rdx
91    movq %rdx, (%rdi)
92    incq %rax
93    addq $8, %rdi
94    loop .Lfill_entry_\@
95.endm
96
97.macro map_trampoline
98    movq $PHYS(pdpt_trampoline), %rsi
99    orq  $X86_KERNEL_PD_FLAGS, %rsi
100    movq %rsi, PHYS(pml4)
101
102    /* update corresponding trampoline PDPTE, it covers 1GB */
103    movq $MEMBASE, %rax
104    shrq $30, %rax
105    andq $0x1ff, %rax
106    shlq $3, %rax
107    movq $PHYS(pdpt_trampoline), %rdi
108    addq %rax, %rdi
109    movq $PHYS(pd_trampoline), %rsi
110    orq  $X86_KERNEL_PD_FLAGS, %rsi
111    movq %rsi, (%rdi)
112
113    /* update corresponding trampoline PDE for bootstrap, it covers 2MB */
114    movq $MEMBASE, %rax
115    addq $KERNEL_LOAD_OFFSET, %rax
116    shrq $21, %rax
117    andq $0x1ff, %rax
118    shlq $3, %rax
119    movq $PHYS(pd_trampoline), %rdi
120    addq %rax, %rdi
121    movq $PHYS(pt_trampoline), %rsi
122    orq $X86_KERNEL_PD_FLAGS, %rsi
123    movq %rsi, (%rdi)
124
125    /* update corresponding trampoline PTE for bootstrap, it covers 4KB */
126    movq $KERNEL_LOAD_OFFSET, %rax
127    shrq $PAGE_DIV_SHIFT, %rax
128    andq $0x1ff, %rax
129    shlq $3, %rax
130    movq $PHYS(pt_trampoline), %rdi
131    addq %rax, %rdi
132    movq $MEMBASE, %rsi
133    addq $KERNEL_LOAD_OFFSET, %rsi
134    andq $X86_PAGE_ALIGN, %rsi
135    orq  $X86_KERNEL_PT_FLAGS, %rsi
136    movq %rsi, (%rdi)
137
138    /* Check whether gdtr and bootstrap code share same PDE */
139    movq $MEMBASE, %rax
140    addq $KERNEL_LOAD_OFFSET, %rax
141    movq $PHYS(_gdtr_phys), %rcx
142    /* RCX indicates (_gdtr_phys - MEMBASE - KERNEL_LOAD_OFFSET) */
143    subq %rax, %rcx
144    /* RCX = RCX / 2MB */
145    shrq $21, %rcx
146    /* _gdtr_phys and MEMBASE use different PDEs */
147    jnz .Luse_different_pde
148    /* _gdtr_phys and MEMBASE share same PDE */
149    movq $PHYS(pt_trampoline), %rdi
150    jmp .Lshare_same_pde
151
152.Luse_different_pde:
153    /* update corresponding trampoline PDE for GDT, it covers 2MB */
154    movq $PHYS(_gdtr_phys), %rax
155    shrq $21, %rax
156    andq $0x1ff, %rax
157    shlq $3, %rax
158    movq $PHYS(pd_trampoline), %rdi
159    addq %rax, %rdi
160    movq $PHYS(pt_trampoline_gdt), %rsi
161    orq $X86_KERNEL_PD_FLAGS, %rsi
162    movq %rsi, (%rdi)
163
164    movq $PHYS(pt_trampoline_gdt), %rdi
165.Lshare_same_pde:
166    /* update corresponding trampoline PTE for GDT, it covers 4KB */
167    movq $PHYS(_gdtr_phys), %rax
168    shrq $PAGE_DIV_SHIFT, %rax
169    andq $0x1ff, %rax
170    shlq $3, %rax
171    addq %rax, %rdi
172    movq $PHYS(_gdtr_phys), %rsi
173    orq  $X86_KERNEL_PT_FLAGS, %rsi
174    movq %rsi, (%rdi)
175.endm
176
177/* Macro used to update mapping attribute for each section */
178.macro update_mapping_attribute_of_each_section
179    leaq PHYS(pt), %rdi
180    movq $KERNEL_LOAD_OFFSET, %rcx
181    shrq $PAGE_DIV_SHIFT, %rcx
182    shlq $3, %rcx
183    addq %rcx, %rdi
184
185    movq $__code_start, %rax
186    movq $__code_end, %rcx
187    addq $PAGE_MASK, %rcx
188    subq %rax, %rcx
189    shrq $PAGE_DIV_SHIFT, %rcx
190
191    /* clear R/W bit in pte */
192.Lupdate_code_section:
193    movq (%rdi), %rdx
194    andq $~X86_MMU_PG_RW, %rdx
195    movq %rdx, (%rdi)
196    addq $8, %rdi
197    loop .Lupdate_code_section
198
199    /* from now, RDI indicates virtual address of __rodata_start */
200    movq $__rodata_start, %rax
201    movq $__rodata_end, %rcx
202    addq $PAGE_MASK, %rcx
203    subq %rax, %rcx
204    shrq $PAGE_DIV_SHIFT, %rcx
205
206    /* clear R/W bit and set XD bit in pte */
207.Lupdate_rodata_section:
208    movq (%rdi), %rdx
209    andq $~X86_MMU_PG_RW, %rdx
210    btsq $63, %rdx
211    movq %rdx, (%rdi)
212    addq $8, %rdi
213    loop .Lupdate_rodata_section
214
215    /* from now, RDI indicates virtual address of __data_start */
216    movq $__data_start, %rax
217    movq $__data_end, %rcx
218    addq $PAGE_MASK, %rcx
219    subq %rax, %rcx
220    shrq $PAGE_DIV_SHIFT, %rcx
221
222    /* set XD bit in pte */
223.Lupdate_data_section:
224    movq (%rdi), %rdx
225    btsq $63, %rdx
226    movq %rdx, (%rdi)
227    addq $8, %rdi
228    loop .Lupdate_data_section
229
230    /* from now, RDI indicates virtual address of __bss_start */
231    movq $__bss_start, %rax
232    movq $__bss_end, %rcx
233    addq $PAGE_MASK, %rcx
234    subq %rax, %rcx
235    shrq $PAGE_DIV_SHIFT, %rcx
236
237    /* set XD bit in pte */
238.Lupdate_bss_section:
239    movq (%rdi), %rdx
240    btsq $63, %rdx
241    movq %rdx, (%rdi)
242    addq $8, %rdi
243    loop .Lupdate_bss_section
244.endm
245
246/* add mapping up to upper_mem, upper_mem suppports up to 512GB */
247.macro map_up_to_uppper_memory
248    leaq mmu_initial_mappings, %rdi
249    movq 0x10(%edi), %r9
250
251    subq $ONE_GB, %r9
252    /* RBX stores how many pages(4KB) still unmapped */
253    shrq $PAGE_DIV_SHIFT, %r9
254
255    movq %r9, %rcx
256    /* up round memory size to GB */
257    addq $0x3ffff, %rcx
258    /* RCX indicates how many GB should be mapped */
259    shrq $18, %rcx
260
261    /* start to allocate boot memory for PDs */
262    movq $PHYS(boot_alloc_end), %rdi
263    movq (%rdi), %rdx
264    /* up round to page size */
265    addq $PAGE_MASK, %rdx
266    /* RDX indicates new PD base address (page aligned address of _end) */
267    andq $X86_PAGE_ALIGN, %rdx
268
269    /* store first new PD base address in RSI */
270    movq %rdx, %rsi
271    orq  $X86_KERNEL_PD_FLAGS, %rdx
272
273    movq $PHYS(pdpt), %rdi
274    movq $KERNEL_BASE, %rax
275    /* RAX indicates PDPTE we have already mapped */
276    andq $ADDR_OFFSET_MASK, %rax
277    /* RAX indicates PDPTE we should start to fill in now */
278    incq %rax
279    shlq $3, %rax
280    addq %rax, %rdi
281.Lfill_upper_mem_pdpte:
282    movq %rdx, (%rdi)
283    addq $0x4, %rdi
284    movq $0, (%rdi)
285    addq $PAGE_SIZE, %rdx
286    addq $0x4, %rdi
287    loop .Lfill_upper_mem_pdpte
288
289    /* RDI indicates PDE we should start to fill in */
290    movq %rsi, %rdi
291    /* RDX indicates new PT base address with page directory attribute */
292    movq %rdx, %rsi
293    /* RSI indicates base address of first PTE now */
294    andq $X86_PAGE_ALIGN, %rsi
295
296    movq %r9, %rcx
297    /* upround to 2MB */
298    addq $ADDR_OFFSET_MASK, %rcx
299    /* RCX indicates how many 2MB should be mapped */
300    shrq $9, %rcx
301
302.Lfill_upper_mem_pde:
303    movq %rdx, (%rdi)
304    addq $0x4, %rdi
305    movq $0, (%rdi)
306    addq $PAGE_SIZE, %rdx
307    addq $0x4, %rdi
308    loop .Lfill_upper_mem_pde
309
310    movq $MEMBASE, %rax
311    addq $ONE_GB, %rax
312    /* we have already mapped up to PA MEMBASE+KERNEL_LOAD_OFFSET+1GB */
313    addq $KERNEL_LOAD_OFFSET, %rax
314    /* RAX indicates first index of 4KB physical address to fill in PTE */
315    shrq $PAGE_DIV_SHIFT, %rax
316    /* ECX indicates how many PTEs should be filled */
317    movq %r9, %rcx
318    movq %rdx, %r9
319    /* RBX incates page aligned address behind last PT */
320    andq $X86_PAGE_ALIGN, %r9
321.Lfill_upper_mem_pte:
322    movq %rax, %rdx
323    shlq $PAGE_DIV_SHIFT, %rdx
324    addq $X86_KERNEL_PT_FLAGS, %rdx
325    movq %rdx, (%rsi)
326    incq %rax
327    addq $8, %rsi
328    loop .Lfill_upper_mem_pte
329
330    movq $PHYS(boot_alloc_end), %rdi
331    movq %r9, (%edi)
332.endm
333
334.global _start
335_start:
336
337    /* zero the bss section */
338bss_setup:
339    movq $PHYS(__bss_start), %rdi /* starting address of the bss */
340    movq $PHYS(__bss_end), %rcx   /* find the length of the bss in bytes */
341    subq %rdi, %rcx
342    shrq $2, %rcx       /* convert to 32 bit words, since the bss is aligned anyway */
343.Lzero_bss_sec:
344    movq $0, (%rdi)
345    addq $4, %rdi
346    loop .Lzero_bss_sec
347
348paging_setup:
349
350    movl $MSR_EFER ,%ecx
351    rdmsr
352    /* NXE bit should be set, since we update XD bit in page table */
353    /* Set SCE to enable AMD compatible syscall support */
354    orl $(EFER_NXE | EFER_SCE),%eax
355    wrmsr
356
357    /* setting the corresponding PML4E to map from KERNEL_BASE */
358    map_kernel_base pml4, pdpt, $9
359
360    /* setting the corresponding PDPTE to map from KERNEL_BASE */
361    map_kernel_base pdpt, pd, $0
362
363    /*
364     * calculate memory size to be mapped
365     * CAUTION: we have only reserved page tables to cover 1GB mapping
366     */
367    leaq PHYS(mmu_initial_mappings), %rdx
368    /* RAX indicates mmu_initial_mappings[0].size */
369    movq 0x10(%rdx), %rax
370    /* RAX indicates memsize in GB */
371    shrq $30, %rax
372
373    /* reload memsize in case memsize + MEMBASE < 1GB */
374    movl 0x10(%rdx), %ecx
375    /*
376     * RAX indicates memsize + MEMBASE in GB
377     * RCX = (RAX > 1GB) ? (1GB - MEMBASE) : memsize
378     */
379    test %rax, %rax
380    jz  .Lsize_smaller_than_one_gb
381    map_up_to_uppper_memory
382    movq $ONE_GB, %rcx
383.Lsize_smaller_than_one_gb:
384    movq %rcx, %r9
385    /* calculate offset of PD to map from KERNEL_BASE */
386    movq $KERNEL_BASE, %rdx
387    shrq $PD_SHIFT, %rdx
388    andq $ADDR_OFFSET_MASK, %rdx
389    shlq $3, %rdx
390    movq $PHYS(pd), %rdi
391    addq %rdx, %rdi
392
393    /* calculate how many 2MB PDE should be used */
394    shrq $PD_SHIFT, %rcx
395
396    /* fill in PDEs */
397    fill_page_table_entry $PHYS(pt), $X86_KERNEL_PD_FLAGS
398
399    movq %r9, %rcx
400    /* calculate how many 4KB PTE should be used */
401    shrq $PAGE_DIV_SHIFT, %rcx
402    movq $PHYS(pt), %rdi
403
404    /* fill in PTEs */
405    fill_page_table_entry $MEMBASE, $X86_KERNEL_PT_FLAGS
406
407    update_mapping_attribute_of_each_section
408
409    map_trampoline
410
411    /* switch page table */
412    movq $PHYS(pml4), %rax
413    movq %rax, %cr3
414
415    lgdt PHYS(_gdtr_phys)
416
417    movq $.Lfarjump64, %rax
418    jmp  *%rax
419
420.Lfarjump64:
421    /* load the high kernel stack */
422    movq  $(_kstack + 4096), %rsp
423    pushq $CODE_64_SELECTOR
424    pushq $.Lhighaddr
425    lretq
426
427.Lhighaddr:
428
429    /* reload the gdtr */
430    lgdt _gdtr
431
432    /* set up all segments except Stack Selector to NULL selector */
433    xorq %rax, %rax
434    movw %ax, %ds
435    movw %ax, %es
436    movw %ax, %fs
437    movw %ax, %gs
438
439    /* set up Stack Segment */
440    movq $STACK_64_SELECTOR, %rax
441    movw %ax, %ss
442
443    /* unmap low virtual address */
444    movq $0, (pml4)
445
446    /* set up gs base */
447    leaq per_cpu_states(%rip), %rax
448
449    movq %rax, %rdx
450    shrq $32, %rdx
451    movq $MSR_GS_BASE, %rcx
452    /*
453     * RCX - MSR index (MSR_GS_BASE)
454     * EDX - high 32 bits value to write
455     * EAX - low 32 bits value to write
456     * MSR(RCX)(MSR_GS_BSSE) = EDX:EAX
457     */
458    wrmsr
459
460    /* set up the idt */
461    call setup_idt
462
463    /* call the main module */
464    call lk_main
465
466.Lhalt:                     /* just sit around waiting for interrupts */
467    hlt                     /* interrupts will unhalt the processor */
468    pause
469    jmp .Lhalt              /* so jump back to halt to conserve power */
470