Commit c65e774f authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Ingo Molnar
Browse files

x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable



For boot-time switching between 4- and 5-level paging we need to be able
to fold p4d page table level at runtime. It requires variable
PGDIR_SHIFT and PTRS_PER_P4D.

The change doesn't affect the kernel image size much:

   text	   data	    bss	    dec	    hex	filename
8628091	4734304	1368064	14730459	 e0c4db	vmlinux.before
8628393	4734340	1368064	14730797	 e0c62d	vmlinux.after
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 5c7919bb
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1; unsigned int pgtable_l5_enabled __ro_after_init = 1;
unsigned int pgdir_shift __ro_after_init = 48;
unsigned int ptrs_per_p4d __ro_after_init = 512;
#endif #endif
extern unsigned long get_cmd_line_ptr(void); extern unsigned long get_cmd_line_ptr(void);
......
...@@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { } ...@@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { } static inline void check_pgt_cache(void) { }
void paging_init(void); void paging_init(void);
static inline int pgd_large(pgd_t pgd) { return 0; }
/* /*
* Define this if things work differently on an i386 and an i486: * Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are * it will (on an i486) warn about kernel memory accesses that are
......
...@@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled; ...@@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled;
#define pgtable_l5_enabled 0 #define pgtable_l5_enabled 0
#endif #endif
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0 #define SHARED_KERNEL_PMD 0
...@@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled; ...@@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled;
/* /*
* PGDIR_SHIFT determines what a top-level page table entry can map * PGDIR_SHIFT determines what a top-level page table entry can map
*/ */
#define PGDIR_SHIFT 48 #define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512 #define PTRS_PER_PGD 512
/* /*
* 4th level page in 5-level paging case * 4th level page in 5-level paging case
*/ */
#define P4D_SHIFT 39 #define P4D_SHIFT 39
#define PTRS_PER_P4D 512 #define MAX_PTRS_PER_P4D 512
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define PTRS_PER_P4D ptrs_per_p4d
#define P4D_MASK (~(P4D_SIZE - 1)) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))
#define MAX_POSSIBLE_PHYSMEM_BITS 52 #define MAX_POSSIBLE_PHYSMEM_BITS 52
...@@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled; ...@@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled;
/* /*
* PGDIR_SHIFT determines what a top-level page table entry can map * PGDIR_SHIFT determines what a top-level page table entry can map
*/ */
#define PGDIR_SHIFT 39 #define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512 #define PTRS_PER_PGD 512
#define MAX_PTRS_PER_P4D 1
#endif /* CONFIG_X86_5LEVEL */ #endif /* CONFIG_X86_5LEVEL */
......
...@@ -1082,19 +1082,7 @@ void arch_unmap_kpfn(unsigned long pfn) ...@@ -1082,19 +1082,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* a legal address. * a legal address.
*/ */
/*
* Build time check to see if we have a spare virtual bit. Don't want
* to leave this until run time because most developers don't have a
* system that can exercise this code path. This will only become a
* problem if/when we move beyond 5-level page tables.
*
* Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
*/
#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
#else
#error "no unused virtual bit available"
#endif
if (set_memory_np(decoy_addr, 1)) if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
...@@ -2328,6 +2316,12 @@ static __init int mcheck_init_device(void) ...@@ -2328,6 +2316,12 @@ static __init int mcheck_init_device(void)
{ {
int err; int err;
/*
* Check if we have a spare virtual bit. This will only become
* a problem if/when we move beyond 5-level page tables.
*/
MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
if (!mce_available(&boot_cpu_data)) { if (!mce_available(&boot_cpu_data)) {
err = -EIO; err = -EIO;
goto err_out; goto err_out;
......
...@@ -42,6 +42,10 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); ...@@ -42,6 +42,10 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init = 1; unsigned int pgtable_l5_enabled __ro_after_init = 1;
EXPORT_SYMBOL(pgtable_l5_enabled); EXPORT_SYMBOL(pgtable_l5_enabled);
unsigned int pgdir_shift __ro_after_init = 48;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 512;
EXPORT_SYMBOL(ptrs_per_p4d);
#endif #endif
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
...@@ -336,7 +340,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) ...@@ -336,7 +340,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK))); (__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
......
...@@ -428,14 +428,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, ...@@ -428,14 +428,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
#define p4d_none(a) pud_none(__pud(p4d_val(a))) #define p4d_none(a) pud_none(__pud(p4d_val(a)))
#endif #endif
#if PTRS_PER_P4D > 1
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{ {
int i; int i;
p4d_t *start, *p4d_start; p4d_t *start, *p4d_start;
pgprotval_t prot; pgprotval_t prot;
if (PTRS_PER_P4D == 1)
return walk_pud_level(m, st, __p4d(pgd_val(addr)), P);
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_P4D; i++) { for (i = 0; i < PTRS_PER_P4D; i++) {
...@@ -455,11 +456,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, ...@@ -455,11 +456,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
} }
} }
#else #define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) #define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
#endif
static inline bool is_hypervisor_range(int idx) static inline bool is_hypervisor_range(int idx)
{ {
......
...@@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) ...@@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to * With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level. * handle synchonization on p4d level.
*/ */
BUILD_BUG_ON(pgd_none(*pgd_ref)); MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr); p4d_ref = p4d_offset(pgd_ref, addr);
if (p4d_none(*p4d_ref)) if (p4d_none(*p4d_ref))
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES]; extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static __init void *early_alloc(size_t size, int nid, bool panic) static __init void *early_alloc(size_t size, int nid, bool panic)
{ {
......
...@@ -257,8 +257,8 @@ void efi_sync_low_kernel_mappings(void) ...@@ -257,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps * only span a single PGD entry and that the entry also maps
* other important kernel regions. * other important kernel regions.
*/ */
BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK)); (EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#define P4D_SHIFT PGDIR_SHIFT #define P4D_SHIFT PGDIR_SHIFT
#define P4D_SIZE PGDIR_SIZE #define P4D_SIZE PGDIR_SIZE
#define P4D_MASK PGDIR_MASK #define P4D_MASK PGDIR_MASK
#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1 #define PTRS_PER_P4D 1
#define p4d_t pgd_t #define p4d_t pgd_t
......
...@@ -8,10 +8,11 @@ ...@@ -8,10 +8,11 @@
typedef struct { pgd_t pgd; } p4d_t; typedef struct { pgd_t pgd; } p4d_t;
#define P4D_SHIFT PGDIR_SHIFT #define P4D_SHIFT PGDIR_SHIFT
#define PTRS_PER_P4D 1 #define MAX_PTRS_PER_P4D 1
#define P4D_SIZE (1UL << P4D_SHIFT) #define PTRS_PER_P4D 1
#define P4D_MASK (~(P4D_SIZE-1)) #define P4D_SIZE (1UL << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
/* /*
* The "pgd_xxx()" functions here are trivial for a folded two-level * The "pgd_xxx()" functions here are trivial for a folded two-level
......
...@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; ...@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE];
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD];
extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
void kasan_populate_zero_shadow(const void *shadow_start, void kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end); const void *shadow_end);
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4 #if CONFIG_PGTABLE_LEVELS > 4
p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
#endif #endif
#if CONFIG_PGTABLE_LEVELS > 3 #if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment