瀏覽代碼

x86/mm: Define virtual memory map for 5-level paging

The first part of memory map (up to %esp fixup) simply scales existing
map for 4-level paging by factor of 9 -- number of bits addressed by
the additional page table level.

The rest of the map is unchanged.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170330080731.65421-4-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Kirill A. Shutemov 8 年之前
父節點
當前提交
4c7c44837b

+ 30 - 3
Documentation/x86/x86_64/mm.txt

@@ -4,7 +4,7 @@
 Virtual memory map with 4 level page tables:
 Virtual memory map with 4 level page tables:
 
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
-hole caused by [48:63] sign extension
+hole caused by [47:63] sign extension
 ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
 ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
 ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
 ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
 ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
 ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
@@ -23,12 +23,39 @@ ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 
+Virtual memory map with 5 level page tables:
+
+0000000000000000 - 00ffffffffffffff (=56 bits) user space, different per mm
+hole caused by [56:63] sign extension
+ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
+ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
+ff90000000000000 - ff91ffffffffffff (=49 bits) hole
+ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
+ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
+... unused hole ...
+ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
+... unused hole ...
+ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+... unused hole ...
+ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
+... unused hole ...
+ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+
+Architecture defines a 64-bit virtual address. Implementations can support
+less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+through to the most-significant implemented bit are set to either all ones
+or all zero. This causes hole between user space and kernel addresses.
+
 The direct mapping covers all memory in the system up to the highest
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
 memory address (this means in some cases it can also include PCI memory
 holes).
 holes).
 
 
-vmalloc space is lazily synchronized into the different PML4 pages of
-the processes using the page fault handler, with init_level4_pgt as
+vmalloc space is lazily synchronized into the different PML4/PML5 pages of
+the processes using the page fault handler, with init_top_pgt as
 reference.
 reference.
 
 
 Current X86-64 implementations support up to 46 bits of address space (64 TB),
 Current X86-64 implementations support up to 46 bits of address space (64 TB),

+ 1 - 0
arch/x86/Kconfig

@@ -291,6 +291,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 config KASAN_SHADOW_OFFSET
 config KASAN_SHADOW_OFFSET
 	hex
 	hex
 	depends on KASAN
 	depends on KASAN
+	default 0xdff8000000000000 if X86_5LEVEL
 	default 0xdffffc0000000000
 	default 0xdffffc0000000000
 
 
 config HAVE_INTEL_TXT
 config HAVE_INTEL_TXT

+ 6 - 3
arch/x86/include/asm/kasan.h

@@ -11,9 +11,12 @@
  * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
  * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
  */
  */
 #define KASAN_SHADOW_START      (KASAN_SHADOW_OFFSET + \
 #define KASAN_SHADOW_START      (KASAN_SHADOW_OFFSET + \
-					(0xffff800000000000ULL >> 3))
-/* 47 bits for kernel address -> (47 - 3) bits for shadow */
-#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1ULL << (47 - 3)))
+					((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
+/*
+ * 47 bits for kernel address -> (47 - 3) bits for shadow
+ * 56 bits for kernel address -> (56 - 3) bits for shadow
+ */
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
 
 
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
 
 

+ 10 - 0
arch/x86/include/asm/page_64_types.h

@@ -36,7 +36,12 @@
  * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
  * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
  * what Xen requires.
  * what Xen requires.
  */
  */
+#ifdef CONFIG_X86_5LEVEL
+#define __PAGE_OFFSET_BASE      _AC(0xff10000000000000, UL)
+#else
 #define __PAGE_OFFSET_BASE      _AC(0xffff880000000000, UL)
 #define __PAGE_OFFSET_BASE      _AC(0xffff880000000000, UL)
+#endif
+
 #ifdef CONFIG_RANDOMIZE_MEMORY
 #ifdef CONFIG_RANDOMIZE_MEMORY
 #define __PAGE_OFFSET           page_offset_base
 #define __PAGE_OFFSET           page_offset_base
 #else
 #else
@@ -46,8 +51,13 @@
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+#ifdef CONFIG_X86_5LEVEL
+#define __PHYSICAL_MASK_SHIFT	52
+#define __VIRTUAL_MASK_SHIFT	56
+#else
 #define __PHYSICAL_MASK_SHIFT	46
 #define __PHYSICAL_MASK_SHIFT	46
 #define __VIRTUAL_MASK_SHIFT	47
 #define __VIRTUAL_MASK_SHIFT	47
+#endif
 
 
 /*
 /*
  * Kernel image size is limited to 1GiB due to the fixmap living in the
  * Kernel image size is limited to 1GiB due to the fixmap living in the

+ 6 - 0
arch/x86/include/asm/pgtable_64_types.h

@@ -56,9 +56,15 @@ typedef struct { pteval_t pte; } pte_t;
 
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
 #define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#ifdef CONFIG_X86_5LEVEL
+#define VMALLOC_SIZE_TB _AC(16384, UL)
+#define __VMALLOC_BASE	_AC(0xff92000000000000, UL)
+#define __VMEMMAP_BASE	_AC(0xffd4000000000000, UL)
+#else
 #define VMALLOC_SIZE_TB	_AC(32, UL)
 #define VMALLOC_SIZE_TB	_AC(32, UL)
 #define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
 #define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
 #define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
 #define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
+#endif
 #ifdef CONFIG_RANDOMIZE_MEMORY
 #ifdef CONFIG_RANDOMIZE_MEMORY
 #define VMALLOC_START	vmalloc_base
 #define VMALLOC_START	vmalloc_base
 #define VMEMMAP_START	vmemmap_base
 #define VMEMMAP_START	vmemmap_base

+ 7 - 2
arch/x86/include/asm/sparsemem.h

@@ -26,8 +26,13 @@
 # endif
 # endif
 #else /* CONFIG_X86_32 */
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS	44
-# define MAX_PHYSMEM_BITS	46
+# ifdef CONFIG_X86_5LEVEL
+#  define MAX_PHYSADDR_BITS	52
+#  define MAX_PHYSMEM_BITS	52
+# else
+#  define MAX_PHYSADDR_BITS	44
+#  define MAX_PHYSMEM_BITS	46
+# endif
 #endif
 #endif
 
 
 #endif /* CONFIG_SPARSEMEM */
 #endif /* CONFIG_SPARSEMEM */