12 年之前 · 61d0669775
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -890,6 +890,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
				 	edd=		[EDD]
			
 
				 			Format: {"off" | "on" | "skip[mbr]"}
			
 
				 
			
 
				+	efi=		[EFI]
			
 
				+			Format: { "old_map" }
			
 
				+			old_map [X86-64]: switch to the old ioremap-based EFI
			
 
				+			runtime services mapping. 32-bit still uses this one by
			
 
				+			default.
			
 
				+
			
 
				 	efi_no_storage_paranoia [EFI; X86]
			
 
				 			Using this parameter you can use more than 50% of
			
 
				 			your efi variable storage. Use this parameter only if
			
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -28,4 +28,11 @@ reference.
 
				 Current X86-64 implementations only support 40 bits of address space,
			
 
				 but we support up to 46 bits. This expands into MBZ space in the page tables.
			
 
				 
			
 
				+->trampoline_pgd:
			
 
				+
			
 
				+We map EFI runtime services in the aforementioned PGD in the virtual
			
 
				+range of 64Gb (arbitrarily set, can be raised if needed)
			
 
				+
			
 
				+0xffffffef00000000 - 0xffffffff00000000
			
 
				+
			
 
				 -Andi Kleen, Jul 2004
			
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,24 @@
 
				 #ifndef _ASM_X86_EFI_H
			
 
				 #define _ASM_X86_EFI_H
			
 
				 
			
 
				+/*
			
 
				+ * We map the EFI regions needed for runtime services non-contiguously,
			
 
				+ * with preserved alignment on virtual addresses starting from -4G down
			
 
				+ * for a total max space of 64G. This way, we provide for stable runtime
			
 
				+ * services addresses across kernels so that a kexec'd kernel can still
			
 
				+ * use them.
			
 
				+ *
			
 
				+ * This is the main reason why we're doing stable VA mappings for RT
			
 
				+ * services.
			
 
				+ *
			
 
				+ * This flag is used in conjuction with a chicken bit called
			
 
				+ * "efi=old_map" which can be used as a fallback to the old runtime
			
 
				+ * services mapping method in case there's some b0rkage with a
			
 
				+ * particular EFI implementation (haha, it is hard to hold up the
			
 
				+ * sarcasm here...).
			
 
				+ */
			
 
				+#define EFI_OLD_MEMMAP		EFI_ARCH_1
			
 
				+
			
 
				 #ifdef CONFIG_X86_32
			
 
				 
			
 
				 #define EFI_LOADER_SIGNATURE	"EL32"
			
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 
				 	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
			
 
				 		  (u64)(a4), (u64)(a5), (u64)(a6))
			
 
				 
			
 
				+#define _efi_call_virtX(x, f, ...)					\
			
 
				+({									\
			
 
				+	efi_status_t __s;						\
			
 
				+									\
			
 
				+	efi_sync_low_kernel_mappings();					\
			
 
				+	preempt_disable();						\
			
 
				+	__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__);	\
			
 
				+	preempt_enable();						\
			
 
				+	__s;								\
			
 
				+})
			
 
				+
			
 
				 #define efi_call_virt0(f)				\
			
 
				-	efi_call0((efi.systab->runtime->f))
			
 
				-#define efi_call_virt1(f, a1)					\
			
 
				-	efi_call1((efi.systab->runtime->f), (u64)(a1))
			
 
				-#define efi_call_virt2(f, a1, a2)					\
			
 
				-	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
			
 
				-#define efi_call_virt3(f, a1, a2, a3)					\
			
 
				-	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
			
 
				-		  (u64)(a3))
			
 
				-#define efi_call_virt4(f, a1, a2, a3, a4)				\
			
 
				-	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
			
 
				-		  (u64)(a3), (u64)(a4))
			
 
				-#define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
			
 
				-	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
			
 
				-		  (u64)(a3), (u64)(a4), (u64)(a5))
			
 
				-#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
			
 
				-	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
			
 
				-		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
			
 
				+	_efi_call_virtX(0, f)
			
 
				+#define efi_call_virt1(f, a1)				\
			
 
				+	_efi_call_virtX(1, f, (u64)(a1))
			
 
				+#define efi_call_virt2(f, a1, a2)			\
			
 
				+	_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
			
 
				+#define efi_call_virt3(f, a1, a2, a3)			\
			
 
				+	_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
			
 
				+#define efi_call_virt4(f, a1, a2, a3, a4)		\
			
 
				+	_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
			
 
				+#define efi_call_virt5(f, a1, a2, a3, a4, a5)		\
			
 
				+	_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
			
 
				+#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
			
 
				+	_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
			
 
				 
			
 
				 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
			
 
				 				 u32 type, u64 attribute);
			
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 
				 
			
 
				 extern int add_efi_memmap;
			
 
				 extern unsigned long x86_efi_facility;
			
 
				+extern struct efi_scratch efi_scratch;
			
 
				 extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
			
 
				 extern int efi_memblock_x86_reserve_range(void);
			
 
				 extern void efi_call_phys_prelog(void);
			
 
				 extern void efi_call_phys_epilog(void);
			
 
				 extern void efi_unmap_memmap(void);
			
 
				 extern void efi_memory_uc(u64 addr, unsigned long size);
			
 
				+extern void __init efi_map_region(efi_memory_desc_t *md);
			
 
				+extern void efi_sync_low_kernel_mappings(void);
			
 
				+extern void efi_setup_page_tables(void);
			
 
				+extern void __init old_map_region(efi_memory_desc_t *md);
			
 
				 
			
 
				 #ifdef CONFIG_EFI
			
 
				 
			
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
 
				  */
			
 
				 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
			
 
				 extern phys_addr_t slow_virt_to_phys(void *__address);
			
 
				-
			
 
				+extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
			
 
				+				   unsigned numpages, unsigned long page_flags);
			
 
				 #endif	/* !__ASSEMBLY__ */
			
 
				 
			
 
				 #endif /* _ASM_X86_PGTABLE_DEFS_H */
			
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -30,6 +30,7 @@
 
				  */
			
 
				 struct cpa_data {
			
 
				 	unsigned long	*vaddr;
			
 
				+	pgd_t		*pgd;
			
 
				 	pgprot_t	mask_set;
			
 
				 	pgprot_t	mask_clr;
			
 
				 	int		numpages;
			
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 
				 	return prot;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Lookup the page table entry for a virtual address. Return a pointer
			
 
				- * to the entry and the level of the mapping.
			
 
				- *
			
 
				- * Note: We return pud and pmd either when the entry is marked large
			
 
				- * or when the present bit is not set. Otherwise we would return a
			
 
				- * pointer to a nonexisting mapping.
			
 
				- */
			
 
				-pte_t *lookup_address(unsigned long address, unsigned int *level)
			
 
				+static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
			
 
				+				      unsigned int *level)
			
 
				 {
			
 
				-	pgd_t *pgd = pgd_offset_k(address);
			
 
				 	pud_t *pud;
			
 
				 	pmd_t *pmd;
			
 
				 
			
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
 
				 
			
 
				 	return pte_offset_kernel(pmd, address);
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Lookup the page table entry for a virtual address. Return a pointer
			
 
				+ * to the entry and the level of the mapping.
			
 
				+ *
			
 
				+ * Note: We return pud and pmd either when the entry is marked large
			
 
				+ * or when the present bit is not set. Otherwise we would return a
			
 
				+ * pointer to a nonexisting mapping.
			
 
				+ */
			
 
				+pte_t *lookup_address(unsigned long address, unsigned int *level)
			
 
				+{
			
 
				+        return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
			
 
				+}
			
 
				 EXPORT_SYMBOL_GPL(lookup_address);
			
 
				 
			
 
				+static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
			
 
				+				  unsigned int *level)
			
 
				+{
			
 
				+        if (cpa->pgd)
			
 
				+		return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
			
 
				+					       address, level);
			
 
				+
			
 
				+        return lookup_address(address, level);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This is necessary because __pa() does not work on some
			
 
				  * kinds of memory, like vmalloc() or the alloc_remap()
			
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 
				 	 * Check for races, another CPU might have split this page
			
 
				 	 * up already:
			
 
				 	 */
			
 
				-	tmp = lookup_address(address, &level);
			
 
				+	tmp = _lookup_address_cpa(cpa, address, &level);
			
 
				 	if (tmp != kpte)
			
 
				 		goto out_unlock;
			
 
				 
			
@@ -543,7 +559,8 @@ out_unlock:
 
				 }
			
 
				 
			
 
				 static int
			
 
				-__split_large_page(pte_t *kpte, unsigned long address, struct page *base)
			
 
				+__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
			
 
				+		   struct page *base)
			
 
				 {
			
 
				 	pte_t *pbase = (pte_t *)page_address(base);
			
 
				 	unsigned long pfn, pfninc = 1;
			
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
 
				 	 * Check for races, another CPU might have split this page
			
 
				 	 * up for us already:
			
 
				 	 */
			
 
				-	tmp = lookup_address(address, &level);
			
 
				+	tmp = _lookup_address_cpa(cpa, address, &level);
			
 
				 	if (tmp != kpte) {
			
 
				 		spin_unlock(&pgd_lock);
			
 
				 		return 1;
			
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int split_large_page(pte_t *kpte, unsigned long address)
			
 
				+static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
			
 
				+			    unsigned long address)
			
 
				 {
			
 
				 	struct page *base;
			
 
				 
			
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 
				 	if (!base)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	if (__split_large_page(kpte, address, base))
			
 
				+	if (__split_large_page(cpa, kpte, address, base))
			
 
				 		__free_page(base);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static bool try_to_free_pte_page(pte_t *pte)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < PTRS_PER_PTE; i++)
			
 
				+		if (!pte_none(pte[i]))
			
 
				+			return false;
			
 
				+
			
 
				+	free_page((unsigned long)pte);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool try_to_free_pmd_page(pmd_t *pmd)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < PTRS_PER_PMD; i++)
			
 
				+		if (!pmd_none(pmd[i]))
			
 
				+			return false;
			
 
				+
			
 
				+	free_page((unsigned long)pmd);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	pte_t *pte = pte_offset_kernel(pmd, start);
			
 
				+
			
 
				+	while (start < end) {
			
 
				+		set_pte(pte, __pte(0));
			
 
				+
			
 
				+		start += PAGE_SIZE;
			
 
				+		pte++;
			
 
				+	}
			
 
				+
			
 
				+	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
			
 
				+		pmd_clear(pmd);
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
			
 
				+			      unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	if (unmap_pte_range(pmd, start, end))
			
 
				+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
			
 
				+			pud_clear(pud);
			
 
				+}
			
 
				+
			
 
				+static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	pmd_t *pmd = pmd_offset(pud, start);
			
 
				+
			
 
				+	/*
			
 
				+	 * Not on a 2MB page boundary?
			
 
				+	 */
			
 
				+	if (start & (PMD_SIZE - 1)) {
			
 
				+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
			
 
				+		unsigned long pre_end = min_t(unsigned long, end, next_page);
			
 
				+
			
 
				+		__unmap_pmd_range(pud, pmd, start, pre_end);
			
 
				+
			
 
				+		start = pre_end;
			
 
				+		pmd++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to unmap in 2M chunks.
			
 
				+	 */
			
 
				+	while (end - start >= PMD_SIZE) {
			
 
				+		if (pmd_large(*pmd))
			
 
				+			pmd_clear(pmd);
			
 
				+		else
			
 
				+			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
			
 
				+
			
 
				+		start += PMD_SIZE;
			
 
				+		pmd++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * 4K leftovers?
			
 
				+	 */
			
 
				+	if (start < end)
			
 
				+		return __unmap_pmd_range(pud, pmd, start, end);
			
 
				+
			
 
				+	/*
			
 
				+	 * Try again to free the PMD page if haven't succeeded above.
			
 
				+	 */
			
 
				+	if (!pud_none(*pud))
			
 
				+		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
			
 
				+			pud_clear(pud);
			
 
				+}
			
 
				+
			
 
				+static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	pud_t *pud = pud_offset(pgd, start);
			
 
				+
			
 
				+	/*
			
 
				+	 * Not on a GB page boundary?
			
 
				+	 */
			
 
				+	if (start & (PUD_SIZE - 1)) {
			
 
				+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
			
 
				+		unsigned long pre_end	= min_t(unsigned long, end, next_page);
			
 
				+
			
 
				+		unmap_pmd_range(pud, start, pre_end);
			
 
				+
			
 
				+		start = pre_end;
			
 
				+		pud++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to unmap in 1G chunks?
			
 
				+	 */
			
 
				+	while (end - start >= PUD_SIZE) {
			
 
				+
			
 
				+		if (pud_large(*pud))
			
 
				+			pud_clear(pud);
			
 
				+		else
			
 
				+			unmap_pmd_range(pud, start, start + PUD_SIZE);
			
 
				+
			
 
				+		start += PUD_SIZE;
			
 
				+		pud++;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * 2M leftovers?
			
 
				+	 */
			
 
				+	if (start < end)
			
 
				+		unmap_pmd_range(pud, start, end);
			
 
				+
			
 
				+	/*
			
 
				+	 * No need to try to free the PUD page because we'll free it in
			
 
				+	 * populate_pgd's error path
			
 
				+	 */
			
 
				+}
			
 
				+
			
 
				+static int alloc_pte_page(pmd_t *pmd)
			
 
				+{
			
 
				+	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
			
 
				+	if (!pte)
			
 
				+		return -1;
			
 
				+
			
 
				+	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int alloc_pmd_page(pud_t *pud)
			
 
				+{
			
 
				+	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
			
 
				+	if (!pmd)
			
 
				+		return -1;
			
 
				+
			
 
				+	set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void populate_pte(struct cpa_data *cpa,
			
 
				+			 unsigned long start, unsigned long end,
			
 
				+			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
			
 
				+{
			
 
				+	pte_t *pte;
			
 
				+
			
 
				+	pte = pte_offset_kernel(pmd, start);
			
 
				+
			
 
				+	while (num_pages-- && start < end) {
			
 
				+
			
 
				+		/* deal with the NX bit */
			
 
				+		if (!(pgprot_val(pgprot) & _PAGE_NX))
			
 
				+			cpa->pfn &= ~_PAGE_NX;
			
 
				+
			
 
				+		set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
			
 
				+
			
 
				+		start	 += PAGE_SIZE;
			
 
				+		cpa->pfn += PAGE_SIZE;
			
 
				+		pte++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int populate_pmd(struct cpa_data *cpa,
			
 
				+			unsigned long start, unsigned long end,
			
 
				+			unsigned num_pages, pud_t *pud, pgprot_t pgprot)
			
 
				+{
			
 
				+	unsigned int cur_pages = 0;
			
 
				+	pmd_t *pmd;
			
 
				+
			
 
				+	/*
			
 
				+	 * Not on a 2M boundary?
			
 
				+	 */
			
 
				+	if (start & (PMD_SIZE - 1)) {
			
 
				+		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
			
 
				+		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
			
 
				+
			
 
				+		pre_end   = min_t(unsigned long, pre_end, next_page);
			
 
				+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
			
 
				+		cur_pages = min_t(unsigned int, num_pages, cur_pages);
			
 
				+
			
 
				+		/*
			
 
				+		 * Need a PTE page?
			
 
				+		 */
			
 
				+		pmd = pmd_offset(pud, start);
			
 
				+		if (pmd_none(*pmd))
			
 
				+			if (alloc_pte_page(pmd))
			
 
				+				return -1;
			
 
				+
			
 
				+		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
			
 
				+
			
 
				+		start = pre_end;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We mapped them all?
			
 
				+	 */
			
 
				+	if (num_pages == cur_pages)
			
 
				+		return cur_pages;
			
 
				+
			
 
				+	while (end - start >= PMD_SIZE) {
			
 
				+
			
 
				+		/*
			
 
				+		 * We cannot use a 1G page so allocate a PMD page if needed.
			
 
				+		 */
			
 
				+		if (pud_none(*pud))
			
 
				+			if (alloc_pmd_page(pud))
			
 
				+				return -1;
			
 
				+
			
 
				+		pmd = pmd_offset(pud, start);
			
 
				+
			
 
				+		set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
			
 
				+
			
 
				+		start	  += PMD_SIZE;
			
 
				+		cpa->pfn  += PMD_SIZE;
			
 
				+		cur_pages += PMD_SIZE >> PAGE_SHIFT;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Map trailing 4K pages.
			
 
				+	 */
			
 
				+	if (start < end) {
			
 
				+		pmd = pmd_offset(pud, start);
			
 
				+		if (pmd_none(*pmd))
			
 
				+			if (alloc_pte_page(pmd))
			
 
				+				return -1;
			
 
				+
			
 
				+		populate_pte(cpa, start, end, num_pages - cur_pages,
			
 
				+			     pmd, pgprot);
			
 
				+	}
			
 
				+	return num_pages;
			
 
				+}
			
 
				+
			
 
				+static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
			
 
				+			pgprot_t pgprot)
			
 
				+{
			
 
				+	pud_t *pud;
			
 
				+	unsigned long end;
			
 
				+	int cur_pages = 0;
			
 
				+
			
 
				+	end = start + (cpa->numpages << PAGE_SHIFT);
			
 
				+
			
 
				+	/*
			
 
				+	 * Not on a Gb page boundary? => map everything up to it with
			
 
				+	 * smaller pages.
			
 
				+	 */
			
 
				+	if (start & (PUD_SIZE - 1)) {
			
 
				+		unsigned long pre_end;
			
 
				+		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
			
 
				+
			
 
				+		pre_end   = min_t(unsigned long, end, next_page);
			
 
				+		cur_pages = (pre_end - start) >> PAGE_SHIFT;
			
 
				+		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
			
 
				+
			
 
				+		pud = pud_offset(pgd, start);
			
 
				+
			
 
				+		/*
			
 
				+		 * Need a PMD page?
			
 
				+		 */
			
 
				+		if (pud_none(*pud))
			
 
				+			if (alloc_pmd_page(pud))
			
 
				+				return -1;
			
 
				+
			
 
				+		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
			
 
				+					 pud, pgprot);
			
 
				+		if (cur_pages < 0)
			
 
				+			return cur_pages;
			
 
				+
			
 
				+		start = pre_end;
			
 
				+	}
			
 
				+
			
 
				+	/* We mapped them all? */
			
 
				+	if (cpa->numpages == cur_pages)
			
 
				+		return cur_pages;
			
 
				+
			
 
				+	pud = pud_offset(pgd, start);
			
 
				+
			
 
				+	/*
			
 
				+	 * Map everything starting from the Gb boundary, possibly with 1G pages
			
 
				+	 */
			
 
				+	while (end - start >= PUD_SIZE) {
			
 
				+		set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
			
 
				+
			
 
				+		start	  += PUD_SIZE;
			
 
				+		cpa->pfn  += PUD_SIZE;
			
 
				+		cur_pages += PUD_SIZE >> PAGE_SHIFT;
			
 
				+		pud++;
			
 
				+	}
			
 
				+
			
 
				+	/* Map trailing leftover */
			
 
				+	if (start < end) {
			
 
				+		int tmp;
			
 
				+
			
 
				+		pud = pud_offset(pgd, start);
			
 
				+		if (pud_none(*pud))
			
 
				+			if (alloc_pmd_page(pud))
			
 
				+				return -1;
			
 
				+
			
 
				+		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
			
 
				+				   pud, pgprot);
			
 
				+		if (tmp < 0)
			
 
				+			return cur_pages;
			
 
				+
			
 
				+		cur_pages += tmp;
			
 
				+	}
			
 
				+	return cur_pages;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Restrictions for kernel page table do not necessarily apply when mapping in
			
 
				+ * an alternate PGD.
			
 
				+ */
			
 
				+static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
			
 
				+{
			
 
				+	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
			
 
				+	bool allocd_pgd = false;
			
 
				+	pgd_t *pgd_entry;
			
 
				+	pud_t *pud = NULL;	/* shut up gcc */
			
 
				+	int ret;
			
 
				+
			
 
				+	pgd_entry = cpa->pgd + pgd_index(addr);
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate a PUD page and hand it down for mapping.
			
 
				+	 */
			
 
				+	if (pgd_none(*pgd_entry)) {
			
 
				+		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
			
 
				+		if (!pud)
			
 
				+			return -1;
			
 
				+
			
 
				+		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
			
 
				+		allocd_pgd = true;
			
 
				+	}
			
 
				+
			
 
				+	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
			
 
				+	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
			
 
				+
			
 
				+	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
			
 
				+	if (ret < 0) {
			
 
				+		unmap_pud_range(pgd_entry, addr,
			
 
				+				addr + (cpa->numpages << PAGE_SHIFT));
			
 
				+
			
 
				+		if (allocd_pgd) {
			
 
				+			/*
			
 
				+			 * If I allocated this PUD page, I can just as well
			
 
				+			 * free it in this error path.
			
 
				+			 */
			
 
				+			pgd_clear(pgd_entry);
			
 
				+			free_page((unsigned long)pud);
			
 
				+		}
			
 
				+		return ret;
			
 
				+	}
			
 
				+	cpa->numpages = ret;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
			
 
				 			       int primary)
			
 
				 {
			
 
				+	if (cpa->pgd)
			
 
				+		return populate_pgd(cpa, vaddr);
			
 
				+
			
 
				 	/*
			
 
				 	 * Ignore all non primary paths.
			
 
				 	 */
			
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
 
				 	else
			
 
				 		address = *cpa->vaddr;
			
 
				 repeat:
			
 
				-	kpte = lookup_address(address, &level);
			
 
				+	kpte = _lookup_address_cpa(cpa, address, &level);
			
 
				 	if (!kpte)
			
 
				 		return __cpa_process_fault(cpa, address, primary);
			
 
				 
			
@@ -761,7 +1154,7 @@ repeat:
 
				 	/*
			
 
				 	 * We have to split the large page:
			
 
				 	 */
			
 
				-	err = split_large_page(kpte, address);
			
 
				+	err = split_large_page(cpa, kpte, address);
			
 
				 	if (!err) {
			
 
				 		/*
			
 
				 	 	 * Do a global flush tlb after splitting the large page
			
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
				 	int ret, cache, checkalias;
			
 
				 	unsigned long baddr = 0;
			
 
				 
			
 
				+	memset(&cpa, 0, sizeof(cpa));
			
 
				+
			
 
				 	/*
			
 
				 	 * Check, if we are requested to change a not supported
			
 
				 	 * feature:
			
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
 
				 {
			
 
				 	unsigned long tempaddr = (unsigned long) page_address(page);
			
 
				 	struct cpa_data cpa = { .vaddr = &tempaddr,
			
 
				+				.pgd = NULL,
			
 
				 				.numpages = numpages,
			
 
				 				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
			
 
				 				.mask_clr = __pgprot(0),
			
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
 
				 {
			
 
				 	unsigned long tempaddr = (unsigned long) page_address(page);
			
 
				 	struct cpa_data cpa = { .vaddr = &tempaddr,
			
 
				+				.pgd = NULL,
			
 
				 				.numpages = numpages,
			
 
				 				.mask_set = __pgprot(0),
			
 
				 				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
			
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)
 
				 
			
 
				 #endif /* CONFIG_DEBUG_PAGEALLOC */
			
 
				 
			
 
				+int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
			
 
				+			    unsigned numpages, unsigned long page_flags)
			
 
				+{
			
 
				+	int retval = -EINVAL;
			
 
				+
			
 
				+	struct cpa_data cpa = {
			
 
				+		.vaddr = &address,
			
 
				+		.pfn = pfn,
			
 
				+		.pgd = pgd,
			
 
				+		.numpages = numpages,
			
 
				+		.mask_set = __pgprot(0),
			
 
				+		.mask_clr = __pgprot(0),
			
 
				+		.flags = 0,
			
 
				+	};
			
 
				+
			
 
				+	if (!(__supported_pte_mask & _PAGE_NX))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (!(page_flags & _PAGE_NX))
			
 
				+		cpa.mask_clr = __pgprot(_PAGE_NX);
			
 
				+
			
 
				+	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
			
 
				+
			
 
				+	retval = __change_page_attr_set_clr(&cpa, 0);
			
 
				+	__flush_tlb_all();
			
 
				+
			
 
				+out:
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * The testcases use internal knowledge of the implementation that shouldn't
			
 
				  * be exposed to the rest of the kernel. Include these directly here.
			
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -12,6 +12,8 @@
 
				  *	Bibo Mao <bibo.mao@intel.com>
			
 
				  *	Chandramouli Narayanan <mouli@linux.intel.com>
			
 
				  *	Huang Ying <ying.huang@intel.com>
			
 
				+ * Copyright (C) 2013 SuSE Labs
			
 
				+ *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
			
 
				  *
			
 
				  * Copied from efi_32.c to eliminate the duplicated code between EFI
			
 
				  * 32/64 support code. --ying 2007-10-26
			
@@ -51,7 +53,7 @@
 
				 #include <asm/x86_init.h>
			
 
				 #include <asm/rtc.h>
			
 
				 
			
 
				-#define EFI_DEBUG	1
			
 
				+#define EFI_DEBUG
			
 
				 
			
 
				 #define EFI_MIN_RESERVE 5120
			
 
				 
			
@@ -398,9 +400,9 @@ int __init efi_memblock_x86_reserve_range(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#if EFI_DEBUG
			
 
				 static void __init print_efi_memmap(void)
			
 
				 {
			
 
				+#ifdef EFI_DEBUG
			
 
				 	efi_memory_desc_t *md;
			
 
				 	void *p;
			
 
				 	int i;
			
@@ -415,8 +417,8 @@ static void __init print_efi_memmap(void)
 
				 			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
			
 
				 			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
			
 
				 	}
			
 
				-}
			
 
				 #endif  /*  EFI_DEBUG  */
			
 
				+}
			
 
				 
			
 
				 void __init efi_reserve_boot_services(void)
			
 
				 {
			
@@ -696,10 +698,7 @@ void __init efi_init(void)
 
				 		x86_platform.set_wallclock = efi_set_rtc_mmss;
			
 
				 	}
			
 
				 #endif
			
 
				-
			
 
				-#if EFI_DEBUG
			
 
				 	print_efi_memmap();
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 void __init efi_late_init(void)
			
@@ -748,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
 
				 	set_memory_uc(addr, npages);
			
 
				 }
			
 
				 
			
 
				+void __init old_map_region(efi_memory_desc_t *md)
			
 
				+{
			
 
				+	u64 start_pfn, end_pfn, end;
			
 
				+	unsigned long size;
			
 
				+	void *va;
			
 
				+
			
 
				+	start_pfn = PFN_DOWN(md->phys_addr);
			
 
				+	size	  = md->num_pages << PAGE_SHIFT;
			
 
				+	end	  = md->phys_addr + size;
			
 
				+	end_pfn   = PFN_UP(end);
			
 
				+
			
 
				+	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
			
 
				+		va = __va(md->phys_addr);
			
 
				+
			
 
				+		if (!(md->attribute & EFI_MEMORY_WB))
			
 
				+			efi_memory_uc((u64)(unsigned long)va, size);
			
 
				+	} else
			
 
				+		va = efi_ioremap(md->phys_addr, size,
			
 
				+				 md->type, md->attribute);
			
 
				+
			
 
				+	md->virt_addr = (u64) (unsigned long) va;
			
 
				+	if (!va)
			
 
				+		pr_err("ioremap of 0x%llX failed!\n",
			
 
				+		       (unsigned long long)md->phys_addr);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This function will switch the EFI runtime services to virtual mode.
			
 
				- * Essentially, look through the EFI memmap and map every region that
			
 
				- * has the runtime attribute bit set in its memory descriptor and update
			
 
				- * that memory descriptor with the virtual address obtained from ioremap().
			
 
				- * This enables the runtime services to be called without having to
			
 
				+ * Essentially, we look through the EFI memmap and map every region that
			
 
				+ * has the runtime attribute bit set in its memory descriptor into the
			
 
				+ * ->trampoline_pgd page table using a top-down VA allocation scheme.
			
 
				+ *
			
 
				+ * The old method which used to update that memory descriptor with the
			
 
				+ * virtual address obtained from ioremap() is still supported when the
			
 
				+ * kernel is booted with efi=old_map on its command line. Same old
			
 
				+ * method enabled the runtime services to be called without having to
			
 
				  * thunk back into physical mode for every invocation.
			
 
				+ *
			
 
				+ * The new method does a pagetable switch in a preemption-safe manner
			
 
				+ * so that we're in a different address space when calling a runtime
			
 
				+ * function. For function arguments passing we do copy the PGDs of the
			
 
				+ * kernel page table into ->trampoline_pgd prior to each call.
			
 
				  */
			
 
				 void __init efi_enter_virtual_mode(void)
			
 
				 {
			
 
				 	efi_memory_desc_t *md, *prev_md = NULL;
			
 
				-	efi_status_t status;
			
 
				+	void *p, *new_memmap = NULL;
			
 
				 	unsigned long size;
			
 
				-	u64 end, systab, start_pfn, end_pfn;
			
 
				-	void *p, *va, *new_memmap = NULL;
			
 
				+	efi_status_t status;
			
 
				+	u64 end, systab;
			
 
				 	int count = 0;
			
 
				 
			
 
				 	efi.systab = NULL;
			
@@ -771,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
 
				 	 * We don't do virtual mode, since we don't do runtime services, on
			
 
				 	 * non-native EFI
			
 
				 	 */
			
 
				-
			
 
				 	if (!efi_is_native()) {
			
 
				 		efi_unmap_memmap();
			
 
				 		return;
			
@@ -802,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
 
				 			continue;
			
 
				 		}
			
 
				 		prev_md = md;
			
 
				+
			
 
				 	}
			
 
				 
			
 
				 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
			
@@ -814,36 +848,24 @@ void __init efi_enter_virtual_mode(void)
 
				 				continue;
			
 
				 		}
			
 
				 
			
 
				+		efi_map_region(md);
			
 
				+
			
 
				 		size = md->num_pages << EFI_PAGE_SHIFT;
			
 
				 		end = md->phys_addr + size;
			
 
				 
			
 
				-		start_pfn = PFN_DOWN(md->phys_addr);
			
 
				-		end_pfn = PFN_UP(end);
			
 
				-		if (pfn_range_is_mapped(start_pfn, end_pfn)) {
			
 
				-			va = __va(md->phys_addr);
			
 
				-
			
 
				-			if (!(md->attribute & EFI_MEMORY_WB))
			
 
				-				efi_memory_uc((u64)(unsigned long)va, size);
			
 
				-		} else
			
 
				-			va = efi_ioremap(md->phys_addr, size,
			
 
				-					 md->type, md->attribute);
			
 
				-
			
 
				-		md->virt_addr = (u64) (unsigned long) va;
			
 
				-
			
 
				-		if (!va) {
			
 
				-			pr_err("ioremap of 0x%llX failed!\n",
			
 
				-			       (unsigned long long)md->phys_addr);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				 		systab = (u64) (unsigned long) efi_phys.systab;
			
 
				 		if (md->phys_addr <= systab && systab < end) {
			
 
				 			systab += md->virt_addr - md->phys_addr;
			
 
				+
			
 
				 			efi.systab = (efi_system_table_t *) (unsigned long) systab;
			
 
				 		}
			
 
				+
			
 
				 		new_memmap = krealloc(new_memmap,
			
 
				 				      (count + 1) * memmap.desc_size,
			
 
				 				      GFP_KERNEL);
			
 
				+		if (!new_memmap)
			
 
				+			goto err_out;
			
 
				+
			
 
				 		memcpy(new_memmap + (count * memmap.desc_size), md,
			
 
				 		       memmap.desc_size);
			
 
				 		count++;
			
@@ -851,6 +873,9 @@ void __init efi_enter_virtual_mode(void)
 
				 
			
 
				 	BUG_ON(!efi.systab);
			
 
				 
			
 
				+	efi_setup_page_tables();
			
 
				+	efi_sync_low_kernel_mappings();
			
 
				+
			
 
				 	status = phys_efi_set_virtual_address_map(
			
 
				 		memmap.desc_size * count,
			
 
				 		memmap.desc_size,
			
@@ -883,7 +908,8 @@ void __init efi_enter_virtual_mode(void)
 
				 	efi.query_variable_info = virt_efi_query_variable_info;
			
 
				 	efi.update_capsule = virt_efi_update_capsule;
			
 
				 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
			
 
				-	if (__supported_pte_mask & _PAGE_NX)
			
 
				+
			
 
				+	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
			
 
				 		runtime_code_page_mkexec();
			
 
				 
			
 
				 	kfree(new_memmap);
			
@@ -894,6 +920,11 @@ void __init efi_enter_virtual_mode(void)
 
				 			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
			
 
				 			 EFI_VARIABLE_RUNTIME_ACCESS,
			
 
				 			 0, NULL);
			
 
				+
			
 
				+	return;
			
 
				+
			
 
				+ err_out:
			
 
				+	pr_err("Error reallocating memory, EFI runtime non-functional!\n");
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1013,3 +1044,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
 
				 	return EFI_SUCCESS;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(efi_query_variable_store);
			
 
				+
			
 
				+static int __init parse_efi_cmdline(char *str)
			
 
				+{
			
 
				+	if (*str == '=')
			
 
				+		str++;
			
 
				+
			
 
				+	if (!strncmp(str, "old_map", 7))
			
 
				+		set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("efi", parse_efi_cmdline);
			
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -37,9 +37,16 @@
 
				  * claim EFI runtime service handler exclusively and to duplicate a memory in
			
 
				  * low memory space say 0 - 3G.
			
 
				  */
			
 
				-
			
 
				 static unsigned long efi_rt_eflags;
			
 
				 
			
 
				+void efi_sync_low_kernel_mappings(void) {}
			
 
				+void efi_setup_page_tables(void) {}
			
 
				+
			
 
				+void __init efi_map_region(efi_memory_desc_t *md)
			
 
				+{
			
 
				+	old_map_region(md);
			
 
				+}
			
 
				+
			
 
				 void efi_call_phys_prelog(void)
			
 
				 {
			
 
				 	struct desc_ptr gdt_descr;
			
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,10 +38,28 @@
 
				 #include <asm/efi.h>
			
 
				 #include <asm/cacheflush.h>
			
 
				 #include <asm/fixmap.h>
			
 
				+#include <asm/realmode.h>
			
 
				 
			
 
				 static pgd_t *save_pgd __initdata;
			
 
				 static unsigned long efi_flags __initdata;
			
 
				 
			
 
				+/*
			
 
				+ * We allocate runtime services regions bottom-up, starting from -4G, i.e.
			
 
				+ * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
			
 
				+ */
			
 
				+static u64 efi_va	= -4 * (1UL << 30);
			
 
				+#define EFI_VA_END	(-68 * (1UL << 30))
			
 
				+
			
 
				+/*
			
 
				+ * Scratch space used for switching the pagetable in the EFI stub
			
 
				+ */
			
 
				+struct efi_scratch {
			
 
				+	u64 r15;
			
 
				+	u64 prev_cr3;
			
 
				+	pgd_t *efi_pgt;
			
 
				+	bool use_pgd;
			
 
				+};
			
 
				+
			
 
				 static void __init early_code_mapping_set_exec(int executable)
			
 
				 {
			
 
				 	efi_memory_desc_t *md;
			
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void)
 
				 	int pgd;
			
 
				 	int n_pgds;
			
 
				 
			
 
				+	if (!efi_enabled(EFI_OLD_MEMMAP))
			
 
				+		return;
			
 
				+
			
 
				 	early_code_mapping_set_exec(1);
			
 
				 	local_irq_save(efi_flags);
			
 
				 
			
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void)
 
				 	 */
			
 
				 	int pgd;
			
 
				 	int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
			
 
				+
			
 
				+	if (!efi_enabled(EFI_OLD_MEMMAP))
			
 
				+		return;
			
 
				+
			
 
				 	for (pgd = 0; pgd < n_pgds; pgd++)
			
 
				 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
			
 
				 	kfree(save_pgd);
			
@@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void)
 
				 	early_code_mapping_set_exec(0);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Add low kernel mappings for passing arguments to EFI functions.
			
 
				+ */
			
 
				+void efi_sync_low_kernel_mappings(void)
			
 
				+{
			
 
				+	unsigned num_pgds;
			
 
				+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
			
 
				+
			
 
				+	if (efi_enabled(EFI_OLD_MEMMAP))
			
 
				+		return;
			
 
				+
			
 
				+	num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
			
 
				+
			
 
				+	memcpy(pgd + pgd_index(PAGE_OFFSET),
			
 
				+		init_mm.pgd + pgd_index(PAGE_OFFSET),
			
 
				+		sizeof(pgd_t) * num_pgds);
			
 
				+}
			
 
				+
			
 
				+void efi_setup_page_tables(void)
			
 
				+{
			
 
				+	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
			
 
				+
			
 
				+	if (!efi_enabled(EFI_OLD_MEMMAP))
			
 
				+		efi_scratch.use_pgd = true;
			
 
				+}
			
 
				+
			
 
				+static void __init __map_region(efi_memory_desc_t *md, u64 va)
			
 
				+{
			
 
				+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
			
 
				+	unsigned long pf = 0, size;
			
 
				+	u64 end;
			
 
				+
			
 
				+	if (!(md->attribute & EFI_MEMORY_WB))
			
 
				+		pf |= _PAGE_PCD;
			
 
				+
			
 
				+	size = md->num_pages << PAGE_SHIFT;
			
 
				+	end  = va + size;
			
 
				+
			
 
				+	if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
			
 
				+		pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
			
 
				+			   md->phys_addr, va);
			
 
				+}
			
 
				+
			
 
				+void __init efi_map_region(efi_memory_desc_t *md)
			
 
				+{
			
 
				+	unsigned long size = md->num_pages << PAGE_SHIFT;
			
 
				+	u64 pa = md->phys_addr;
			
 
				+
			
 
				+	if (efi_enabled(EFI_OLD_MEMMAP))
			
 
				+		return old_map_region(md);
			
 
				+
			
 
				+	/*
			
 
				+	 * Make sure the 1:1 mappings are present as a catch-all for b0rked
			
 
				+	 * firmware which doesn't update all internal pointers after switching
			
 
				+	 * to virtual mode and would otherwise crap on us.
			
 
				+	 */
			
 
				+	__map_region(md, md->phys_addr);
			
 
				+
			
 
				+	efi_va -= size;
			
 
				+
			
 
				+	/* Is PA 2M-aligned? */
			
 
				+	if (!(pa & (PMD_SIZE - 1))) {
			
 
				+		efi_va &= PMD_MASK;
			
 
				+	} else {
			
 
				+		u64 pa_offset = pa & (PMD_SIZE - 1);
			
 
				+		u64 prev_va = efi_va;
			
 
				+
			
 
				+		/* get us the same offset within this 2M page */
			
 
				+		efi_va = (efi_va & PMD_MASK) + pa_offset;
			
 
				+
			
 
				+		if (efi_va > prev_va)
			
 
				+			efi_va -= PMD_SIZE;
			
 
				+	}
			
 
				+
			
 
				+	if (efi_va < EFI_VA_END) {
			
 
				+		pr_warn(FW_WARN "VA address range overflow!\n");
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* Do the VA map */
			
 
				+	__map_region(md, efi_va);
			
 
				+	md->virt_addr = efi_va;
			
 
				+}
			
 
				+
			
 
				 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
			
 
				 				 u32 type, u64 attribute)
			
 
				 {
			
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -34,10 +34,47 @@
 
				 	mov %rsi, %cr0;			\
			
 
				 	mov (%rsp), %rsp
			
 
				 
			
 
				+	/* stolen from gcc */
			
 
				+	.macro FLUSH_TLB_ALL
			
 
				+	movq %r15, efi_scratch(%rip)
			
 
				+	movq %r14, efi_scratch+8(%rip)
			
 
				+	movq %cr4, %r15
			
 
				+	movq %r15, %r14
			
 
				+	andb $0x7f, %r14b
			
 
				+	movq %r14, %cr4
			
 
				+	movq %r15, %cr4
			
 
				+	movq efi_scratch+8(%rip), %r14
			
 
				+	movq efi_scratch(%rip), %r15
			
 
				+	.endm
			
 
				+
			
 
				+	.macro SWITCH_PGT
			
 
				+	cmpb $0, efi_scratch+24(%rip)
			
 
				+	je 1f
			
 
				+	movq %r15, efi_scratch(%rip)		# r15
			
 
				+	# save previous CR3
			
 
				+	movq %cr3, %r15
			
 
				+	movq %r15, efi_scratch+8(%rip)		# prev_cr3
			
 
				+	movq efi_scratch+16(%rip), %r15		# EFI pgt
			
 
				+	movq %r15, %cr3
			
 
				+	1:
			
 
				+	.endm
			
 
				+
			
 
				+	.macro RESTORE_PGT
			
 
				+	cmpb $0, efi_scratch+24(%rip)
			
 
				+	je 2f
			
 
				+	movq efi_scratch+8(%rip), %r15
			
 
				+	movq %r15, %cr3
			
 
				+	movq efi_scratch(%rip), %r15
			
 
				+	FLUSH_TLB_ALL
			
 
				+	2:
			
 
				+	.endm
			
 
				+
			
 
				 ENTRY(efi_call0)
			
 
				 	SAVE_XMM
			
 
				 	subq $32, %rsp
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $32, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -47,7 +84,9 @@ ENTRY(efi_call1)
 
				 	SAVE_XMM
			
 
				 	subq $32, %rsp
			
 
				 	mov  %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $32, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -57,7 +96,9 @@ ENTRY(efi_call2)
 
				 	SAVE_XMM
			
 
				 	subq $32, %rsp
			
 
				 	mov  %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $32, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -68,7 +109,9 @@ ENTRY(efi_call3)
 
				 	subq $32, %rsp
			
 
				 	mov  %rcx, %r8
			
 
				 	mov  %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $32, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -80,7 +123,9 @@ ENTRY(efi_call4)
 
				 	mov %r8, %r9
			
 
				 	mov %rcx, %r8
			
 
				 	mov %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $32, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -93,7 +138,9 @@ ENTRY(efi_call5)
 
				 	mov %r8, %r9
			
 
				 	mov %rcx, %r8
			
 
				 	mov %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $48, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
@@ -109,8 +156,15 @@ ENTRY(efi_call6)
 
				 	mov %r8, %r9
			
 
				 	mov %rcx, %r8
			
 
				 	mov %rsi, %rcx
			
 
				+	SWITCH_PGT
			
 
				 	call *%rdi
			
 
				+	RESTORE_PGT
			
 
				 	addq $48, %rsp
			
 
				 	RESTORE_XMM
			
 
				 	ret
			
 
				 ENDPROC(efi_call6)
			
 
				+
			
 
				+	.data
			
 
				+ENTRY(efi_scratch)
			
 
				+	.fill 3,8,0
			
 
				+	.byte 0
			
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *);
 
				 #define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
			
 
				 #define EFI_MEMMAP		4	/* Can we use EFI memory map? */
			
 
				 #define EFI_64BIT		5	/* Is the firmware 64-bit? */
			
 
				+#define EFI_ARCH_1		6	/* First arch-specific bit */
			
 
				 
			
 
				 #ifdef CONFIG_EFI
			
 
				 # ifdef CONFIG_X86