Browse Source

Merge branch 'for-next/52-bit-pa' into for-next/core

* for-next/52-bit-pa:
  arm64: enable 52-bit physical address support
  arm64: allow ID map to be extended to 52 bits
  arm64: handle 52-bit physical addresses in page table entries
  arm64: don't open code page table entry creation
  arm64: head.S: handle 52-bit PAs in PTEs in early page table setup
  arm64: handle 52-bit addresses in TTBR
  arm64: limit PA size to supported range
  arm64: add kconfig symbol to configure physical address size
Catalin Marinas 7 năm trước cách đây
mục cha
commit
1f911c3a11

+ 7 - 0
arch/arm/include/asm/kvm_mmu.h

@@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void)
 	return false;
 }
 
+static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
+{
+	return PTRS_PER_PGD;
+}
+
 static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 				       pgd_t *hyp_pgd,
 				       pgd_t *merged_hyp_pgd,
@@ -221,6 +226,8 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+#define kvm_phys_to_vttbr(addr)		(addr)
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */

+ 29 - 0
arch/arm64/Kconfig

@@ -639,6 +639,35 @@ config ARM64_VA_BITS
 	default 47 if ARM64_VA_BITS_47
 	default 48 if ARM64_VA_BITS_48
 
+choice
+	prompt "Physical address space size"
+	default ARM64_PA_BITS_48
+	help
+	  Choose the maximum physical address range that the kernel will
+	  support.
+
+config ARM64_PA_BITS_48
+	bool "48-bit"
+
+config ARM64_PA_BITS_52
+	bool "52-bit (ARMv8.2)"
+	depends on ARM64_64K_PAGES
+	depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
+	help
+	  Enable support for a 52-bit physical address space, introduced as
+	  part of the ARMv8.2-LPA extension.
+
+	  With this enabled, the kernel will also continue to work on CPUs that
+	  do not support ARMv8.2-LPA, but with some added memory overhead (and
+	  minor performance overhead).
+
+endchoice
+
+config ARM64_PA_BITS
+	int
+	default 48 if ARM64_PA_BITS_48
+	default 52 if ARM64_PA_BITS_52
+
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
        help

+ 34 - 2
arch/arm64/include/asm/assembler.h

@@ -343,10 +343,26 @@ alternative_endif
  * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
  */
 	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
 	ldr_l	\tmpreg, idmap_t0sz
 	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
+	.endm
+
+/*
+ * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
+ * ID_AA64MMFR0_EL1.PARange value
+ *
+ *	tcr:		register with the TCR_ELx value to be updated
+ *	pos:		PARange bitfield position
+ *	tmp{0,1}:	temporary registers
+ */
+	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
+	mrs	\tmp0, ID_AA64MMFR0_EL1
+	// Narrow PARange to fit the PS field in TCR_ELx
+	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
+	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
+	cmp	\tmp0, \tmp1
+	csel	\tmp0, \tmp1, \tmp0, hi
+	bfi	\tcr, \tmp0, \pos, #3
 	.endm
 
 /*
@@ -489,4 +505,20 @@ alternative_else_nop_endif
 #endif
 	.endm
 
+/*
+ * Arrange a physical address in a TTBR register, taking care of 52-bit
+ * addresses.
+ *
+ * 	phys:	physical address, preserved
+ * 	ttbr:	returns the TTBR value
+ */
+	.macro	phys_to_ttbr, phys, ttbr
+#ifdef CONFIG_ARM64_PA_BITS_52
+	orr	\ttbr, \phys, \phys, lsr #46
+	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52
+#else
+	mov	\ttbr, \phys
+#endif
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */

+ 18 - 3
arch/arm64/include/asm/kvm_mmu.h

@@ -273,15 +273,26 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
 static inline bool __kvm_cpu_uses_extended_idmap(void)
 {
-	return __cpu_uses_extended_idmap();
+	return __cpu_uses_extended_idmap_level();
 }
 
+static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
+{
+	return idmap_ptrs_per_pgd;
+}
+
+/*
+ * Can't use pgd_populate here, because the extended idmap adds an extra level
+ * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended
+ * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4.
+ */
 static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 				       pgd_t *hyp_pgd,
 				       pgd_t *merged_hyp_pgd,
 				       unsigned long hyp_idmap_start)
 {
 	int idmap_idx;
+	u64 pgd_addr;
 
 	/*
 	 * Use the first entry to access the HYP mappings. It is
@@ -289,7 +300,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 	 * extended idmap.
 	 */
 	VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
-	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+	pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd));
+	merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);
 
 	/*
 	 * Create another extended level entry that points to the boot HYP map,
@@ -299,7 +311,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 	 */
 	idmap_idx = hyp_idmap_start >> VA_BITS;
 	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
-	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+	pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd));
+	merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);
 }
 
 static inline unsigned int kvm_get_vmid_bits(void)
@@ -309,5 +322,7 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+#define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */

+ 11 - 1
arch/arm64/include/asm/mmu_context.h

@@ -49,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbr = __pa_symbol(empty_zero_page);
+	unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));
 
 	write_sysreg(ttbr, ttbr0_el1);
 	isb();
@@ -68,6 +68,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
  * physical memory, in which case it will be smaller.
  */
 extern u64 idmap_t0sz;
+extern u64 idmap_ptrs_per_pgd;
 
 static inline bool __cpu_uses_extended_idmap(void)
 {
@@ -75,6 +76,15 @@ static inline bool __cpu_uses_extended_idmap(void)
 		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
+/*
+ * True if the extended ID map requires an extra level of translation table
+ * to be configured.
+ */
+static inline bool __cpu_uses_extended_idmap_level(void)
+{
+	return ARM64_HW_PGTABLE_LEVELS((64 - idmap_t0sz)) > CONFIG_PGTABLE_LEVELS;
+}
+
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */

+ 3 - 3
arch/arm64/include/asm/pgalloc.h

@@ -44,7 +44,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
 {
-	set_pud(pud, __pud(pmd | prot));
+	set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -73,7 +73,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
 {
-	set_pgd(pgdp, __pgd(pud | prot));
+	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
 }
 
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
@@ -129,7 +129,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
 				  pmdval_t prot)
 {
-	set_pmd(pmdp, __pmd(pte | prot));
+	set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
 }
 
 /*

+ 24 - 1
arch/arm64/include/asm/pgtable-hwdef.h

@@ -16,6 +16,8 @@
 #ifndef __ASM_PGTABLE_HWDEF_H
 #define __ASM_PGTABLE_HWDEF_H
 
+#include <asm/memory.h>
+
 /*
  * Number of page-table levels required to address 'va_bits' wide
  * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
@@ -166,6 +168,14 @@
 #define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */
 #define PTE_HYP_XN		(_AT(pteval_t, 1) << 54)	/* HYP XN */
 
+#define PTE_ADDR_LOW		(((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define PTE_ADDR_HIGH		(_AT(pteval_t, 0xf) << 12)
+#define PTE_ADDR_MASK		(PTE_ADDR_LOW | PTE_ADDR_HIGH)
+#else
+#define PTE_ADDR_MASK		PTE_ADDR_LOW
+#endif
+
 /*
  * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
  */
@@ -196,7 +206,7 @@
 /*
  * Highest possible physical address supported.
  */
-#define PHYS_MASK_SHIFT		(48)
+#define PHYS_MASK_SHIFT		(CONFIG_ARM64_PA_BITS)
 #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
 
 /*
@@ -272,10 +282,23 @@
 #define TCR_TG1_4K		(UL(2) << TCR_TG1_SHIFT)
 #define TCR_TG1_64K		(UL(3) << TCR_TG1_SHIFT)
 
+#define TCR_IPS_SHIFT		32
+#define TCR_IPS_MASK		(UL(7) << TCR_IPS_SHIFT)
 #define TCR_A1			(UL(1) << 22)
 #define TCR_ASID16		(UL(1) << 36)
 #define TCR_TBI0		(UL(1) << 37)
 #define TCR_HA			(UL(1) << 39)
 #define TCR_HD			(UL(1) << 40)
 
+/*
+ * TTBR.
+ */
+#ifdef CONFIG_ARM64_PA_BITS_52
+/*
+ * This should be GENMASK_ULL(47, 2).
+ * TTBR_ELx[1] is RES0 in this configuration.
+ */
+#define TTBR_BADDR_MASK_52	(((UL(1) << 46) - 1) << 2)
+#endif
+
 #endif

+ 44 - 11
arch/arm64/include/asm/pgtable.h

@@ -57,9 +57,22 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
 
-#define pte_pfn(pte)		((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
+/*
+ * Macros to convert between a physical address and its placement in a
+ * page table entry, taking care of 52-bit addresses.
+ */
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define __pte_to_phys(pte)	\
+	((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
+#define __phys_to_pte_val(phys)	(((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
+#else
+#define __pte_to_phys(pte)	(pte_val(pte) & PTE_ADDR_MASK)
+#define __phys_to_pte_val(phys)	(phys)
+#endif
 
-#define pfn_pte(pfn,prot)	(__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pte_pfn(pte)		(__pte_to_phys(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn,prot)	\
+	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
@@ -284,6 +297,11 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
 
 #define __HAVE_ARCH_PTE_SPECIAL
 
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+	return __pte(pgd_val(pgd));
+}
+
 static inline pte_t pud_pte(pud_t pud)
 {
 	return __pte(pud_val(pud));
@@ -349,15 +367,24 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
 
-#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
-#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define __pmd_to_phys(pmd)	__pte_to_phys(pmd_pte(pmd))
+#define __phys_to_pmd_val(phys)	__phys_to_pte_val(phys)
+#define pmd_pfn(pmd)		((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
 
 #define pud_write(pud)		pte_write(pud_pte(pud))
-#define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+
+#define __pud_to_phys(pud)	__pte_to_phys(pud_pte(pud))
+#define __phys_to_pud_val(phys)	__phys_to_pte_val(phys)
+#define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
+#define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
 
+#define __pgd_to_phys(pgd)	__pte_to_phys(pgd_pte(pgd))
+#define __phys_to_pgd_val(phys)	__phys_to_pte_val(phys)
+
 #define __pgprot_modify(prot,mask,bits) \
 	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
 
@@ -408,7 +435,7 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 {
-	return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
+	return __pmd_to_phys(pmd);
 }
 
 /* Find an entry in the third-level page table. */
@@ -426,7 +453,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 #define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
 #define pte_clear_fixmap()		clear_fixmap(FIX_PTE)
 
-#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
 
 /* use ONLY for statically allocated translation tables */
 #define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@ -459,7 +486,7 @@ static inline void pud_clear(pud_t *pudp)
 
 static inline phys_addr_t pud_page_paddr(pud_t pud)
 {
-	return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
+	return __pud_to_phys(pud);
 }
 
 /* Find an entry in the second-level page table. */
@@ -472,7 +499,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
 #define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
 
-#define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+#define pud_page(pud)		pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
 
 /* use ONLY for statically allocated translation tables */
 #define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -511,7 +538,7 @@ static inline void pgd_clear(pgd_t *pgdp)
 
 static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 {
-	return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
+	return __pgd_to_phys(pgd);
 }
 
 /* Find an entry in the frst-level page table. */
@@ -524,7 +551,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))
 #define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
 
-#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
 
 /* use ONLY for statically allocated translation tables */
 #define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
@@ -734,6 +761,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define kc_vaddr_to_offset(v)	((v) & ~VA_START)
 #define kc_offset_to_vaddr(o)	((o) | VA_START)
 
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define phys_to_ttbr(addr)	(((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
+#else
+#define phys_to_ttbr(addr)	(addr)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */

+ 1 - 1
arch/arm64/include/asm/sparsemem.h

@@ -17,7 +17,7 @@
 #define __ASM_SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS	48
+#define MAX_PHYSMEM_BITS	CONFIG_ARM64_PA_BITS
 #define SECTION_SIZE_BITS	30
 #endif
 

+ 8 - 0
arch/arm64/include/asm/sysreg.h

@@ -471,6 +471,14 @@
 #define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
 #define ID_AA64MMFR0_TGRAN16_NI		0x0
 #define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_AA64MMFR0_PARANGE_48		0x5
+#define ID_AA64MMFR0_PARANGE_52		0x6
+
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_52
+#else
+#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_48
+#endif
 
 /* id_aa64mmfr1 */
 #define ID_AA64MMFR1_PAN_SHIFT		20

+ 80 - 42
arch/arm64/kernel/head.S

@@ -147,6 +147,26 @@ preserve_boot_args:
 	b	__inval_dcache_area		// tail call
 ENDPROC(preserve_boot_args)
 
+/*
+ * Macro to arrange a physical address in a page table entry, taking care of
+ * 52-bit addresses.
+ *
+ * Preserves:	phys
+ * Returns:	pte
+ */
+	.macro	phys_to_pte, phys, pte
+#ifdef CONFIG_ARM64_PA_BITS_52
+	/*
+	 * We assume \phys is 64K aligned and this is guaranteed by only
+	 * supporting this configuration with 64K pages.
+	 */
+	orr	\pte, \phys, \phys, lsr #36
+	and	\pte, \pte, #PTE_ADDR_MASK
+#else
+	mov	\pte, \phys
+#endif
+	.endm
+
 /*
  * Macro to create a table entry to the next page.
  *
@@ -156,14 +176,16 @@ ENDPROC(preserve_boot_args)
  *	ptrs:	#imm pointers per table page
  *
  * Preserves:	virt
- * Corrupts:	tmp1, tmp2
+ * Corrupts:	ptrs, tmp1, tmp2
  * Returns:	tbl -> next level table page address
  */
 	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
-	lsr	\tmp1, \virt, #\shift
-	and	\tmp1, \tmp1, #\ptrs - 1	// table index
-	add	\tmp2, \tbl, #PAGE_SIZE
+	add	\tmp1, \tbl, #PAGE_SIZE
+	phys_to_pte \tmp1, \tmp2
 	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
+	lsr	\tmp1, \virt, #\shift
+	sub	\ptrs, \ptrs, #1
+	and	\tmp1, \tmp1, \ptrs		// table index
 	str	\tmp2, [\tbl, \tmp1, lsl #3]
 	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
 	.endm
@@ -173,15 +195,17 @@ ENDPROC(preserve_boot_args)
  * block entry in the next level (tbl) for the given virtual address.
  *
  * Preserves:	tbl, next, virt
- * Corrupts:	tmp1, tmp2
+ * Corrupts:	ptrs_per_pgd, tmp1, tmp2
  */
-	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
-	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
+	.macro	create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2
+	create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
 #if SWAPPER_PGTABLE_LEVELS > 3
-	create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
+	mov	\ptrs_per_pgd, PTRS_PER_PUD
+	create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
 #endif
 #if SWAPPER_PGTABLE_LEVELS > 2
-	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+	mov	\ptrs_per_pgd, PTRS_PER_PTE
+	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
 #endif
 	.endm
 
@@ -190,16 +214,17 @@ ENDPROC(preserve_boot_args)
  * virtual range (inclusive).
  *
  * Preserves:	tbl, flags
- * Corrupts:	phys, start, end, pstate
+ * Corrupts:	phys, start, end, tmp, pstate
  */
-	.macro	create_block_map, tbl, flags, phys, start, end
-	lsr	\phys, \phys, #SWAPPER_BLOCK_SHIFT
+	.macro	create_block_map, tbl, flags, phys, start, end, tmp
 	lsr	\start, \start, #SWAPPER_BLOCK_SHIFT
 	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	orr	\phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT	// table entry
+	bic	\phys, \phys, #SWAPPER_BLOCK_SIZE - 1
 	lsr	\end, \end, #SWAPPER_BLOCK_SHIFT
 	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
-9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
+9999:	phys_to_pte \phys, \tmp
+	orr	\tmp, \tmp, \flags			// table entry
+	str	\tmp, [\tbl, \start, lsl #3]		// store the entry
 	add	\start, \start, #1			// next entry
 	add	\phys, \phys, #SWAPPER_BLOCK_SIZE		// next block
 	cmp	\start, \end
@@ -244,26 +269,13 @@ __create_page_tables:
 	adrp	x0, idmap_pg_dir
 	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
 
-#ifndef CONFIG_ARM64_VA_BITS_48
-#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
-
-	/*
-	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
-	 * created that covers system RAM if that is located sufficiently high
-	 * in the physical address space. So for the ID map, use an extended
-	 * virtual range in that case, by configuring an additional translation
-	 * level.
-	 * First, we have to verify our assumption that the current value of
-	 * VA_BITS was chosen such that all translation levels are fully
-	 * utilised, and that lowering T0SZ will always result in an additional
-	 * translation level to be configured.
-	 */
-#if VA_BITS != EXTRA_SHIFT
-#error "Mismatch between VA_BITS and page size/number of translation levels"
-#endif
-
 	/*
+	 * VA_BITS may be too small to allow for an ID mapping to be created
+	 * that covers system RAM if that is located sufficiently high in the
+	 * physical address space. So for the ID map, use an extended virtual
+	 * range in that case, and configure an additional translation level
+	 * if needed.
+	 *
 	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
 	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
 	 * this number conveniently equals the number of leading zeroes in
@@ -272,21 +284,44 @@ __create_page_tables:
 	adrp	x5, __idmap_text_end
 	clz	x5, x5
 	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
-	b.ge	1f			// .. then skip additional level
+	b.ge	1f			// .. then skip VA range extension
 
 	adr_l	x6, idmap_t0sz
 	str	x5, [x6]
 	dmb	sy
 	dc	ivac, x6		// Invalidate potentially stale cache line
 
-	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
-1:
+#if (VA_BITS < 48)
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, we have to configure an additional table level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
 #endif
 
-	create_pgd_entry x0, x3, x5, x6
+	mov	x4, EXTRA_PTRS
+	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
+#else
+	/*
+	 * If VA_BITS == 48, we don't have to configure an additional
+	 * translation level, but the top-level table has more entries.
+	 */
+	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+	str_l	x4, idmap_ptrs_per_pgd, x5
+#endif
+1:
+	ldr_l	x4, idmap_ptrs_per_pgd
+	create_pgd_entry x0, x3, x4, x5, x6
 	mov	x5, x3				// __pa(__idmap_text_start)
 	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
-	create_block_map x0, x7, x3, x5, x6
+	create_block_map x0, x7, x3, x5, x6, x4
 
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
@@ -294,12 +329,13 @@ __create_page_tables:
 	adrp	x0, swapper_pg_dir
 	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
-	create_pgd_entry x0, x5, x3, x6
+	mov	x4, PTRS_PER_PGD
+	create_pgd_entry x0, x5, x4, x3, x6
 	adrp	x6, _end			// runtime __pa(_end)
 	adrp	x3, _text			// runtime __pa(_text)
 	sub	x6, x6, x3			// _end - _text
 	add	x6, x6, x5			// runtime __va(_end)
-	create_block_map x0, x7, x3, x5, x6
+	create_block_map x0, x7, x3, x5, x6, x4
 
 	/*
 	 * Since the page tables have been populated with non-cacheable
@@ -679,8 +715,10 @@ ENTRY(__enable_mmu)
 	update_early_cpu_boot_status 0, x1, x2
 	adrp	x1, idmap_pg_dir
 	adrp	x2, swapper_pg_dir
-	msr	ttbr0_el1, x1			// load TTBR0
-	msr	ttbr1_el1, x2			// load TTBR1
+	phys_to_ttbr x1, x3
+	phys_to_ttbr x2, x4
+	msr	ttbr0_el1, x3			// load TTBR0
+	msr	ttbr1_el1, x4			// load TTBR1
 	isb
 	msr	sctlr_el1, x0
 	isb

+ 7 - 5
arch/arm64/kernel/hibernate-asm.S

@@ -33,12 +33,14 @@
  * Even switching to our copied tables will cause a changed output address at
  * each stage of the walk.
  */
-.macro break_before_make_ttbr_switch zero_page, page_table
-	msr	ttbr1_el1, \zero_page
+.macro break_before_make_ttbr_switch zero_page, page_table, tmp
+	phys_to_ttbr \zero_page, \tmp
+	msr	ttbr1_el1, \tmp
 	isb
 	tlbi	vmalle1
 	dsb	nsh
-	msr	ttbr1_el1, \page_table
+	phys_to_ttbr \page_table, \tmp
+	msr	ttbr1_el1, \tmp
 	isb
 .endm
 
@@ -78,7 +80,7 @@ ENTRY(swsusp_arch_suspend_exit)
 	 * We execute from ttbr0, change ttbr1 to our copied linear map tables
 	 * with a break-before-make via the zero page
 	 */
-	break_before_make_ttbr_switch	x5, x0
+	break_before_make_ttbr_switch	x5, x0, x6
 
 	mov	x21, x1
 	mov	x30, x2
@@ -109,7 +111,7 @@ ENTRY(swsusp_arch_suspend_exit)
 	dsb	ish		/* wait for PoU cleaning to finish */
 
 	/* switch to the restored kernels page tables */
-	break_before_make_ttbr_switch	x25, x21
+	break_before_make_ttbr_switch	x25, x21, x6
 
 	ic	ialluis
 	dsb	ish

+ 2 - 3
arch/arm64/kernel/hibernate.c

@@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	}
 
 	pte = pte_offset_kernel(pmd, dst_addr);
-	set_pte(pte, __pte(virt_to_phys((void *)dst) |
-			 pgprot_val(PAGE_KERNEL_EXEC)));
+	set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
 	/*
 	 * Load our new page tables. A strict BBM approach requires that we
@@ -264,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	 */
 	cpu_set_reserved_ttbr0();
 	local_flush_tlb_all();
-	write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+	write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
 	isb();
 
 	*phys_dst_addr = virt_to_phys((void *)dst);

+ 12 - 14
arch/arm64/kvm/hyp-init.S

@@ -63,7 +63,8 @@ __do_hyp_init:
 	cmp	x0, #HVC_STUB_HCALL_NR
 	b.lo	__kvm_handle_stub_hvc
 
-	msr	ttbr0_el2, x0
+	phys_to_ttbr x0, x4
+	msr	ttbr0_el2, x4
 
 	mrs	x4, tcr_el1
 	ldr	x5, =TCR_EL2_MASK
@@ -71,30 +72,27 @@ __do_hyp_init:
 	mov	x5, #TCR_EL2_RES1
 	orr	x4, x4, x5
 
-#ifndef CONFIG_ARM64_VA_BITS_48
 	/*
-	 * If we are running with VA_BITS < 48, we may be running with an extra
-	 * level of translation in the ID map. This is only the case if system
-	 * RAM is out of range for the currently configured page size and number
-	 * of translation levels, in which case we will also need the extra
-	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 * The ID map may be configured to use an extended virtual address
+	 * range. This is only the case if system RAM is out of range for the
+	 * currently configured page size and VA_BITS, in which case we will
+	 * also need the extended virtual range for the HYP ID map, or we won't
+	 * be able to enable the EL2 MMU.
 	 *
 	 * However, at EL2, there is only one TTBR register, and we can't switch
 	 * between translation tables *and* update TCR_EL2.T0SZ at the same
-	 * time. Bottom line: we need the extra level in *both* our translation
-	 * tables.
+	 * time. Bottom line: we need to use the extended range with *both* our
+	 * translation tables.
 	 *
 	 * So use the same T0SZ value we use for the ID map.
 	 */
 	ldr_l	x5, idmap_t0sz
 	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
-#endif
+
 	/*
-	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
-	 * TCR_EL2.
+	 * Set the PS bits in TCR_EL2.
 	 */
-	mrs	x5, ID_AA64MMFR0_EL1
-	bfi	x4, x5, #16, #3
+	tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
 
 	msr	tcr_el2, x4
 

+ 2 - 0
arch/arm64/kvm/hyp/s2-setup.c

@@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void)
 	 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
 	 */
 	parange = read_sysreg(id_aa64mmfr0_el1) & 7;
+	if (parange > ID_AA64MMFR0_PARANGE_MAX)
+		parange = ID_AA64MMFR0_PARANGE_MAX;
 	val |= parange << 16;
 
 	/* Compute the actual PARange... */

+ 10 - 5
arch/arm64/mm/mmu.c

@@ -50,6 +50,7 @@
 #define NO_CONT_MAPPINGS	BIT(1)
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
@@ -601,8 +602,8 @@ static void __init map_kernel(pgd_t *pgd)
 		 * entry instead.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
-			__pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
+		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+			     lm_alias(bm_pmd));
 		pud_clear_fixmap();
 	} else {
 		BUG();
@@ -717,7 +718,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 			if (!p)
 				return -ENOMEM;
 
-			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
+			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
 		} else
 			vmemmap_verify((pte_t *)pmd, node, addr, next);
 	} while (addr = next, addr != end);
@@ -910,15 +911,19 @@ int __init arch_ioremap_pmd_supported(void)
 
 int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
 {
+	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
+					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PUD_MASK);
-	set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
 int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
 {
+	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
+					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PMD_MASK);
-	set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 

+ 8 - 0
arch/arm64/mm/pgd.c

@@ -49,6 +49,14 @@ void __init pgd_cache_init(void)
 	if (PGD_SIZE == PAGE_SIZE)
 		return;
 
+#ifdef CONFIG_ARM64_PA_BITS_52
+	/*
+	 * With 52-bit physical addresses, the architecture requires the
+	 * top-level table to be aligned to at least 64 bytes.
+	 */
+	BUILD_BUG_ON(PGD_SIZE < 64);
+#endif
+
 	/*
 	 * Naturally aligned pgds required by the architecture.
 	 */

+ 8 - 7
arch/arm64/mm/proc.S

@@ -143,7 +143,8 @@ ENTRY(cpu_do_switch_mm)
 	bfi	x2, x1, #48, #16		// set the ASID
 	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
 	isb
-	msr	ttbr0_el1, x0			// now update TTBR0
+	phys_to_ttbr x0, x2
+	msr	ttbr0_el1, x2			// now update TTBR0
 	isb
 	post_ttbr_update_workaround
 	ret
@@ -160,14 +161,16 @@ ENTRY(idmap_cpu_replace_ttbr1)
 	save_and_disable_daif flags=x2
 
 	adrp	x1, empty_zero_page
-	msr	ttbr1_el1, x1
+	phys_to_ttbr x1, x3
+	msr	ttbr1_el1, x3
 	isb
 
 	tlbi	vmalle1
 	dsb	nsh
 	isb
 
-	msr	ttbr1_el1, x0
+	phys_to_ttbr x0, x3
+	msr	ttbr1_el1, x3
 	isb
 
 	restore_daif x2
@@ -230,11 +233,9 @@ ENTRY(__cpu_setup)
 	tcr_set_idmap_t0sz	x10, x9
 
 	/*
-	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
-	 * TCR_EL1.
+	 * Set the IPS bits in TCR_EL1.
 	 */
-	mrs	x9, ID_AA64MMFR0_EL1
-	bfi	x10, x9, #32, #3
+	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
 #ifdef CONFIG_ARM64_HW_AFDBM
 	/*
 	 * Hardware update of the Access and Dirty bits.

+ 1 - 1
virt/kvm/arm/arm.c

@@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm)
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
-	kvm->arch.vttbr = pgd_phys | vmid;
+	kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }

+ 8 - 2
virt/kvm/arm/mmu.c

@@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp,
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	unsigned long addr, next;
+	unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD;
 	int err = 0;
 
+	/*
+	 * If it's not the hyp_pgd, fall back to the kvm idmap layout.
+	 */
+	if (pgdp != hyp_pgd)
+		ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd();
+
 	mutex_lock(&kvm_hyp_pgd_mutex);
 	addr = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 	do {
-		pgd = pgdp + pgd_index(addr);
+		pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
 
 		if (pgd_none(*pgd)) {
 			pud = pud_alloc_one(NULL, addr);