Parcourir la source

ARC: mm: PAE40 support

This is the first working implementation of 40-bit physical address
extension on ARCv2.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Vineet Gupta il y a 10 ans
Parent
commit
5a364c2a17

+ 15 - 0
arch/arc/Kconfig

@@ -453,6 +453,21 @@ config HIGHMEM
 	  kernel. Enable this to potentially allow access to rest of 2G and PAE
 	  in future
 
+config ARC_HAS_PAE40
+	bool "Support for the 40-bit Physical Address Extension"
+	default n
+	depends on ISA_ARCV2
+	select HIGHMEM
+	help
+	  Enable access to physical memory beyond 4G, only supported on
+	  ARC cores with 40 bit Physical Addressing support
+
+config ARCH_PHYS_ADDR_T_64BIT
+	def_bool ARC_HAS_PAE40
+
+config ARCH_DMA_ADDR_T_64BIT
+	bool
+
 config ARC_CURR_IN_REG
 	bool "Dedicate Register r25 for current_task pointer"
 	default y

+ 2 - 0
arch/arc/include/asm/cache.h

@@ -65,6 +65,7 @@ extern int ioc_exists;
 #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #define ARC_REG_IC_PTAG		0x1E
 #endif
+#define ARC_REG_IC_PTAG_HI	0x1F
 
 /* Bit val in IC_CTRL */
 #define IC_CTRL_CACHE_DISABLE   0x1
@@ -77,6 +78,7 @@ extern int ioc_exists;
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLDL		0x4C
 #define ARC_REG_DC_PTAG		0x5C
+#define ARC_REG_DC_PTAG_HI	0x5F
 
 /* Bit val in DC_CTRL */
 #define DC_CTRL_INV_MODE_FLUSH  0x40

+ 7 - 0
arch/arc/include/asm/mmu.h

@@ -24,6 +24,7 @@
 #if (CONFIG_ARC_MMU_VER < 4)
 #define ARC_REG_TLBPD0		0x405
 #define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
 #define ARC_REG_TLBINDEX	0x407
 #define ARC_REG_TLBCOMMAND	0x408
 #define ARC_REG_PID		0x409
@@ -31,6 +32,7 @@
 #else
 #define ARC_REG_TLBPD0		0x460
 #define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
 #define ARC_REG_TLBINDEX	0x464
 #define ARC_REG_TLBCOMMAND	0x465
 #define ARC_REG_PID		0x468
@@ -83,6 +85,11 @@ void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 void read_decode_mmu_bcr(void);
 
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif

+ 4 - 0
arch/arc/include/asm/page.h

@@ -56,7 +56,11 @@ typedef struct {
 
 #else /* !STRICT_MM_TYPECHECKS */
 
+#ifdef CONFIG_ARC_HAS_PAE40
+typedef unsigned long long pte_t;
+#else
 typedef unsigned long pte_t;
+#endif
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
 

+ 3 - 3
arch/arc/include/asm/pgalloc.h

@@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
 
 static inline int __get_order_pgd(void)
 {
-	return get_order(PTRS_PER_PGD * 4);
+	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline int __get_order_pte(void)
 {
-	return get_order(PTRS_PER_PTE * 4);
+	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
 	if (!pte_pg)
 		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
+	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
 	page = virt_to_page(pte_pg);
 	if (!pgtable_page_ctor(page)) {
 		__free_page(page);

+ 7 - 1
arch/arc/include/asm/pgtable.h

@@ -134,7 +134,12 @@
 /* Masks for actual TLB "PD"s */
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#else
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
+#endif
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
@@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 #define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
 #define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)	(__pte(((pte_t)(pfn) << PAGE_SHIFT) | \
+				 pgprot_val(prot)))
 #define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 /*

+ 40 - 4
arch/arc/mm/cache.c

@@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 	}
 }
 
+/*
+ * For ARC700 MMUv3 I-cache and D-cache flushes
+ * Also reused for HS38 aliasing I-cache configuration
+ */
 static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op)
@@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	if (full_page)
 		write_aux_reg(aux_tag, paddr);
 
+	/*
+	 * This is technically for MMU v4, using the MMU v3 programming model
+	 * Special work for HS38 aliasing I-cache configuratino with PAE40
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 * Note that PTAG_HI is hoisted outside the line loop
+	 */
+	if (is_pae40_enabled() && op == OP_INV_IC)
+		write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+
 	while (num_lines-- > 0) {
 		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
@@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 }
 
 /*
- * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
- * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
+ * Here's how cache ops are implemented
+ *
+ *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
+ *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
+ *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
+ *    respectively, similar to MMU v3 programming model, hence
+ *    __cache_line_loop_v3() is used)
  *
- * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
- * specified in PTAG (similar to MMU v3)
+ * If PAE40 is enabled, independent of aliasing considerations, the higher bits
+ * needs to be written into PTAG_HI
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
+	/*
+	 * For HS38 PAE40 configuration
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 */
+	if (is_pae40_enabled()) {
+		if (cacheop == OP_INV_IC)
+			/*
+			 * Non aliasing I-cache in HS38,
+			 * aliasing I-cache handled in __cache_line_loop_v3()
+			 */
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
 		paddr += L1_CACHE_BYTES;

+ 22 - 5
arch/arc/mm/tlb.c

@@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 static inline void __tlb_entry_erase(void)
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
@@ -182,7 +186,7 @@ static void utlb_invalidate(void)
 
 }
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 	unsigned int idx;
 
@@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
 	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
 
@@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void)
 
 	/* Load PD0 and PD1 with template for a Blank Entry */
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 
 	for (entry = 0; entry < num_tlb; entry++) {
@@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
-	unsigned long pd0, pd1;
+	unsigned long pd0;
+	pte_t pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n",
+		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-		       p_mmu->u_dtlb, p_mmu->u_itlb);
+		       p_mmu->u_dtlb, p_mmu->u_itlb,
+		       IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
 
 	return buf;
 }
@@ -821,6 +835,9 @@ void arc_mmu_init(void)
 		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
 		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
 
+	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
+		panic("Hardware doesn't support PAE40\n");
+
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 

+ 10 - 1
arch/arc/mm/tlbex.S

@@ -223,12 +223,16 @@ ex_saved_reg1:
 	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
 	; (3) z = (pgtbl + y * 4)
 
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_SIZE_LOG	3	/* 8 == 2 ^ 3 */
+#else
 #define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
+#endif
 
 	; multiply in step (3) above avoided by shifting lesser in step (1)
 	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
 	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
-	ld.aw   r0, [r1, r0]		; r0: PTE
+	ld.aw   r0, [r1, r0]            ; r0: PTE (lower word only for PAE40)
 					; r1: PTE ptr
 
 2:
@@ -247,6 +251,7 @@ ex_saved_reg1:
 ;-----------------------------------------------------------------
 ; Convert Linux PTE entry into TLB entry
 ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+;    (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
@@ -259,6 +264,10 @@ ex_saved_reg1:
 	or  r3, r3, r2
 
 	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
+#ifdef	CONFIG_ARC_HAS_PAE40
+	ld	r3, [r1, 4]		; paddr[39..32]
+	sr	r3, [ARC_REG_TLBPD1HI]
+#endif
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb