Browse Source

ARC: mm: PAE40 support

This is the first working implementation of 40-bit physical address
extension on ARCv2.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Vineet Gupta 10 years ago
parent
commit
5a364c2a17

+ 15 - 0
arch/arc/Kconfig

@@ -453,6 +453,21 @@ config HIGHMEM
 	  kernel. Enable this to potentially allow access to rest of 2G and PAE
 	  kernel. Enable this to potentially allow access to rest of 2G and PAE
 	  in future
 	  in future
 
 
+config ARC_HAS_PAE40
+	bool "Support for the 40-bit Physical Address Extension"
+	default n
+	depends on ISA_ARCV2
+	select HIGHMEM
+	help
+	  Enable access to physical memory beyond 4G, only supported on
+	  ARC cores with 40 bit Physical Addressing support
+
+config ARCH_PHYS_ADDR_T_64BIT
+	def_bool ARC_HAS_PAE40
+
+config ARCH_DMA_ADDR_T_64BIT
+	bool
+
 config ARC_CURR_IN_REG
 config ARC_CURR_IN_REG
 	bool "Dedicate Register r25 for current_task pointer"
 	bool "Dedicate Register r25 for current_task pointer"
 	default y
 	default y

+ 2 - 0
arch/arc/include/asm/cache.h

@@ -65,6 +65,7 @@ extern int ioc_exists;
 #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #define ARC_REG_IC_PTAG		0x1E
 #define ARC_REG_IC_PTAG		0x1E
 #endif
 #endif
+#define ARC_REG_IC_PTAG_HI	0x1F
 
 
 /* Bit val in IC_CTRL */
 /* Bit val in IC_CTRL */
 #define IC_CTRL_CACHE_DISABLE   0x1
 #define IC_CTRL_CACHE_DISABLE   0x1
@@ -77,6 +78,7 @@ extern int ioc_exists;
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLDL		0x4C
 #define ARC_REG_DC_FLDL		0x4C
 #define ARC_REG_DC_PTAG		0x5C
 #define ARC_REG_DC_PTAG		0x5C
+#define ARC_REG_DC_PTAG_HI	0x5F
 
 
 /* Bit val in DC_CTRL */
 /* Bit val in DC_CTRL */
 #define DC_CTRL_INV_MODE_FLUSH  0x40
 #define DC_CTRL_INV_MODE_FLUSH  0x40

+ 7 - 0
arch/arc/include/asm/mmu.h

@@ -24,6 +24,7 @@
 #if (CONFIG_ARC_MMU_VER < 4)
 #if (CONFIG_ARC_MMU_VER < 4)
 #define ARC_REG_TLBPD0		0x405
 #define ARC_REG_TLBPD0		0x405
 #define ARC_REG_TLBPD1		0x406
 #define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
 #define ARC_REG_TLBINDEX	0x407
 #define ARC_REG_TLBINDEX	0x407
 #define ARC_REG_TLBCOMMAND	0x408
 #define ARC_REG_TLBCOMMAND	0x408
 #define ARC_REG_PID		0x409
 #define ARC_REG_PID		0x409
@@ -31,6 +32,7 @@
 #else
 #else
 #define ARC_REG_TLBPD0		0x460
 #define ARC_REG_TLBPD0		0x460
 #define ARC_REG_TLBPD1		0x461
 #define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
 #define ARC_REG_TLBINDEX	0x464
 #define ARC_REG_TLBINDEX	0x464
 #define ARC_REG_TLBCOMMAND	0x465
 #define ARC_REG_TLBCOMMAND	0x465
 #define ARC_REG_PID		0x468
 #define ARC_REG_PID		0x468
@@ -83,6 +85,11 @@ void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 void read_decode_mmu_bcr(void);
 void read_decode_mmu_bcr(void);
 
 
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
 #endif	/* !__ASSEMBLY__ */
 #endif	/* !__ASSEMBLY__ */
 
 
 #endif
 #endif

+ 4 - 0
arch/arc/include/asm/page.h

@@ -56,7 +56,11 @@ typedef struct {
 
 
 #else /* !STRICT_MM_TYPECHECKS */
 #else /* !STRICT_MM_TYPECHECKS */
 
 
+#ifdef CONFIG_ARC_HAS_PAE40
+typedef unsigned long long pte_t;
+#else
 typedef unsigned long pte_t;
 typedef unsigned long pte_t;
+#endif
 typedef unsigned long pgd_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
 typedef unsigned long pgprot_t;
 
 

+ 3 - 3
arch/arc/include/asm/pgalloc.h

@@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
 
 
 static inline int __get_order_pgd(void)
 static inline int __get_order_pgd(void)
 {
 {
-	return get_order(PTRS_PER_PGD * 4);
+	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
 }
 }
 
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 
 static inline int __get_order_pte(void)
 static inline int __get_order_pte(void)
 {
 {
-	return get_order(PTRS_PER_PTE * 4);
+	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 }
 
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
 	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
 	if (!pte_pg)
 	if (!pte_pg)
 		return 0;
 		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
+	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
 	page = virt_to_page(pte_pg);
 	page = virt_to_page(pte_pg);
 	if (!pgtable_page_ctor(page)) {
 	if (!pgtable_page_ctor(page)) {
 		__free_page(page);
 		__free_page(page);

+ 7 - 1
arch/arc/include/asm/pgtable.h

@@ -134,7 +134,12 @@
 /* Masks for actual TLB "PD"s */
 /* Masks for actual TLB "PD"s */
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#else
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
+#endif
 
 
 /**************************************************************************
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
@@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 
 #define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
 #define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
 #define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
 #define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)	(__pte(((pte_t)(pfn) << PAGE_SHIFT) | \
+				 pgprot_val(prot)))
 #define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 #define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 
 /*
 /*

+ 40 - 4
arch/arc/mm/cache.c

@@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 	}
 	}
 }
 }
 
 
+/*
+ * For ARC700 MMUv3 I-cache and D-cache flushes
+ * Also reused for HS38 aliasing I-cache configuration
+ */
 static inline
 static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op)
 			  unsigned long sz, const int op)
@@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 	if (full_page)
 	if (full_page)
 		write_aux_reg(aux_tag, paddr);
 		write_aux_reg(aux_tag, paddr);
 
 
+	/*
+	 * This is technically for MMU v4, using the MMU v3 programming model
+	 * Special work for HS38 aliasing I-cache configuratino with PAE40
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 * Note that PTAG_HI is hoisted outside the line loop
+	 */
+	if (is_pae40_enabled() && op == OP_INV_IC)
+		write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+
 	while (num_lines-- > 0) {
 	while (num_lines-- > 0) {
 		if (!full_page) {
 		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
 			write_aux_reg(aux_tag, paddr);
@@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 }
 }
 
 
 /*
 /*
- * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
- * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
+ * Here's how cache ops are implemented
+ *
+ *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
+ *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
+ *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
+ *    respectively, similar to MMU v3 programming model, hence
+ *    __cache_line_loop_v3() is used)
  *
  *
- * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
- * specified in PTAG (similar to MMU v3)
+ * If PAE40 is enabled, independent of aliasing considerations, the higher bits
+ * needs to be written into PTAG_HI
  */
  */
 static inline
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 
 	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
 
+	/*
+	 * For HS38 PAE40 configuration
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 */
+	if (is_pae40_enabled()) {
+		if (cacheop == OP_INV_IC)
+			/*
+			 * Non aliasing I-cache in HS38,
+			 * aliasing I-cache handled in __cache_line_loop_v3()
+			 */
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
 	while (num_lines-- > 0) {
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
 		write_aux_reg(aux_cmd, paddr);
 		paddr += L1_CACHE_BYTES;
 		paddr += L1_CACHE_BYTES;

+ 22 - 5
arch/arc/mm/tlb.c

@@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 static inline void __tlb_entry_erase(void)
 static inline void __tlb_entry_erase(void)
 {
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 }
@@ -182,7 +186,7 @@ static void utlb_invalidate(void)
 
 
 }
 }
 
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 {
 	unsigned int idx;
 	unsigned int idx;
 
 
@@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 }
 
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
 	write_aux_reg(ARC_REG_TLBPD1, pd1);
 	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
 }
 
 
@@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void)
 
 
 	/* Load PD0 and PD1 with template for a Blank Entry */
 	/* Load PD0 and PD1 with template for a Blank Entry */
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 
 
 	for (entry = 0; entry < num_tlb; entry++) {
 	for (entry = 0; entry < num_tlb; entry++) {
@@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
 	unsigned int asid_or_sasid, rwx;
-	unsigned long pd0, pd1;
+	unsigned long pd0;
+	pte_t pd1;
 
 
 	/*
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 
 	n += scnprintf(buf + n, len - n,
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n",
+		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-		       p_mmu->u_dtlb, p_mmu->u_itlb);
+		       p_mmu->u_dtlb, p_mmu->u_itlb,
+		       IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
 
 
 	return buf;
 	return buf;
 }
 }
@@ -821,6 +835,9 @@ void arc_mmu_init(void)
 		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
 		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
 		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
 		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
 
 
+	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
+		panic("Hardware doesn't support PAE40\n");
+
 	/* Enable the MMU */
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
 

+ 10 - 1
arch/arc/mm/tlbex.S

@@ -223,12 +223,16 @@ ex_saved_reg1:
 	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
 	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
 	; (3) z = (pgtbl + y * 4)
 	; (3) z = (pgtbl + y * 4)
 
 
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_SIZE_LOG	3	/* 8 == 2 ^ 3 */
+#else
 #define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
 #define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
+#endif
 
 
 	; multiply in step (3) above avoided by shifting lesser in step (1)
 	; multiply in step (3) above avoided by shifting lesser in step (1)
 	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
 	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
 	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
 	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
-	ld.aw   r0, [r1, r0]		; r0: PTE
+	ld.aw   r0, [r1, r0]            ; r0: PTE (lower word only for PAE40)
 					; r1: PTE ptr
 					; r1: PTE ptr
 
 
 2:
 2:
@@ -247,6 +251,7 @@ ex_saved_reg1:
 ;-----------------------------------------------------------------
 ;-----------------------------------------------------------------
 ; Convert Linux PTE entry into TLB entry
 ; Convert Linux PTE entry into TLB entry
 ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
 ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+;    (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
 ; IN: r0 = PTE, r1 = ptr to PTE
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 
 .macro CONV_PTE_TO_TLB
 .macro CONV_PTE_TO_TLB
@@ -259,6 +264,10 @@ ex_saved_reg1:
 	or  r3, r3, r2
 	or  r3, r3, r2
 
 
 	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
 	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
+#ifdef	CONFIG_ARC_HAS_PAE40
+	ld	r3, [r1, 4]		; paddr[39..32]
+	sr	r3, [ARC_REG_TLBPD1HI]
+#endif
 
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb