8 years ago · e6f81a9201
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -39,6 +39,7 @@
 
				 
			
 
				 /* Bits in the SLB VSID word */
			
 
				 #define SLB_VSID_SHIFT		12
			
 
				+#define SLB_VSID_SHIFT_256M	SLB_VSID_SHIFT
			
 
				 #define SLB_VSID_SHIFT_1T	24
			
 
				 #define SLB_VSID_SSIZE_SHIFT	62
			
 
				 #define SLB_VSID_B		ASM_CONST(0xc000000000000000)
			
@@ -521,9 +522,19 @@ extern void slb_set_size(u16 size);
 
				  * because of the modulo operation in vsid scramble.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Max Va bits we support as of now is 68 bits. We want 19 bit
			
 
				+ * context ID.
			
 
				+ * Restrictions:
			
 
				+ * GPU has restrictions of not able to access beyond 128TB
			
 
				+ * (47 bit effective address). We also cannot do more than 20bit PID.
			
 
				+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
			
 
				+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
			
 
				+ */
			
 
				+#define VA_BITS			68
			
 
				 #define CONTEXT_BITS		19
			
 
				-#define ESID_BITS		18
			
 
				-#define ESID_BITS_1T		6
			
 
				+#define ESID_BITS		(VA_BITS - (SID_SHIFT + CONTEXT_BITS))
			
 
				+#define ESID_BITS_1T		(VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
			
 
				 
			
 
				 #define ESID_BITS_MASK		((1 << ESID_BITS) - 1)
			
 
				 #define ESID_BITS_1T_MASK	((1 << ESID_BITS_1T) - 1)
			
@@ -533,7 +544,7 @@ extern void slb_set_size(u16 size);
 
				  * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
			
 
				  * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
			
 
				  * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
			
 
				- * context maps 2^46 bytes (64TB).
			
 
				+ * context maps 2^49 bytes (512TB).
			
 
				  *
			
 
				  * We also need to avoid the last segment of the last context, because that
			
 
				  * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
			
@@ -545,54 +556,46 @@ extern void slb_set_size(u16 size);
 
				 /* Would be nice to use KERNEL_REGION_ID here */
			
 
				 #define KERNEL_REGION_CONTEXT_OFFSET	(0xc - 1)
			
 
				 
			
 
				+/*
			
 
				+ * For platforms that support on 65bit VA we limit the context bits
			
 
				+ */
			
 
				+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
			
 
				+
			
 
				 /*
			
 
				  * This should be computed such that protovosid * vsid_mulitplier
			
 
				- * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
			
 
				+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
			
 
				+ * co-prime to vsid_modulus. We also need to make sure that number
			
 
				+ * of bits in multiplied result (dividend) is less than twice the number of
			
 
				+ * protovsid bits for our modulus optmization to work.
			
 
				+ *
			
 
				+ * The below table shows the current values used.
			
 
				+ * |-------+------------+----------------------+------------+-------------------|
			
 
				+ * |       | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
			
 
				+ * |-------+------------+----------------------+------------+-------------------|
			
 
				+ * | 1T    |         24 |                   25 |         49 |                50 |
			
 
				+ * |-------+------------+----------------------+------------+-------------------|
			
 
				+ * | 256MB |         24 |                   37 |         61 |                74 |
			
 
				+ * |-------+------------+----------------------+------------+-------------------|
			
 
				+ *
			
 
				+ * |-------+------------+----------------------+------------+--------------------|
			
 
				+ * |       | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
			
 
				+ * |-------+------------+----------------------+------------+--------------------|
			
 
				+ * | 1T    |         24 |                   28 |         52 |                 56 |
			
 
				+ * |-------+------------+----------------------+------------+--------------------|
			
 
				+ * | 256MB |         24 |                   40 |         64 |                 80 |
			
 
				+ * |-------+------------+----------------------+------------+--------------------|
			
 
				+ *
			
 
				  */
			
 
				 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
			
 
				-#define VSID_BITS_256M		(CONTEXT_BITS + ESID_BITS)
			
 
				-#define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
			
 
				+#define VSID_BITS_256M		(VA_BITS - SID_SHIFT)
			
 
				+#define VSID_BITS_65_256M	(65 - SID_SHIFT)
			
 
				 
			
 
				 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
			
 
				-#define VSID_BITS_1T		(CONTEXT_BITS + ESID_BITS_1T)
			
 
				-#define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
			
 
				-
			
 
				+#define VSID_BITS_1T		(VA_BITS - SID_SHIFT_1T)
			
 
				+#define VSID_BITS_65_1T		(65 - SID_SHIFT_1T)
			
 
				 
			
 
				 #define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
			
 
				 
			
 
				-/*
			
 
				- * This macro generates asm code to compute the VSID scramble
			
 
				- * function.  Used in slb_allocate() and do_stab_bolted.  The function
			
 
				- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
			
 
				- *
			
 
				- *	rt = register containing the proto-VSID and into which the
			
 
				- *		VSID will be stored
			
 
				- *	rx = scratch register (clobbered)
			
 
				- *
			
 
				- * 	- rt and rx must be different registers
			
 
				- * 	- The answer will end up in the low VSID_BITS bits of rt.  The higher
			
 
				- * 	  bits may contain other garbage, so you may need to mask the
			
 
				- * 	  result.
			
 
				- */
			
 
				-#define ASM_VSID_SCRAMBLE(rt, rx, size)					\
			
 
				-	lis	rx,VSID_MULTIPLIER_##size@h;				\
			
 
				-	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
			
 
				-	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
			
 
				-									\
			
 
				-	srdi	rx,rt,VSID_BITS_##size;					\
			
 
				-	clrldi	rt,rt,(64-VSID_BITS_##size);				\
			
 
				-	add	rt,rt,rx;		/* add high and low bits */	\
			
 
				-	/* NOTE: explanation based on VSID_BITS_##size = 36		\
			
 
				-	 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
			
 
				-	 * 2^36-1+2^28-1.  That in particular means that if r3 >=	\
			
 
				-	 * 2^36-1, then r3+1 has the 2^36 bit set.  So, if r3+1 has	\
			
 
				-	 * the bit clear, r3 already has the answer we want, if it	\
			
 
				-	 * doesn't, the answer is the low 36 bits of r3+1.  So in all	\
			
 
				-	 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
			
 
				-	addi	rx,rt,1;						\
			
 
				-	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
			
 
				-	add	rt,rt,rx
			
 
				-
			
 
				 /* 4 bits per slice and we have one slice per 1TB */
			
 
				 #define SLICE_ARRAY_SIZE  (H_PGTABLE_RANGE >> 41)
			
 
				 
			
@@ -640,7 +643,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 
				 #define vsid_scramble(protovsid, size) \
			
 
				 	((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
			
 
				 
			
 
				-#else /* 1 */
			
 
				+/* simplified form avoiding mod operation */
			
 
				 #define vsid_scramble(protovsid, size) \
			
 
				 	({								 \
			
 
				 		unsigned long x;					 \
			
@@ -648,6 +651,21 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 
				 		x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
			
 
				 		(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
			
 
				 	})
			
 
				+
			
 
				+#else /* 1 */
			
 
				+static inline unsigned long vsid_scramble(unsigned long protovsid,
			
 
				+				  unsigned long vsid_multiplier, int vsid_bits)
			
 
				+{
			
 
				+	unsigned long vsid;
			
 
				+	unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
			
 
				+	/*
			
 
				+	 * We have same multipler for both 256 and 1T segements now
			
 
				+	 */
			
 
				+	vsid = protovsid * vsid_multiplier;
			
 
				+	vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
			
 
				+	return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
			
 
				+}
			
 
				+
			
 
				 #endif /* 1 */
			
 
				 
			
 
				 /* Returns the segment size indicator for a user address */
			
@@ -662,17 +680,30 @@ static inline int user_segment_size(unsigned long addr)
 
				 static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
			
 
				 				     int ssize)
			
 
				 {
			
 
				+	unsigned long va_bits = VA_BITS;
			
 
				+	unsigned long vsid_bits;
			
 
				+	unsigned long protovsid;
			
 
				+
			
 
				 	/*
			
 
				 	 * Bad address. We return VSID 0 for that
			
 
				 	 */
			
 
				 	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (ssize == MMU_SEGSIZE_256M)
			
 
				-		return vsid_scramble((context << ESID_BITS)
			
 
				-				     | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
			
 
				-	return vsid_scramble((context << ESID_BITS_1T)
			
 
				-			     | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
			
 
				+	if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
			
 
				+		va_bits = 65;
			
 
				+
			
 
				+	if (ssize == MMU_SEGSIZE_256M) {
			
 
				+		vsid_bits = va_bits - SID_SHIFT;
			
 
				+		protovsid = (context << ESID_BITS) |
			
 
				+			((ea >> SID_SHIFT) & ESID_BITS_MASK);
			
 
				+		return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
			
 
				+	}
			
 
				+	/* 1T segment */
			
 
				+	vsid_bits = va_bits - SID_SHIFT_1T;
			
 
				+	protovsid = (context << ESID_BITS_1T) |
			
 
				+		((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
			
 
				+	return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -28,6 +28,10 @@
 
				  * Individual features below.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Support for 68 bit VA space. We added that from ISA 2.05
			
 
				+ */
			
 
				+#define MMU_FTR_68_BIT_VA		ASM_CONST(0x00002000)
			
 
				 /*
			
 
				  * Kernel read only support.
			
 
				  * We added the ppp value 0b110 in ISA 2.04.
			
@@ -109,10 +113,10 @@
 
				 #define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
			
 
				 #define MMU_FTRS_PPC970		MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
			
 
				 #define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
			
 
				-#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
			
 
				-#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
			
 
				-#define MMU_FTRS_POWER8		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
			
 
				-#define MMU_FTRS_POWER9		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
			
 
				+#define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
			
 
				+#define MMU_FTRS_POWER7		MMU_FTRS_POWER6
			
 
				+#define MMU_FTRS_POWER8		MMU_FTRS_POWER6
			
 
				+#define MMU_FTRS_POWER9		MMU_FTRS_POWER6
			
 
				 #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
			
 
				 				MMU_FTR_CI_LARGE_PAGE
			
 
				 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
			
@@ -136,7 +140,7 @@ enum {
 
				 		MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
			
 
				 		MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
			
 
				 		MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
			
 
				-		MMU_FTR_KERNEL_RO |
			
 
				+		MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
			
 
				 #ifdef CONFIG_PPC_RADIX_MMU
			
 
				 		MMU_FTR_TYPE_RADIX |
			
 
				 #endif
			
@@ -290,7 +294,10 @@ static inline bool early_radix_enabled(void)
 
				 #define MMU_PAGE_16G	14
			
 
				 #define MMU_PAGE_64G	15
			
 
				 
			
 
				-/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
			
 
				+/*
			
 
				+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
			
 
				+ * Also we need to change he type of mm_context.low/high_slices_psize.
			
 
				+ */
			
 
				 #define MMU_PAGE_COUNT	16
			
 
				 
			
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -229,6 +229,7 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 
				 
			
 
				 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
			
 
				 {
			
 
				+	unsigned long vsid_bits = VSID_BITS_65_256M;
			
 
				 	struct kvmppc_sid_map *map;
			
 
				 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
			
 
				 	u16 sid_map_mask;
			
@@ -257,7 +258,12 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 
				 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
			
 
				 		kvmppc_mmu_flush_segments(vcpu);
			
 
				 	}
			
 
				-	map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
			
 
				+
			
 
				+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
			
 
				+		vsid_bits = VSID_BITS_256M;
			
 
				+
			
 
				+	map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
			
 
				+				       VSID_MULTIPLIER_256M, vsid_bits);
			
 
				 
			
 
				 	map->guest_vsid = gvsid;
			
 
				 	map->valid = true;
			
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -59,7 +59,14 @@ again:
 
				 
			
 
				 int hash__alloc_context_id(void)
			
 
				 {
			
 
				-	return alloc_context_id(MIN_USER_CONTEXT, MAX_USER_CONTEXT);
			
 
				+	unsigned long max;
			
 
				+
			
 
				+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
			
 
				+		max = MAX_USER_CONTEXT;
			
 
				+	else
			
 
				+		max = MAX_USER_CONTEXT_65BIT_VA;
			
 
				+
			
 
				+	return alloc_context_id(MIN_USER_CONTEXT, max);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
			
 
				 
			
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -23,6 +23,48 @@
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/firmware.h>
			
 
				 
			
 
				+/*
			
 
				+ * This macro generates asm code to compute the VSID scramble
			
 
				+ * function.  Used in slb_allocate() and do_stab_bolted.  The function
			
 
				+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
			
 
				+ *
			
 
				+ *	rt = register containing the proto-VSID and into which the
			
 
				+ *		VSID will be stored
			
 
				+ *	rx = scratch register (clobbered)
			
 
				+ *	rf = flags
			
 
				+ *
			
 
				+ *	- rt and rx must be different registers
			
 
				+ *	- The answer will end up in the low VSID_BITS bits of rt.  The higher
			
 
				+ *	  bits may contain other garbage, so you may need to mask the
			
 
				+ *	  result.
			
 
				+ */
			
 
				+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)				\
			
 
				+	lis	rx,VSID_MULTIPLIER_##size@h;				\
			
 
				+	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
			
 
				+	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
			
 
				+/*									\
			
 
				+ * powermac get slb fault before feature fixup, so make 65 bit part     \
			
 
				+ * the default part of feature fixup					\
			
 
				+ */									\
			
 
				+BEGIN_MMU_FTR_SECTION							\
			
 
				+	srdi	rx,rt,VSID_BITS_65_##size;				\
			
 
				+	clrldi	rt,rt,(64-VSID_BITS_65_##size);				\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	addi	rx,rt,1;						\
			
 
				+	srdi	rx,rx,VSID_BITS_65_##size;				\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
			
 
				+MMU_FTR_SECTION_ELSE							\
			
 
				+	srdi	rx,rt,VSID_BITS_##size;					\
			
 
				+	clrldi	rt,rt,(64-VSID_BITS_##size);				\
			
 
				+	add	rt,rt,rx;		/* add high and low bits */	\
			
 
				+	addi	rx,rt,1;						\
			
 
				+	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
			
 
				+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
			
 
				+
			
 
				+
			
 
				 /* void slb_allocate_realmode(unsigned long ea);
			
 
				  *
			
 
				  * Create an SLB entry for the given EA (user or kernel).
			
@@ -179,13 +221,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 
				  */
			
 
				 .Lslb_finish_load:
			
 
				 	rldimi  r10,r9,ESID_BITS,0
			
 
				-	ASM_VSID_SCRAMBLE(r10,r9,256M)
			
 
				-	/*
			
 
				-	 * bits above VSID_BITS_256M need to be ignored from r10
			
 
				-	 * also combine VSID and flags
			
 
				-	 */
			
 
				-	rldimi	r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
			
 
				-
			
 
				+	ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
			
 
				 	/* r3 = EA, r11 = VSID data */
			
 
				 	/*
			
 
				 	 * Find a slot, round robin. Previously we tried to find a
			
@@ -249,12 +285,12 @@ slb_compare_rr_to_size:
 
				 .Lslb_finish_load_1T:
			
 
				 	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
			
 
				 	rldimi  r10,r9,ESID_BITS_1T,0
			
 
				-	ASM_VSID_SCRAMBLE(r10,r9,1T)
			
 
				+	ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
			
 
				 	/*
			
 
				 	 * bits above VSID_BITS_1T need to be ignored from r10
			
 
				 	 * also combine VSID and flags
			
 
				 	 */
			
 
				-	rldimi	r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
			
 
				+
			
 
				 	li	r10,MMU_SEGSIZE_1T
			
 
				 	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */