7 years ago · 54be0b9c7c
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -78,8 +78,6 @@ void kernel_bad_stack(struct pt_regs *regs);
 
				 void system_reset_exception(struct pt_regs *regs);
			
 
				 void machine_check_exception(struct pt_regs *regs);
			
 
				 void emulation_assist_interrupt(struct pt_regs *regs);
			
 
				-long do_slb_fault(struct pt_regs *regs, unsigned long ea);
			
 
				-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
			
 
				 
			
 
				 /* signals, syscalls and interrupts */
			
 
				 long sys_swapcontext(struct ucontext __user *old_ctx,
			
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -487,8 +487,6 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 
				 extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
			
 
				 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
			
 
				 
			
 
				-extern void hash__setup_new_exec(void);
			
 
				-
			
 
				 #ifdef CONFIG_PPC_PSERIES
			
 
				 void hpte_init_pseries(void);
			
 
				 #else
			
@@ -503,7 +501,6 @@ struct slb_entry {
 
				 };
			
 
				 
			
 
				 extern void slb_initialize(void);
			
 
				-extern void core_flush_all_slbs(struct mm_struct *mm);
			
 
				 extern void slb_flush_and_rebolt(void);
			
 
				 void slb_flush_all_realmode(void);
			
 
				 void __slb_restore_bolted_realmode(void);
			
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -60,6 +60,14 @@
 
				  */
			
 
				 #define MAX_MCE_DEPTH	4
			
 
				 
			
 
				+/*
			
 
				+ * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
			
 
				+ * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
			
 
				+ * in the save area so it's not necessary to overlap them. Could be used
			
 
				+ * for future savings though if another 4 byte register was to be saved.
			
 
				+ */
			
 
				+#define EX_LR		EX_DAR
			
 
				+
			
 
				 /*
			
 
				  * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
			
 
				  * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
			
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -113,10 +113,7 @@ struct paca_struct {
 
				  				 * on the linear mapping */
			
 
				 	/* SLB related definitions */
			
 
				 	u16 vmalloc_sllp;
			
 
				-	u8 slb_cache_ptr;
			
 
				-	u8 stab_rr;			/* stab/slb round-robin counter */
			
 
				-	u32 slb_used_bitmap;		/* Bitmaps for first 32 SLB entries. */
			
 
				-	u32 slb_kern_bitmap;
			
 
				+	u16 slb_cache_ptr;
			
 
				 	u32 slb_cache[SLB_CACHE_ENTRIES];
			
 
				 #endif /* CONFIG_PPC_BOOK3S_64 */
			
 
				 
			
@@ -146,11 +143,24 @@ struct paca_struct {
 
				 	struct tlb_core_data tcd;
			
 
				 #endif /* CONFIG_PPC_BOOK3E */
			
 
				 
			
 
				+#ifdef CONFIG_PPC_BOOK3S
			
 
				+	mm_context_id_t mm_ctx_id;
			
 
				+#ifdef CONFIG_PPC_MM_SLICES
			
 
				+	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
			
 
				+	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
			
 
				+	unsigned long mm_ctx_slb_addr_limit;
			
 
				+#else
			
 
				+	u16 mm_ctx_user_psize;
			
 
				+	u16 mm_ctx_sllp;
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				 	/*
			
 
				 	 * then miscellaneous read-write fields
			
 
				 	 */
			
 
				 	struct task_struct *__current;	/* Pointer to current */
			
 
				 	u64 kstack;			/* Saved Kernel stack addr */
			
 
				+	u64 stab_rr;			/* stab/slb round-robin counter */
			
 
				 	u64 saved_r1;			/* r1 save for RTAS calls or PM or EE=0 */
			
 
				 	u64 saved_msr;			/* MSR saved here by enter_rtas */
			
 
				 	u16 trap_save;			/* Used when bad stack is encountered */
			
@@ -248,6 +258,7 @@ struct paca_struct {
 
				 #endif /* CONFIG_PPC_BOOK3S_64 */
			
 
				 } ____cacheline_aligned;
			
 
				 
			
 
				+extern void copy_mm_to_paca(struct mm_struct *mm);
			
 
				 extern struct paca_struct **paca_ptrs;
			
 
				 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
			
 
				 extern void setup_paca(struct paca_struct *new_paca);
			
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -273,7 +273,6 @@ struct thread_struct {
 
				 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
			
 
				 	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
			
 
				 	unsigned long	trap_nr;	/* last trap # on this thread */
			
 
				-	u8 load_slb;			/* Ages out SLB preload cache entries */
			
 
				 	u8 load_fp;
			
 
				 #ifdef CONFIG_ALTIVEC
			
 
				 	u8 load_vec;
			
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -32,7 +32,6 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 
				 			   unsigned long len, unsigned int psize);
			
 
				 
			
 
				 void slice_init_new_context_exec(struct mm_struct *mm);
			
 
				-void slice_setup_new_exec(void);
			
 
				 
			
 
				 #endif /* __ASSEMBLY__ */
			
 
				 
			
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -29,7 +29,6 @@
 
				 #include <asm/page.h>
			
 
				 #include <asm/accounting.h>
			
 
				 
			
 
				-#define SLB_PRELOAD_NR	16U
			
 
				 /*
			
 
				  * low level task data.
			
 
				  */
			
@@ -45,10 +44,6 @@ struct thread_info {
 
				 #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
			
 
				 	struct cpu_accounting_data accounting;
			
 
				 #endif
			
 
				-	u8 slb_preload_nr;
			
 
				-	u8 slb_preload_tail;
			
 
				-	u32 slb_preload_esid[SLB_PRELOAD_NR];
			
 
				-
			
 
				 	/* low level flags - has atomic operations done on it */
			
 
				 	unsigned long	flags ____cacheline_aligned_in_smp;
			
 
				 };
			
@@ -77,12 +72,6 @@ static inline struct thread_info *current_thread_info(void)
 
				 }
			
 
				 
			
 
				 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
			
 
				-
			
 
				-#ifdef CONFIG_PPC_BOOK3S_64
			
 
				-void arch_setup_new_exec(void);
			
 
				-#define arch_setup_new_exec arch_setup_new_exec
			
 
				-#endif
			
 
				-
			
 
				 #endif /* __ASSEMBLY__ */
			
 
				 
			
 
				 /*
			
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -173,6 +173,7 @@ int main(void)
 
				 	OFFSET(PACAKSAVE, paca_struct, kstack);
			
 
				 	OFFSET(PACACURRENT, paca_struct, __current);
			
 
				 	OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
			
 
				+	OFFSET(PACASTABRR, paca_struct, stab_rr);
			
 
				 	OFFSET(PACAR1, paca_struct, saved_r1);
			
 
				 	OFFSET(PACATOC, paca_struct, kernel_toc);
			
 
				 	OFFSET(PACAKBASE, paca_struct, kernelbase);
			
@@ -180,6 +181,15 @@ int main(void)
 
				 	OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
			
 
				 	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
			
 
				 	OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
			
 
				+#ifdef CONFIG_PPC_BOOK3S
			
 
				+	OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
			
 
				+#ifdef CONFIG_PPC_MM_SLICES
			
 
				+	OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
			
 
				+	OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
			
 
				+	OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
			
 
				+	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
			
 
				+#endif /* CONFIG_PPC_MM_SLICES */
			
 
				+#endif
			
 
				 
			
 
				 #ifdef CONFIG_PPC_BOOK3E
			
 
				 	OFFSET(PACAPGD, paca_struct, pgd);
			
@@ -202,7 +212,6 @@ int main(void)
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
 
				 	OFFSET(PACASLBCACHE, paca_struct, slb_cache);
			
 
				 	OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
			
 
				-	OFFSET(PACASTABRR, paca_struct, stab_rr);
			
 
				 	OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
			
 
				 #ifdef CONFIG_PPC_MM_SLICES
			
 
				 	OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
			
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -596,36 +596,28 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 
			
 
				 
			
 
				 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
			
 
				-EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
			
 
				+	SET_SCRATCH0(r13)
			
 
				+	EXCEPTION_PROLOG_0(PACA_EXSLB)
			
 
				+	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
			
 
				+	mr	r12,r3	/* save r3 */
			
 
				+	mfspr	r3,SPRN_DAR
			
 
				+	mfspr	r11,SPRN_SRR1
			
 
				+	crset	4*cr6+eq
			
 
				+	BRANCH_TO_COMMON(r10, slb_miss_common)
			
 
				 EXC_REAL_END(data_access_slb, 0x380, 0x80)
			
 
				 
			
 
				 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
			
 
				-EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
			
 
				+	SET_SCRATCH0(r13)
			
 
				+	EXCEPTION_PROLOG_0(PACA_EXSLB)
			
 
				+	EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
			
 
				+	mr	r12,r3	/* save r3 */
			
 
				+	mfspr	r3,SPRN_DAR
			
 
				+	mfspr	r11,SPRN_SRR1
			
 
				+	crset	4*cr6+eq
			
 
				+	BRANCH_TO_COMMON(r10, slb_miss_common)
			
 
				 EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
			
 
				-
			
 
				 TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
			
 
				 
			
 
				-EXC_COMMON_BEGIN(data_access_slb_common)
			
 
				-	mfspr	r10,SPRN_DAR
			
 
				-	std	r10,PACA_EXSLB+EX_DAR(r13)
			
 
				-	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
			
 
				-	ld	r4,PACA_EXSLB+EX_DAR(r13)
			
 
				-	std	r4,_DAR(r1)
			
 
				-	addi	r3,r1,STACK_FRAME_OVERHEAD
			
 
				-	bl	do_slb_fault
			
 
				-	cmpdi	r3,0
			
 
				-	bne-	1f
			
 
				-	b	fast_exception_return
			
 
				-1:	/* Error case */
			
 
				-	std	r3,RESULT(r1)
			
 
				-	bl	save_nvgprs
			
 
				-	RECONCILE_IRQ_STATE(r10, r11)
			
 
				-	ld	r4,_DAR(r1)
			
 
				-	ld	r5,RESULT(r1)
			
 
				-	addi	r3,r1,STACK_FRAME_OVERHEAD
			
 
				-	bl	do_bad_slb_fault
			
 
				-	b	ret_from_except
			
 
				-
			
 
				 
			
 
				 EXC_REAL(instruction_access, 0x400, 0x80)
			
 
				 EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
			
@@ -648,34 +640,160 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 
			
 
				 
			
 
				 EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
			
 
				-EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
			
 
				+	SET_SCRATCH0(r13)
			
 
				+	EXCEPTION_PROLOG_0(PACA_EXSLB)
			
 
				+	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
			
 
				+	mr	r12,r3	/* save r3 */
			
 
				+	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
			
 
				+	mfspr	r11,SPRN_SRR1
			
 
				+	crclr	4*cr6+eq
			
 
				+	BRANCH_TO_COMMON(r10, slb_miss_common)
			
 
				 EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
			
 
				 
			
 
				 EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
			
 
				-EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
			
 
				+	SET_SCRATCH0(r13)
			
 
				+	EXCEPTION_PROLOG_0(PACA_EXSLB)
			
 
				+	EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
			
 
				+	mr	r12,r3	/* save r3 */
			
 
				+	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
			
 
				+	mfspr	r11,SPRN_SRR1
			
 
				+	crclr	4*cr6+eq
			
 
				+	BRANCH_TO_COMMON(r10, slb_miss_common)
			
 
				 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
			
 
				-
			
 
				 TRAMP_KVM(PACA_EXSLB, 0x480)
			
 
				 
			
 
				-EXC_COMMON_BEGIN(instruction_access_slb_common)
			
 
				-	EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
			
 
				-	ld	r4,_NIP(r1)
			
 
				-	addi	r3,r1,STACK_FRAME_OVERHEAD
			
 
				-	bl	do_slb_fault
			
 
				-	cmpdi	r3,0
			
 
				-	bne-	1f
			
 
				-	b	fast_exception_return
			
 
				-1:	/* Error case */
			
 
				-	std	r3,RESULT(r1)
			
 
				+
			
 
				+/*
			
 
				+ * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
			
 
				+ * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
			
 
				+ */
			
 
				+EXC_COMMON_BEGIN(slb_miss_common)
			
 
				+	/*
			
 
				+	 * r13 points to the PACA, r9 contains the saved CR,
			
 
				+	 * r12 contains the saved r3,
			
 
				+	 * r11 contain the saved SRR1, SRR0 is still ready for return
			
 
				+	 * r3 has the faulting address
			
 
				+	 * r9 - r13 are saved in paca->exslb.
			
 
				+ 	 * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
			
 
				+	 * We assume we aren't going to take any exceptions during this
			
 
				+	 * procedure.
			
 
				+	 */
			
 
				+	mflr	r10
			
 
				+	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
			
 
				+	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
			
 
				+
			
 
				+	andi.	r9,r11,MSR_PR	// Check for exception from userspace
			
 
				+	cmpdi	cr4,r9,MSR_PR	// And save the result in CR4 for later
			
 
				+
			
 
				+	/*
			
 
				+	 * Test MSR_RI before calling slb_allocate_realmode, because the
			
 
				+	 * MSR in r11 gets clobbered. However we still want to allocate
			
 
				+	 * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
			
 
				+	 * recursive SLB faults. So use cr5 for this, which is preserved.
			
 
				+	 */
			
 
				+	andi.	r11,r11,MSR_RI	/* check for unrecoverable exception */
			
 
				+	cmpdi	cr5,r11,MSR_RI
			
 
				+
			
 
				+	crset	4*cr0+eq
			
 
				+#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+BEGIN_MMU_FTR_SECTION
			
 
				+	bl	slb_allocate
			
 
				+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
			
 
				+#endif
			
 
				+
			
 
				+	ld	r10,PACA_EXSLB+EX_LR(r13)
			
 
				+	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
			
 
				+	mtlr	r10
			
 
				+
			
 
				+	/*
			
 
				+	 * Large address, check whether we have to allocate new contexts.
			
 
				+	 */
			
 
				+	beq-	8f
			
 
				+
			
 
				+	bne-	cr5,2f		/* if unrecoverable exception, oops */
			
 
				+
			
 
				+	/* All done -- return from exception. */
			
 
				+
			
 
				+	bne	cr4,1f		/* returning to kernel */
			
 
				+
			
 
				+	mtcrf	0x80,r9
			
 
				+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
			
 
				+	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
			
 
				+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
			
 
				+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
			
 
				+
			
 
				+	RESTORE_CTR(r9, PACA_EXSLB)
			
 
				+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
			
 
				+	mr	r3,r12
			
 
				+	ld	r9,PACA_EXSLB+EX_R9(r13)
			
 
				+	ld	r10,PACA_EXSLB+EX_R10(r13)
			
 
				+	ld	r11,PACA_EXSLB+EX_R11(r13)
			
 
				+	ld	r12,PACA_EXSLB+EX_R12(r13)
			
 
				+	ld	r13,PACA_EXSLB+EX_R13(r13)
			
 
				+	RFI_TO_USER
			
 
				+	b	.	/* prevent speculative execution */
			
 
				+1:
			
 
				+	mtcrf	0x80,r9
			
 
				+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
			
 
				+	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
			
 
				+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
			
 
				+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
			
 
				+
			
 
				+	RESTORE_CTR(r9, PACA_EXSLB)
			
 
				+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
			
 
				+	mr	r3,r12
			
 
				+	ld	r9,PACA_EXSLB+EX_R9(r13)
			
 
				+	ld	r10,PACA_EXSLB+EX_R10(r13)
			
 
				+	ld	r11,PACA_EXSLB+EX_R11(r13)
			
 
				+	ld	r12,PACA_EXSLB+EX_R12(r13)
			
 
				+	ld	r13,PACA_EXSLB+EX_R13(r13)
			
 
				+	RFI_TO_KERNEL
			
 
				+	b	.	/* prevent speculative execution */
			
 
				+
			
 
				+
			
 
				+2:	std     r3,PACA_EXSLB+EX_DAR(r13)
			
 
				+	mr	r3,r12
			
 
				+	mfspr	r11,SPRN_SRR0
			
 
				+	mfspr	r12,SPRN_SRR1
			
 
				+	LOAD_HANDLER(r10,unrecov_slb)
			
 
				+	mtspr	SPRN_SRR0,r10
			
 
				+	ld	r10,PACAKMSR(r13)
			
 
				+	mtspr	SPRN_SRR1,r10
			
 
				+	RFI_TO_KERNEL
			
 
				+	b	.
			
 
				+
			
 
				+8:	std     r3,PACA_EXSLB+EX_DAR(r13)
			
 
				+	mr	r3,r12
			
 
				+	mfspr	r11,SPRN_SRR0
			
 
				+	mfspr	r12,SPRN_SRR1
			
 
				+	LOAD_HANDLER(r10, large_addr_slb)
			
 
				+	mtspr	SPRN_SRR0,r10
			
 
				+	ld	r10,PACAKMSR(r13)
			
 
				+	mtspr	SPRN_SRR1,r10
			
 
				+	RFI_TO_KERNEL
			
 
				+	b	.
			
 
				+
			
 
				+EXC_COMMON_BEGIN(unrecov_slb)
			
 
				+	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
			
 
				+	RECONCILE_IRQ_STATE(r10, r11)
			
 
				 	bl	save_nvgprs
			
 
				+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
			
 
				+	bl	unrecoverable_exception
			
 
				+	b	1b
			
 
				+
			
 
				+EXC_COMMON_BEGIN(large_addr_slb)
			
 
				+	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
			
 
				 	RECONCILE_IRQ_STATE(r10, r11)
			
 
				-	ld	r4,_NIP(r1)
			
 
				-	ld	r5,RESULT(r1)
			
 
				-	addi	r3,r1,STACK_FRAME_OVERHEAD
			
 
				-	bl	do_bad_slb_fault
			
 
				+	ld	r3, PACA_EXSLB+EX_DAR(r13)
			
 
				+	std	r3, _DAR(r1)
			
 
				+	beq	cr6, 2f
			
 
				+	li	r10, 0x481		/* fix trap number for I-SLB miss */
			
 
				+	std	r10, _TRAP(r1)
			
 
				+2:	bl	save_nvgprs
			
 
				+	addi	r3, r1, STACK_FRAME_OVERHEAD
			
 
				+	bl	slb_miss_large_addr
			
 
				 	b	ret_from_except
			
 
				 
			
 
				-
			
 
				 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
			
 
				 	.globl hardware_interrupt_hv;
			
 
				 hardware_interrupt_hv:
			
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -258,3 +258,25 @@ void __init free_unused_pacas(void)
 
				 	printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
			
 
				 			paca_ptrs_size + paca_struct_size, nr_cpu_ids);
			
 
				 }
			
 
				+
			
 
				+void copy_mm_to_paca(struct mm_struct *mm)
			
 
				+{
			
 
				+#ifdef CONFIG_PPC_BOOK3S
			
 
				+	mm_context_t *context = &mm->context;
			
 
				+
			
 
				+	get_paca()->mm_ctx_id = context->id;
			
 
				+#ifdef CONFIG_PPC_MM_SLICES
			
 
				+	VM_BUG_ON(!mm->context.slb_addr_limit);
			
 
				+	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
			
 
				+	memcpy(&get_paca()->mm_ctx_low_slices_psize,
			
 
				+	       &context->low_slices_psize, sizeof(context->low_slices_psize));
			
 
				+	memcpy(&get_paca()->mm_ctx_high_slices_psize,
			
 
				+	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
			
 
				+#else /* CONFIG_PPC_MM_SLICES */
			
 
				+	get_paca()->mm_ctx_user_psize = context->user_psize;
			
 
				+	get_paca()->mm_ctx_sllp = context->sllp;
			
 
				+#endif
			
 
				+#else /* !CONFIG_PPC_BOOK3S */
			
 
				+	return;
			
 
				+#endif
			
 
				+}
			
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1482,15 +1482,6 @@ void flush_thread(void)
 
				 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_PPC_BOOK3S_64
			
 
				-void arch_setup_new_exec(void)
			
 
				-{
			
 
				-	if (radix_enabled())
			
 
				-		return;
			
 
				-	hash__setup_new_exec();
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 int set_thread_uses_vas(void)
			
 
				 {
			
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
@@ -1719,8 +1710,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void preload_new_slb_context(unsigned long start, unsigned long sp);
			
 
				-
			
 
				 /*
			
 
				  * Set up a thread for executing a new program
			
 
				  */
			
@@ -1728,10 +1717,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 
				 {
			
 
				 #ifdef CONFIG_PPC64
			
 
				 	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
			
 
				-
			
 
				-#ifdef CONFIG_PPC_BOOK3S_64
			
 
				-	preload_new_slb_context(start, sp);
			
 
				-#endif
			
 
				 #endif
			
 
				 
			
 
				 	/*
			
@@ -1822,7 +1807,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 
				 #ifdef CONFIG_VSX
			
 
				 	current->thread.used_vsr = 0;
			
 
				 #endif
			
 
				-	current->thread.load_slb = 0;
			
 
				 	current->thread.load_fp = 0;
			
 
				 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
			
 
				 	current->thread.fp_save_area = NULL;
			
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 
				 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(BITS)e.o
			
 
				 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
			
 
				 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
			
 
				-obj-$(CONFIG_PPC_BOOK3S_64)	+= pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
			
 
				+obj-$(CONFIG_PPC_BOOK3S_64)	+= pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
			
 
				 obj-$(CONFIG_PPC_RADIX_MMU)	+= pgtable-radix.o tlb-radix.o
			
 
				 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
			
 
				 obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(BITS).o
			
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1088,16 +1088,16 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PPC_MM_SLICES
			
 
				-static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
			
 
				+static unsigned int get_paca_psize(unsigned long addr)
			
 
				 {
			
 
				 	unsigned char *psizes;
			
 
				 	unsigned long index, mask_index;
			
 
				 
			
 
				 	if (addr < SLICE_LOW_TOP) {
			
 
				-		psizes = mm->context.low_slices_psize;
			
 
				+		psizes = get_paca()->mm_ctx_low_slices_psize;
			
 
				 		index = GET_LOW_SLICE_INDEX(addr);
			
 
				 	} else {
			
 
				-		psizes = mm->context.high_slices_psize;
			
 
				+		psizes = get_paca()->mm_ctx_high_slices_psize;
			
 
				 		index = GET_HIGH_SLICE_INDEX(addr);
			
 
				 	}
			
 
				 	mask_index = index & 0x1;
			
@@ -1105,9 +1105,9 @@ static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 
				 }
			
 
				 
			
 
				 #else
			
 
				-unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
			
 
				+unsigned int get_paca_psize(unsigned long addr)
			
 
				 {
			
 
				-	return mm->context.user_psize;
			
 
				+	return get_paca()->mm_ctx_user_psize;
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -1118,11 +1118,15 @@ unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 
				 #ifdef CONFIG_PPC_64K_PAGES
			
 
				 void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
			
 
				 {
			
 
				-	if (get_psize(mm, addr) == MMU_PAGE_4K)
			
 
				+	if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
			
 
				 		return;
			
 
				 	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
			
 
				 	copro_flush_all_slbs(mm);
			
 
				-	core_flush_all_slbs(mm);
			
 
				+	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
			
 
				+
			
 
				+		copy_mm_to_paca(mm);
			
 
				+		slb_flush_and_rebolt();
			
 
				+	}
			
 
				 }
			
 
				 #endif /* CONFIG_PPC_64K_PAGES */
			
 
				 
			
@@ -1187,6 +1191,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
 
				 		trap, vsid, ssize, psize, lpsize, pte);
			
 
				 }
			
 
				 
			
 
				+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
			
 
				+			     int psize, bool user_region)
			
 
				+{
			
 
				+	if (user_region) {
			
 
				+		if (psize != get_paca_psize(ea)) {
			
 
				+			copy_mm_to_paca(mm);
			
 
				+			slb_flush_and_rebolt();
			
 
				+		}
			
 
				+	} else if (get_paca()->vmalloc_sllp !=
			
 
				+		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
			
 
				+		get_paca()->vmalloc_sllp =
			
 
				+			mmu_psize_defs[mmu_vmalloc_psize].sllp;
			
 
				+		slb_vmalloc_update();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /* Result code is:
			
 
				  *  0 - handled
			
 
				  *  1 - normal page fault
			
@@ -1219,7 +1239,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 
				 			rc = 1;
			
 
				 			goto bail;
			
 
				 		}
			
 
				-		psize = get_psize(mm, ea);
			
 
				+		psize = get_slice_psize(mm, ea);
			
 
				 		ssize = user_segment_size(ea);
			
 
				 		vsid = get_user_vsid(&mm->context, ea, ssize);
			
 
				 		break;
			
@@ -1307,6 +1327,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 
				 			WARN_ON(1);
			
 
				 		}
			
 
				 #endif
			
 
				+		if (current->mm == mm)
			
 
				+			check_paca_psize(ea, mm, psize, user_region);
			
 
				+
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
@@ -1341,14 +1364,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 
				 			       "to 4kB pages because of "
			
 
				 			       "non-cacheable mapping\n");
			
 
				 			psize = mmu_vmalloc_psize = MMU_PAGE_4K;
			
 
				-			slb_vmalloc_update();
			
 
				 			copro_flush_all_slbs(mm);
			
 
				-			core_flush_all_slbs(mm);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 #endif /* CONFIG_PPC_64K_PAGES */
			
 
				 
			
 
				+	if (current->mm == mm)
			
 
				+		check_paca_psize(ea, mm, psize, user_region);
			
 
				+
			
 
				 #ifdef CONFIG_PPC_64K_PAGES
			
 
				 	if (psize == MMU_PAGE_64K)
			
 
				 		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
			
@@ -1436,7 +1460,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 
				 #ifdef CONFIG_PPC_MM_SLICES
			
 
				 static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
			
 
				 {
			
 
				-	int psize = get_psize(mm, ea);
			
 
				+	int psize = get_slice_psize(mm, ea);
			
 
				 
			
 
				 	/* We only prefault standard pages for now */
			
 
				 	if (unlikely(psize != mm->context.user_psize))
			
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -54,7 +54,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
				 		 * MMU context id, which is then moved to SPRN_PID.
			
 
				 		 *
			
 
				 		 * For the hash MMU it is either the first load from slb_cache
			
 
				-		 * in switch_slb(), and/or load of MMU context id.
			
 
				+		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
			
 
				+		 * copy_mm_to_paca().
			
 
				 		 *
			
 
				 		 * On the other side, the barrier is in mm/tlb-radix.c for
			
 
				 		 * radix which orders earlier stores to clear the PTEs vs
			
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -53,8 +53,6 @@ int hash__alloc_context_id(void)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
			
 
				 
			
 
				-void slb_setup_new_exec(void);
			
 
				-
			
 
				 static int hash__init_new_context(struct mm_struct *mm)
			
 
				 {
			
 
				 	int index;
			
@@ -86,13 +84,6 @@ static int hash__init_new_context(struct mm_struct *mm)
 
				 	return index;
			
 
				 }
			
 
				 
			
 
				-void hash__setup_new_exec(void)
			
 
				-{
			
 
				-	slice_setup_new_exec();
			
 
				-
			
 
				-	slb_setup_new_exec();
			
 
				-}
			
 
				-
			
 
				 static int radix__init_new_context(struct mm_struct *mm)
			
 
				 {
			
 
				 	unsigned long rts_field;
			
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,7 +14,6 @@
 
				  *      2 of the License, or (at your option) any later version.
			
 
				  */
			
 
				 
			
 
				-#include <asm/asm-prototypes.h>
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/mmu.h>
			
 
				 #include <asm/mmu_context.h>
			
@@ -34,7 +33,7 @@ enum slb_index {
 
				 	KSTACK_INDEX	= 1, /* Kernel stack map */
			
 
				 };
			
 
				 
			
 
				-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
			
 
				+extern void slb_allocate(unsigned long ea);
			
 
				 
			
 
				 #define slb_esid_mask(ssize)	\
			
 
				 	(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
			
@@ -45,17 +44,11 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
 
				 	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
			
 
				 }
			
 
				 
			
 
				-static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
			
 
				-					 unsigned long flags)
			
 
				-{
			
 
				-	return (vsid << slb_vsid_shift(ssize)) | flags |
			
 
				-		((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
			
 
				-}
			
 
				-
			
 
				 static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
			
 
				 					 unsigned long flags)
			
 
				 {
			
 
				-	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
			
 
				+	return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
			
 
				+		((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
			
 
				 }
			
 
				 
			
 
				 static inline void slb_shadow_update(unsigned long ea, int ssize,
			
@@ -122,9 +115,6 @@ void slb_restore_bolted_realmode(void)
 
				 {
			
 
				 	__slb_restore_bolted_realmode();
			
 
				 	get_paca()->slb_cache_ptr = 0;
			
 
				-
			
 
				-	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
			
 
				-	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -132,6 +122,9 @@ void slb_restore_bolted_realmode(void)
 
				  */
			
 
				 void slb_flush_all_realmode(void)
			
 
				 {
			
 
				+	/*
			
 
				+	 * This flushes all SLB entries including 0, so it must be realmode.
			
 
				+	 */
			
 
				 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
			
 
				 }
			
 
				 
			
@@ -177,9 +170,6 @@ void slb_flush_and_rebolt(void)
 
				 		     : "memory");
			
 
				 
			
 
				 	get_paca()->slb_cache_ptr = 0;
			
 
				-
			
 
				-	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
			
 
				-	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
			
 
				 }
			
 
				 
			
 
				 void slb_save_contents(struct slb_entry *slb_ptr)
			
@@ -212,7 +202,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr)
 
				 		return;
			
 
				 
			
 
				 	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
			
 
				-	pr_err("Last SLB entry inserted at slot %u\n", get_paca()->stab_rr);
			
 
				+	pr_err("Last SLB entry inserted at slot %lld\n", get_paca()->stab_rr);
			
 
				 
			
 
				 	for (i = 0; i < mmu_slb_size; i++) {
			
 
				 		e = slb_ptr->esid;
			
@@ -257,119 +247,41 @@ void slb_vmalloc_update(void)
 
				 	slb_flush_and_rebolt();
			
 
				 }
			
 
				 
			
 
				-static bool preload_hit(struct thread_info *ti, unsigned long esid)
			
 
				-{
			
 
				-	u8 i;
			
 
				-
			
 
				-	for (i = 0; i < ti->slb_preload_nr; i++) {
			
 
				-		u8 idx;
			
 
				-
			
 
				-		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
			
 
				-		if (esid == ti->slb_preload_esid[idx])
			
 
				-			return true;
			
 
				-	}
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-static bool preload_add(struct thread_info *ti, unsigned long ea)
			
 
				-{
			
 
				-	unsigned long esid;
			
 
				-	u8 idx;
			
 
				-
			
 
				-	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
			
 
				-		/* EAs are stored >> 28 so 256MB segments don't need clearing */
			
 
				-		if (ea & ESID_MASK_1T)
			
 
				-			ea &= ESID_MASK_1T;
			
 
				-	}
			
 
				-
			
 
				-	esid = ea >> SID_SHIFT;
			
 
				-
			
 
				-	if (preload_hit(ti, esid))
			
 
				-		return false;
			
 
				-
			
 
				-	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
			
 
				-	ti->slb_preload_esid[idx] = esid;
			
 
				-	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
			
 
				-		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
			
 
				-	else
			
 
				-		ti->slb_preload_nr++;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void preload_age(struct thread_info *ti)
			
 
				-{
			
 
				-	if (!ti->slb_preload_nr)
			
 
				-		return;
			
 
				-	ti->slb_preload_nr--;
			
 
				-	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
			
 
				-}
			
 
				-
			
 
				-void slb_setup_new_exec(void)
			
 
				+/* Helper function to compare esids.  There are four cases to handle.
			
 
				+ * 1. The system is not 1T segment size capable.  Use the GET_ESID compare.
			
 
				+ * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
			
 
				+ * 3. The system is 1T capable, only one of the two addresses is > 1T.  This is not a match.
			
 
				+ * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
			
 
				+ */
			
 
				+static inline int esids_match(unsigned long addr1, unsigned long addr2)
			
 
				 {
			
 
				-	struct thread_info *ti = current_thread_info();
			
 
				-	struct mm_struct *mm = current->mm;
			
 
				-	unsigned long exec = 0x10000000;
			
 
				+	int esid_1t_count;
			
 
				 
			
 
				-	/*
			
 
				-	 * We have no good place to clear the slb preload cache on exec,
			
 
				-	 * flush_thread is about the earliest arch hook but that happens
			
 
				-	 * after we switch to the mm and have aleady preloaded the SLBEs.
			
 
				-	 *
			
 
				-	 * For the most part that's probably okay to use entries from the
			
 
				-	 * previous exec, they will age out if unused. It may turn out to
			
 
				-	 * be an advantage to clear the cache before switching to it,
			
 
				-	 * however.
			
 
				-	 */
			
 
				-
			
 
				-	/*
			
 
				-	 * preload some userspace segments into the SLB.
			
 
				-	 * Almost all 32 and 64bit PowerPC executables are linked at
			
 
				-	 * 0x10000000 so it makes sense to preload this segment.
			
 
				-	 */
			
 
				-	if (!is_kernel_addr(exec)) {
			
 
				-		if (preload_add(ti, exec))
			
 
				-			slb_allocate_user(mm, exec);
			
 
				-	}
			
 
				-
			
 
				-	/* Libraries and mmaps. */
			
 
				-	if (!is_kernel_addr(mm->mmap_base)) {
			
 
				-		if (preload_add(ti, mm->mmap_base))
			
 
				-			slb_allocate_user(mm, mm->mmap_base);
			
 
				-	}
			
 
				-}
			
 
				+	/* System is not 1T segment size capable. */
			
 
				+	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
			
 
				+		return (GET_ESID(addr1) == GET_ESID(addr2));
			
 
				 
			
 
				-void preload_new_slb_context(unsigned long start, unsigned long sp)
			
 
				-{
			
 
				-	struct thread_info *ti = current_thread_info();
			
 
				-	struct mm_struct *mm = current->mm;
			
 
				-	unsigned long heap = mm->start_brk;
			
 
				+	esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
			
 
				+				((addr2 >> SID_SHIFT_1T) != 0));
			
 
				 
			
 
				-	/* Userspace entry address. */
			
 
				-	if (!is_kernel_addr(start)) {
			
 
				-		if (preload_add(ti, start))
			
 
				-			slb_allocate_user(mm, start);
			
 
				-	}
			
 
				+	/* both addresses are < 1T */
			
 
				+	if (esid_1t_count == 0)
			
 
				+		return (GET_ESID(addr1) == GET_ESID(addr2));
			
 
				 
			
 
				-	/* Top of stack, grows down. */
			
 
				-	if (!is_kernel_addr(sp)) {
			
 
				-		if (preload_add(ti, sp))
			
 
				-			slb_allocate_user(mm, sp);
			
 
				-	}
			
 
				+	/* One address < 1T, the other > 1T.  Not a match */
			
 
				+	if (esid_1t_count == 1)
			
 
				+		return 0;
			
 
				 
			
 
				-	/* Bottom of heap, grows up. */
			
 
				-	if (heap && !is_kernel_addr(heap)) {
			
 
				-		if (preload_add(ti, heap))
			
 
				-			slb_allocate_user(mm, heap);
			
 
				-	}
			
 
				+	/* Both addresses are > 1T. */
			
 
				+	return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /* Flush all user entries from the segment table of the current processor. */
			
 
				 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
			
 
				 {
			
 
				-	struct thread_info *ti = task_thread_info(tsk);
			
 
				-	u8 i;
			
 
				+	unsigned long pc = KSTK_EIP(tsk);
			
 
				+	unsigned long stack = KSTK_ESP(tsk);
			
 
				+	unsigned long exec_base;
			
 
				 
			
 
				 	/*
			
 
				 	 * We need interrupts hard-disabled here, not just soft-disabled,
			
@@ -392,6 +304,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 
				 		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
			
 
				 		    offset <= SLB_CACHE_ENTRIES) {
			
 
				 			unsigned long slbie_data = 0;
			
 
				+			int i;
			
 
				 
			
 
				 			asm volatile("isync" : : : "memory");
			
 
				 			for (i = 0; i < offset; i++) {
			
@@ -422,60 +335,67 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 
				 				     "isync"
			
 
				 				     :: "r"(ksp_vsid_data),
			
 
				 					"r"(ksp_esid_data));
			
 
				-
			
 
				-			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
			
 
				 		}
			
 
				 
			
 
				 		get_paca()->slb_cache_ptr = 0;
			
 
				 	}
			
 
				-	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
			
 
				+
			
 
				+	copy_mm_to_paca(mm);
			
 
				 
			
 
				 	/*
			
 
				-	 * We gradually age out SLBs after a number of context switches to
			
 
				-	 * reduce reload overhead of unused entries (like we do with FP/VEC
			
 
				-	 * reload). Each time we wrap 256 switches, take an entry out of the
			
 
				-	 * SLB preload cache.
			
 
				+	 * preload some userspace segments into the SLB.
			
 
				+	 * Almost all 32 and 64bit PowerPC executables are linked at
			
 
				+	 * 0x10000000 so it makes sense to preload this segment.
			
 
				 	 */
			
 
				-	tsk->thread.load_slb++;
			
 
				-	if (!tsk->thread.load_slb) {
			
 
				-		unsigned long pc = KSTK_EIP(tsk);
			
 
				+	exec_base = 0x10000000;
			
 
				 
			
 
				-		preload_age(ti);
			
 
				-		preload_add(ti, pc);
			
 
				-	}
			
 
				+	if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
			
 
				+	    is_kernel_addr(exec_base))
			
 
				+		return;
			
 
				 
			
 
				-	for (i = 0; i < ti->slb_preload_nr; i++) {
			
 
				-		unsigned long ea;
			
 
				-		u8 idx;
			
 
				+	slb_allocate(pc);
			
 
				 
			
 
				-		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
			
 
				-		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
			
 
				+	if (!esids_match(pc, stack))
			
 
				+		slb_allocate(stack);
			
 
				 
			
 
				-		slb_allocate_user(mm, ea);
			
 
				-	}
			
 
				+	if (!esids_match(pc, exec_base) &&
			
 
				+	    !esids_match(stack, exec_base))
			
 
				+		slb_allocate(exec_base);
			
 
				 }
			
 
				 
			
 
				-void slb_set_size(u16 size)
			
 
				+static inline void patch_slb_encoding(unsigned int *insn_addr,
			
 
				+				      unsigned int immed)
			
 
				 {
			
 
				-	mmu_slb_size = size;
			
 
				+
			
 
				+	/*
			
 
				+	 * This function patches either an li or a cmpldi instruction with
			
 
				+	 * a new immediate value. This relies on the fact that both li
			
 
				+	 * (which is actually addi) and cmpldi both take a 16-bit immediate
			
 
				+	 * value, and it is situated in the same location in the instruction,
			
 
				+	 * ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
			
 
				+	 * The signedness of the immediate operand differs between the two
			
 
				+	 * instructions however this code is only ever patching a small value,
			
 
				+	 * much less than 1 << 15, so we can get away with it.
			
 
				+	 * To patch the value we read the existing instruction, clear the
			
 
				+	 * immediate value, and or in our new value, then write the instruction
			
 
				+	 * back.
			
 
				+	 */
			
 
				+	unsigned int insn = (*insn_addr & 0xffff0000) | immed;
			
 
				+	patch_instruction(insn_addr, insn);
			
 
				 }
			
 
				 
			
 
				-static void cpu_flush_slb(void *parm)
			
 
				-{
			
 
				-	struct mm_struct *mm = parm;
			
 
				-	unsigned long flags;
			
 
				+extern u32 slb_miss_kernel_load_linear[];
			
 
				+extern u32 slb_miss_kernel_load_io[];
			
 
				+extern u32 slb_compare_rr_to_size[];
			
 
				+extern u32 slb_miss_kernel_load_vmemmap[];
			
 
				 
			
 
				-	if (mm != current->active_mm)
			
 
				+void slb_set_size(u16 size)
			
 
				+{
			
 
				+	if (mmu_slb_size == size)
			
 
				 		return;
			
 
				 
			
 
				-	local_irq_save(flags);
			
 
				-	slb_flush_and_rebolt();
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				-void core_flush_all_slbs(struct mm_struct *mm)
			
 
				-{
			
 
				-	on_each_cpu(cpu_flush_slb, mm, 1);
			
 
				+	mmu_slb_size = size;
			
 
				+	patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
			
 
				 }
			
 
				 
			
 
				 void slb_initialize(void)
			
@@ -497,16 +417,24 @@ void slb_initialize(void)
 
				 #endif
			
 
				 	if (!slb_encoding_inited) {
			
 
				 		slb_encoding_inited = 1;
			
 
				+		patch_slb_encoding(slb_miss_kernel_load_linear,
			
 
				+				   SLB_VSID_KERNEL | linear_llp);
			
 
				+		patch_slb_encoding(slb_miss_kernel_load_io,
			
 
				+				   SLB_VSID_KERNEL | io_llp);
			
 
				+		patch_slb_encoding(slb_compare_rr_to_size,
			
 
				+				   mmu_slb_size);
			
 
				+
			
 
				 		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
			
 
				 		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
			
 
				+
			
 
				 #ifdef CONFIG_SPARSEMEM_VMEMMAP
			
 
				+		patch_slb_encoding(slb_miss_kernel_load_vmemmap,
			
 
				+				   SLB_VSID_KERNEL | vmemmap_llp);
			
 
				 		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
			
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				 	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
			
 
				-	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
			
 
				-	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
			
 
				 
			
 
				 	lflags = SLB_VSID_KERNEL | linear_llp;
			
 
				 
			
@@ -530,13 +458,52 @@ void slb_initialize(void)
 
				 	asm volatile("isync":::"memory");
			
 
				 }
			
 
				 
			
 
				-static void slb_cache_update(unsigned long esid_data)
			
 
				+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
			
 
				+			     int bpsize, int ssize)
			
 
				 {
			
 
				+	unsigned long flags, vsid_data, esid_data;
			
 
				+	enum slb_index index;
			
 
				 	int slb_cache_index;
			
 
				 
			
 
				 	if (cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				 		return; /* ISAv3.0B and later does not use slb_cache */
			
 
				 
			
 
				+	/*
			
 
				+	 * We are irq disabled, hence should be safe to access PACA.
			
 
				+	 */
			
 
				+	VM_WARN_ON(!irqs_disabled());
			
 
				+
			
 
				+	/*
			
 
				+	 * We can't take a PMU exception in the following code, so hard
			
 
				+	 * disable interrupts.
			
 
				+	 */
			
 
				+	hard_irq_disable();
			
 
				+
			
 
				+	index = get_paca()->stab_rr;
			
 
				+
			
 
				+	/*
			
 
				+	 * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
			
 
				+	 */
			
 
				+	if (index < (mmu_slb_size - 1))
			
 
				+		index++;
			
 
				+	else
			
 
				+		index = SLB_NUM_BOLTED;
			
 
				+
			
 
				+	get_paca()->stab_rr = index;
			
 
				+
			
 
				+	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
			
 
				+	vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
			
 
				+		    ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
			
 
				+	esid_data = mk_esid_data(ea, ssize, index);
			
 
				+
			
 
				+	/*
			
 
				+	 * No need for an isync before or after this slbmte. The exception
			
 
				+	 * we enter with and the rfid we exit with are context synchronizing.
			
 
				+	 * Also we only handle user segments here.
			
 
				+	 */
			
 
				+	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
			
 
				+		     : "memory");
			
 
				+
			
 
				 	/*
			
 
				 	 * Now update slb cache entries
			
 
				 	 */
			
@@ -558,196 +525,58 @@ static void slb_cache_update(unsigned long esid_data)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static enum slb_index alloc_slb_index(bool kernel)
			
 
				+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
			
 
				 {
			
 
				-	enum slb_index index;
			
 
				-
			
 
				-	/*
			
 
				-	 * The allocation bitmaps can become out of synch with the SLB
			
 
				-	 * when the _switch code does slbie when bolting a new stack
			
 
				-	 * segment and it must not be anywhere else in the SLB. This leaves
			
 
				-	 * a kernel allocated entry that is unused in the SLB. With very
			
 
				-	 * large systems or small segment sizes, the bitmaps could slowly
			
 
				-	 * fill with these entries. They will eventually be cleared out
			
 
				-	 * by the round robin allocator in that case, so it's probably not
			
 
				-	 * worth accounting for.
			
 
				-	 */
			
 
				+	struct mm_struct *mm = current->mm;
			
 
				+	unsigned long vsid;
			
 
				+	int bpsize;
			
 
				 
			
 
				 	/*
			
 
				-	 * SLBs beyond 32 entries are allocated with stab_rr only
			
 
				-	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
			
 
				-	 * future CPU has more.
			
 
				+	 * We are always above 1TB, hence use high user segment size.
			
 
				 	 */
			
 
				-	if (get_paca()->slb_used_bitmap != U32_MAX) {
			
 
				-		index = ffz(get_paca()->slb_used_bitmap);
			
 
				-		get_paca()->slb_used_bitmap |= 1U << index;
			
 
				-		if (kernel)
			
 
				-			get_paca()->slb_kern_bitmap |= 1U << index;
			
 
				-	} else {
			
 
				-		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
			
 
				-		index = get_paca()->stab_rr;
			
 
				-		if (index < (mmu_slb_size - 1))
			
 
				-			index++;
			
 
				-		else
			
 
				-			index = SLB_NUM_BOLTED;
			
 
				-		get_paca()->stab_rr = index;
			
 
				-		if (index < 32) {
			
 
				-			if (kernel)
			
 
				-				get_paca()->slb_kern_bitmap |= 1U << index;
			
 
				-			else
			
 
				-				get_paca()->slb_kern_bitmap &= ~(1U << index);
			
 
				-		}
			
 
				-	}
			
 
				-	BUG_ON(index < SLB_NUM_BOLTED);
			
 
				-
			
 
				-	return index;
			
 
				+	vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
			
 
				+	bpsize = get_slice_psize(mm, ea);
			
 
				+	insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
			
 
				 }
			
 
				 
			
 
				-static long slb_insert_entry(unsigned long ea, unsigned long context,
			
 
				-				unsigned long flags, int ssize, bool kernel)
			
 
				+void slb_miss_large_addr(struct pt_regs *regs)
			
 
				 {
			
 
				-	unsigned long vsid;
			
 
				-	unsigned long vsid_data, esid_data;
			
 
				-	enum slb_index index;
			
 
				-
			
 
				-	vsid = get_vsid(context, ea, ssize);
			
 
				-	if (!vsid)
			
 
				-		return -EFAULT;
			
 
				+	enum ctx_state prev_state = exception_enter();
			
 
				+	unsigned long ea = regs->dar;
			
 
				+	int context;
			
 
				 
			
 
				-	index = alloc_slb_index(kernel);
			
 
				-
			
 
				-	vsid_data = __mk_vsid_data(vsid, ssize, flags);
			
 
				-	esid_data = mk_esid_data(ea, ssize, index);
			
 
				+	if (REGION_ID(ea) != USER_REGION_ID)
			
 
				+		goto slb_bad_addr;
			
 
				 
			
 
				 	/*
			
 
				-	 * No need for an isync before or after this slbmte. The exception
			
 
				-	 * we enter with and the rfid we exit with are context synchronizing.
			
 
				-	 * Also we only handle user segments here.
			
 
				+	 * Are we beyound what the page table layout supports ?
			
 
				 	 */
			
 
				-	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
			
 
				+	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
			
 
				+		goto slb_bad_addr;
			
 
				 
			
 
				-	if (!kernel)
			
 
				-		slb_cache_update(esid_data);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static long slb_allocate_kernel(unsigned long ea, unsigned long id)
			
 
				-{
			
 
				-	unsigned long context;
			
 
				-	unsigned long flags;
			
 
				-	int ssize;
			
 
				-
			
 
				-	if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	if (id == KERNEL_REGION_ID) {
			
 
				-		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
			
 
				-#ifdef CONFIG_SPARSEMEM_VMEMMAP
			
 
				-	} else if (id == VMEMMAP_REGION_ID) {
			
 
				-		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
			
 
				-#endif
			
 
				-	} else if (id == VMALLOC_REGION_ID) {
			
 
				-		if (ea < H_VMALLOC_END)
			
 
				-			flags = get_paca()->vmalloc_sllp;
			
 
				-		else
			
 
				-			flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
			
 
				-	} else {
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				-
			
 
				-	ssize = MMU_SEGSIZE_1T;
			
 
				-	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
			
 
				-		ssize = MMU_SEGSIZE_256M;
			
 
				-
			
 
				-	context = id - KERNEL_REGION_CONTEXT_OFFSET;
			
 
				-
			
 
				-	return slb_insert_entry(ea, context, flags, ssize, true);
			
 
				-}
			
 
				-
			
 
				-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
			
 
				-{
			
 
				-	unsigned long context;
			
 
				-	unsigned long flags;
			
 
				-	int bpsize;
			
 
				-	int ssize;
			
 
				+	/* Lower address should have been handled by asm code */
			
 
				+	if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
			
 
				+		goto slb_bad_addr;
			
 
				 
			
 
				 	/*
			
 
				 	 * consider this as bad access if we take a SLB miss
			
 
				 	 * on an address above addr limit.
			
 
				 	 */
			
 
				-	if (ea >= mm->context.slb_addr_limit)
			
 
				-		return -EFAULT;
			
 
				+	if (ea >= current->mm->context.slb_addr_limit)
			
 
				+		goto slb_bad_addr;
			
 
				 
			
 
				-	context = get_ea_context(&mm->context, ea);
			
 
				+	context = get_ea_context(&current->mm->context, ea);
			
 
				 	if (!context)
			
 
				-		return -EFAULT;
			
 
				-
			
 
				-	if (unlikely(ea >= H_PGTABLE_RANGE)) {
			
 
				-		WARN_ON(1);
			
 
				-		return -EFAULT;
			
 
				-	}
			
 
				-
			
 
				-	ssize = user_segment_size(ea);
			
 
				-
			
 
				-	bpsize = get_slice_psize(mm, ea);
			
 
				-	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
			
 
				-
			
 
				-	return slb_insert_entry(ea, context, flags, ssize, false);
			
 
				-}
			
 
				-
			
 
				-long do_slb_fault(struct pt_regs *regs, unsigned long ea)
			
 
				-{
			
 
				-	unsigned long id = REGION_ID(ea);
			
 
				-
			
 
				-	/* IRQs are not reconciled here, so can't check irqs_disabled */
			
 
				-	VM_WARN_ON(mfmsr() & MSR_EE);
			
 
				-
			
 
				-	if (unlikely(!(regs->msr & MSR_RI)))
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	/*
			
 
				-	 * SLB kernel faults must be very careful not to touch anything
			
 
				-	 * that is not bolted. E.g., PACA and global variables are okay,
			
 
				-	 * mm->context stuff is not.
			
 
				-	 *
			
 
				-	 * SLB user faults can access all of kernel memory, but must be
			
 
				-	 * careful not to touch things like IRQ state because it is not
			
 
				-	 * "reconciled" here. The difficulty is that we must use
			
 
				-	 * fast_exception_return to return from kernel SLB faults without
			
 
				-	 * looking at possible non-bolted memory. We could test user vs
			
 
				-	 * kernel faults in the interrupt handler asm and do a full fault,
			
 
				-	 * reconcile, ret_from_except for user faults which would make them
			
 
				-	 * first class kernel code. But for performance it's probably nicer
			
 
				-	 * if they go via fast_exception_return too.
			
 
				-	 */
			
 
				-	if (id >= KERNEL_REGION_ID) {
			
 
				-		return slb_allocate_kernel(ea, id);
			
 
				-	} else {
			
 
				-		struct mm_struct *mm = current->mm;
			
 
				-		long err;
			
 
				-
			
 
				-		if (unlikely(!mm))
			
 
				-			return -EFAULT;
			
 
				+		goto slb_bad_addr;
			
 
				 
			
 
				-		err = slb_allocate_user(mm, ea);
			
 
				-		if (!err)
			
 
				-			preload_add(current_thread_info(), ea);
			
 
				-
			
 
				-		return err;
			
 
				-	}
			
 
				-}
			
 
				+	handle_multi_context_slb_miss(context, ea);
			
 
				+	exception_exit(prev_state);
			
 
				+	return;
			
 
				 
			
 
				-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
			
 
				-{
			
 
				-	if (err == -EFAULT) {
			
 
				-		if (user_mode(regs))
			
 
				-			_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
			
 
				-		else
			
 
				-			bad_page_fault(regs, ea, SIGSEGV);
			
 
				-	} else if (err == -EINVAL) {
			
 
				-		unrecoverable_exception(regs);
			
 
				-	} else {
			
 
				-		BUG();
			
 
				-	}
			
 
				+slb_bad_addr:
			
 
				+	if (user_mode(regs))
			
 
				+		_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
			
 
				+	else
			
 
				+		bad_page_fault(regs, ea, SIGSEGV);
			
 
				+	exception_exit(prev_state);
			
 
				 }
			
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -0,0 +1,335 @@
 
				+/*
			
 
				+ * Low-level SLB routines
			
 
				+ *
			
 
				+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
			
 
				+ *
			
 
				+ * Based on earlier C version:
			
 
				+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
			
 
				+ *    Copyright (c) 2001 Dave Engebretsen
			
 
				+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
			
 
				+ *
			
 
				+ *  This program is free software; you can redistribute it and/or
			
 
				+ *  modify it under the terms of the GNU General Public License
			
 
				+ *  as published by the Free Software Foundation; either version
			
 
				+ *  2 of the License, or (at your option) any later version.
			
 
				+ */
			
 
				+
			
 
				+#include <asm/processor.h>
			
 
				+#include <asm/ppc_asm.h>
			
 
				+#include <asm/asm-offsets.h>
			
 
				+#include <asm/cputable.h>
			
 
				+#include <asm/page.h>
			
 
				+#include <asm/mmu.h>
			
 
				+#include <asm/pgtable.h>
			
 
				+#include <asm/firmware.h>
			
 
				+#include <asm/feature-fixups.h>
			
 
				+
			
 
				+/*
			
 
				+ * This macro generates asm code to compute the VSID scramble
			
 
				+ * function.  Used in slb_allocate() and do_stab_bolted.  The function
			
 
				+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
			
 
				+ *
			
 
				+ *	rt = register containing the proto-VSID and into which the
			
 
				+ *		VSID will be stored
			
 
				+ *	rx = scratch register (clobbered)
			
 
				+ *	rf = flags
			
 
				+ *
			
 
				+ *	- rt and rx must be different registers
			
 
				+ *	- The answer will end up in the low VSID_BITS bits of rt.  The higher
			
 
				+ *	  bits may contain other garbage, so you may need to mask the
			
 
				+ *	  result.
			
 
				+ */
			
 
				+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)				\
			
 
				+	lis	rx,VSID_MULTIPLIER_##size@h;				\
			
 
				+	ori	rx,rx,VSID_MULTIPLIER_##size@l;				\
			
 
				+	mulld	rt,rt,rx;		/* rt = rt * MULTIPLIER */	\
			
 
				+/*									\
			
 
				+ * powermac get slb fault before feature fixup, so make 65 bit part     \
			
 
				+ * the default part of feature fixup					\
			
 
				+ */									\
			
 
				+BEGIN_MMU_FTR_SECTION							\
			
 
				+	srdi	rx,rt,VSID_BITS_65_##size;				\
			
 
				+	clrldi	rt,rt,(64-VSID_BITS_65_##size);				\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	addi	rx,rt,1;						\
			
 
				+	srdi	rx,rx,VSID_BITS_65_##size;				\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
			
 
				+MMU_FTR_SECTION_ELSE							\
			
 
				+	srdi	rx,rt,VSID_BITS_##size;					\
			
 
				+	clrldi	rt,rt,(64-VSID_BITS_##size);				\
			
 
				+	add	rt,rt,rx;		/* add high and low bits */	\
			
 
				+	addi	rx,rt,1;						\
			
 
				+	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
			
 
				+	add	rt,rt,rx;						\
			
 
				+	rldimi	rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
			
 
				+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
			
 
				+
			
 
				+
			
 
				+/* void slb_allocate(unsigned long ea);
			
 
				+ *
			
 
				+ * Create an SLB entry for the given EA (user or kernel).
			
 
				+ * 	r3 = faulting address, r13 = PACA
			
 
				+ *	r9, r10, r11 are clobbered by this function
			
 
				+ *	r3 is preserved.
			
 
				+ * No other registers are examined or changed.
			
 
				+ */
			
 
				+_GLOBAL(slb_allocate)
			
 
				+	/*
			
 
				+	 * Check if the address falls within the range of the first context, or
			
 
				+	 * if we may need to handle multi context. For the first context we
			
 
				+	 * allocate the slb entry via the fast path below. For large address we
			
 
				+	 * branch out to C-code and see if additional contexts have been
			
 
				+	 * allocated.
			
 
				+	 * The test here is:
			
 
				+	 *   (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
			
 
				+	 */
			
 
				+	rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
			
 
				+	bne-	8f
			
 
				+
			
 
				+	srdi	r9,r3,60		/* get region */
			
 
				+	srdi	r10,r3,SID_SHIFT	/* get esid */
			
 
				+	cmpldi	cr7,r9,0xc		/* cmp PAGE_OFFSET for later use */
			
 
				+
			
 
				+	/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
			
 
				+	blt	cr7,0f			/* user or kernel? */
			
 
				+
			
 
				+	/* Check if hitting the linear mapping or some other kernel space
			
 
				+	*/
			
 
				+	bne	cr7,1f
			
 
				+
			
 
				+	/* Linear mapping encoding bits, the "li" instruction below will
			
 
				+	 * be patched by the kernel at boot
			
 
				+	 */
			
 
				+.globl slb_miss_kernel_load_linear
			
 
				+slb_miss_kernel_load_linear:
			
 
				+	li	r11,0
			
 
				+	/*
			
 
				+	 * context = (ea >> 60) - (0xc - 1)
			
 
				+	 * r9 = region id.
			
 
				+	 */
			
 
				+	subi	r9,r9,KERNEL_REGION_CONTEXT_OFFSET
			
 
				+
			
 
				+BEGIN_FTR_SECTION
			
 
				+	b	.Lslb_finish_load
			
 
				+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
			
 
				+	b	.Lslb_finish_load_1T
			
 
				+
			
 
				+1:
			
 
				+#ifdef CONFIG_SPARSEMEM_VMEMMAP
			
 
				+	cmpldi	cr0,r9,0xf
			
 
				+	bne	1f
			
 
				+/* Check virtual memmap region. To be patched at kernel boot */
			
 
				+.globl slb_miss_kernel_load_vmemmap
			
 
				+slb_miss_kernel_load_vmemmap:
			
 
				+	li	r11,0
			
 
				+	b	6f
			
 
				+1:
			
 
				+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
			
 
				+
			
 
				+	/*
			
 
				+	 * r10 contains the ESID, which is the original faulting EA shifted
			
 
				+	 * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
			
 
				+	 * which is 0xd00038000. That can't be used as an immediate, even if we
			
 
				+	 * ignored the 0xd, so we have to load it into a register, and we only
			
 
				+	 * have one register free. So we must load all of (H_VMALLOC_END >> 28)
			
 
				+	 * into a register and compare ESID against that.
			
 
				+	 */
			
 
				+	lis	r11,(H_VMALLOC_END >> 32)@h	// r11 = 0xffffffffd0000000
			
 
				+	ori	r11,r11,(H_VMALLOC_END >> 32)@l	// r11 = 0xffffffffd0003800
			
 
				+	// Rotate left 4, then mask with 0xffffffff0
			
 
				+	rldic	r11,r11,4,28			// r11 = 0xd00038000
			
 
				+	cmpld	r10,r11				// if r10 >= r11
			
 
				+	bge	5f				//   goto io_mapping
			
 
				+
			
 
				+	/*
			
 
				+	 * vmalloc mapping gets the encoding from the PACA as the mapping
			
 
				+	 * can be demoted from 64K -> 4K dynamically on some machines.
			
 
				+	 */
			
 
				+	lhz	r11,PACAVMALLOCSLLP(r13)
			
 
				+	b	6f
			
 
				+5:
			
 
				+	/* IO mapping */
			
 
				+.globl slb_miss_kernel_load_io
			
 
				+slb_miss_kernel_load_io:
			
 
				+	li	r11,0
			
 
				+6:
			
 
				+	/*
			
 
				+	 * context = (ea >> 60) - (0xc - 1)
			
 
				+	 * r9 = region id.
			
 
				+	 */
			
 
				+	subi	r9,r9,KERNEL_REGION_CONTEXT_OFFSET
			
 
				+
			
 
				+BEGIN_FTR_SECTION
			
 
				+	b	.Lslb_finish_load
			
 
				+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
			
 
				+	b	.Lslb_finish_load_1T
			
 
				+
			
 
				+0:	/*
			
 
				+	 * For userspace addresses, make sure this is region 0.
			
 
				+	 */
			
 
				+	cmpdi	r9, 0
			
 
				+	bne-	8f
			
 
				+        /*
			
 
				+         * user space make sure we are within the allowed limit
			
 
				+	 */
			
 
				+	ld	r11,PACA_SLB_ADDR_LIMIT(r13)
			
 
				+	cmpld	r3,r11
			
 
				+	bge-	8f
			
 
				+
			
 
				+	/* when using slices, we extract the psize off the slice bitmaps
			
 
				+	 * and then we need to get the sllp encoding off the mmu_psize_defs
			
 
				+	 * array.
			
 
				+	 *
			
 
				+	 * XXX This is a bit inefficient especially for the normal case,
			
 
				+	 * so we should try to implement a fast path for the standard page
			
 
				+	 * size using the old sllp value so we avoid the array. We cannot
			
 
				+	 * really do dynamic patching unfortunately as processes might flip
			
 
				+	 * between 4k and 64k standard page size
			
 
				+	 */
			
 
				+#ifdef CONFIG_PPC_MM_SLICES
			
 
				+	/* r10 have esid */
			
 
				+	cmpldi	r10,16
			
 
				+	/* below SLICE_LOW_TOP */
			
 
				+	blt	5f
			
 
				+	/*
			
 
				+	 * Handle hpsizes,
			
 
				+	 * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
			
 
				+	 */
			
 
				+	srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
			
 
				+	addi	r9,r11,PACAHIGHSLICEPSIZE
			
 
				+	lbzx	r9,r13,r9		/* r9 is hpsizes[r11] */
			
 
				+	/* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
			
 
				+	rldicl	r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
			
 
				+	b	6f
			
 
				+
			
 
				+5:
			
 
				+	/*
			
 
				+	 * Handle lpsizes
			
 
				+	 * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
			
 
				+	 */
			
 
				+	srdi    r11,r10,1 /* index */
			
 
				+	addi	r9,r11,PACALOWSLICESPSIZE
			
 
				+	lbzx	r9,r13,r9		/* r9 is lpsizes[r11] */
			
 
				+	rldicl	r11,r10,0,63		/* r11 = r10 & 0x1 */
			
 
				+6:
			
 
				+	sldi	r11,r11,2  /* index * 4 */
			
 
				+	/* Extract the psize and multiply to get an array offset */
			
 
				+	srd	r9,r9,r11
			
 
				+	andi.	r9,r9,0xf
			
 
				+	mulli	r9,r9,MMUPSIZEDEFSIZE
			
 
				+
			
 
				+	/* Now get to the array and obtain the sllp
			
 
				+	 */
			
 
				+	ld	r11,PACATOC(r13)
			
 
				+	ld	r11,mmu_psize_defs@got(r11)
			
 
				+	add	r11,r11,r9
			
 
				+	ld	r11,MMUPSIZESLLP(r11)
			
 
				+	ori	r11,r11,SLB_VSID_USER
			
 
				+#else
			
 
				+	/* paca context sllp already contains the SLB_VSID_USER bits */
			
 
				+	lhz	r11,PACACONTEXTSLLP(r13)
			
 
				+#endif /* CONFIG_PPC_MM_SLICES */
			
 
				+
			
 
				+	ld	r9,PACACONTEXTID(r13)
			
 
				+BEGIN_FTR_SECTION
			
 
				+	cmpldi	r10,0x1000
			
 
				+	bge	.Lslb_finish_load_1T
			
 
				+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
			
 
				+	b	.Lslb_finish_load
			
 
				+
			
 
				+8:	/* invalid EA - return an error indication */
			
 
				+	crset	4*cr0+eq		/* indicate failure */
			
 
				+	blr
			
 
				+
			
 
				+/*
			
 
				+ * Finish loading of an SLB entry and return
			
 
				+ *
			
 
				+ * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
			
 
				+ */
			
 
				+.Lslb_finish_load:
			
 
				+	rldimi  r10,r9,ESID_BITS,0
			
 
				+	ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
			
 
				+	/* r3 = EA, r11 = VSID data */
			
 
				+	/*
			
 
				+	 * Find a slot, round robin. Previously we tried to find a
			
 
				+	 * free slot first but that took too long. Unfortunately we
			
 
				+	 * dont have any LRU information to help us choose a slot.
			
 
				+	 */
			
 
				+
			
 
				+	mr	r9,r3
			
 
				+
			
 
				+	/* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
			
 
				+7:	ld	r10,PACASTABRR(r13)
			
 
				+	addi	r10,r10,1
			
 
				+	/* This gets soft patched on boot. */
			
 
				+.globl slb_compare_rr_to_size
			
 
				+slb_compare_rr_to_size:
			
 
				+	cmpldi	r10,0
			
 
				+
			
 
				+	blt+	4f
			
 
				+	li	r10,SLB_NUM_BOLTED
			
 
				+
			
 
				+4:
			
 
				+	std	r10,PACASTABRR(r13)
			
 
				+
			
 
				+3:
			
 
				+	rldimi	r9,r10,0,36		/* r9  = EA[0:35] | entry */
			
 
				+	oris	r10,r9,SLB_ESID_V@h	/* r10 = r9 | SLB_ESID_V */
			
 
				+
			
 
				+	/* r9 = ESID data, r11 = VSID data */
			
 
				+
			
 
				+	/*
			
 
				+	 * No need for an isync before or after this slbmte. The exception
			
 
				+	 * we enter with and the rfid we exit with are context synchronizing.
			
 
				+	 */
			
 
				+	slbmte	r11,r10
			
 
				+
			
 
				+	/* we're done for kernel addresses */
			
 
				+	crclr	4*cr0+eq		/* set result to "success" */
			
 
				+	bgelr	cr7
			
 
				+
			
 
				+	/* Update the slb cache */
			
 
				+	lhz	r9,PACASLBCACHEPTR(r13)	/* offset = paca->slb_cache_ptr */
			
 
				+	cmpldi	r9,SLB_CACHE_ENTRIES
			
 
				+	bge	1f
			
 
				+
			
 
				+	/* still room in the slb cache */
			
 
				+	sldi	r11,r9,2		/* r11 = offset * sizeof(u32) */
			
 
				+	srdi    r10,r10,28		/* get the 36 bits of the ESID */
			
 
				+	add	r11,r11,r13		/* r11 = (u32 *)paca + offset */
			
 
				+	stw	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
			
 
				+	addi	r9,r9,1			/* offset++ */
			
 
				+	b	2f
			
 
				+1:					/* offset >= SLB_CACHE_ENTRIES */
			
 
				+	li	r9,SLB_CACHE_ENTRIES+1
			
 
				+2:
			
 
				+	sth	r9,PACASLBCACHEPTR(r13)	/* paca->slb_cache_ptr = offset */
			
 
				+	crclr	4*cr0+eq		/* set result to "success" */
			
 
				+	blr
			
 
				+
			
 
				+/*
			
 
				+ * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
			
 
				+ *
			
 
				+ * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
			
 
				+ */
			
 
				+.Lslb_finish_load_1T:
			
 
				+	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
			
 
				+	rldimi  r10,r9,ESID_BITS_1T,0
			
 
				+	ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
			
 
				+
			
 
				+	li	r10,MMU_SEGSIZE_1T
			
 
				+	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */
			
 
				+
			
 
				+	/* r3 = EA, r11 = VSID data */
			
 
				+	clrrdi	r9,r3,SID_SHIFT_1T	/* clear out non-ESID bits */
			
 
				+	b	7b
			
 
				+
			
 
				+
			
 
				+_ASM_NOKPROBE_SYMBOL(slb_allocate)
			
 
				+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
			
 
				+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
			
 
				+_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
			
 
				+#ifdef CONFIG_SPARSEMEM_VMEMMAP
			
 
				+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
			
 
				+#endif
			
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -207,6 +207,23 @@ static bool slice_check_range_fits(struct mm_struct *mm,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				+static void slice_flush_segments(void *parm)
			
 
				+{
			
 
				+#ifdef CONFIG_PPC64
			
 
				+	struct mm_struct *mm = parm;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (mm != current->active_mm)
			
 
				+		return;
			
 
				+
			
 
				+	copy_mm_to_paca(current->active_mm);
			
 
				+
			
 
				+	local_irq_save(flags);
			
 
				+	slb_flush_and_rebolt();
			
 
				+	local_irq_restore(flags);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 static void slice_convert(struct mm_struct *mm,
			
 
				 				const struct slice_mask *mask, int psize)
			
 
				 {
			
@@ -272,9 +289,6 @@ static void slice_convert(struct mm_struct *mm,
 
				 	spin_unlock_irqrestore(&slice_convert_lock, flags);
			
 
				 
			
 
				 	copro_flush_all_slbs(mm);
			
 
				-#ifdef CONFIG_PPC64
			
 
				-	core_flush_all_slbs(mm);
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -488,9 +502,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
				 		 * be already initialised beyond the old address limit.
			
 
				 		 */
			
 
				 		mm->context.slb_addr_limit = high_limit;
			
 
				-#ifdef CONFIG_PPC64
			
 
				-		core_flush_all_slbs(mm);
			
 
				-#endif
			
 
				+
			
 
				+		on_each_cpu(slice_flush_segments, mm, 1);
			
 
				 	}
			
 
				 
			
 
				 	/* Sanity checks */
			
@@ -652,10 +665,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
				 		(SLICE_NUM_HIGH &&
			
 
				 		 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
			
 
				 		slice_convert(mm, &potential_mask, psize);
			
 
				-#ifdef CONFIG_PPC64
			
 
				 		if (psize > MMU_PAGE_BASE)
			
 
				-			core_flush_all_slbs(mm);
			
 
				-#endif
			
 
				+			on_each_cpu(slice_flush_segments, mm, 1);
			
 
				 	}
			
 
				 	return newaddr;
			
 
				 
			
@@ -746,20 +757,6 @@ void slice_init_new_context_exec(struct mm_struct *mm)
 
				 		bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_PPC_BOOK3S_64
			
 
				-void slice_setup_new_exec(void)
			
 
				-{
			
 
				-	struct mm_struct *mm = current->mm;
			
 
				-
			
 
				-	slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
			
 
				-
			
 
				-	if (!is_32bit_task())
			
 
				-		return;
			
 
				-
			
 
				-	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
			
 
				 			   unsigned long len, unsigned int psize)
			
 
				 {
			
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2394,9 +2394,7 @@ static void dump_one_paca(int cpu)
 
				 			}
			
 
				 		}
			
 
				 		DUMP(p, vmalloc_sllp, "%#-*x");
			
 
				-		DUMP(p, stab_rr, "%#-*x");
			
 
				-		DUMP(p, slb_used_bitmap, "%#-*x");
			
 
				-		DUMP(p, slb_kern_bitmap, "%#-*x");
			
 
				+		DUMP(p, stab_rr, "%#-*llx");
			
 
				 
			
 
				 		if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
			
 
				 			DUMP(p, slb_cache_ptr, "%#-*x");