Prechádzať zdrojové kódy

Merge tag 'powerpc-4.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "A handful of fixes, mostly for new code:

   - some reworking of the new STRICT_KERNEL_RWX support to make sure we
     also remove executable permission from __init memory before it's
     freed.

   - a fix to some recent optimisations to the hypercall entry where we
     were clobbering r12, this was breaking nested guests (PR KVM).

   - a fix for the recent patch to opal_configure_cores(). This could
     break booting on bare metal Power8 boxes if the kernel was built
     without CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG.

   - .. and finally a workaround for spurious PMU interrupts on Power9
     DD2.

  Thanks to: Nicholas Piggin, Anton Blanchard, Balbir Singh"

* tag 'powerpc-4.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/mm: Mark __init memory no-execute when STRICT_KERNEL_RWX=y
  powerpc/mm/hash: Refactor hash__mark_rodata_ro()
  powerpc/mm/radix: Refactor radix__mark_rodata_ro()
  powerpc/64s: Fix hypercall entry clobbering r12 input
  powerpc/perf: Avoid spurious PMU interrupts after idle
  powerpc/powernv: Fix boot on Power8 bare metal due to opal_configure_cores()
Linus Torvalds 8 rokov pred
rodič
commit
10fc95547f

+ 1 - 0
arch/powerpc/include/asm/book3s/64/hash.h

@@ -91,6 +91,7 @@ static inline int hash__pgd_bad(pgd_t pgd)
 }
 #ifdef CONFIG_STRICT_KERNEL_RWX
 extern void hash__mark_rodata_ro(void);
+extern void hash__mark_initmem_nx(void);
 #endif
 
 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,

+ 1 - 0
arch/powerpc/include/asm/book3s/64/pgtable.h

@@ -1192,5 +1192,6 @@ static inline const int pud_pfn(pud_t pud)
 	BUILD_BUG();
 	return 0;
 }
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */

+ 1 - 0
arch/powerpc/include/asm/book3s/64/radix.h

@@ -118,6 +118,7 @@
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 extern void radix__mark_rodata_ro(void);
+extern void radix__mark_initmem_nx(void);
 #endif
 
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,

+ 7 - 0
arch/powerpc/include/asm/pgtable.h

@@ -80,6 +80,13 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_initmem_nx(void);
+#else
+static inline void mark_initmem_nx(void) { }
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */

+ 14 - 14
arch/powerpc/kernel/exceptions-64s.S

@@ -824,7 +824,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  * r3 volatile parameter and return value for status
  * r4-r10 volatile input and output value
  * r11 volatile hypercall number and output value
- * r12 volatile
+ * r12 volatile input and output value
  * r13-r31 nonvolatile
  * LR nonvolatile
  * CTR volatile
@@ -834,25 +834,26 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  * Other registers nonvolatile
  *
  * The intersection of volatile registers that don't contain possible
- * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs
- * upon entry without saving.
+ * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
+ * without saving, though xer is not a good idea to use, as hardware may
+ * interpret some bits so it may be costly to change them.
  */
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	/*
 	 * There is a little bit of juggling to get syscall and hcall
-	 * working well. Save r10 in ctr to be restored in case it is a
-	 * hcall.
+	 * working well. Save r13 in ctr to avoid using SPRG scratch
+	 * register.
 	 *
 	 * Userspace syscalls have already saved the PPR, hcalls must save
 	 * it before setting HMT_MEDIUM.
 	 */
 #define SYSCALL_KVMTEST							\
-	mr	r12,r13;						\
+	mtctr	r13;							\
 	GET_PACA(r13);							\
-	mtctr	r10;							\
+	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
 	HMT_MEDIUM;							\
-	mr	r9,r12;							\
+	mfctr	r9;
 
 #else
 #define SYSCALL_KVMTEST							\
@@ -935,8 +936,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 	 * This is a hcall, so register convention is as above, with these
 	 * differences:
 	 * r13 = PACA
-	 * r12 = orig r13
-	 * ctr = orig r10
+	 * ctr = orig r13
+	 * orig r10 saved in PACA
 	 */
 TRAMP_KVM_BEGIN(do_kvm_0xc00)
 	 /*
@@ -944,14 +945,13 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
 	  * HMT_MEDIUM. That allows the KVM code to save that value into the
 	  * guest state (it is the guest's PPR value).
 	  */
-	OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR)
+	OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
 	HMT_MEDIUM
-	OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR)
+	OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
 	mfctr	r10
-	SET_SCRATCH0(r12)
+	SET_SCRATCH0(r10)
 	std	r9,PACA_EXGEN+EX_R9(r13)
 	mfcr	r9
-	std	r10,PACA_EXGEN+EX_R10(r13)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
 #endif
 

+ 14 - 1
arch/powerpc/kernel/idle_book3s.S

@@ -30,6 +30,7 @@
  * Use unused space in the interrupt stack to save and restore
  * registers for winkle support.
  */
+#define _MMCR0	GPR0
 #define _SDR1	GPR3
 #define _PTCR	GPR3
 #define _RPR	GPR4
@@ -272,6 +273,14 @@ power_enter_stop:
 	b 	pnv_wakeup_noloss
 
 .Lhandle_esl_ec_set:
+	/*
+	 * POWER9 DD2 can incorrectly set PMAO when waking up after a
+	 * state-loss idle. Saving and restoring MMCR0 over idle is a
+	 * workaround.
+	 */
+	mfspr	r4,SPRN_MMCR0
+	std	r4,_MMCR0(r1)
+
 /*
  * Check if the requested state is a deep idle state.
  */
@@ -450,10 +459,14 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 pnv_restore_hyp_resource_arch300:
 	/*
 	 * Workaround for POWER9, if we lost resources, the ERAT
-	 * might have been mixed up and needs flushing.
+	 * might have been mixed up and needs flushing. We also need
+	 * to reload MMCR0 (see comment above).
 	 */
 	blt	cr3,1f
 	PPC_INVALIDATE_ERAT
+	ld	r1,PACAR1(r13)
+	ld	r4,_MMCR0(r1)
+	mtspr	SPRN_MMCR0,r4
 1:
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we

+ 1 - 0
arch/powerpc/mm/mem.c

@@ -402,6 +402,7 @@ void __init mem_init(void)
 void free_initmem(void)
 {
 	ppc_md.progress = ppc_printk_progress;
+	mark_initmem_nx();
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 

+ 31 - 13
arch/powerpc/mm/pgtable-hash64.c

@@ -425,33 +425,51 @@ int hash__has_transparent_hugepage(void)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void hash__mark_rodata_ro(void)
+static bool hash__change_memory_range(unsigned long start, unsigned long end,
+				      unsigned long newpp)
 {
-	unsigned long start = (unsigned long)_stext;
-	unsigned long end = (unsigned long)__init_begin;
 	unsigned long idx;
 	unsigned int step, shift;
-	unsigned long newpp = PP_RXXX;
 
 	shift = mmu_psize_defs[mmu_linear_psize].shift;
 	step = 1 << shift;
 
-	start = ((start + step - 1) >> shift) << shift;
-	end = (end >> shift) << shift;
+	start = ALIGN_DOWN(start, step);
+	end = ALIGN(end, step); // aligns up
 
-	pr_devel("marking ro start %lx, end %lx, step %x\n",
-			start, end, step);
+	if (start >= end)
+		return false;
 
-	if (start == end) {
-		pr_warn("could not set rodata ro, relocate the start"
-			" of the kernel to a 0x%x boundary\n", step);
-		return;
-	}
+	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+		 start, end, newpp, step);
 
 	for (idx = start; idx < end; idx += step)
 		/* Not sure if we can do much with the return value */
 		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
 							mmu_kernel_ssize);
 
+	return true;
+}
+
+void hash__mark_rodata_ro(void)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__init_begin;
+
+	WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
+}
+
+void hash__mark_initmem_nx(void)
+{
+	unsigned long start, end, pp;
+
+	start = (unsigned long)__init_begin;
+	end = (unsigned long)__init_end;
+
+	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
+
+	WARN_ON(!hash__change_memory_range(start, end, pp));
 }
 #endif

+ 23 - 5
arch/powerpc/mm/pgtable-radix.c

@@ -112,10 +112,9 @@ set_the_pte:
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void radix__mark_rodata_ro(void)
+void radix__change_memory_range(unsigned long start, unsigned long end,
+				unsigned long clear)
 {
-	unsigned long start = (unsigned long)_stext;
-	unsigned long end = (unsigned long)__init_begin;
 	unsigned long idx;
 	pgd_t *pgdp;
 	pud_t *pudp;
@@ -125,7 +124,8 @@ void radix__mark_rodata_ro(void)
 	start = ALIGN_DOWN(start, PAGE_SIZE);
 	end = PAGE_ALIGN(end); // aligns up
 
-	pr_devel("marking ro start %lx, end %lx\n", start, end);
+	pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+		 start, end, clear);
 
 	for (idx = start; idx < end; idx += PAGE_SIZE) {
 		pgdp = pgd_offset_k(idx);
@@ -147,11 +147,29 @@ void radix__mark_rodata_ro(void)
 		if (!ptep)
 			continue;
 update_the_pte:
-		radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0);
+		radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
 	}
 
 	radix__flush_tlb_kernel_range(start, end);
 }
+
+void radix__mark_rodata_ro(void)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__init_begin;
+
+	radix__change_memory_range(start, end, _PAGE_WRITE);
+}
+
+void radix__mark_initmem_nx(void)
+{
+	unsigned long start = (unsigned long)__init_begin;
+	unsigned long end = (unsigned long)__init_end;
+
+	radix__change_memory_range(start, end, _PAGE_EXEC);
+}
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 static inline void __meminit print_mapping(unsigned long start,

+ 8 - 0
arch/powerpc/mm/pgtable_64.c

@@ -505,4 +505,12 @@ void mark_rodata_ro(void)
 	else
 		hash__mark_rodata_ro();
 }
+
+void mark_initmem_nx(void)
+{
+	if (radix_enabled())
+		radix__mark_initmem_nx();
+	else
+		hash__mark_initmem_nx();
+}
 #endif

+ 1 - 1
arch/powerpc/platforms/powernv/opal.c

@@ -78,7 +78,7 @@ void opal_configure_cores(void)
 	 *  ie. Host hash  supports  hash guests
 	 *      Host radix supports  hash/radix guests
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
 		reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
 		if (early_radix_enabled())
 			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;