Browse Source

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull more arm64 updates from Catalin Marinas:

 - Silence module allocation failures when CONFIG_ARM*_MODULE_PLTS is
   enabled. This requires a check for __GFP_NOWARN in alloc_vmap_area()

 - Improve/sanitise user tagged pointers handling in the kernel

 - Inline asm fixes/cleanups

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: Silence first allocation with CONFIG_ARM64_MODULE_PLTS=y
  ARM: Silence first allocation with CONFIG_ARM_MODULE_PLTS=y
  mm: Silence vmap() allocation failures based on caller gfp_flags
  arm64: uaccess: suppress spurious clang warning
  arm64: atomic_lse: match asm register sizes
  arm64: armv8_deprecated: ensure extension of addr
  arm64: uaccess: ensure extension of access_ok() addr
  arm64: ensure extension of smp_store_release value
  arm64: xchg: hazard against entire exchange variable
  arm64: documentation: document tagged pointer stack constraints
  arm64: entry: improve data abort handling of tagged pointers
  arm64: hw_breakpoint: fix watchpoint matching for tagged pointers
  arm64: traps: fix userspace cache maintenance emulation on a tagged pointer
Linus Torvalds 8 years ago
parent
commit
e47b40a235

+ 47 - 15
Documentation/arm64/tagged-pointers.txt

@@ -11,24 +11,56 @@ in AArch64 Linux.
 The kernel configures the translation tables so that translations made
 The kernel configures the translation tables so that translations made
 via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
 via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
 the virtual address ignored by the translation hardware. This frees up
 the virtual address ignored by the translation hardware. This frees up
-this byte for application use, with the following caveats:
+this byte for application use.
 
 
-	(1) The kernel requires that all user addresses passed to EL1
-	    are tagged with tag 0x00. This means that any syscall
-	    parameters containing user virtual addresses *must* have
-	    their top byte cleared before trapping to the kernel.
 
 
-	(2) Non-zero tags are not preserved when delivering signals.
-	    This means that signal handlers in applications making use
-	    of tags cannot rely on the tag information for user virtual
-	    addresses being maintained for fields inside siginfo_t.
-	    One exception to this rule is for signals raised in response
-	    to watchpoint debug exceptions, where the tag information
-	    will be preserved.
+Passing tagged addresses to the kernel
+--------------------------------------
 
 
-	(3) Special care should be taken when using tagged pointers,
-	    since it is likely that C compilers will not hazard two
-	    virtual addresses differing only in the upper byte.
+All interpretation of userspace memory addresses by the kernel assumes
+an address tag of 0x00.
+
+This includes, but is not limited to, addresses found in:
+
+ - pointer arguments to system calls, including pointers in structures
+   passed to system calls,
+
+ - the stack pointer (sp), e.g. when interpreting it to deliver a
+   signal,
+
+ - the frame pointer (x29) and frame records, e.g. when interpreting
+   them to generate a backtrace or call graph.
+
+Using non-zero address tags in any of these locations may result in an
+error code being returned, a (fatal) signal being raised, or other modes
+of failure.
+
+For these reasons, passing non-zero address tags to the kernel via
+system calls is forbidden, and using a non-zero address tag for sp is
+strongly discouraged.
+
+Programs maintaining a frame pointer and frame records that use non-zero
+address tags may suffer impaired or inaccurate debug and profiling
+visibility.
+
+
+Preserving tags
+---------------
+
+Non-zero tags are not preserved when delivering signals. This means that
+signal handlers in applications making use of tags cannot rely on the
+tag information for user virtual addresses being maintained for fields
+inside siginfo_t. One exception to this rule is for signals raised in
+response to watchpoint debug exceptions, where the tag information will
+be preserved.
 
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
 be set to a sign-extension of bit 55 on exception return.
+
+
+Other considerations
+--------------------
+
+Special care should be taken when using tagged pointers, since it is
+likely that C compilers will not hazard two virtual addresses differing
+only in the upper byte.

+ 9 - 2
arch/arm/kernel/module.c

@@ -40,8 +40,15 @@
 #ifdef CONFIG_MMU
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 void *module_alloc(unsigned long size)
 {
 {
-	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
+	p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 				__builtin_return_address(0));
 	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
 	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
 		return p;
 		return p;

+ 9 - 0
arch/arm64/include/asm/asm-uaccess.h

@@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO
 alternative_else_nop_endif
 alternative_else_nop_endif
 	.endm
 	.endm
 
 
+/*
+ * Remove the address tag from a virtual address, if present.
+ */
+	.macro	clear_address_tag, dst, addr
+	tst	\addr, #(1 << 55)
+	bic	\dst, \addr, #(0xff << 56)
+	csel	\dst, \dst, \addr, eq
+	.endm
+
 #endif
 #endif

+ 2 - 2
arch/arm64/include/asm/atomic_lse.h

@@ -322,7 +322,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
 static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
 {									\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
@@ -394,7 +394,7 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
 static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
 {									\
 {									\
-	register long x0 asm ("w0") = i;				\
+	register long x0 asm ("x0") = i;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\

+ 15 - 5
arch/arm64/include/asm/barrier.h

@@ -42,25 +42,35 @@
 #define __smp_rmb()	dmb(ishld)
 #define __smp_rmb()	dmb(ishld)
 #define __smp_wmb()	dmb(ishst)
 #define __smp_wmb()	dmb(ishst)
 
 
-#define __smp_store_release(p, v)						\
+#define __smp_store_release(p, v)					\
 do {									\
 do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (__force typeof(*p)) (v) }; 			\
 	compiletime_assert_atomic_type(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 	case 1:								\
 		asm volatile ("stlrb %w1, %0"				\
 		asm volatile ("stlrb %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u8 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 		break;							\
 	case 2:								\
 	case 2:								\
 		asm volatile ("stlrh %w1, %0"				\
 		asm volatile ("stlrh %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u16 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 		break;							\
 	case 4:								\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
 		asm volatile ("stlr %w1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u32 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 		break;							\
 	case 8:								\
 	case 8:								\
 		asm volatile ("stlr %1, %0"				\
 		asm volatile ("stlr %1, %0"				\
-				: "=Q" (*p) : "r" (v) : "memory");	\
+				: "=Q" (*p)				\
+				: "r" (*(__u64 *)__u.__c)		\
+				: "memory");				\
 		break;							\
 		break;							\
 	}								\
 	}								\
 } while (0)
 } while (0)

+ 1 - 1
arch/arm64/include/asm/cmpxchg.h

@@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x,		\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
 		__nops(3)						\
 		__nops(3)						\
 	"	" #nop_lse)						\
 	"	" #nop_lse)						\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (x)							\
 	: "r" (x)							\
 	: cl);								\
 	: cl);								\
 									\
 									\

+ 7 - 6
arch/arm64/include/asm/uaccess.h

@@ -69,20 +69,21 @@ static inline void set_fs(mm_segment_t fs)
  */
  */
 #define __range_ok(addr, size)						\
 #define __range_ok(addr, size)						\
 ({									\
 ({									\
+	unsigned long __addr = (unsigned long __force)(addr);		\
 	unsigned long flag, roksum;					\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
 	__chk_user_ptr(addr);						\
 	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 	asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls"		\
 		: "=&r" (flag), "=&r" (roksum)				\
 		: "=&r" (flag), "=&r" (roksum)				\
-		: "1" (addr), "Ir" (size),				\
+		: "1" (__addr), "Ir" (size),				\
 		  "r" (current_thread_info()->addr_limit)		\
 		  "r" (current_thread_info()->addr_limit)		\
 		: "cc");						\
 		: "cc");						\
 	flag;								\
 	flag;								\
 })
 })
 
 
 /*
 /*
- * When dealing with data aborts or instruction traps we may end up with
- * a tagged userland pointer. Clear the tag to get a sane pointer to pass
- * on to access_ok(), for instance.
+ * When dealing with data aborts, watchpoints, or instruction traps we may end
+ * up with a tagged userland pointer. Clear the tag to get a sane pointer to
+ * pass on to access_ok(), for instance.
  */
  */
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 #define untagged_addr(addr)		sign_extend64(addr, 55)
 
 
@@ -230,7 +231,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 		break;							\
 	case 8:								\
 	case 8:								\
-		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+		__get_user_asm("ldr", "ldtr", "%x",  __gu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 		break;							\
 	default:							\
 	default:							\
@@ -297,7 +298,7 @@ do {									\
 			       (err), ARM64_HAS_UAO);			\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 		break;							\
 	case 8:								\
 	case 8:								\
-		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+		__put_user_asm("str", "sttr", "%x", __pu_val, (ptr),	\
 			       (err), ARM64_HAS_UAO);			\
 			       (err), ARM64_HAS_UAO);			\
 		break;							\
 		break;							\
 	default:							\
 	default:							\

+ 2 - 1
arch/arm64/kernel/armv8_deprecated.c

@@ -306,7 +306,8 @@ do {								\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
-	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),		\
+	: "r" ((unsigned long)addr), "i" (-EAGAIN),		\
+	  "i" (-EFAULT),					\
 	  "i" (__SWP_LL_SC_LOOPS)				\
 	  "i" (__SWP_LL_SC_LOOPS)				\
 	: "memory");						\
 	: "memory");						\
 	uaccess_disable();					\
 	uaccess_disable();					\

+ 3 - 2
arch/arm64/kernel/entry.S

@@ -428,12 +428,13 @@ el1_da:
 	/*
 	/*
 	 * Data abort handling
 	 * Data abort handling
 	 */
 	 */
-	mrs	x0, far_el1
+	mrs	x3, far_el1
 	enable_dbg
 	enable_dbg
 	// re-enable interrupts if they were enabled in the aborted context
 	// re-enable interrupts if they were enabled in the aborted context
 	tbnz	x23, #7, 1f			// PSR_I_BIT
 	tbnz	x23, #7, 1f			// PSR_I_BIT
 	enable_irq
 	enable_irq
 1:
 1:
+	clear_address_tag x0, x3
 	mov	x2, sp				// struct pt_regs
 	mov	x2, sp				// struct pt_regs
 	bl	do_mem_abort
 	bl	do_mem_abort
 
 
@@ -594,7 +595,7 @@ el0_da:
 	// enable interrupts before calling the main handler
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
 	enable_dbg_and_irq
 	ct_user_exit
 	ct_user_exit
-	bic	x0, x26, #(0xff << 56)
+	clear_address_tag x0, x26
 	mov	x1, x25
 	mov	x1, x25
 	mov	x2, sp
 	mov	x2, sp
 	bl	do_mem_abort
 	bl	do_mem_abort

+ 3 - 0
arch/arm64/kernel/hw_breakpoint.c

@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 #include <asm/system_misc.h>
+#include <asm/uaccess.h>
 
 
 /* Breakpoint currently in use for each BRP. */
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
@@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 	u64 wp_low, wp_high;
 	u64 wp_low, wp_high;
 	u32 lens, lene;
 	u32 lens, lene;
 
 
+	addr = untagged_addr(addr);
+
 	lens = __ffs(ctrl->len);
 	lens = __ffs(ctrl->len);
 	lene = __fls(ctrl->len);
 	lene = __fls(ctrl->len);
 
 

+ 6 - 1
arch/arm64/kernel/module.c

@@ -32,11 +32,16 @@
 
 
 void *module_alloc(unsigned long size)
 void *module_alloc(unsigned long size)
 {
 {
+	gfp_t gfp_mask = GFP_KERNEL;
 	void *p;
 	void *p;
 
 
+	/* Silence the initial allocation */
+	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+		gfp_mask |= __GFP_NOWARN;
+
 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
 	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
 				module_alloc_base + MODULES_VSIZE,
 				module_alloc_base + MODULES_VSIZE,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				gfp_mask, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 				NUMA_NO_NODE, __builtin_return_address(0));
 
 
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&

+ 2 - 2
arch/arm64/kernel/traps.c

@@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused)
 }
 }
 
 
 #define __user_cache_maint(insn, address, res)			\
 #define __user_cache_maint(insn, address, res)			\
-	if (untagged_addr(address) >= user_addr_max()) {	\
+	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
 		res = -EFAULT;					\
 	} else {						\
 	} else {						\
 		uaccess_ttbr0_enable();				\
 		uaccess_ttbr0_enable();				\
@@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
 	int ret = 0;
 	int ret = 0;
 
 
-	address = pt_regs_read_reg(regs, rt);
+	address = untagged_addr(pt_regs_read_reg(regs, rt));
 
 
 	switch (crm) {
 	switch (crm) {
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */

+ 1 - 1
mm/vmalloc.c

@@ -521,7 +521,7 @@ overflow:
 		}
 		}
 	}
 	}
 
 
-	if (printk_ratelimit())
+	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
 			size);
 			size);
 	kfree(va);
 	kfree(va);