Sfoglia il codice sorgente

arm64: atomics: prefetch the destination word for write prior to stxr

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Will Deacon 10 anni fa
parent
commit
0ea366f5e1

+ 9 - 0
arch/arm64/include/asm/atomic_ll_sc.h

@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))				\
 	int result;							\
 	int result;							\
 									\
 									\
 	asm volatile("// atomic_" #op "\n"				\
 	asm volatile("// atomic_" #op "\n"				\
+"	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w0, %2\n"						\
 "1:	ldxr	%w0, %2\n"						\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	stxr	%w1, %w0, %2\n"						\
 "	stxr	%w1, %w0, %2\n"						\
@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))		\
 	int result;							\
 	int result;							\
 									\
 									\
 	asm volatile("// atomic_" #op "_return\n"			\
 	asm volatile("// atomic_" #op "_return\n"			\
+"	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w0, %2\n"						\
 "1:	ldxr	%w0, %2\n"						\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	stlxr	%w1, %w0, %2\n"						\
 "	stlxr	%w1, %w0, %2\n"						\
@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
 	int oldval;
 	int oldval;
 
 
 	asm volatile("// atomic_cmpxchg\n"
 	asm volatile("// atomic_cmpxchg\n"
+"	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	eor	%w0, %w1, %w3\n"
 "	eor	%w0, %w1, %w3\n"
 "	cbnz	%w0, 2f\n"
 "	cbnz	%w0, 2f\n"
@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))			\
 	unsigned long tmp;						\
 	unsigned long tmp;						\
 									\
 									\
 	asm volatile("// atomic64_" #op "\n"				\
 	asm volatile("// atomic64_" #op "\n"				\
+"	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%0, %2\n"						\
 "1:	ldxr	%0, %2\n"						\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	stxr	%w1, %0, %2\n"						\
 "	stxr	%w1, %0, %2\n"						\
@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))		\
 	unsigned long tmp;						\
 	unsigned long tmp;						\
 									\
 									\
 	asm volatile("// atomic64_" #op "_return\n"			\
 	asm volatile("// atomic64_" #op "_return\n"			\
+"	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%0, %2\n"						\
 "1:	ldxr	%0, %2\n"						\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	stlxr	%w1, %0, %2\n"						\
 "	stlxr	%w1, %0, %2\n"						\
@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
 	unsigned long res;
 	unsigned long res;
 
 
 	asm volatile("// atomic64_cmpxchg\n"
 	asm volatile("// atomic64_cmpxchg\n"
+"	prfm	pstl1strm, %2\n"
 "1:	ldxr	%1, %2\n"
 "1:	ldxr	%1, %2\n"
 "	eor	%0, %1, %3\n"
 "	eor	%0, %1, %3\n"
 "	cbnz	%w0, 2f\n"
 "	cbnz	%w0, 2f\n"
@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 	unsigned long tmp;
 	unsigned long tmp;
 
 
 	asm volatile("// atomic64_dec_if_positive\n"
 	asm volatile("// atomic64_dec_if_positive\n"
+"	prfm	pstl1strm, %2\n"
 "1:	ldxr	%0, %2\n"
 "1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
 "	subs	%0, %0, #1\n"
 "	b.mi	2f\n"
 "	b.mi	2f\n"
@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 	unsigned long tmp, oldval;					\
 	unsigned long tmp, oldval;					\
 									\
 									\
 	asm volatile(							\
 	asm volatile(							\
+	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
 	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
 	"	cbnz	%" #w "[tmp], 2f\n"				\
 	"	cbnz	%" #w "[tmp], 2f\n"				\
@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,		\
 	unsigned long tmp, ret;						\
 	unsigned long tmp, ret;						\
 									\
 									\
 	asm volatile("// __cmpxchg_double" #name "\n"			\
 	asm volatile("// __cmpxchg_double" #name "\n"			\
+	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxp	%0, %1, %2\n"					\
 	"1:	ldxp	%0, %1, %2\n"					\
 	"	eor	%0, %0, %3\n"					\
 	"	eor	%0, %0, %3\n"					\
 	"	eor	%1, %1, %4\n"					\
 	"	eor	%1, %1, %4\n"					\

+ 8 - 0
arch/arm64/include/asm/cmpxchg.h

@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	case 1:
 	case 1:
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		/* LL/SC */
 		/* LL/SC */
+		"	prfm	pstl1strm, %2\n"
 		"1:	ldxrb	%w0, %2\n"
 		"1:	ldxrb	%w0, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 		"	cbnz	%w1, 1b\n"
 		"	dmb	ish",
 		"	dmb	ish",
 		/* LSE atomics */
 		/* LSE atomics */
 		"	nop\n"
 		"	nop\n"
+		"	nop\n"
 		"	swpalb	%w3, %w0, %2\n"
 		"	swpalb	%w3, %w0, %2\n"
 		"	nop\n"
 		"	nop\n"
 		"	nop")
 		"	nop")
@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	case 2:
 	case 2:
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		/* LL/SC */
 		/* LL/SC */
+		"	prfm	pstl1strm, %2\n"
 		"1:	ldxrh	%w0, %2\n"
 		"1:	ldxrh	%w0, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 		"	cbnz	%w1, 1b\n"
 		"	dmb	ish",
 		"	dmb	ish",
 		/* LSE atomics */
 		/* LSE atomics */
 		"	nop\n"
 		"	nop\n"
+		"	nop\n"
 		"	swpalh	%w3, %w0, %2\n"
 		"	swpalh	%w3, %w0, %2\n"
 		"	nop\n"
 		"	nop\n"
 		"	nop")
 		"	nop")
@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	case 4:
 	case 4:
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		/* LL/SC */
 		/* LL/SC */
+		"	prfm	pstl1strm, %2\n"
 		"1:	ldxr	%w0, %2\n"
 		"1:	ldxr	%w0, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 		"	cbnz	%w1, 1b\n"
 		"	dmb	ish",
 		"	dmb	ish",
 		/* LSE atomics */
 		/* LSE atomics */
 		"	nop\n"
 		"	nop\n"
+		"	nop\n"
 		"	swpal	%w3, %w0, %2\n"
 		"	swpal	%w3, %w0, %2\n"
 		"	nop\n"
 		"	nop\n"
 		"	nop")
 		"	nop")
@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	case 8:
 	case 8:
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		asm volatile(ARM64_LSE_ATOMIC_INSN(
 		/* LL/SC */
 		/* LL/SC */
+		"	prfm	pstl1strm, %2\n"
 		"1:	ldxr	%0, %2\n"
 		"1:	ldxr	%0, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
 		"	cbnz	%w1, 1b\n"
 		"	dmb	ish",
 		"	dmb	ish",
 		/* LSE atomics */
 		/* LSE atomics */
 		"	nop\n"
 		"	nop\n"
+		"	nop\n"
 		"	swpal	%3, %0, %2\n"
 		"	swpal	%3, %0, %2\n"
 		"	nop\n"
 		"	nop\n"
 		"	nop")
 		"	nop")

+ 2 - 0
arch/arm64/include/asm/futex.h

@@ -30,6 +30,7 @@
 	asm volatile(							\
 	asm volatile(							\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,		\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,		\
 		    CONFIG_ARM64_PAN)					\
 		    CONFIG_ARM64_PAN)					\
+"	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w1, %2\n"						\
 "1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 	insn "\n"							\
 "2:	stlxr	%w3, %w0, %2\n"						\
 "2:	stlxr	%w3, %w0, %2\n"						\
@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 		return -EFAULT;
 
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+"	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
 "	sub	%w3, %w1, %w4\n"
 "	cbnz	%w3, 3f\n"
 "	cbnz	%w3, 3f\n"

+ 2 - 0
arch/arm64/lib/bitops.S

@@ -31,6 +31,7 @@ ENTRY(	\name	)
 	eor	w0, w0, w3		// Clear low bits
 	eor	w0, w0, w3		// Clear low bits
 	mov	x2, #1
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	add	x1, x1, x0, lsr #3	// Get word offset
+alt_lse "	prfm	pstl1strm, [x1]",	"nop"
 	lsl	x3, x2, x3		// Create mask
 	lsl	x3, x2, x3		// Create mask
 
 
 alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x3, [x1]"
 alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x3, [x1]"
@@ -48,6 +49,7 @@ ENTRY(	\name	)
 	eor	w0, w0, w3		// Clear low bits
 	eor	w0, w0, w3		// Clear low bits
 	mov	x2, #1
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	add	x1, x1, x0, lsr #3	// Get word offset
+alt_lse "	prfm	pstl1strm, [x1]",	"nop"
 	lsl	x4, x2, x3		// Create mask
 	lsl	x4, x2, x3		// Create mask
 
 
 alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x4, x2, [x1]"
 alt_lse	"1:	ldxr	x2, [x1]",		"\lse	x4, x2, [x1]"