|
|
@@ -263,8 +263,9 @@ do { \
|
|
|
})
|
|
|
|
|
|
/*
|
|
|
- * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
|
|
|
- * full lock semantics even though they are not needed.
|
|
|
+ * xchg is implemented using cmpxchg without a lock prefix. xchg is
|
|
|
+ * expensive due to the implied lock prefix. The processor cannot prefetch
|
|
|
+ * cachelines if xchg is used.
|
|
|
*/
|
|
|
#define percpu_xchg_op(var, nval) \
|
|
|
({ \
|
|
|
@@ -272,25 +273,33 @@ do { \
|
|
|
typeof(var) pxo_new__ = (nval); \
|
|
|
switch (sizeof(var)) { \
|
|
|
case 1: \
|
|
|
- asm("xchgb %2, "__percpu_arg(1) \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%al" \
|
|
|
+ "\n\tcmpxchgb %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
: "=a" (pxo_ret__), "+m" (var) \
|
|
|
: "q" (pxo_new__) \
|
|
|
: "memory"); \
|
|
|
break; \
|
|
|
case 2: \
|
|
|
- asm("xchgw %2, "__percpu_arg(1) \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%ax" \
|
|
|
+ "\n\tcmpxchgw %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
: "=a" (pxo_ret__), "+m" (var) \
|
|
|
: "r" (pxo_new__) \
|
|
|
: "memory"); \
|
|
|
break; \
|
|
|
case 4: \
|
|
|
- asm("xchgl %2, "__percpu_arg(1) \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%eax" \
|
|
|
+ "\n\tcmpxchgl %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
: "=a" (pxo_ret__), "+m" (var) \
|
|
|
: "r" (pxo_new__) \
|
|
|
: "memory"); \
|
|
|
break; \
|
|
|
case 8: \
|
|
|
- asm("xchgq %2, "__percpu_arg(1) \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%rax" \
|
|
|
+ "\n\tcmpxchgq %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
: "=a" (pxo_ret__), "+m" (var) \
|
|
|
: "r" (pxo_new__) \
|
|
|
: "memory"); \
|