|
@@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
|
|
|
rep_nop();
|
|
|
}
|
|
|
|
|
|
-/* Stop speculative execution and prefetching of modified code. */
|
|
|
+/*
|
|
|
+ * This function forces the icache and prefetched instruction stream to
|
|
|
+ * catch up with reality in two very specific cases:
|
|
|
+ *
|
|
|
+ * a) Text was modified using one virtual address and is about to be executed
|
|
|
+ * from the same physical page at a different virtual address.
|
|
|
+ *
|
|
|
+ * b) Text was modified on a different CPU, may subsequently be
|
|
|
+ * executed on this CPU, and you want to make sure the new version
|
|
|
+ * gets executed. This generally means you're calling this in a IPI.
|
|
|
+ *
|
|
|
+ * If you're calling this for a different reason, you're probably doing
|
|
|
+ * it wrong.
|
|
|
+ */
|
|
|
static inline void sync_core(void)
|
|
|
{
|
|
|
- int tmp;
|
|
|
-
|
|
|
-#ifdef CONFIG_X86_32
|
|
|
/*
|
|
|
- * Do a CPUID if available, otherwise do a jump. The jump
|
|
|
- * can conveniently enough be the jump around CPUID.
|
|
|
+ * There are quite a few ways to do this. IRET-to-self is nice
|
|
|
+ * because it works on every CPU, at any CPL (so it's compatible
|
|
|
+ * with paravirtualization), and it never exits to a hypervisor.
|
|
|
+ * The only down sides are that it's a bit slow (it seems to be
|
|
|
+ * a bit more than 2x slower than the fastest options) and that
|
|
|
+ * it unmasks NMIs. The "push %cs" is needed because, in
|
|
|
+ * paravirtual environments, __KERNEL_CS may not be a valid CS
|
|
|
+ * value when we do IRET directly.
|
|
|
+ *
|
|
|
+ * In case NMI unmasking or performance ever becomes a problem,
|
|
|
+ * the next best option appears to be MOV-to-CR2 and an
|
|
|
+ * unconditional jump. That sequence also works on all CPUs,
|
|
|
+ * but it will fault at CPL3 (i.e. Xen PV and lguest).
|
|
|
+ *
|
|
|
+ * CPUID is the conventional way, but it's nasty: it doesn't
|
|
|
+ * exist on some 486-like CPUs, and it usually exits to a
|
|
|
+ * hypervisor.
|
|
|
+ *
|
|
|
+ * Like all of Linux's memory ordering operations, this is a
|
|
|
+ * compiler barrier as well.
|
|
|
*/
|
|
|
- asm volatile("cmpl %2,%1\n\t"
|
|
|
- "jl 1f\n\t"
|
|
|
- "cpuid\n"
|
|
|
- "1:"
|
|
|
- : "=a" (tmp)
|
|
|
- : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
|
|
|
- : "ebx", "ecx", "edx", "memory");
|
|
|
+ register void *__sp asm(_ASM_SP);
|
|
|
+
|
|
|
+#ifdef CONFIG_X86_32
|
|
|
+ asm volatile (
|
|
|
+ "pushfl\n\t"
|
|
|
+ "pushl %%cs\n\t"
|
|
|
+ "pushl $1f\n\t"
|
|
|
+ "iret\n\t"
|
|
|
+ "1:"
|
|
|
+ : "+r" (__sp) : : "memory");
|
|
|
#else
|
|
|
- /*
|
|
|
- * CPUID is a barrier to speculative execution.
|
|
|
- * Prefetched instructions are automatically
|
|
|
- * invalidated when modified.
|
|
|
- */
|
|
|
- asm volatile("cpuid"
|
|
|
- : "=a" (tmp)
|
|
|
- : "0" (1)
|
|
|
- : "ebx", "ecx", "edx", "memory");
|
|
|
+ unsigned int tmp;
|
|
|
+
|
|
|
+ asm volatile (
|
|
|
+ "mov %%ss, %0\n\t"
|
|
|
+ "pushq %q0\n\t"
|
|
|
+ "pushq %%rsp\n\t"
|
|
|
+ "addq $8, (%%rsp)\n\t"
|
|
|
+ "pushfq\n\t"
|
|
|
+ "mov %%cs, %0\n\t"
|
|
|
+ "pushq %q0\n\t"
|
|
|
+ "pushq $1f\n\t"
|
|
|
+ "iretq\n\t"
|
|
|
+ "1:"
|
|
|
+ : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
|
|
|
#endif
|
|
|
}
|
|
|
|