8 years ago · c198b121b1
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void)
 
				 	rep_nop();
			
 
				 }
			
 
				 
			
 
				-/* Stop speculative execution and prefetching of modified code. */
			
 
				+/*
			
 
				+ * This function forces the icache and prefetched instruction stream to
			
 
				+ * catch up with reality in two very specific cases:
			
 
				+ *
			
 
				+ *  a) Text was modified using one virtual address and is about to be executed
			
 
				+ *     from the same physical page at a different virtual address.
			
 
				+ *
			
 
				+ *  b) Text was modified on a different CPU, may subsequently be
			
 
				+ *     executed on this CPU, and you want to make sure the new version
			
 
				+ *     gets executed.  This generally means you're calling this in a IPI.
			
 
				+ *
			
 
				+ * If you're calling this for a different reason, you're probably doing
			
 
				+ * it wrong.
			
 
				+ */
			
 
				 static inline void sync_core(void)
			
 
				 {
			
 
				-	int tmp;
			
 
				-
			
 
				-#ifdef CONFIG_X86_32
			
 
				 	/*
			
 
				-	 * Do a CPUID if available, otherwise do a jump.  The jump
			
 
				-	 * can conveniently enough be the jump around CPUID.
			
 
				+	 * There are quite a few ways to do this.  IRET-to-self is nice
			
 
				+	 * because it works on every CPU, at any CPL (so it's compatible
			
 
				+	 * with paravirtualization), and it never exits to a hypervisor.
			
 
				+	 * The only down sides are that it's a bit slow (it seems to be
			
 
				+	 * a bit more than 2x slower than the fastest options) and that
			
 
				+	 * it unmasks NMIs.  The "push %cs" is needed because, in
			
 
				+	 * paravirtual environments, __KERNEL_CS may not be a valid CS
			
 
				+	 * value when we do IRET directly.
			
 
				+	 *
			
 
				+	 * In case NMI unmasking or performance ever becomes a problem,
			
 
				+	 * the next best option appears to be MOV-to-CR2 and an
			
 
				+	 * unconditional jump.  That sequence also works on all CPUs,
			
 
				+	 * but it will fault at CPL3 (i.e. Xen PV and lguest).
			
 
				+	 *
			
 
				+	 * CPUID is the conventional way, but it's nasty: it doesn't
			
 
				+	 * exist on some 486-like CPUs, and it usually exits to a
			
 
				+	 * hypervisor.
			
 
				+	 *
			
 
				+	 * Like all of Linux's memory ordering operations, this is a
			
 
				+	 * compiler barrier as well.
			
 
				 	 */
			
 
				-	asm volatile("cmpl %2,%1\n\t"
			
 
				-		     "jl 1f\n\t"
			
 
				-		     "cpuid\n"
			
 
				-		     "1:"
			
 
				-		     : "=a" (tmp)
			
 
				-		     : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
			
 
				-		     : "ebx", "ecx", "edx", "memory");
			
 
				+	register void *__sp asm(_ASM_SP);
			
 
				+
			
 
				+#ifdef CONFIG_X86_32
			
 
				+	asm volatile (
			
 
				+		"pushfl\n\t"
			
 
				+		"pushl %%cs\n\t"
			
 
				+		"pushl $1f\n\t"
			
 
				+		"iret\n\t"
			
 
				+		"1:"
			
 
				+		: "+r" (__sp) : : "memory");
			
 
				 #else
			
 
				-	/*
			
 
				-	 * CPUID is a barrier to speculative execution.
			
 
				-	 * Prefetched instructions are automatically
			
 
				-	 * invalidated when modified.
			
 
				-	 */
			
 
				-	asm volatile("cpuid"
			
 
				-		     : "=a" (tmp)
			
 
				-		     : "0" (1)
			
 
				-		     : "ebx", "ecx", "edx", "memory");
			
 
				+	unsigned int tmp;
			
 
				+
			
 
				+	asm volatile (
			
 
				+		"mov %%ss, %0\n\t"
			
 
				+		"pushq %q0\n\t"
			
 
				+		"pushq %%rsp\n\t"
			
 
				+		"addq $8, (%%rsp)\n\t"
			
 
				+		"pushfq\n\t"
			
 
				+		"mov %%cs, %0\n\t"
			
 
				+		"pushq %q0\n\t"
			
 
				+		"pushq $1f\n\t"
			
 
				+		"iretq\n\t"
			
 
				+		"1:"
			
 
				+		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
			
 
				 #endif
			
 
				 }