浏览代码

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - MSR access API fixes and enhancements (Andy Lutomirski)

   - early exception handling improvements (Andy Lutomirski)

   - user-space FS/GS prctl usage fixes and improvements (Andy
     Lutomirski)

   - Remove the cpu_has_*() APIs and replace them with equivalents
     (Borislav Petkov)

   - task switch micro-optimization (Brian Gerst)

   - 32-bit entry code simplification (Denys Vlasenko)

   - enhance PAT handling in enumated CPUs (Toshi Kani)

  ... and lots of other cleanups/fixlets"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
  x86/arch_prctl/64: Restore accidentally removed put_cpu() in ARCH_SET_GS
  x86/entry/32: Remove asmlinkage_protect()
  x86/entry/32: Remove GET_THREAD_INFO() from entry code
  x86/entry, sched/x86: Don't save/restore EFLAGS on task switch
  x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs
  selftests/x86/ldt_gdt: Test set_thread_area() deletion of an active segment
  x86/tls: Synchronize segment registers in set_thread_area()
  x86/asm/64: Rename thread_struct's fs and gs to fsbase and gsbase
  x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization
  x86/segments/64: When load_gs_index fails, clear the base
  x86/segments/64: When loadsegment(fs, ...) fails, clear the base
  x86/asm: Make asm/alternative.h safe from assembly
  x86/asm: Stop depending on ptrace.h in alternative.h
  x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  x86/asm: Make sure verify_cpu() has a good stack
  x86/extable: Add a comment about early exception handlers
  x86/msr: Set the return value to zero when native_rdmsr_safe() fails
  x86/paravirt: Make "unsafe" MSR accesses unsafe even if PARAVIRT=y
  x86/paravirt: Add paravirt_{read,write}_msr()
  x86/msr: Carry on after a non-"safe" MSR access fails
  ...
Linus Torvalds 9 年之前
父节点
当前提交
168f1a7163
共有 100 个文件被更改,包括 750 次插入746 次删除
  1. 32 0
      Documentation/x86/pat.txt
  2. 0 1
      arch/ia64/include/asm/iommu.h
  3. 1 1
      arch/x86/crypto/aesni-intel_glue.c
  4. 4 1
      arch/x86/crypto/camellia_aesni_avx2_glue.c
  5. 3 1
      arch/x86/crypto/camellia_aesni_avx_glue.c
  6. 2 1
      arch/x86/crypto/chacha20_glue.c
  7. 3 2
      arch/x86/crypto/poly1305_glue.c
  8. 1 1
      arch/x86/crypto/serpent_avx2_glue.c
  9. 1 1
      arch/x86/crypto/serpent_sse2_glue.c
  10. 1 1
      arch/x86/crypto/sha1_ssse3_glue.c
  11. 1 1
      arch/x86/crypto/sha256_ssse3_glue.c
  12. 1 1
      arch/x86/crypto/sha512_ssse3_glue.c
  13. 1 1
      arch/x86/entry/common.c
  14. 0 7
      arch/x86/entry/entry_32.S
  15. 12 9
      arch/x86/entry/entry_64.S
  16. 21 24
      arch/x86/entry/entry_64_compat.S
  17. 2 0
      arch/x86/entry/syscalls/syscall_64.tbl
  18. 0 15
      arch/x86/entry/vdso/vclock_gettime.c
  19. 2 3
      arch/x86/entry/vdso/vdso-layout.lds.S
  20. 0 11
      arch/x86/entry/vdso/vma.c
  21. 1 1
      arch/x86/events/core.c
  22. 1 1
      arch/x86/events/intel/uncore.c
  23. 1 1
      arch/x86/ia32/ia32_signal.c
  24. 3 32
      arch/x86/include/asm/alternative.h
  25. 2 2
      arch/x86/include/asm/apic.h
  26. 4 5
      arch/x86/include/asm/clocksource.h
  27. 2 2
      arch/x86/include/asm/compat.h
  28. 0 25
      arch/x86/include/asm/cpufeature.h
  29. 3 0
      arch/x86/include/asm/cpufeatures.h
  30. 3 3
      arch/x86/include/asm/elf.h
  31. 1 1
      arch/x86/include/asm/hugetlb.h
  32. 1 1
      arch/x86/include/asm/irq_work.h
  33. 2 0
      arch/x86/include/asm/kgdb.h
  34. 0 34
      arch/x86/include/asm/linkage.h
  35. 15 5
      arch/x86/include/asm/msr.h
  36. 5 1
      arch/x86/include/asm/mtrr.h
  37. 27 18
      arch/x86/include/asm/paravirt.h
  38. 10 4
      arch/x86/include/asm/paravirt_types.h
  39. 1 1
      arch/x86/include/asm/pat.h
  40. 1 1
      arch/x86/include/asm/pgtable.h
  41. 9 2
      arch/x86/include/asm/processor.h
  42. 39 10
      arch/x86/include/asm/segment.h
  43. 1 0
      arch/x86/include/asm/setup.h
  44. 1 3
      arch/x86/include/asm/switch_to.h
  45. 40 0
      arch/x86/include/asm/text-patching.h
  46. 1 1
      arch/x86/include/asm/thread_info.h
  47. 1 1
      arch/x86/include/asm/tlbflush.h
  48. 1 1
      arch/x86/include/asm/tsc.h
  49. 1 1
      arch/x86/include/asm/uaccess.h
  50. 1 1
      arch/x86/include/asm/xor_32.h
  51. 2 2
      arch/x86/include/asm/xor_avx.h
  52. 4 4
      arch/x86/kernel/acpi/boot.c
  53. 1 0
      arch/x86/kernel/alternative.c
  54. 16 16
      arch/x86/kernel/apic/apic.c
  55. 2 2
      arch/x86/kernel/apic/apic_noop.c
  56. 1 1
      arch/x86/kernel/apic/io_apic.c
  57. 1 1
      arch/x86/kernel/apic/ipi.c
  58. 1 1
      arch/x86/kernel/apic/vector.c
  59. 12 8
      arch/x86/kernel/cpu/amd.c
  60. 56 26
      arch/x86/kernel/cpu/common.c
  61. 1 1
      arch/x86/kernel/cpu/cyrix.c
  62. 6 6
      arch/x86/kernel/cpu/intel.c
  63. 1 1
      arch/x86/kernel/cpu/mcheck/mce_intel.c
  64. 1 1
      arch/x86/kernel/cpu/mcheck/therm_throt.c
  65. 2 2
      arch/x86/kernel/cpu/mtrr/cyrix.c
  66. 16 12
      arch/x86/kernel/cpu/mtrr/generic.c
  67. 12 1
      arch/x86/kernel/cpu/mtrr/main.c
  68. 1 0
      arch/x86/kernel/cpu/mtrr/mtrr.h
  69. 1 1
      arch/x86/kernel/cpu/vmware.c
  70. 1 1
      arch/x86/kernel/devicetree.c
  71. 5 11
      arch/x86/kernel/fpu/bugs.c
  72. 16 34
      arch/x86/kernel/fpu/core.c
  73. 8 8
      arch/x86/kernel/fpu/init.c
  74. 14 11
      arch/x86/kernel/fpu/regset.c
  75. 9 9
      arch/x86/kernel/fpu/xstate.c
  76. 49 67
      arch/x86/kernel/head_32.S
  77. 35 68
      arch/x86/kernel/head_64.S
  78. 0 1
      arch/x86/kernel/hpet.c
  79. 1 0
      arch/x86/kernel/jump_label.c
  80. 1 0
      arch/x86/kernel/kgdb.c
  81. 1 0
      arch/x86/kernel/kprobes/core.c
  82. 1 0
      arch/x86/kernel/kprobes/opt.c
  83. 1 1
      arch/x86/kernel/kvm.c
  84. 1 0
      arch/x86/kernel/module.c
  85. 4 2
      arch/x86/kernel/paravirt.c
  86. 113 128
      arch/x86/kernel/process_64.c
  87. 9 41
      arch/x86/kernel/ptrace.c
  88. 3 3
      arch/x86/kernel/signal.c
  89. 1 1
      arch/x86/kernel/smpboot.c
  90. 1 1
      arch/x86/kernel/tce_64.c
  91. 42 0
      arch/x86/kernel/tls.c
  92. 1 0
      arch/x86/kernel/traps.c
  93. 15 18
      arch/x86/kernel/tsc.c
  94. 1 1
      arch/x86/kernel/uprobes.c
  95. 1 1
      arch/x86/kvm/cpuid.c
  96. 2 1
      arch/x86/kvm/mmu.c
  97. 1 1
      arch/x86/kvm/svm.c
  98. 1 2
      arch/x86/kvm/trace.h
  99. 1 1
      arch/x86/kvm/vmx.c
  100. 8 8
      arch/x86/kvm/x86.c

+ 32 - 0
Documentation/x86/pat.txt

@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
 "debugpat" boot parameter. With this parameter, various debug messages are
 "debugpat" boot parameter. With this parameter, various debug messages are
 printed to dmesg log.
 printed to dmesg log.
 
 
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT   Call Sequence               PAT State  PAT MSR
+ =========================================================
+ E    E     MTRR -> PAT init            Enabled    OS
+ E    D     MTRR -> PAT init            Disabled    -
+ D    E     MTRR -> PAT disable         Disabled   BIOS
+ D    D     MTRR -> PAT disable         Disabled    -
+ -    np/E  PAT  -> PAT disable         Disabled   BIOS
+ -    np/D  PAT  -> PAT disable         Disabled    -
+ E    !P/E  MTRR -> PAT init            Disabled   BIOS
+ D    !P/E  MTRR -> PAT disable         Disabled   BIOS
+ !M   !P/E  MTRR stub -> PAT disable    Disabled   BIOS
+
+ Legend
+ ------------------------------------------------
+ E         Feature enabled in CPU
+ D	   Feature disabled/unsupported in CPU
+ np	   "nopat" boot option specified
+ !P	   CONFIG_X86_PAT option unset
+ !M	   CONFIG_MTRR option unset
+ Enabled   PAT state set to enabled
+ Disabled  PAT state set to disabled
+ OS        PAT initializes PAT MSR with OS setting
+ BIOS      PAT keeps PAT MSR with BIOS setting
+

+ 0 - 1
arch/ia64/include/asm/iommu.h

@@ -1,7 +1,6 @@
 #ifndef _ASM_IA64_IOMMU_H
 #ifndef _ASM_IA64_IOMMU_H
 #define _ASM_IA64_IOMMU_H 1
 #define _ASM_IA64_IOMMU_H 1
 
 
-#define cpu_has_x2apic 0
 /* 10 seconds */
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
 #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
 
 

+ 1 - 1
arch/x86/crypto/aesni-intel_glue.c

@@ -1477,7 +1477,7 @@ static int __init aesni_init(void)
 	}
 	}
 	aesni_ctr_enc_tfm = aesni_ctr_enc;
 	aesni_ctr_enc_tfm = aesni_ctr_enc;
 #ifdef CONFIG_AS_AVX
 #ifdef CONFIG_AS_AVX
-	if (cpu_has_avx) {
+	if (boot_cpu_has(X86_FEATURE_AVX)) {
 		/* optimize performance of ctr mode encryption transform */
 		/* optimize performance of ctr mode encryption transform */
 		aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
 		aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
 		pr_info("AES CTR mode by8 optimization enabled\n");
 		pr_info("AES CTR mode by8 optimization enabled\n");

+ 4 - 1
arch/x86/crypto/camellia_aesni_avx2_glue.c

@@ -562,7 +562,10 @@ static int __init camellia_aesni_init(void)
 {
 {
 	const char *feature_name;
 	const char *feature_name;
 
 
-	if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX) ||
+	    !boot_cpu_has(X86_FEATURE_AVX2) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX2 or AES-NI instructions are not detected.\n");
 		pr_info("AVX2 or AES-NI instructions are not detected.\n");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}

+ 3 - 1
arch/x86/crypto/camellia_aesni_avx_glue.c

@@ -554,7 +554,9 @@ static int __init camellia_aesni_init(void)
 {
 {
 	const char *feature_name;
 	const char *feature_name;
 
 
-	if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX or AES-NI instructions are not detected.\n");
 		pr_info("AVX or AES-NI instructions are not detected.\n");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}

+ 2 - 1
arch/x86/crypto/chacha20_glue.c

@@ -129,7 +129,8 @@ static int __init chacha20_simd_mod_init(void)
 		return -ENODEV;
 		return -ENODEV;
 
 
 #ifdef CONFIG_AS_AVX2
 #ifdef CONFIG_AS_AVX2
-	chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+	chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+			    boot_cpu_has(X86_FEATURE_AVX2) &&
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 #endif
 #endif
 	return crypto_register_alg(&alg);
 	return crypto_register_alg(&alg);

+ 3 - 2
arch/x86/crypto/poly1305_glue.c

@@ -179,11 +179,12 @@ static struct shash_alg alg = {
 
 
 static int __init poly1305_simd_mod_init(void)
 static int __init poly1305_simd_mod_init(void)
 {
 {
-	if (!cpu_has_xmm2)
+	if (!boot_cpu_has(X86_FEATURE_XMM2))
 		return -ENODEV;
 		return -ENODEV;
 
 
 #ifdef CONFIG_AS_AVX2
 #ifdef CONFIG_AS_AVX2
-	poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+			    boot_cpu_has(X86_FEATURE_AVX2) &&
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
 	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
 	if (poly1305_use_avx2)
 	if (poly1305_use_avx2)

+ 1 - 1
arch/x86/crypto/serpent_avx2_glue.c

@@ -538,7 +538,7 @@ static int __init init(void)
 {
 {
 	const char *feature_name;
 	const char *feature_name;
 
 
-	if (!cpu_has_avx2 || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX2 instructions are not detected.\n");
 		pr_info("AVX2 instructions are not detected.\n");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}

+ 1 - 1
arch/x86/crypto/serpent_sse2_glue.c

@@ -600,7 +600,7 @@ static struct crypto_alg serpent_algs[10] = { {
 
 
 static int __init serpent_sse2_init(void)
 static int __init serpent_sse2_init(void)
 {
 {
-	if (!cpu_has_xmm2) {
+	if (!boot_cpu_has(X86_FEATURE_XMM2)) {
 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}

+ 1 - 1
arch/x86/crypto/sha1_ssse3_glue.c

@@ -166,7 +166,7 @@ static struct shash_alg sha1_avx_alg = {
 static bool avx_usable(void)
 static bool avx_usable(void)
 {
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 			pr_info("AVX detected but unusable.\n");
 		return false;
 		return false;
 	}
 	}

+ 1 - 1
arch/x86/crypto/sha256_ssse3_glue.c

@@ -201,7 +201,7 @@ static struct shash_alg sha256_avx_algs[] = { {
 static bool avx_usable(void)
 static bool avx_usable(void)
 {
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 			pr_info("AVX detected but unusable.\n");
 		return false;
 		return false;
 	}
 	}

+ 1 - 1
arch/x86/crypto/sha512_ssse3_glue.c

@@ -151,7 +151,7 @@ asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
 static bool avx_usable(void)
 static bool avx_usable(void)
 {
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 			pr_info("AVX detected but unusable.\n");
 		return false;
 		return false;
 	}
 	}

+ 1 - 1
arch/x86/entry/common.c

@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 
 long syscall_trace_enter(struct pt_regs *regs)
 long syscall_trace_enter(struct pt_regs *regs)
 {
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 
 	if (phase1_result == 0)
 	if (phase1_result == 0)

+ 0 - 7
arch/x86/entry/entry_32.S

@@ -207,10 +207,7 @@
 ENTRY(ret_from_fork)
 ENTRY(ret_from_fork)
 	pushl	%eax
 	pushl	%eax
 	call	schedule_tail
 	call	schedule_tail
-	GET_THREAD_INFO(%ebp)
 	popl	%eax
 	popl	%eax
-	pushl	$0x0202				# Reset kernel eflags
-	popfl
 
 
 	/* When we fork, we trace the syscall return in the child, too. */
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
 	movl    %esp, %eax
@@ -221,10 +218,7 @@ END(ret_from_fork)
 ENTRY(ret_from_kernel_thread)
 ENTRY(ret_from_kernel_thread)
 	pushl	%eax
 	pushl	%eax
 	call	schedule_tail
 	call	schedule_tail
-	GET_THREAD_INFO(%ebp)
 	popl	%eax
 	popl	%eax
-	pushl	$0x0202				# Reset kernel eflags
-	popfl
 	movl	PT_EBP(%esp), %eax
 	movl	PT_EBP(%esp), %eax
 	call	*PT_EBX(%esp)
 	call	*PT_EBX(%esp)
 	movl	$0, PT_EAX(%esp)
 	movl	$0, PT_EAX(%esp)
@@ -251,7 +245,6 @@ ENDPROC(ret_from_kernel_thread)
 ret_from_exception:
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
 	preempt_stop(CLBR_ANY)
 ret_from_intr:
 ret_from_intr:
-	GET_THREAD_INFO(%ebp)
 #ifdef CONFIG_VM86
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb	PT_CS(%esp), %al
 	movb	PT_CS(%esp), %al

+ 12 - 9
arch/x86/entry/entry_64.S

@@ -372,9 +372,6 @@ END(ptregs_\func)
 ENTRY(ret_from_fork)
 ENTRY(ret_from_fork)
 	LOCK ; btr $TIF_FORK, TI_flags(%r8)
 	LOCK ; btr $TIF_FORK, TI_flags(%r8)
 
 
-	pushq	$0x0002
-	popfq					/* reset kernel eflags */
-
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
 
 	testb	$3, CS(%rsp)			/* from kernel_thread? */
 	testb	$3, CS(%rsp)			/* from kernel_thread? */
@@ -781,19 +778,25 @@ ENTRY(native_load_gs_index)
 	pushfq
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
 	SWAPGS
 	SWAPGS
-gs_change:
+.Lgs_change:
 	movl	%edi, %gs
 	movl	%edi, %gs
-2:	mfence					/* workaround */
+2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	SWAPGS
 	SWAPGS
 	popfq
 	popfq
 	ret
 	ret
 END(native_load_gs_index)
 END(native_load_gs_index)
 
 
-	_ASM_EXTABLE(gs_change, bad_gs)
+	_ASM_EXTABLE(.Lgs_change, bad_gs)
 	.section .fixup, "ax"
 	.section .fixup, "ax"
 	/* running with kernelgs */
 	/* running with kernelgs */
 bad_gs:
 bad_gs:
 	SWAPGS					/* switch back to user gs */
 	SWAPGS					/* switch back to user gs */
+.macro ZAP_GS
+	/* This can't be a string because the preprocessor needs to see it. */
+	movl $__USER_DS, %eax
+	movl %eax, %gs
+.endm
+	ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
 	xorl	%eax, %eax
 	xorl	%eax, %eax
 	movl	%eax, %gs
 	movl	%eax, %gs
 	jmp	2b
 	jmp	2b
@@ -1019,13 +1022,13 @@ ENTRY(error_entry)
 	movl	%ecx, %eax			/* zero extend */
 	movl	%ecx, %eax			/* zero extend */
 	cmpq	%rax, RIP+8(%rsp)
 	cmpq	%rax, RIP+8(%rsp)
 	je	.Lbstep_iret
 	je	.Lbstep_iret
-	cmpq	$gs_change, RIP+8(%rsp)
+	cmpq	$.Lgs_change, RIP+8(%rsp)
 	jne	.Lerror_entry_done
 	jne	.Lerror_entry_done
 
 
 	/*
 	/*
-	 * hack: gs_change can fail with user gsbase.  If this happens, fix up
+	 * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
 	 * gsbase and proceed.  We'll fix up the exception and land in
 	 * gsbase and proceed.  We'll fix up the exception and land in
-	 * gs_change's error handler with kernel gsbase.
+	 * .Lgs_change's error handler with kernel gsbase.
 	 */
 	 */
 	jmp	.Lerror_entry_from_usermode_swapgs
 	jmp	.Lerror_entry_from_usermode_swapgs
 
 

+ 21 - 24
arch/x86/entry/entry_64_compat.S

@@ -72,24 +72,23 @@ ENTRY(entry_SYSENTER_compat)
 	pushfq				/* pt_regs->flags (except IF = 0) */
 	pushfq				/* pt_regs->flags (except IF = 0) */
 	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
 	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	$__USER32_CS		/* pt_regs->cs */
-	xorq    %r8,%r8
-	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
+	pushq	$0			/* pt_regs->ip = 0 (placeholder) */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 	cld
 	cld
 
 
 	/*
 	/*
@@ -205,17 +204,16 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 
 
 	/*
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -316,11 +314,10 @@ ENTRY(entry_INT80_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
 	pushq   %rbp                    /* pt_regs->rbp */
 	pushq   %r12                    /* pt_regs->r12 */
 	pushq   %r12                    /* pt_regs->r12 */

+ 2 - 0
arch/x86/entry/syscalls/syscall_64.tbl

@@ -374,3 +374,5 @@
 543	x32	io_setup		compat_sys_io_setup
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
 545	x32	execveat		compat_sys_execveat/ptregs
+534	x32	preadv2			compat_sys_preadv2
+535	x32	pwritev2		compat_sys_pwritev2

+ 0 - 15
arch/x86/entry/vdso/vclock_gettime.c

@@ -13,7 +13,6 @@
 
 
 #include <uapi/linux/time.h>
 #include <uapi/linux/time.h>
 #include <asm/vgtod.h>
 #include <asm/vgtod.h>
-#include <asm/hpet.h>
 #include <asm/vvar.h>
 #include <asm/vvar.h>
 #include <asm/unistd.h>
 #include <asm/unistd.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
@@ -28,16 +27,6 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 extern time_t __vdso_time(time_t *t);
 
 
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
-	__attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
-	return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
 #ifdef CONFIG_PARAVIRT_CLOCK
 #ifdef CONFIG_PARAVIRT_CLOCK
 extern u8 pvclock_page
 extern u8 pvclock_page
 	__attribute__((visibility("hidden")));
 	__attribute__((visibility("hidden")));
@@ -195,10 +184,6 @@ notrace static inline u64 vgetsns(int *mode)
 
 
 	if (gtod->vclock_mode == VCLOCK_TSC)
 	if (gtod->vclock_mode == VCLOCK_TSC)
 		cycles = vread_tsc();
 		cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
-	else if (gtod->vclock_mode == VCLOCK_HPET)
-		cycles = vread_hpet();
-#endif
 #ifdef CONFIG_PARAVIRT_CLOCK
 #ifdef CONFIG_PARAVIRT_CLOCK
 	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
 	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
 		cycles = vread_pvclock(mode);
 		cycles = vread_pvclock(mode);

+ 2 - 3
arch/x86/entry/vdso/vdso-layout.lds.S

@@ -25,7 +25,7 @@ SECTIONS
 	 * segment.
 	 * segment.
 	 */
 	 */
 
 
-	vvar_start = . - 3 * PAGE_SIZE;
+	vvar_start = . - 2 * PAGE_SIZE;
 	vvar_page = vvar_start;
 	vvar_page = vvar_start;
 
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
 	/* Place all vvars at the offsets in asm/vvar.h. */
@@ -35,8 +35,7 @@ SECTIONS
 #undef __VVAR_KERNEL_LDS
 #undef __VVAR_KERNEL_LDS
 #undef EMIT_VVAR
 #undef EMIT_VVAR
 
 
-	hpet_page = vvar_start + PAGE_SIZE;
-	pvclock_page = vvar_start + 2 * PAGE_SIZE;
+	pvclock_page = vvar_start + PAGE_SIZE;
 
 
 	. = SIZEOF_HEADERS;
 	. = SIZEOF_HEADERS;
 
 

+ 0 - 11
arch/x86/entry/vdso/vma.c

@@ -18,7 +18,6 @@
 #include <asm/vdso.h>
 #include <asm/vdso.h>
 #include <asm/vvar.h>
 #include <asm/vvar.h>
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm/hpet.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 #include <asm/cpufeature.h>
 
 
@@ -129,16 +128,6 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 	if (sym_offset == image->sym_vvar_page) {
 	if (sym_offset == image->sym_vvar_page) {
 		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
 		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
-	} else if (sym_offset == image->sym_hpet_page) {
-#ifdef CONFIG_HPET_TIMER
-		if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
-			ret = vm_insert_pfn_prot(
-				vma,
-				(unsigned long)vmf->virtual_address,
-				hpet_address >> PAGE_SHIFT,
-				pgprot_noncached(PAGE_READONLY));
-		}
-#endif
 	} else if (sym_offset == image->sym_pvclock_page) {
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
 		struct pvclock_vsyscall_time_info *pvti =
 			pvclock_pvti_cpu0_va();
 			pvclock_pvti_cpu0_va();

+ 1 - 1
arch/x86/events/core.c

@@ -1524,7 +1524,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 
 
 static void __init pmu_check_apic(void)
 static void __init pmu_check_apic(void)
 {
 {
-	if (cpu_has_apic)
+	if (boot_cpu_has(X86_FEATURE_APIC))
 		return;
 		return;
 
 
 	x86_pmu.apic = 0;
 	x86_pmu.apic = 0;

+ 1 - 1
arch/x86/events/intel/uncore.c

@@ -1400,7 +1400,7 @@ static int __init intel_uncore_init(void)
 	if (!id)
 	if (!id)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if (cpu_has_hypervisor)
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	max_packages = topology_max_packages();
 	max_packages = topology_max_packages();

+ 1 - 1
arch/x86/ia32/ia32_signal.c

@@ -357,7 +357,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 		put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
 		put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
 
 
 		/* Create the ucontext.  */
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
+		if (boot_cpu_has(X86_FEATURE_XSAVE))
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 		else
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 			put_user_ex(0, &frame->uc.uc_flags);

+ 3 - 32
arch/x86/include/asm/alternative.h

@@ -1,11 +1,12 @@
 #ifndef _ASM_X86_ALTERNATIVE_H
 #ifndef _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
 #include <asm/asm.h>
-#include <asm/ptrace.h>
 
 
 /*
 /*
  * Alternative inline assembly for SMP.
  * Alternative inline assembly for SMP.
@@ -233,36 +234,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
  */
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 
 
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
-				  struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
-
-/*
- * Clear and restore the kernel write-protection flag on the local CPU.
- * Allows the kernel to edit read-only pages.
- * Side-effect: any interrupt handler running between save and restore will have
- * the ability to write to read-only pages.
- *
- * Warning:
- * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
- * no thread can be preempted in the instructions being modified (no iret to an
- * invalid instruction possible) or if the instructions are changed from a
- * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
- */
-extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
 
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
 #endif /* _ASM_X86_ALTERNATIVE_H */

+ 2 - 2
arch/x86/include/asm/apic.h

@@ -239,10 +239,10 @@ extern void __init check_x2apic(void);
 extern void x2apic_setup(void);
 extern void x2apic_setup(void);
 static inline int x2apic_enabled(void)
 static inline int x2apic_enabled(void)
 {
 {
-	return cpu_has_x2apic && apic_is_x2apic_enabled();
+	return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
 }
 }
 
 
-#define x2apic_supported()	(cpu_has_x2apic)
+#define x2apic_supported()	(boot_cpu_has(X86_FEATURE_X2APIC))
 #else /* !CONFIG_X86_X2APIC */
 #else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline void x2apic_setup(void) { }

+ 4 - 5
arch/x86/include/asm/clocksource.h

@@ -3,11 +3,10 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
 
-#define VCLOCK_NONE	0  /* No vDSO clock available.	*/
-#define VCLOCK_TSC	1  /* vDSO should use vread_tsc.	*/
-#define VCLOCK_HPET	2  /* vDSO should use vread_hpet.	*/
-#define VCLOCK_PVCLOCK	3 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX	3
+#define VCLOCK_NONE	0	/* No vDSO clock available.		*/
+#define VCLOCK_TSC	1	/* vDSO should use vread_tsc.		*/
+#define VCLOCK_PVCLOCK	2	/* vDSO should use vread_pvclock.	*/
+#define VCLOCK_MAX	2
 
 
 struct arch_clocksource_data {
 struct arch_clocksource_data {
 	int vclock_mode;
 	int vclock_mode;

+ 2 - 2
arch/x86/include/asm/compat.h

@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
 	return (void __user *)round_down(sp - len, 16);
 	return (void __user *)round_down(sp - len, 16);
 }
 }
 
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 {
 #ifdef CONFIG_X86_X32_ABI
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 
 static inline bool in_compat_syscall(void)
 static inline bool in_compat_syscall(void)
 {
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
 

+ 0 - 25
arch/x86/include/asm/cpufeature.h

@@ -119,31 +119,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 } while (0)
 
 
-#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx		boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2		boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves		boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * Static testing of CPU features.  Used the same as boot_cpu_has().

+ 3 - 0
arch/x86/include/asm/cpufeatures.h

@@ -301,6 +301,9 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
+
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 /*
 /*

+ 3 - 3
arch/x86/include/asm/elf.h

@@ -176,7 +176,7 @@ static inline void elf_common_init(struct thread_struct *t,
 	regs->si = regs->di = regs->bp = 0;
 	regs->si = regs->di = regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-	t->fs = t->gs = 0;
+	t->fsbase = t->gsbase = 0;
 	t->fsindex = t->gsindex = 0;
 	t->fsindex = t->gsindex = 0;
 	t->ds = t->es = ds;
 	t->ds = t->es = ds;
 }
 }
@@ -226,8 +226,8 @@ do {								\
 	(pr_reg)[18] = (regs)->flags;				\
 	(pr_reg)[18] = (regs)->flags;				\
 	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
 	(pr_reg)[20] = (regs)->ss;				\
-	(pr_reg)[21] = current->thread.fs;			\
-	(pr_reg)[22] = current->thread.gs;			\
+	(pr_reg)[21] = current->thread.fsbase;			\
+	(pr_reg)[22] = current->thread.gsbase;			\
 	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
 	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
 	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
 	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
 	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\
 	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\

+ 1 - 1
arch/x86/include/asm/hugetlb.h

@@ -4,7 +4,7 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 #include <asm-generic/hugetlb.h>
 
 
-#define hugepages_supported() cpu_has_pse
+#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
 
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
 					 unsigned long addr,

+ 1 - 1
arch/x86/include/asm/irq_work.h

@@ -5,7 +5,7 @@
 
 
 static inline bool arch_irq_work_has_interrupt(void)
 static inline bool arch_irq_work_has_interrupt(void)
 {
 {
-	return cpu_has_apic;
+	return boot_cpu_has(X86_FEATURE_APIC);
 }
 }
 
 
 #endif /* _ASM_IRQ_WORK_H */
 #endif /* _ASM_IRQ_WORK_H */

+ 2 - 0
arch/x86/include/asm/kgdb.h

@@ -6,6 +6,8 @@
  * Copyright (C) 2008 Wind River Systems, Inc.
  * Copyright (C) 2008 Wind River Systems, Inc.
  */
  */
 
 
+#include <asm/ptrace.h>
+
 /*
 /*
  * BUFMAX defines the maximum number of characters in inbound/outbound
  * BUFMAX defines the maximum number of characters in inbound/outbound
  * buffers at least NUMREGBYTES*2 are needed for register packets
  * buffers at least NUMREGBYTES*2 are needed for register packets

+ 0 - 34
arch/x86/include/asm/linkage.h

@@ -8,40 +8,6 @@
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-
-/*
- * Make sure the compiler doesn't do anything stupid with the
- * arguments on the stack - they are owned by the *caller*, not
- * the callee. This just fools gcc into not spilling into them,
- * and keeps it from doing tailcall recursion and/or using the
- * stack slots for temporaries, since they are live and "used"
- * all the way to the end of the function.
- *
- * NOTE! On x86-64, all the arguments are in registers, so this
- * only matters on a 32-bit kernel.
- */
-#define asmlinkage_protect(n, ret, args...) \
-	__asmlinkage_protect##n(ret, ##args)
-#define __asmlinkage_protect_n(ret, args...) \
-	__asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
-#define __asmlinkage_protect0(ret) \
-	__asmlinkage_protect_n(ret)
-#define __asmlinkage_protect1(ret, arg1) \
-	__asmlinkage_protect_n(ret, "m" (arg1))
-#define __asmlinkage_protect2(ret, arg1, arg2) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
-#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
-#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4))
-#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5))
-#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5), "m" (arg6))
-
 #endif /* CONFIG_X86_32 */
 #endif /* CONFIG_X86_32 */
 
 
 #ifdef __ASSEMBLY__
 #ifdef __ASSEMBLY__

+ 15 - 5
arch/x86/include/asm/msr.h

@@ -84,7 +84,10 @@ static inline unsigned long long native_read_msr(unsigned int msr)
 {
 {
 	DECLARE_ARGS(val, low, high);
 	DECLARE_ARGS(val, low, high);
 
 
-	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	asm volatile("1: rdmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+		     : EAX_EDX_RET(val, low, high) : "c" (msr));
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
 		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
 	return EAX_EDX_VAL(val, low, high);
 	return EAX_EDX_VAL(val, low, high);
@@ -98,7 +101,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
 	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
 		     "1:\n\t"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
+		     "3: mov %[fault],%[err]\n\t"
+		     "xorl %%eax, %%eax\n\t"
+		     "xorl %%edx, %%edx\n\t"
+		     "jmp 1b\n\t"
 		     ".previous\n\t"
 		     ".previous\n\t"
 		     _ASM_EXTABLE(2b, 3b)
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
@@ -108,10 +114,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 	return EAX_EDX_VAL(val, low, high);
 	return EAX_EDX_VAL(val, low, high);
 }
 }
 
 
-static inline void native_write_msr(unsigned int msr,
-				    unsigned low, unsigned high)
+/* Can be uninlined because referenced by paravirt */
+notrace static inline void native_write_msr(unsigned int msr,
+					    unsigned low, unsigned high)
 {
 {
-	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+	asm volatile("1: wrmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+		     : : "c" (msr), "a"(low), "d" (high) : "memory");
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 }

+ 5 - 1
arch/x86/include/asm/mtrr.h

@@ -24,6 +24,7 @@
 #define _ASM_X86_MTRR_H
 #define _ASM_X86_MTRR_H
 
 
 #include <uapi/asm/mtrr.h>
 #include <uapi/asm/mtrr.h>
+#include <asm/pat.h>
 
 
 
 
 /*
 /*
@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
 static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 {
 {
 }
 }
+static inline void mtrr_bp_init(void)
+{
+	pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}
 
 
 #define mtrr_ap_init() do {} while (0)
 #define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
 #define set_mtrr_aps_delayed_init() do {} while (0)
 #define set_mtrr_aps_delayed_init() do {} while (0)
 #define mtrr_aps_init() do {} while (0)
 #define mtrr_aps_init() do {} while (0)
 #define mtrr_bp_restore() do {} while (0)
 #define mtrr_bp_restore() do {} while (0)

+ 27 - 18
arch/x86/include/asm/paravirt.h

@@ -130,21 +130,31 @@ static inline void wbinvd(void)
 
 
 #define get_kernel_rpl()  (pv_info.kernel_rpl)
 #define get_kernel_rpl()  (pv_info.kernel_rpl)
 
 
-static inline u64 paravirt_read_msr(unsigned msr, int *err)
+static inline u64 paravirt_read_msr(unsigned msr)
 {
 {
-	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
+	return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
 }
 }
 
 
-static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
+static inline void paravirt_write_msr(unsigned msr,
+				      unsigned low, unsigned high)
 {
 {
-	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
+	return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
+}
+
+static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+{
+	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+}
+
+static inline int paravirt_write_msr_safe(unsigned msr,
+					  unsigned low, unsigned high)
+{
+	return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
 }
 }
 
 
-/* These should all do BUG_ON(_err), but our headers are too tangled. */
 #define rdmsr(msr, val1, val2)			\
 #define rdmsr(msr, val1, val2)			\
 do {						\
 do {						\
-	int _err;				\
-	u64 _l = paravirt_read_msr(msr, &_err);	\
+	u64 _l = paravirt_read_msr(msr);	\
 	val1 = (u32)_l;				\
 	val1 = (u32)_l;				\
 	val2 = _l >> 32;			\
 	val2 = _l >> 32;			\
 } while (0)
 } while (0)
@@ -156,8 +166,7 @@ do {						\
 
 
 #define rdmsrl(msr, val)			\
 #define rdmsrl(msr, val)			\
 do {						\
 do {						\
-	int _err;				\
-	val = paravirt_read_msr(msr, &_err);	\
+	val = paravirt_read_msr(msr);		\
 } while (0)
 } while (0)
 
 
 static inline void wrmsrl(unsigned msr, u64 val)
 static inline void wrmsrl(unsigned msr, u64 val)
@@ -165,23 +174,23 @@ static inline void wrmsrl(unsigned msr, u64 val)
 	wrmsr(msr, (u32)val, (u32)(val>>32));
 	wrmsr(msr, (u32)val, (u32)(val>>32));
 }
 }
 
 
-#define wrmsr_safe(msr, a, b)	paravirt_write_msr(msr, a, b)
+#define wrmsr_safe(msr, a, b)	paravirt_write_msr_safe(msr, a, b)
 
 
 /* rdmsr with exception handling */
 /* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b)			\
-({						\
-	int _err;				\
-	u64 _l = paravirt_read_msr(msr, &_err);	\
-	(*a) = (u32)_l;				\
-	(*b) = _l >> 32;			\
-	_err;					\
+#define rdmsr_safe(msr, a, b)				\
+({							\
+	int _err;					\
+	u64 _l = paravirt_read_msr_safe(msr, &_err);	\
+	(*a) = (u32)_l;					\
+	(*b) = _l >> 32;				\
+	_err;						\
 })
 })
 
 
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 {
 {
 	int err;
 	int err;
 
 
-	*p = paravirt_read_msr(msr, &err);
+	*p = paravirt_read_msr_safe(msr, &err);
 	return err;
 	return err;
 }
 }
 
 

+ 10 - 4
arch/x86/include/asm/paravirt_types.h

@@ -155,10 +155,16 @@ struct pv_cpu_ops {
 	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
 	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
 		      unsigned int *ecx, unsigned int *edx);
 		      unsigned int *ecx, unsigned int *edx);
 
 
-	/* MSR, PMC and TSR operations.
-	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+	/* Unsafe MSR operations.  These will warn or panic on failure. */
+	u64 (*read_msr)(unsigned int msr);
+	void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+	/*
+	 * Safe MSR operations.
+	 * read sets err to 0 or -EIO.  write returns 0 or -EIO.
+	 */
+	u64 (*read_msr_safe)(unsigned int msr, int *err);
+	int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
 
 
 	u64 (*read_pmc)(int counter);
 	u64 (*read_pmc)(int counter);
 
 

+ 1 - 1
arch/x86/include/asm/pat.h

@@ -5,8 +5,8 @@
 #include <asm/pgtable_types.h>
 #include <asm/pgtable_types.h>
 
 
 bool pat_enabled(void);
 bool pat_enabled(void);
+void pat_disable(const char *reason);
 extern void pat_init(void);
 extern void pat_init(void);
-void pat_init_cache_modes(u64);
 
 
 extern int reserve_memtype(u64 start, u64 end,
 extern int reserve_memtype(u64 start, u64 end,
 		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
 		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);

+ 1 - 1
arch/x86/include/asm/pgtable.h

@@ -183,7 +183,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
 
 
 static inline int has_transparent_hugepage(void)
 static inline int has_transparent_hugepage(void)
 {
 {
-	return cpu_has_pse;
+	return boot_cpu_has(X86_FEATURE_PSE);
 }
 }
 
 
 #ifdef __HAVE_ARCH_PTE_DEVMAP
 #ifdef __HAVE_ARCH_PTE_DEVMAP

+ 9 - 2
arch/x86/include/asm/processor.h

@@ -388,9 +388,16 @@ struct thread_struct {
 	unsigned long		ip;
 	unsigned long		ip;
 #endif
 #endif
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	unsigned long		fs;
+	unsigned long		fsbase;
+	unsigned long		gsbase;
+#else
+	/*
+	 * XXX: this could presumably be unsigned short.  Alternatively,
+	 * 32-bit kernels could be taught to use fsindex instead.
+	 */
+	unsigned long fs;
+	unsigned long gs;
 #endif
 #endif
-	unsigned long		gs;
 
 
 	/* Save middle states of ptrace breakpoints */
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	struct perf_event	*ptrace_bps[HBP_NUM];

+ 39 - 10
arch/x86/include/asm/segment.h

@@ -2,6 +2,7 @@
 #define _ASM_X86_SEGMENT_H
 #define _ASM_X86_SEGMENT_H
 
 
 #include <linux/const.h>
 #include <linux/const.h>
+#include <asm/alternative.h>
 
 
 /*
 /*
  * Constructor for a conventional segment GDT (or LDT) entry.
  * Constructor for a conventional segment GDT (or LDT) entry.
@@ -207,13 +208,6 @@
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __PER_CPU_SEG			(GDT_ENTRY_PER_CPU*8 + 3)
 #define __PER_CPU_SEG			(GDT_ENTRY_PER_CPU*8 + 3)
 
 
-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS				0
-#define GS_TLS				1
-
-#define GS_TLS_SEL			((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL			((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
 #endif
 #endif
 
 
 #ifndef CONFIG_PARAVIRT
 #ifndef CONFIG_PARAVIRT
@@ -249,10 +243,13 @@ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDL
 #endif
 #endif
 
 
 /*
 /*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
+ * Load a segment. Fall back on loading the zero segment if something goes
+ * wrong.  This variant assumes that loading zero fully clears the segment.
+ * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any
+ * failure to fully clear the cached descriptor is only observable for
+ * FS and GS.
  */
  */
-#define loadsegment(seg, value)						\
+#define __loadsegment_simple(seg, value)				\
 do {									\
 do {									\
 	unsigned short __val = (value);					\
 	unsigned short __val = (value);					\
 									\
 									\
@@ -269,6 +266,38 @@ do {									\
 		     : "+r" (__val) : : "memory");			\
 		     : "+r" (__val) : : "memory");			\
 } while (0)
 } while (0)
 
 
+#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
+#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
+#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On 32-bit systems, the hidden parts of FS and GS are unobservable if
+ * the selector is NULL, so there's no funny business here.
+ */
+#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
+#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+
+#else
+
+static inline void __loadsegment_fs(unsigned short value)
+{
+	asm volatile("						\n"
+		     "1:	movw %0, %%fs			\n"
+		     "2:					\n"
+
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+
+		     : : "rm" (value) : "memory");
+}
+
+/* __loadsegment_gs is intentionally undefined.  Use load_gs_index instead. */
+
+#endif
+
+#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+
 /*
 /*
  * Save a segment register away:
  * Save a segment register away:
  */
  */

+ 1 - 0
arch/x86/include/asm/setup.h

@@ -6,6 +6,7 @@
 #define COMMAND_LINE_SIZE 2048
 #define COMMAND_LINE_SIZE 2048
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
+#include <asm/page_types.h>
 
 
 #ifdef __i386__
 #ifdef __i386__
 
 

+ 1 - 3
arch/x86/include/asm/switch_to.h

@@ -39,8 +39,7 @@ do {									\
 	 */								\
 	 */								\
 	unsigned long ebx, ecx, edx, esi, edi;				\
 	unsigned long ebx, ecx, edx, esi, edi;				\
 									\
 									\
-	asm volatile("pushfl\n\t"		/* save    flags */	\
-		     "pushl %%ebp\n\t"		/* save    EBP   */	\
+	asm volatile("pushl %%ebp\n\t"		/* save    EBP   */	\
 		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
 		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
@@ -49,7 +48,6 @@ do {									\
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "1:\t"						\
 		     "1:\t"						\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
-		     "popfl\n"			/* restore flags */	\
 									\
 									\
 		     /* output parameters */				\
 		     /* output parameters */				\
 		     : [prev_sp] "=m" (prev->thread.sp),		\
 		     : [prev_sp] "=m" (prev->thread.sp),		\

+ 40 - 0
arch/x86/include/asm/text-patching.h

@@ -0,0 +1,40 @@
+#ifndef _ASM_X86_TEXT_PATCHING_H
+#define _ASM_X86_TEXT_PATCHING_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+		    struct paravirt_patch_site *end);
+#else
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+				  struct paravirt_patch_site *end)
+{}
+#define __parainstructions	NULL
+#define __parainstructions_end	NULL
+#endif
+
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ */
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+
+#endif /* _ASM_X86_TEXT_PATCHING_H */

+ 1 - 1
arch/x86/include/asm/thread_info.h

@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
 	return true;
 	return true;
 }
 }
 
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 {
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	return true;
 	return true;

+ 1 - 1
arch/x86/include/asm/tlbflush.h

@@ -181,7 +181,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 
 static inline void __flush_tlb_all(void)
 static inline void __flush_tlb_all(void)
 {
 {
-	if (cpu_has_pge)
+	if (static_cpu_has(X86_FEATURE_PGE))
 		__flush_tlb_global();
 		__flush_tlb_global();
 	else
 	else
 		__flush_tlb();
 		__flush_tlb();

+ 1 - 1
arch/x86/include/asm/tsc.h

@@ -22,7 +22,7 @@ extern void disable_TSC(void);
 static inline cycles_t get_cycles(void)
 static inline cycles_t get_cycles(void)
 {
 {
 #ifndef CONFIG_X86_TSC
 #ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return 0;
 		return 0;
 #endif
 #endif
 
 

+ 1 - 1
arch/x86/include/asm/uaccess.h

@@ -118,7 +118,7 @@ struct exception_table_entry {
 
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(unsigned long *ip);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
 
 /*
 /*
  * These are the main single-value transfer routines.  They automatically
  * These are the main single-value transfer routines.  They automatically

+ 1 - 1
arch/x86/include/asm/xor_32.h

@@ -550,7 +550,7 @@ static struct xor_block_template xor_block_pIII_sse = {
 #define XOR_TRY_TEMPLATES				\
 #define XOR_TRY_TEMPLATES				\
 do {							\
 do {							\
 	AVX_XOR_SPEED;					\
 	AVX_XOR_SPEED;					\
-	if (cpu_has_xmm) {				\
+	if (boot_cpu_has(X86_FEATURE_XMM)) {				\
 		xor_speed(&xor_block_pIII_sse);		\
 		xor_speed(&xor_block_pIII_sse);		\
 		xor_speed(&xor_block_sse_pf64);		\
 		xor_speed(&xor_block_sse_pf64);		\
 	} else if (boot_cpu_has(X86_FEATURE_MMX)) {	\
 	} else if (boot_cpu_has(X86_FEATURE_MMX)) {	\

+ 2 - 2
arch/x86/include/asm/xor_avx.h

@@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = {
 
 
 #define AVX_XOR_SPEED \
 #define AVX_XOR_SPEED \
 do { \
 do { \
-	if (cpu_has_avx && cpu_has_osxsave) \
+	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 		xor_speed(&xor_block_avx); \
 		xor_speed(&xor_block_avx); \
 } while (0)
 } while (0)
 
 
 #define AVX_SELECT(FASTEST) \
 #define AVX_SELECT(FASTEST) \
-	(cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
+	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 
 
 #else
 #else
 
 

+ 4 - 4
arch/x86/kernel/acpi/boot.c

@@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 {
 {
 	struct acpi_table_madt *madt = NULL;
 	struct acpi_table_madt *madt = NULL;
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	madt = (struct acpi_table_madt *)table;
 	madt = (struct acpi_table_madt *)table;
@@ -951,7 +951,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 {
 {
 	int count;
 	int count;
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	/*
 	/*
@@ -979,7 +979,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
 	int ret;
 	int ret;
 	struct acpi_subtable_proc madt_proc[2];
 	struct acpi_subtable_proc madt_proc[2];
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	/*
 	/*
@@ -1125,7 +1125,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	if (acpi_disabled || acpi_noirq)
 	if (acpi_disabled || acpi_noirq)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	/*
 	/*

+ 1 - 0
arch/x86/kernel/alternative.c

@@ -11,6 +11,7 @@
 #include <linux/stop_machine.h>
 #include <linux/stop_machine.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/kdebug.h>
 #include <linux/kdebug.h>
+#include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>

+ 16 - 16
arch/x86/kernel/apic/apic.c

@@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
 	long tapic = apic_read(APIC_TMCCT);
 	long tapic = apic_read(APIC_TMCCT);
 	unsigned long pm = acpi_pm_read_early();
 	unsigned long pm = acpi_pm_read_early();
 
 
-	if (cpu_has_tsc)
+	if (boot_cpu_has(X86_FEATURE_TSC))
 		tsc = rdtsc();
 		tsc = rdtsc();
 
 
 	switch (lapic_cal_loops++) {
 	switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 	*delta = (long)res;
 	*delta = (long)res;
 
 
 	/* Correct the tsc counter value */
 	/* Correct the tsc counter value */
-	if (cpu_has_tsc) {
+	if (boot_cpu_has(X86_FEATURE_TSC)) {
 		res = (((u64)(*deltatsc)) * pm_100ms);
 		res = (((u64)(*deltatsc)) * pm_100ms);
 		do_div(res, deltapm);
 		do_div(res, deltapm);
 		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
 		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void)
 	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 		    lapic_timer_frequency);
 		    lapic_timer_frequency);
 
 
-	if (cpu_has_tsc) {
+	if (boot_cpu_has(X86_FEATURE_TSC)) {
 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 			    "%ld.%04ld MHz.\n",
 			    "%ld.%04ld MHz.\n",
 			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
 			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@ void lapic_shutdown(void)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (!cpu_has_apic && !apic_from_smp_config())
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
 		return;
 		return;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
@@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void)
 	 * Don't do the setup now if we have a SMP BIOS as the
 	 * Don't do the setup now if we have a SMP BIOS as the
 	 * through-I/O-APIC virtual wire mode might be active.
 	 * through-I/O-APIC virtual wire mode might be active.
 	 */
 	 */
-	if (smp_found_config || !cpu_has_apic)
+	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
 		return;
 		return;
 
 
 	/*
 	/*
@@ -1227,7 +1227,7 @@ void setup_local_APIC(void)
 	unsigned long long tsc = 0, ntsc;
 	unsigned long long tsc = 0, ntsc;
 	long long max_loops = cpu_khz ? cpu_khz : 1000000;
 	long long max_loops = cpu_khz ? cpu_khz : 1000000;
 
 
-	if (cpu_has_tsc)
+	if (boot_cpu_has(X86_FEATURE_TSC))
 		tsc = rdtsc();
 		tsc = rdtsc();
 
 
 	if (disable_apic) {
 	if (disable_apic) {
@@ -1311,7 +1311,7 @@ void setup_local_APIC(void)
 			break;
 			break;
 		}
 		}
 		if (queued) {
 		if (queued) {
-			if (cpu_has_tsc && cpu_khz) {
+			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
 				ntsc = rdtsc();
 				ntsc = rdtsc();
 				max_loops = (cpu_khz << 10) - (ntsc - tsc);
 				max_loops = (cpu_khz << 10) - (ntsc - tsc);
 			} else
 			} else
@@ -1445,7 +1445,7 @@ static void __x2apic_disable(void)
 {
 {
 	u64 msr;
 	u64 msr;
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return;
 		return;
 
 
 	rdmsrl(MSR_IA32_APICBASE, msr);
 	rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@ void __init check_x2apic(void)
 		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
 		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
 		x2apic_mode = 1;
 		x2apic_mode = 1;
 		x2apic_state = X2APIC_ON;
 		x2apic_state = X2APIC_ON;
-	} else if (!cpu_has_x2apic) {
+	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
 		x2apic_state = X2APIC_DISABLED;
 		x2apic_state = X2APIC_DISABLED;
 	}
 	}
 }
 }
@@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void)
  */
  */
 static int __init detect_init_APIC(void)
 static int __init detect_init_APIC(void)
 {
 {
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		pr_info("No local APIC present\n");
 		pr_info("No local APIC present\n");
 		return -1;
 		return -1;
 	}
 	}
@@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void)
 		goto no_apic;
 		goto no_apic;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
 		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
 		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
 			break;
 			break;
 		goto no_apic;
 		goto no_apic;
 	default:
 	default:
 		goto no_apic;
 		goto no_apic;
 	}
 	}
 
 
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		/*
 		/*
 		 * Over-ride BIOS and try to enable the local APIC only if
 		 * Over-ride BIOS and try to enable the local APIC only if
 		 * "lapic" specified.
 		 * "lapic" specified.
@@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void)
 		return -1;
 		return -1;
 	}
 	}
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		disable_apic = 1;
 		disable_apic = 1;
 		pr_info("Apic disabled by BIOS\n");
 		pr_info("Apic disabled by BIOS\n");
 		return -1;
 		return -1;
 	}
 	}
 #else
 #else
-	if (!smp_found_config && !cpu_has_apic)
+	if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
 		return -1;
 		return -1;
 
 
 	/*
 	/*
 	 * Complain if the BIOS pretends there is one.
 	 * Complain if the BIOS pretends there is one.
 	 */
 	 */
-	if (!cpu_has_apic &&
+	if (!boot_cpu_has(X86_FEATURE_APIC) &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 			boot_cpu_physical_apicid);
 			boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@ static void apic_pm_activate(void)
 static int __init init_lapic_sysfs(void)
 static int __init init_lapic_sysfs(void)
 {
 {
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-	if (cpu_has_apic)
+	if (boot_cpu_has(X86_FEATURE_APIC))
 		register_syscore_ops(&lapic_syscore_ops);
 		register_syscore_ops(&lapic_syscore_ops);
 
 
 	return 0;
 	return 0;

+ 2 - 2
arch/x86/kernel/apic/apic_noop.c

@@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
 
 
 static u32 noop_apic_read(u32 reg)
 static u32 noop_apic_read(u32 reg)
 {
 {
-	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 	return 0;
 	return 0;
 }
 }
 
 
 static void noop_apic_write(u32 reg, u32 v)
 static void noop_apic_write(u32 reg, u32 v)
 {
 {
-	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 }
 
 
 struct apic apic_noop = {
 struct apic apic_noop = {

+ 1 - 1
arch/x86/kernel/apic/io_apic.c

@@ -1454,7 +1454,7 @@ void native_disable_io_apic(void)
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 	}
 
 
-	if (cpu_has_apic || apic_from_smp_config())
+	if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
 		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 }
 
 

+ 1 - 1
arch/x86/kernel/apic/ipi.c

@@ -230,7 +230,7 @@ int safe_smp_processor_id(void)
 {
 {
 	int apicid, cpuid;
 	int apicid, cpuid;
 
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return 0;
 		return 0;
 
 
 	apicid = hard_smp_processor_id();
 	apicid = hard_smp_processor_id();

+ 1 - 1
arch/x86/kernel/apic/vector.c

@@ -944,7 +944,7 @@ static int __init print_ICs(void)
 	print_PIC();
 	print_PIC();
 
 
 	/* don't print out if apic is not there */
 	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
 		return 0;
 		return 0;
 
 
 	print_local_APICs(show_lapic);
 	print_local_APICs(show_lapic);

+ 12 - 8
arch/x86/kernel/cpu/amd.c

@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
 	 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
 	 * after 16h.
 	 * after 16h.
 	 */
 	 */
-	if (cpu_has_apic && c->x86 > 0x16) {
-		set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
-	} else if (cpu_has_apic && c->x86 >= 0xf) {
-		/* check CPU config space for extended APIC ID */
-		unsigned int val;
-		val = read_pci_config(0, 24, 0, 0x68);
-		if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+	if (boot_cpu_has(X86_FEATURE_APIC)) {
+		if (c->x86 > 0x16)
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+		else if (c->x86 >= 0xf) {
+			/* check CPU config space for extended APIC ID */
+			unsigned int val;
+
+			val = read_pci_config(0, 24, 0, 0x68);
+			if ((val >> 17 & 0x3) == 0x3)
+				set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+		}
 	}
 	}
 #endif
 #endif
 
 
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
 	 */
 	 */
 	msr_set_bit(MSR_K7_HWCR, 6);
 	msr_set_bit(MSR_K7_HWCR, 6);
 #endif
 #endif
+	set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
 }
 }
 
 
 static void init_amd_gh(struct cpuinfo_x86 *c)
 static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 >= 0xf)
 	if (c->x86 >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_K8);
 		set_cpu_cap(c, X86_FEATURE_K8);
 
 
-	if (cpu_has_xmm2) {
+	if (cpu_has(c, X86_FEATURE_XMM2)) {
 		/* MFENCE stops RDTSC speculation */
 		/* MFENCE stops RDTSC speculation */
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 	}
 	}

+ 56 - 26
arch/x86/kernel/cpu/common.c

@@ -430,7 +430,7 @@ void load_percpu_segment(int cpu)
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	loadsegment(fs, __KERNEL_PERCPU);
 	loadsegment(fs, __KERNEL_PERCPU);
 #else
 #else
-	loadsegment(gs, 0);
+	__loadsegment_simple(gs, 0);
 	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
 #endif
 	load_stack_canary_segment();
 	load_stack_canary_segment();
@@ -866,30 +866,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
 #else
 #else
 	set_cpu_cap(c, X86_FEATURE_NOPL);
 	set_cpu_cap(c, X86_FEATURE_NOPL);
 #endif
 #endif
+}
 
 
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
 	/*
 	/*
-	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
-	 * systems that run Linux at CPL > 0 may or may not have the
-	 * issue, but, even if they have the issue, there's absolutely
-	 * nothing we can do about it because we can't use the real IRET
-	 * instruction.
+	 * Empirically, writing zero to a segment selector on AMD does
+	 * not clear the base, whereas writing zero to a segment
+	 * selector on Intel does clear the base.  Intel's behavior
+	 * allows slightly faster context switches in the common case
+	 * where GS is unused by the prev and next threads.
 	 *
 	 *
-	 * NB: For the time being, only 32-bit kernels support
-	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
-	 * whether to apply espfix using paravirt hooks.  If any
-	 * non-paravirt system ever shows up that does *not* have the
-	 * ESPFIX issue, we can change this.
+	 * Since neither vendor documents this anywhere that I can see,
+	 * detect it directly instead of hardcoding the choice by
+	 * vendor.
+	 *
+	 * I've designated AMD's behavior as the "bug" because it's
+	 * counterintuitive and less friendly.
 	 */
 	 */
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
-	do {
-		extern void native_iret(void);
-		if (pv_cpu_ops.iret == native_iret)
-			set_cpu_bug(c, X86_BUG_ESPFIX);
-	} while (0);
-#else
-	set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+	unsigned long old_base, tmp;
+	rdmsrl(MSR_FS_BASE, old_base);
+	wrmsrl(MSR_FS_BASE, 1);
+	loadsegment(fs, 0);
+	rdmsrl(MSR_FS_BASE, tmp);
+	if (tmp != 0)
+		set_cpu_bug(c, X86_BUG_NULL_SEG);
+	wrmsrl(MSR_FS_BASE, old_base);
 #endif
 #endif
 }
 }
 
 
@@ -925,6 +929,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
 	get_model_name(c); /* Default name */
 	get_model_name(c); /* Default name */
 
 
 	detect_nopl(c);
 	detect_nopl(c);
+
+	detect_null_seg_behavior(c);
+
+	/*
+	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
+	 * systems that run Linux at CPL > 0 may or may not have the
+	 * issue, but, even if they have the issue, there's absolutely
+	 * nothing we can do about it because we can't use the real IRET
+	 * instruction.
+	 *
+	 * NB: For the time being, only 32-bit kernels support
+	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
+	 * whether to apply espfix using paravirt hooks.  If any
+	 * non-paravirt system ever shows up that does *not* have the
+	 * ESPFIX issue, we can change this.
+	 */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+	do {
+		extern void native_iret(void);
+		if (pv_cpu_ops.iret == native_iret)
+			set_cpu_bug(c, X86_BUG_ESPFIX);
+	} while (0);
+# else
+	set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
 }
 }
 
 
 static void x86_init_cache_qos(struct cpuinfo_x86 *c)
 static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1080,12 +1111,12 @@ void enable_sep_cpu(void)
 	struct tss_struct *tss;
 	struct tss_struct *tss;
 	int cpu;
 	int cpu;
 
 
+	if (!boot_cpu_has(X86_FEATURE_SEP))
+		return;
+
 	cpu = get_cpu();
 	cpu = get_cpu();
 	tss = &per_cpu(cpu_tss, cpu);
 	tss = &per_cpu(cpu_tss, cpu);
 
 
-	if (!boot_cpu_has(X86_FEATURE_SEP))
-		goto out;
-
 	/*
 	/*
 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
 	 * see the big comment in struct x86_hw_tss's definition.
 	 * see the big comment in struct x86_hw_tss's definition.
@@ -1100,7 +1131,6 @@ void enable_sep_cpu(void)
 
 
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
 
-out:
 	put_cpu();
 	put_cpu();
 }
 }
 #endif
 #endif
@@ -1532,7 +1562,7 @@ void cpu_init(void)
 	pr_info("Initializing CPU#%d\n", cpu);
 	pr_info("Initializing CPU#%d\n", cpu);
 
 
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
-	    cpu_has_tsc ||
+	    boot_cpu_has(X86_FEATURE_TSC) ||
 	    boot_cpu_has(X86_FEATURE_DE))
 	    boot_cpu_has(X86_FEATURE_DE))
 		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 

+ 1 - 1
arch/x86/kernel/cpu/cyrix.c

@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 		switch (dir0_lsn) {
 		switch (dir0_lsn) {
 		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
 		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
 			dir0_msn = 0;
 			dir0_msn = 0;
-			p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+			p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
 			break;
 			break;
 
 
 		case 0xe:  /* a 486S A step */
 		case 0xe:  /* a 486S A step */

+ 6 - 6
arch/x86/kernel/cpu/intel.c

@@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	 *  the TLB when any changes are made to any of the page table entries.
 	 *  the TLB when any changes are made to any of the page table entries.
 	 *  The operating system must reload CR3 to cause the TLB to be flushed"
 	 *  The operating system must reload CR3 to cause the TLB to be flushed"
 	 *
 	 *
-	 * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
-	 * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
-	 * to be modified
+	 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+	 * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+	 * to be modified.
 	 */
 	 */
 	if (c->x86 == 5 && c->x86_model == 9) {
 	if (c->x86 == 5 && c->x86_model == 9) {
 		pr_info("Disabling PGE capability bit\n");
 		pr_info("Disabling PGE capability bit\n");
@@ -281,7 +281,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	 * integrated APIC (see 11AP erratum in "Pentium Processor
 	 * integrated APIC (see 11AP erratum in "Pentium Processor
 	 * Specification Update").
 	 * Specification Update").
 	 */
 	 */
-	if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+	if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
 	    (c->x86_mask < 0x6 || c->x86_mask == 0xb))
 	    (c->x86_mask < 0x6 || c->x86_mask == 0xb))
 		set_cpu_bug(c, X86_BUG_11AP);
 		set_cpu_bug(c, X86_BUG_11AP);
 
 
@@ -456,7 +456,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 	}
 
 
-	if (cpu_has_xmm2)
+	if (cpu_has(c, X86_FEATURE_XMM2))
 		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 
 
 	if (boot_cpu_has(X86_FEATURE_DS)) {
 	if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_PEBS);
 			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 	}
 
 
-	if (c->x86 == 6 && cpu_has_clflush &&
+	if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 
 

+ 1 - 1
arch/x86/kernel/cpu/mcheck/mce_intel.c

@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
 	 */
 	 */
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 		return 0;
 		return 0;
-	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
 		return 0;
 		return 0;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);

+ 1 - 1
arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
 static int intel_thermal_supported(struct cpuinfo_x86 *c)
 static int intel_thermal_supported(struct cpuinfo_x86 *c)
 {
 {
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return 0;
 		return 0;
 	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 		return 0;
 		return 0;

+ 2 - 2
arch/x86/kernel/cpu/mtrr/cyrix.c

@@ -137,7 +137,7 @@ static void prepare_set(void)
 	u32 cr0;
 	u32 cr0;
 
 
 	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
 	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-	if (cpu_has_pge) {
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4 = __read_cr4();
 		cr4 = __read_cr4();
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 	}
 	}
@@ -170,7 +170,7 @@ static void post_set(void)
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 
 	/* Restore value of CR4 */
 	/* Restore value of CR4 */
-	if (cpu_has_pge)
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__write_cr4(cr4);
 		__write_cr4(cr4);
 }
 }
 
 

+ 16 - 12
arch/x86/kernel/cpu/mtrr/generic.c

@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
 		pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
 		pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
 }
 }
 
 
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	prepare_set();
+
+	pat_init();
+
+	post_set();
+	local_irq_restore(flags);
+}
+
 /* Grab all of the MTRR state for this CPU into *state */
 /* Grab all of the MTRR state for this CPU into *state */
 bool __init get_mtrr_state(void)
 bool __init get_mtrr_state(void)
 {
 {
 	struct mtrr_var_range *vrs;
 	struct mtrr_var_range *vrs;
-	unsigned long flags;
 	unsigned lo, dummy;
 	unsigned lo, dummy;
 	unsigned int i;
 	unsigned int i;
 
 
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
 
 
 	mtrr_state_set = 1;
 	mtrr_state_set = 1;
 
 
-	/* PAT setup for BP. We need to go through sync steps here */
-	local_irq_save(flags);
-	prepare_set();
-
-	pat_init();
-
-	post_set();
-	local_irq_restore(flags);
-
 	return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
 	return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
 }
 }
 
 
@@ -741,7 +745,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	wbinvd();
 	wbinvd();
 
 
 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
-	if (cpu_has_pge) {
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4 = __read_cr4();
 		cr4 = __read_cr4();
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 	}
 	}
@@ -771,7 +775,7 @@ static void post_set(void) __releases(set_atomicity_lock)
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 
 	/* Restore value of CR4 */
 	/* Restore value of CR4 */
-	if (cpu_has_pge)
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__write_cr4(cr4);
 		__write_cr4(cr4);
 	raw_spin_unlock(&set_atomicity_lock);
 	raw_spin_unlock(&set_atomicity_lock);
 }
 }

+ 12 - 1
arch/x86/kernel/cpu/mtrr/main.c

@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
 			/* BIOS may override */
 			/* BIOS may override */
 			__mtrr_enabled = get_mtrr_state();
 			__mtrr_enabled = get_mtrr_state();
 
 
+			if (mtrr_enabled())
+				mtrr_bp_pat_init();
+
 			if (mtrr_cleanup(phys_addr)) {
 			if (mtrr_cleanup(phys_addr)) {
 				changed_by_mtrr_cleanup = 1;
 				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
 				mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
 		}
 		}
 	}
 	}
 
 
-	if (!mtrr_enabled())
+	if (!mtrr_enabled()) {
 		pr_info("MTRR: Disabled\n");
 		pr_info("MTRR: Disabled\n");
+
+		/*
+		 * PAT initialization relies on MTRR's rendezvous handler.
+		 * Skip PAT init until the handler can initialize both
+		 * features independently.
+		 */
+		pat_disable("MTRRs disabled, skipping PAT initialization too.");
+	}
 }
 }
 
 
 void mtrr_ap_init(void)
 void mtrr_ap_init(void)

+ 1 - 0
arch/x86/kernel/cpu/mtrr/mtrr.h

@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 void fill_mtrr_var_range(unsigned int index,
 void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 bool get_mtrr_state(void);
 bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
 
 
 extern void set_mtrr_ops(const struct mtrr_ops *ops);
 extern void set_mtrr_ops(const struct mtrr_ops *ops);
 
 

+ 1 - 1
arch/x86/kernel/cpu/vmware.c

@@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void)
  */
  */
 static uint32_t __init vmware_platform(void)
 static uint32_t __init vmware_platform(void)
 {
 {
-	if (cpu_has_hypervisor) {
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 		unsigned int eax;
 		unsigned int eax;
 		unsigned int hyper_vendor_id[3];
 		unsigned int hyper_vendor_id[3];
 
 

+ 1 - 1
arch/x86/kernel/devicetree.c

@@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void)
 		return;
 		return;
 
 
 	/* Did the boot loader setup the local APIC ? */
 	/* Did the boot loader setup the local APIC ? */
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		if (apic_force_enable(r.start))
 		if (apic_force_enable(r.start))
 			return;
 			return;
 	}
 	}

+ 5 - 11
arch/x86/kernel/fpu/bugs.c

@@ -21,11 +21,15 @@ static double __initdata y = 3145727.0;
  * We should really only care about bugs here
  * We should really only care about bugs here
  * anyway. Not features.
  * anyway. Not features.
  */
  */
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
 {
 {
 	u32 cr0_saved;
 	u32 cr0_saved;
 	s32 fdiv_bug;
 	s32 fdiv_bug;
 
 
+	/* kernel_fpu_begin/end() relies on patched alternative instructions. */
+	if (!boot_cpu_has(X86_FEATURE_FPU))
+		return;
+
 	/* We might have CR0::TS set already, clear it: */
 	/* We might have CR0::TS set already, clear it: */
 	cr0_saved = read_cr0();
 	cr0_saved = read_cr0();
 	write_cr0(cr0_saved & ~X86_CR0_TS);
 	write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@ static void __init check_fpu(void)
 		pr_warn("Hmm, FPU with FDIV bug\n");
 		pr_warn("Hmm, FPU with FDIV bug\n");
 	}
 	}
 }
 }
-
-void __init fpu__init_check_bugs(void)
-{
-	/*
-	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
-	 * alternative instructions.
-	 */
-	if (cpu_has_fpu)
-		check_fpu();
-}

+ 16 - 34
arch/x86/kernel/fpu/core.c

@@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
 
 
 void fpstate_init(union fpregs_state *state)
 void fpstate_init(union fpregs_state *state)
 {
 {
-	if (!cpu_has_fpu) {
+	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		fpstate_init_soft(&state->soft);
 		fpstate_init_soft(&state->soft);
 		return;
 		return;
 	}
 	}
 
 
 	memset(state, 0, xstate_size);
 	memset(state, 0, xstate_size);
 
 
-	if (cpu_has_fxsr)
+	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 		fpstate_init_fxstate(&state->fxsave);
 	else
 	else
 		fpstate_init_fstate(&state->fsave);
 		fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	dst_fpu->fpregs_active = 0;
 	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 	dst_fpu->last_cpu = -1;
 
 
-	if (!src_fpu->fpstate_active || !cpu_has_fpu)
+	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 		return 0;
 
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu)
  * x87 math exception handling:
  * x87 math exception handling:
  */
  */
 
 
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
-	if (cpu_has_fxsr) {
-		return fpu->state.fxsave.cwd;
-	} else {
-		return (unsigned short)fpu->state.fsave.cwd;
-	}
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
-	if (cpu_has_fxsr) {
-		return fpu->state.fxsave.swd;
-	} else {
-		return (unsigned short)fpu->state.fsave.swd;
-	}
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
-	if (cpu_has_xmm) {
-		return fpu->state.fxsave.mxcsr;
-	} else {
-		return MXCSR_DEFAULT;
-	}
-}
-
 int fpu__exception_code(struct fpu *fpu, int trap_nr)
 int fpu__exception_code(struct fpu *fpu, int trap_nr)
 {
 {
 	int err;
 	int err;
@@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
 		 * so if this combination doesn't produce any single exception,
 		 * so if this combination doesn't produce any single exception,
 		 * then we have a bad program that isn't synchronizing its FPU usage
 		 * then we have a bad program that isn't synchronizing its FPU usage
 		 * and it will suffer the consequences since we won't be able to
 		 * and it will suffer the consequences since we won't be able to
-		 * fully reproduce the context of the exception
+		 * fully reproduce the context of the exception.
 		 */
 		 */
-		cwd = get_fpu_cwd(fpu);
-		swd = get_fpu_swd(fpu);
+		if (boot_cpu_has(X86_FEATURE_FXSR)) {
+			cwd = fpu->state.fxsave.cwd;
+			swd = fpu->state.fxsave.swd;
+		} else {
+			cwd = (unsigned short)fpu->state.fsave.cwd;
+			swd = (unsigned short)fpu->state.fsave.swd;
+		}
 
 
 		err = swd & ~cwd;
 		err = swd & ~cwd;
 	} else {
 	} else {
@@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
 		 * unmasked exception was caught we must mask the exception mask bits
 		 * unmasked exception was caught we must mask the exception mask bits
 		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 		 */
 		 */
-		unsigned short mxcsr = get_fpu_mxcsr(fpu);
+		unsigned short mxcsr = MXCSR_DEFAULT;
+
+		if (boot_cpu_has(X86_FEATURE_XMM))
+			mxcsr = fpu->state.fxsave.mxcsr;
+
 		err = ~(mxcsr >> 7) & mxcsr;
 		err = ~(mxcsr >> 7) & mxcsr;
 	}
 	}
 
 

+ 8 - 8
arch/x86/kernel/fpu/init.c

@@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void)
 	unsigned long cr0;
 	unsigned long cr0;
 	unsigned long cr4_mask = 0;
 	unsigned long cr4_mask = 0;
 
 
-	if (cpu_has_fxsr)
+	if (boot_cpu_has(X86_FEATURE_FXSR))
 		cr4_mask |= X86_CR4_OSFXSR;
 		cr4_mask |= X86_CR4_OSFXSR;
-	if (cpu_has_xmm)
+	if (boot_cpu_has(X86_FEATURE_XMM))
 		cr4_mask |= X86_CR4_OSXMMEXCPT;
 		cr4_mask |= X86_CR4_OSXMMEXCPT;
 	if (cr4_mask)
 	if (cr4_mask)
 		cr4_set_bits(cr4_mask);
 		cr4_set_bits(cr4_mask);
 
 
 	cr0 = read_cr0();
 	cr0 = read_cr0();
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
-	if (!cpu_has_fpu)
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		cr0 |= X86_CR0_EM;
 		cr0 |= X86_CR0_EM;
 	write_cr0(cr0);
 	write_cr0(cr0);
 
 
 	/* Flush out any pending x87 state: */
 	/* Flush out any pending x87 state: */
 #ifdef CONFIG_MATH_EMULATION
 #ifdef CONFIG_MATH_EMULATION
-	if (!cpu_has_fpu)
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		fpstate_init_soft(&current->thread.fpu.state.soft);
 		fpstate_init_soft(&current->thread.fpu.state.soft);
 	else
 	else
 #endif
 #endif
@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
 	}
 	}
 
 
 #ifndef CONFIG_MATH_EMULATION
 #ifndef CONFIG_MATH_EMULATION
-	if (!cpu_has_fpu) {
+	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 		pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
 		pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
 		for (;;)
 		for (;;)
 			asm volatile("hlt");
 			asm volatile("hlt");
@@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void)
 {
 {
 	unsigned int mask = 0;
 	unsigned int mask = 0;
 
 
-	if (cpu_has_fxsr) {
+	if (boot_cpu_has(X86_FEATURE_FXSR)) {
 		/* Static because GCC does not get 16-byte stack alignment right: */
 		/* Static because GCC does not get 16-byte stack alignment right: */
 		static struct fxregs_state fxregs __initdata;
 		static struct fxregs_state fxregs __initdata;
 
 
@@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	 * fpu__init_system_xstate().
 	 * fpu__init_system_xstate().
 	 */
 	 */
 
 
-	if (!cpu_has_fpu) {
+	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 		/*
 		/*
 		 * Disable xsave as we do not support it if i387
 		 * Disable xsave as we do not support it if i387
 		 * emulation is enabled.
 		 * emulation is enabled.
@@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
 		xstate_size = sizeof(struct swregs_state);
 		xstate_size = sizeof(struct swregs_state);
 	} else {
 	} else {
-		if (cpu_has_fxsr)
+		if (boot_cpu_has(X86_FEATURE_FXSR))
 			xstate_size = sizeof(struct fxregs_state);
 			xstate_size = sizeof(struct fxregs_state);
 		else
 		else
 			xstate_size = sizeof(struct fregs_state);
 			xstate_size = sizeof(struct fregs_state);

+ 14 - 11
arch/x86/kernel/fpu/regset.c

@@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
 {
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 	struct fpu *target_fpu = &target->thread.fpu;
 
 
-	return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+		return regset->n;
+	else
+		return 0;
 }
 }
 
 
 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 {
 {
 	struct fpu *fpu = &target->thread.fpu;
 	struct fpu *fpu = &target->thread.fpu;
 
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	fpu__activate_fpstate_read(fpu);
 	fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct fpu *fpu = &target->thread.fpu;
 	int ret;
 	int ret;
 
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	fpu__activate_fpstate_write(fpu);
 	fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	 * update the header bits in the xsave header, indicating the
 	 * update the header bits in the xsave header, indicating the
 	 * presence of FP and SSE state.
 	 * presence of FP and SSE state.
 	 */
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 
 
 	return ret;
 	return ret;
@@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct xregs_state *xsave;
 	struct xregs_state *xsave;
 	int ret;
 	int ret;
 
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	fpu__activate_fpstate_read(fpu);
 	fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct xregs_state *xsave;
 	struct xregs_state *xsave;
 	int ret;
 	int ret;
 
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	fpu__activate_fpstate_write(fpu);
 	fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
 
 	fpu__activate_fpstate_read(fpu);
 	fpu__activate_fpstate_read(fpu);
 
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &fpu->state.fsave, 0,
 					   &fpu->state.fsave, 0,
 					   -1);
 					   -1);
@@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_write(fpu);
 	fpu__activate_fpstate_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &fpu->state.fsave, 0,
 					  &fpu->state.fsave, 0,
 					  -1);
 					  -1);
@@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	 * update the header bit in the xsave header, indicating the
 	 * update the header bit in the xsave header, indicating the
 	 * presence of FP.
 	 * presence of FP.
 	 */
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
 	return ret;
 	return ret;
 }
 }

+ 9 - 9
arch/x86/kernel/fpu/xstate.c

@@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
  */
  */
 void fpu__init_cpu_xstate(void)
 void fpu__init_cpu_xstate(void)
 {
 {
-	if (!cpu_has_xsave || !xfeatures_mask)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 		return;
 		return;
 
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
 	cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void)
 	xstate_comp_offsets[0] = 0;
 	xstate_comp_offsets[0] = 0;
 	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 
 
-	if (!cpu_has_xsaves) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 			if (xfeature_enabled(i)) {
 			if (xfeature_enabled(i)) {
 				xstate_comp_offsets[i] = xstate_offsets[i];
 				xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 	on_boot_cpu = 0;
 
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return;
 		return;
 
 
 	setup_xstate_features();
 	setup_xstate_features();
 	print_xstate_features();
 	print_xstate_features();
 
 
-	if (cpu_has_xsaves) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
 		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
 		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
 	}
 	}
@@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr)
  */
  */
 static int using_compacted_format(void)
 static int using_compacted_format(void)
 {
 {
-	return cpu_has_xsaves;
+	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 }
 
 
 static void __xstate_dump_leaves(void)
 static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void)
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int calculated_xstate_size;
 	unsigned int calculated_xstate_size;
 
 
-	if (!cpu_has_xsaves) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 		/*
 		/*
 		 * - CPUID function 0DH, sub-function 0:
 		 * - CPUID function 0DH, sub-function 0:
 		 *    EBX enumerates the size (in bytes) required by
 		 *    EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 	on_boot_cpu = 0;
 
 
-	if (!cpu_has_xsave) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 		pr_info("x86/fpu: Legacy x87 FPU detected.\n");
 		pr_info("x86/fpu: Legacy x87 FPU detected.\n");
 		return;
 		return;
 	}
 	}
@@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void)
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
 		xfeatures_mask,
 		xstate_size,
 		xstate_size,
-		cpu_has_xsaves ? "compacted" : "standard");
+		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 }
 
 
 /*
 /*
@@ -678,7 +678,7 @@ void fpu__resume_cpu(void)
 	/*
 	/*
 	 * Restore XCR0 on xsave capable CPUs:
 	 * Restore XCR0 on xsave capable CPUs:
 	 */
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 }
 }
 
 

+ 49 - 67
arch/x86/kernel/head_32.S

@@ -555,62 +555,53 @@ early_idt_handler_common:
 	 */
 	 */
 	cld
 	cld
 
 
-	cmpl $2,(%esp)		# X86_TRAP_NMI
-	je .Lis_nmi		# Ignore NMI
-
-	cmpl $2,%ss:early_recursion_flag
-	je hlt_loop
 	incl %ss:early_recursion_flag
 	incl %ss:early_recursion_flag
 
 
-	push %eax		# 16(%esp)
-	push %ecx		# 12(%esp)
-	push %edx		#  8(%esp)
-	push %ds		#  4(%esp)
-	push %es		#  0(%esp)
-	movl $(__KERNEL_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-
-	cmpl $(__KERNEL_CS),32(%esp)
-	jne 10f
+	/* The vector number is in pt_regs->gs */
 
 
-	leal 28(%esp),%eax	# Pointer to %eip
-	call early_fixup_exception
-	andl %eax,%eax
-	jnz ex_entry		/* found an exception entry */
-
-10:
-#ifdef CONFIG_PRINTK
-	xorl %eax,%eax
-	movw %ax,2(%esp)	/* clean up the segment values on some cpus */
-	movw %ax,6(%esp)
-	movw %ax,34(%esp)
-	leal  40(%esp),%eax
-	pushl %eax		/* %esp before the exception */
-	pushl %ebx
-	pushl %ebp
-	pushl %esi
-	pushl %edi
-	movl %cr2,%eax
-	pushl %eax
-	pushl (20+6*4)(%esp)	/* trapno */
-	pushl $fault_msg
-	call printk
-#endif
-	call dump_stack
-hlt_loop:
-	hlt
-	jmp hlt_loop
-
-ex_entry:
-	pop %es
-	pop %ds
-	pop %edx
-	pop %ecx
-	pop %eax
-	decl %ss:early_recursion_flag
-.Lis_nmi:
-	addl $8,%esp		/* drop vector number and error code */
+	cld
+	pushl	%fs		/* pt_regs->fs */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%es		/* pt_regs->es */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%ds		/* pt_regs->ds */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%eax		/* pt_regs->ax */
+	pushl	%ebp		/* pt_regs->bp */
+	pushl	%edi		/* pt_regs->di */
+	pushl	%esi		/* pt_regs->si */
+	pushl	%edx		/* pt_regs->dx */
+	pushl	%ecx		/* pt_regs->cx */
+	pushl	%ebx		/* pt_regs->bx */
+
+	/* Fix up DS and ES */
+	movl	$(__KERNEL_DS), %ecx
+	movl	%ecx, %ds
+	movl	%ecx, %es
+
+	/* Load the vector number into EDX */
+	movl	PT_GS(%esp), %edx
+
+	/* Load GS into pt_regs->gs and clear high bits */
+	movw	%gs, PT_GS(%esp)
+	movw	$0, PT_GS+2(%esp)
+
+	movl	%esp, %eax	/* args are pt_regs (EAX), trapnr (EDX) */
+	call	early_fixup_exception
+
+	popl	%ebx		/* pt_regs->bx */
+	popl	%ecx		/* pt_regs->cx */
+	popl	%edx		/* pt_regs->dx */
+	popl	%esi		/* pt_regs->si */
+	popl	%edi		/* pt_regs->di */
+	popl	%ebp		/* pt_regs->bp */
+	popl	%eax		/* pt_regs->ax */
+	popl	%ds		/* pt_regs->ds */
+	popl	%es		/* pt_regs->es */
+	popl	%fs		/* pt_regs->fs */
+	popl	%gs		/* pt_regs->gs */
+	decl	%ss:early_recursion_flag
+	addl	$4, %esp	/* pop pt_regs->orig_ax */
 	iret
 	iret
 ENDPROC(early_idt_handler_common)
 ENDPROC(early_idt_handler_common)
 
 
@@ -647,10 +638,14 @@ ignore_int:
 	popl %eax
 	popl %eax
 #endif
 #endif
 	iret
 	iret
+
+hlt_loop:
+	hlt
+	jmp hlt_loop
 ENDPROC(ignore_int)
 ENDPROC(ignore_int)
 __INITDATA
 __INITDATA
 	.align 4
 	.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 	.long 0
 
 
 __REFDATA
 __REFDATA
@@ -715,19 +710,6 @@ __INITRODATA
 int_msg:
 int_msg:
 	.asciz "Unknown interrupt or fault at: %p %p %p\n"
 	.asciz "Unknown interrupt or fault at: %p %p %p\n"
 
 
-fault_msg:
-/* fault info: */
-	.ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
-	.ascii "     EDI %p  ESI %p  EBP %p  EBX %p\n"
-	.ascii "     ESP %p   ES %p   DS %p\n"
-	.ascii "     EDX %p  ECX %p  EAX %p\n"
-/* fault frame: */
-	.ascii "     vec %p  err %p  EIP %p   CS %p  flg %p\n"
-	.ascii "Stack: %p %p %p %p %p %p %p %p\n"
-	.ascii "       %p %p %p %p %p %p %p %p\n"
-	.asciz "       %p %p %p %p %p %p %p %p\n"
-
 #include "../../x86/xen/xen-head.S"
 #include "../../x86/xen/xen-head.S"
 
 
 /*
 /*

+ 35 - 68
arch/x86/kernel/head_64.S

@@ -20,6 +20,7 @@
 #include <asm/processor-flags.h>
 #include <asm/processor-flags.h>
 #include <asm/percpu.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/nops.h>
+#include "../entry/calling.h"
 
 
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@ startup_64:
 	 * tables and then reload them.
 	 * tables and then reload them.
 	 */
 	 */
 
 
+	/*
+	 * Setup stack for verify_cpu(). "-8" because stack_start is defined
+	 * this way, see below. Our best guess is a NULL ptr for stack
+	 * termination heuristics and we don't want to break anything which
+	 * might depend on it (kgdb, ...).
+	 */
+	leaq	(__end_init_task - 8)(%rip), %rsp
+
 	/* Sanitize CPU configuration */
 	/* Sanitize CPU configuration */
 	call verify_cpu
 	call verify_cpu
 
 
@@ -350,90 +359,48 @@ early_idt_handler_common:
 	 */
 	 */
 	cld
 	cld
 
 
-	cmpl $2,(%rsp)		# X86_TRAP_NMI
-	je .Lis_nmi		# Ignore NMI
-
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
 	incl early_recursion_flag(%rip)
 	incl early_recursion_flag(%rip)
 
 
-	pushq %rax		# 64(%rsp)
-	pushq %rcx		# 56(%rsp)
-	pushq %rdx		# 48(%rsp)
-	pushq %rsi		# 40(%rsp)
-	pushq %rdi		# 32(%rsp)
-	pushq %r8		# 24(%rsp)
-	pushq %r9		# 16(%rsp)
-	pushq %r10		#  8(%rsp)
-	pushq %r11		#  0(%rsp)
-
-	cmpl $__KERNEL_CS,96(%rsp)
-	jne 11f
-
-	cmpl $14,72(%rsp)	# Page fault?
+	/* The vector number is currently in the pt_regs->di slot. */
+	pushq %rsi				/* pt_regs->si */
+	movq 8(%rsp), %rsi			/* RSI = vector number */
+	movq %rdi, 8(%rsp)			/* pt_regs->di = RDI */
+	pushq %rdx				/* pt_regs->dx */
+	pushq %rcx				/* pt_regs->cx */
+	pushq %rax				/* pt_regs->ax */
+	pushq %r8				/* pt_regs->r8 */
+	pushq %r9				/* pt_regs->r9 */
+	pushq %r10				/* pt_regs->r10 */
+	pushq %r11				/* pt_regs->r11 */
+	pushq %rbx				/* pt_regs->bx */
+	pushq %rbp				/* pt_regs->bp */
+	pushq %r12				/* pt_regs->r12 */
+	pushq %r13				/* pt_regs->r13 */
+	pushq %r14				/* pt_regs->r14 */
+	pushq %r15				/* pt_regs->r15 */
+
+	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
 	jnz 10f
-	GET_CR2_INTO(%rdi)	# can clobber any volatile register if pv
+	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
 	call early_make_pgtable
 	call early_make_pgtable
 	andl %eax,%eax
 	andl %eax,%eax
-	jz 20f			# All good
+	jz 20f			/* All good */
 
 
 10:
 10:
-	leaq 88(%rsp),%rdi	# Pointer to %rip
+	movq %rsp,%rdi		/* RDI = pt_regs; RSI is already trapnr */
 	call early_fixup_exception
 	call early_fixup_exception
-	andl %eax,%eax
-	jnz 20f			# Found an exception entry
-
-11:
-#ifdef CONFIG_EARLY_PRINTK
-	GET_CR2_INTO(%r9)	# can clobber any volatile register if pv
-	movl 80(%rsp),%r8d	# error code
-	movl 72(%rsp),%esi	# vector number
-	movl 96(%rsp),%edx	# %cs
-	movq 88(%rsp),%rcx	# %rip
-	xorl %eax,%eax
-	leaq early_idt_msg(%rip),%rdi
-	call early_printk
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
-	call dump_stack
-#ifdef CONFIG_KALLSYMS	
-	leaq early_idt_ripmsg(%rip),%rdi
-	movq 40(%rsp),%rsi	# %rip again
-	call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1:	hlt
-	jmp 1b
-
-20:	# Exception table entry found or page table generated
-	popq %r11
-	popq %r10
-	popq %r9
-	popq %r8
-	popq %rdi
-	popq %rsi
-	popq %rdx
-	popq %rcx
-	popq %rax
+
+20:
 	decl early_recursion_flag(%rip)
 	decl early_recursion_flag(%rip)
-.Lis_nmi:
-	addq $16,%rsp		# drop vector number and error code
-	INTERRUPT_RETURN
+	jmp restore_regs_and_iret
 ENDPROC(early_idt_handler_common)
 ENDPROC(early_idt_handler_common)
 
 
 	__INITDATA
 	__INITDATA
 
 
 	.balign 4
 	.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 	.long 0
 
 
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
-	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
-	.asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
 #define NEXT_PAGE(name) \
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
 	.balign	PAGE_SIZE; \
 GLOBAL(name)
 GLOBAL(name)

+ 0 - 1
arch/x86/kernel/hpet.c

@@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = {
 	.mask		= HPET_MASK,
 	.mask		= HPET_MASK,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.resume		= hpet_resume_counter,
 	.resume		= hpet_resume_counter,
-	.archdata	= { .vclock_mode = VCLOCK_HPET },
 };
 };
 
 
 static int hpet_clocksource_register(void)
 static int hpet_clocksource_register(void)

+ 1 - 0
arch/x86/kernel/jump_label.c

@@ -13,6 +13,7 @@
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <asm/kprobes.h>
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
 #include <asm/alternative.h>
+#include <asm/text-patching.h>
 
 
 #ifdef HAVE_JUMP_LABEL
 #ifdef HAVE_JUMP_LABEL
 
 

+ 1 - 0
arch/x86/kernel/kgdb.c

@@ -45,6 +45,7 @@
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/memory.h>
 #include <linux/memory.h>
 
 
+#include <asm/text-patching.h>
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
 #include <asm/apic.h>
 #include <asm/apic.h>

+ 1 - 0
arch/x86/kernel/kprobes/core.c

@@ -51,6 +51,7 @@
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 #include <linux/frame.h>
 
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>

+ 1 - 0
arch/x86/kernel/kprobes/opt.c

@@ -29,6 +29,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/ftrace.h>
 
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>

+ 1 - 1
arch/x86/kernel/kvm.c

@@ -522,7 +522,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
 	if (boot_cpu_data.cpuid_level < 0)
 	if (boot_cpu_data.cpuid_level < 0)
 		return 0;	/* So we don't blow up on old processors */
 		return 0;	/* So we don't blow up on old processors */
 
 
-	if (cpu_has_hypervisor)
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
 		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
 
 
 	return 0;
 	return 0;

+ 1 - 0
arch/x86/kernel/module.c

@@ -31,6 +31,7 @@
 #include <linux/jump_label.h>
 #include <linux/jump_label.h>
 #include <linux/random.h>
 #include <linux/random.h>
 
 
+#include <asm/text-patching.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/setup.h>

+ 4 - 2
arch/x86/kernel/paravirt.c

@@ -339,8 +339,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.write_cr8 = native_write_cr8,
 	.write_cr8 = native_write_cr8,
 #endif
 #endif
 	.wbinvd = native_wbinvd,
 	.wbinvd = native_wbinvd,
-	.read_msr = native_read_msr_safe,
-	.write_msr = native_write_msr_safe,
+	.read_msr = native_read_msr,
+	.write_msr = native_write_msr,
+	.read_msr_safe = native_read_msr_safe,
+	.write_msr_safe = native_write_msr_safe,
 	.read_pmc = native_read_pmc,
 	.read_pmc = native_read_pmc,
 	.load_tr_desc = native_load_tr_desc,
 	.load_tr_desc = native_load_tr_desc,
 	.set_ldt = native_set_ldt,
 	.set_ldt = native_set_ldt,

+ 113 - 128
arch/x86/kernel/process_64.c

@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task)
 	}
 	}
 }
 }
 
 
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
-	struct user_desc ud = {
-		.base_addr = addr,
-		.limit = 0xfffff,
-		.seg_32bit = 1,
-		.limit_in_pages = 1,
-		.useable = 1,
-	};
-	struct desc_struct *desc = t->thread.tls_array;
-	desc += tls;
-	fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
-	return get_desc_base(&t->thread.tls_array[tls]);
-}
-
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct *p, unsigned long tls)
 		unsigned long arg, struct task_struct *p, unsigned long tls)
 {
 {
@@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	p->thread.io_bitmap_ptr = NULL;
 	p->thread.io_bitmap_ptr = NULL;
 
 
 	savesegment(gs, p->thread.gsindex);
 	savesegment(gs, p->thread.gsindex);
-	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
 	savesegment(fs, p->thread.fsindex);
 	savesegment(fs, p->thread.fsindex);
-	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
 	savesegment(es, p->thread.es);
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
 	savesegment(ds, p->thread.ds);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	 */
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 				(struct user_desc __user *)tls, 0);
 		else
 		else
@@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *next_fpu = &next->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	unsigned fsindex, gsindex;
+	unsigned prev_fsindex, prev_gsindex;
 	fpu_switch_t fpu_switch;
 	fpu_switch_t fpu_switch;
 
 
 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 *
 	 *
 	 * (e.g. xen_load_tls())
 	 * (e.g. xen_load_tls())
 	 */
 	 */
-	savesegment(fs, fsindex);
-	savesegment(gs, gsindex);
+	savesegment(fs, prev_fsindex);
+	savesegment(gs, prev_gsindex);
 
 
 	/*
 	/*
 	 * Load TLS before restoring any segments so that segment loads
 	 * Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Switch FS and GS.
 	 * Switch FS and GS.
 	 *
 	 *
 	 * These are even more complicated than DS and ES: they have
 	 * These are even more complicated than DS and ES: they have
-	 * 64-bit bases are that controlled by arch_prctl.  Those bases
-	 * only differ from the values in the GDT or LDT if the selector
-	 * is 0.
-	 *
-	 * Loading the segment register resets the hidden base part of
-	 * the register to 0 or the value from the GDT / LDT.  If the
-	 * next base address zero, writing 0 to the segment register is
-	 * much faster than using wrmsr to explicitly zero the base.
-	 *
-	 * The thread_struct.fs and thread_struct.gs values are 0
-	 * if the fs and gs bases respectively are not overridden
-	 * from the values implied by fsindex and gsindex.  They
-	 * are nonzero, and store the nonzero base addresses, if
-	 * the bases are overridden.
-	 *
-	 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
-	 * be impossible.
-	 *
-	 * Therefore we need to reload the segment registers if either
-	 * the old or new selector is nonzero, and we need to override
-	 * the base address if next thread expects it to be overridden.
+	 * 64-bit bases are that controlled by arch_prctl.  The bases
+	 * don't necessarily match the selectors, as user code can do
+	 * any number of things to cause them to be inconsistent.
 	 *
 	 *
-	 * This code is unnecessarily slow in the case where the old and
-	 * new indexes are zero and the new base is nonzero -- it will
-	 * unnecessarily write 0 to the selector before writing the new
-	 * base address.
+	 * We don't promise to preserve the bases if the selectors are
+	 * nonzero.  We also don't promise to preserve the base if the
+	 * selector is zero and the base doesn't match whatever was
+	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
+	 * FSGSBASE instructions are enabled, we'll need to offer
+	 * stronger guarantees.)
 	 *
 	 *
-	 * Note: This all depends on arch_prctl being the only way that
-	 * user code can override the segment base.  Once wrfsbase and
-	 * wrgsbase are enabled, most of this code will need to change.
+	 * As an invariant,
+	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+	 * impossible.
 	 */
 	 */
-	if (unlikely(fsindex | next->fsindex | prev->fs)) {
+	if (next->fsindex) {
+		/* Loading a nonzero value into FS sets the index and base. */
 		loadsegment(fs, next->fsindex);
 		loadsegment(fs, next->fsindex);
-
-		/*
-		 * If user code wrote a nonzero value to FS, then it also
-		 * cleared the overridden base address.
-		 *
-		 * XXX: if user code wrote 0 to FS and cleared the base
-		 * address itself, we won't notice and we'll incorrectly
-		 * restore the prior base address next time we reschdule
-		 * the process.
-		 */
-		if (fsindex)
-			prev->fs = 0;
+	} else {
+		if (next->fsbase) {
+			/* Next index is zero but next base is nonzero. */
+			if (prev_fsindex)
+				loadsegment(fs, 0);
+			wrmsrl(MSR_FS_BASE, next->fsbase);
+		} else {
+			/* Next base and index are both zero. */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				/*
+				 * We don't know the previous base and can't
+				 * find out without RDMSR.  Forcibly clear it.
+				 */
+				loadsegment(fs, __USER_DS);
+				loadsegment(fs, 0);
+			} else {
+				/*
+				 * If the previous index is zero and ARCH_SET_FS
+				 * didn't change the base, then the base is
+				 * also zero and we don't need to do anything.
+				 */
+				if (prev->fsbase || prev_fsindex)
+					loadsegment(fs, 0);
+			}
+		}
 	}
 	}
-	if (next->fs)
-		wrmsrl(MSR_FS_BASE, next->fs);
-	prev->fsindex = fsindex;
+	/*
+	 * Save the old state and preserve the invariant.
+	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
+	 * without RDMSR because Intel user code can zero it without telling
+	 * us and AMD user code can program any 32-bit value without telling
+	 * us.
+	 */
+	if (prev_fsindex)
+		prev->fsbase = 0;
+	prev->fsindex = prev_fsindex;
 
 
-	if (unlikely(gsindex | next->gsindex | prev->gs)) {
+	if (next->gsindex) {
+		/* Loading a nonzero value into GS sets the index and base. */
 		load_gs_index(next->gsindex);
 		load_gs_index(next->gsindex);
-
-		/* This works (and fails) the same way as fsindex above. */
-		if (gsindex)
-			prev->gs = 0;
+	} else {
+		if (next->gsbase) {
+			/* Next index is zero but next base is nonzero. */
+			if (prev_gsindex)
+				load_gs_index(0);
+			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+		} else {
+			/* Next base and index are both zero. */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				/*
+				 * We don't know the previous base and can't
+				 * find out without RDMSR.  Forcibly clear it.
+				 *
+				 * This contains a pointless SWAPGS pair.
+				 * Fixing it would involve an explicit check
+				 * for Xen or a new pvop.
+				 */
+				load_gs_index(__USER_DS);
+				load_gs_index(0);
+			} else {
+				/*
+				 * If the previous index is zero and ARCH_SET_GS
+				 * didn't change the base, then the base is
+				 * also zero and we don't need to do anything.
+				 */
+				if (prev->gsbase || prev_gsindex)
+					load_gs_index(0);
+			}
+		}
 	}
 	}
-	if (next->gs)
-		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
-	prev->gsindex = gsindex;
+	/*
+	 * Save the old state and preserve the invariant.
+	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
+	 * without RDMSR because Intel user code can zero it without telling
+	 * us and AMD user code can program any 32-bit value without telling
+	 * us.
+	 */
+	if (prev_gsindex)
+		prev->gsbase = 0;
+	prev->gsindex = prev_gsindex;
 
 
 	switch_fpu_finish(next_fpu, fpu_switch);
 	switch_fpu_finish(next_fpu, fpu_switch);
 
 
@@ -516,23 +535,11 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		if (addr >= TASK_SIZE_OF(task))
 		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM;
 			return -EPERM;
 		cpu = get_cpu();
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to
-		   switch. */
-		if (addr <= 0xffffffff) {
-			set_32bit_tls(task, GS_TLS, addr);
-			if (doit) {
-				load_TLS(&task->thread, cpu);
-				load_gs_index(GS_TLS_SEL);
-			}
-			task->thread.gsindex = GS_TLS_SEL;
-			task->thread.gs = 0;
-		} else {
-			task->thread.gsindex = 0;
-			task->thread.gs = addr;
-			if (doit) {
-				load_gs_index(0);
-				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-			}
+		task->thread.gsindex = 0;
+		task->thread.gsbase = addr;
+		if (doit) {
+			load_gs_index(0);
+			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
 		}
 		}
 		put_cpu();
 		put_cpu();
 		break;
 		break;
@@ -542,52 +549,30 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		if (addr >= TASK_SIZE_OF(task))
 		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM;
 			return -EPERM;
 		cpu = get_cpu();
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to
-		   switch. */
-		if (addr <= 0xffffffff) {
-			set_32bit_tls(task, FS_TLS, addr);
-			if (doit) {
-				load_TLS(&task->thread, cpu);
-				loadsegment(fs, FS_TLS_SEL);
-			}
-			task->thread.fsindex = FS_TLS_SEL;
-			task->thread.fs = 0;
-		} else {
-			task->thread.fsindex = 0;
-			task->thread.fs = addr;
-			if (doit) {
-				/* set the selector to 0 to not confuse
-				   __switch_to */
-				loadsegment(fs, 0);
-				ret = wrmsrl_safe(MSR_FS_BASE, addr);
-			}
+		task->thread.fsindex = 0;
+		task->thread.fsbase = addr;
+		if (doit) {
+			/* set the selector to 0 to not confuse __switch_to */
+			loadsegment(fs, 0);
+			ret = wrmsrl_safe(MSR_FS_BASE, addr);
 		}
 		}
 		put_cpu();
 		put_cpu();
 		break;
 		break;
 	case ARCH_GET_FS: {
 	case ARCH_GET_FS: {
 		unsigned long base;
 		unsigned long base;
-		if (task->thread.fsindex == FS_TLS_SEL)
-			base = read_32bit_tls(task, FS_TLS);
-		else if (doit)
+		if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 			rdmsrl(MSR_FS_BASE, base);
 		else
 		else
-			base = task->thread.fs;
+			base = task->thread.fsbase;
 		ret = put_user(base, (unsigned long __user *)addr);
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 		break;
 	}
 	}
 	case ARCH_GET_GS: {
 	case ARCH_GET_GS: {
 		unsigned long base;
 		unsigned long base;
-		unsigned gsindex;
-		if (task->thread.gsindex == GS_TLS_SEL)
-			base = read_32bit_tls(task, GS_TLS);
-		else if (doit) {
-			savesegment(gs, gsindex);
-			if (gsindex)
-				rdmsrl(MSR_KERNEL_GS_BASE, base);
-			else
-				base = task->thread.gs;
-		} else
-			base = task->thread.gs;
+		if (doit)
+			rdmsrl(MSR_KERNEL_GS_BASE, base);
+		else
+			base = task->thread.gsbase;
 		ret = put_user(base, (unsigned long __user *)addr);
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 		break;
 	}
 	}

+ 9 - 41
arch/x86/kernel/ptrace.c

@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task,
 
 
 	switch (offset) {
 	switch (offset) {
 	case offsetof(struct user_regs_struct,fs):
 	case offsetof(struct user_regs_struct,fs):
-		/*
-		 * If this is setting fs as for normal 64-bit use but
-		 * setting fs_base has implicitly changed it, leave it.
-		 */
-		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
-		     task->thread.fs != 0) ||
-		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
-		     task->thread.fs == 0))
-			break;
 		task->thread.fsindex = value;
 		task->thread.fsindex = value;
 		if (task == current)
 		if (task == current)
 			loadsegment(fs, task->thread.fsindex);
 			loadsegment(fs, task->thread.fsindex);
 		break;
 		break;
 	case offsetof(struct user_regs_struct,gs):
 	case offsetof(struct user_regs_struct,gs):
-		/*
-		 * If this is setting gs as for normal 64-bit use but
-		 * setting gs_base has implicitly changed it, leave it.
-		 */
-		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
-		     task->thread.gs != 0) ||
-		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
-		     task->thread.gs == 0))
-			break;
 		task->thread.gsindex = value;
 		task->thread.gsindex = value;
 		if (task == current)
 		if (task == current)
 			load_gs_index(task->thread.gsindex);
 			load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@ static int putreg(struct task_struct *child,
 		 * to set either thread.fs or thread.fsindex and the
 		 * to set either thread.fs or thread.fsindex and the
 		 * corresponding GDT slot.
 		 * corresponding GDT slot.
 		 */
 		 */
-		if (child->thread.fs != value)
+		if (child->thread.fsbase != value)
 			return do_arch_prctl(child, ARCH_SET_FS, value);
 			return do_arch_prctl(child, ARCH_SET_FS, value);
 		return 0;
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
 	case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@ static int putreg(struct task_struct *child,
 		 */
 		 */
 		if (value >= TASK_SIZE_OF(child))
 		if (value >= TASK_SIZE_OF(child))
 			return -EIO;
 			return -EIO;
-		if (child->thread.gs != value)
+		if (child->thread.gsbase != value)
 			return do_arch_prctl(child, ARCH_SET_GS, value);
 			return do_arch_prctl(child, ARCH_SET_GS, value);
 		return 0;
 		return 0;
 #endif
 #endif
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	case offsetof(struct user_regs_struct, fs_base): {
 	case offsetof(struct user_regs_struct, fs_base): {
 		/*
 		/*
-		 * do_arch_prctl may have used a GDT slot instead of
-		 * the MSR.  To userland, it appears the same either
-		 * way, except the %fs segment selector might not be 0.
+		 * XXX: This will not behave as expected if called on
+		 * current or if fsindex != 0.
 		 */
 		 */
-		unsigned int seg = task->thread.fsindex;
-		if (task->thread.fs != 0)
-			return task->thread.fs;
-		if (task == current)
-			asm("movl %%fs,%0" : "=r" (seg));
-		if (seg != FS_TLS_SEL)
-			return 0;
-		return get_desc_base(&task->thread.tls_array[FS_TLS]);
+		return task->thread.fsbase;
 	}
 	}
 	case offsetof(struct user_regs_struct, gs_base): {
 	case offsetof(struct user_regs_struct, gs_base): {
 		/*
 		/*
-		 * Exactly the same here as the %fs handling above.
+		 * XXX: This will not behave as expected if called on
+		 * current or if fsindex != 0.
 		 */
 		 */
-		unsigned int seg = task->thread.gsindex;
-		if (task->thread.gs != 0)
-			return task->thread.gs;
-		if (task == current)
-			asm("movl %%gs,%0" : "=r" (seg));
-		if (seg != GS_TLS_SEL)
-			return 0;
-		return get_desc_base(&task->thread.tls_array[GS_TLS]);
+		return task->thread.gsbase;
 	}
 	}
 #endif
 #endif
 	}
 	}
@@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 {
 #ifdef CONFIG_X86_X32_ABI
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #endif
 #ifdef CONFIG_IA32_EMULATION
 #ifdef CONFIG_IA32_EMULATION

+ 3 - 3
arch/x86/kernel/signal.c

@@ -390,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 		put_user_ex(&frame->uc, &frame->puc);
 		put_user_ex(&frame->uc, &frame->puc);
 
 
 		/* Create the ucontext.  */
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
+		if (boot_cpu_has(X86_FEATURE_XSAVE))
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 		else
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 			put_user_ex(0, &frame->uc.uc_flags);
@@ -441,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
 		flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
 	else
 	else
 		flags = UC_SIGCONTEXT_SS;
 		flags = UC_SIGCONTEXT_SS;
@@ -761,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 {
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 		return __NR_ia32_restart_syscall;
 #endif
 #endif
 #ifdef CONFIG_X86_X32_ABI
 #ifdef CONFIG_X86_X32_ABI

+ 1 - 1
arch/x86/kernel/smpboot.c

@@ -1236,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 * If we couldn't find a local APIC, then get out of here now!
 	 * If we couldn't find a local APIC, then get out of here now!
 	 */
 	 */
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
-	    !cpu_has_apic) {
+	    !boot_cpu_has(X86_FEATURE_APIC)) {
 		if (!disable_apic) {
 		if (!disable_apic) {
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
 				boot_cpu_physical_apicid);
 				boot_cpu_physical_apicid);

+ 1 - 1
arch/x86/kernel/tce_64.c

@@ -40,7 +40,7 @@
 static inline void flush_tce(void* tceaddr)
 static inline void flush_tce(void* tceaddr)
 {
 {
 	/* a single tce can't cross a cache line */
 	/* a single tce can't cross a cache line */
-	if (cpu_has_clflush)
+	if (boot_cpu_has(X86_FEATURE_CLFLUSH))
 		clflush(tceaddr);
 		clflush(tceaddr);
 	else
 	else
 		wbinvd();
 		wbinvd();

+ 42 - 0
arch/x86/kernel/tls.c

@@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
 		       int can_allocate)
 		       int can_allocate)
 {
 {
 	struct user_desc info;
 	struct user_desc info;
+	unsigned short __maybe_unused sel, modified_sel;
 
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
 		return -EFAULT;
@@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx,
 
 
 	set_tls_desc(p, idx, &info, 1);
 	set_tls_desc(p, idx, &info, 1);
 
 
+	/*
+	 * If DS, ES, FS, or GS points to the modified segment, forcibly
+	 * refresh it.  Only needed on x86_64 because x86_32 reloads them
+	 * on return to user mode.
+	 */
+	modified_sel = (idx << 3) | 3;
+
+	if (p == current) {
+#ifdef CONFIG_X86_64
+		savesegment(ds, sel);
+		if (sel == modified_sel)
+			loadsegment(ds, sel);
+
+		savesegment(es, sel);
+		if (sel == modified_sel)
+			loadsegment(es, sel);
+
+		savesegment(fs, sel);
+		if (sel == modified_sel)
+			loadsegment(fs, sel);
+
+		savesegment(gs, sel);
+		if (sel == modified_sel)
+			load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+		savesegment(gs, sel);
+		if (sel == modified_sel)
+			loadsegment(gs, sel);
+#endif
+	} else {
+#ifdef CONFIG_X86_64
+		if (p->thread.fsindex == modified_sel)
+			p->thread.fsbase = info.base_addr;
+
+		if (p->thread.gsindex == modified_sel)
+			p->thread.gsbase = info.base_addr;
+#endif
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 1 - 0
arch/x86/kernel/traps.c

@@ -51,6 +51,7 @@
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 #include <linux/atomic.h>
 #include <linux/atomic.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/ftrace.h>
 #include <asm/traps.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/desc.h>

+ 15 - 18
arch/x86/kernel/tsc.c

@@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable;
 
 
 /* native_sched_clock() is called before tsc_init(), so
 /* native_sched_clock() is called before tsc_init(), so
    we must start with the TSC soft disabled to prevent
    we must start with the TSC soft disabled to prevent
-   erroneous rdtsc usage on !cpu_has_tsc processors */
+   erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
 static int __read_mostly tsc_disabled = -1;
 static int __read_mostly tsc_disabled = -1;
 
 
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void)
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 	unsigned long cpu_khz_old = cpu_khz;
 	unsigned long cpu_khz_old = cpu_khz;
 
 
-	if (cpu_has_tsc) {
-		tsc_khz = x86_platform.calibrate_tsc();
-		cpu_khz = tsc_khz;
-		cpu_data(0).loops_per_jiffy =
-			cpufreq_scale(cpu_data(0).loops_per_jiffy,
-					cpu_khz_old, cpu_khz);
-		return 0;
-	} else
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return -ENODEV;
 		return -ENODEV;
+
+	tsc_khz = x86_platform.calibrate_tsc();
+	cpu_khz = tsc_khz;
+	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+						    cpu_khz_old, cpu_khz);
+
+	return 0;
 #else
 #else
 	return -ENODEV;
 	return -ENODEV;
 #endif
 #endif
@@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	struct cpufreq_freqs *freq = data;
 	struct cpufreq_freqs *freq = data;
 	unsigned long *lpj;
 	unsigned long *lpj;
 
 
-	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
 	lpj = &boot_cpu_data.loops_per_jiffy;
 	lpj = &boot_cpu_data.loops_per_jiffy;
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
 	.notifier_call  = time_cpufreq_notifier
 	.notifier_call  = time_cpufreq_notifier
 };
 };
 
 
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
 {
 {
-	if (!cpu_has_tsc)
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return 0;
 		return 0;
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 		return 0;
 		return 0;
@@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void)
 	return 0;
 	return 0;
 }
 }
 
 
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
 
 
 #endif /* CONFIG_CPU_FREQ */
 #endif /* CONFIG_CPU_FREQ */
 
 
@@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void)
  */
  */
 int unsynchronized_tsc(void)
 int unsynchronized_tsc(void)
 {
 {
-	if (!cpu_has_tsc || tsc_unstable)
+	if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
 		return 1;
 		return 1;
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@ out:
 
 
 static int __init init_tsc_clocksource(void)
 static int __init init_tsc_clocksource(void)
 {
 {
-	if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+	if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
 		return 0;
 		return 0;
 
 
 	if (tsc_clocksource_reliable)
 	if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@ void __init tsc_init(void)
 	u64 lpj;
 	u64 lpj;
 	int cpu;
 	int cpu;
 
 
-	if (!cpu_has_tsc) {
+	if (!boot_cpu_has(X86_FEATURE_TSC)) {
 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 		return;
 		return;
 	}
 	}

+ 1 - 1
arch/x86/kernel/uprobes.c

@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 
 static inline int sizeof_long(void)
 static inline int sizeof_long(void)
 {
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 }
 
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)

+ 1 - 1
arch/x86/kvm/cpuid.c

@@ -75,7 +75,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		return 0;
 		return 0;
 
 
 	/* Update OSXSAVE bit */
 	/* Update OSXSAVE bit */
-	if (cpu_has_xsave && best->function == 0x1) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
 		best->ecx &= ~F(OSXSAVE);
 		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
 			best->ecx |= F(OSXSAVE);
 			best->ecx |= F(OSXSAVE);

+ 2 - 1
arch/x86/kvm/mmu.c

@@ -3844,7 +3844,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 					boot_cpu_data.x86_phys_bits,
 					boot_cpu_data.x86_phys_bits,
 					context->shadow_root_level, false,
 					context->shadow_root_level, false,
-					cpu_has_gbpages, true, true);
+					boot_cpu_has(X86_FEATURE_GBPAGES),
+					true, true);
 	else
 	else
 		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
 		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
 					    boot_cpu_data.x86_phys_bits,
 					    boot_cpu_data.x86_phys_bits,

+ 1 - 1
arch/x86/kvm/svm.c

@@ -1254,7 +1254,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 	kvm_load_ldt(svm->host.ldt);
 	kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	loadsegment(fs, svm->host.fs);
 	loadsegment(fs, svm->host.fs);
-	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
 	load_gs_index(svm->host.gs);
 	load_gs_index(svm->host.gs);
 #else
 #else
 #ifdef CONFIG_X86_32_LAZY_GS
 #ifdef CONFIG_X86_32_LAZY_GS

+ 1 - 2
arch/x86/kvm/trace.h

@@ -809,8 +809,7 @@ TRACE_EVENT(kvm_write_tsc_offset,
 
 
 #define host_clocks					\
 #define host_clocks					\
 	{VCLOCK_NONE, "none"},				\
 	{VCLOCK_NONE, "none"},				\
-	{VCLOCK_TSC,  "tsc"},				\
-	{VCLOCK_HPET, "hpet"}				\
+	{VCLOCK_TSC,  "tsc"}				\
 
 
 TRACE_EVENT(kvm_update_master_clock,
 TRACE_EVENT(kvm_update_master_clock,
 	TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
 	TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),

+ 1 - 1
arch/x86/kvm/vmx.c

@@ -3390,7 +3390,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		}
 		}
 	}
 	}
 
 
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		rdmsrl(MSR_IA32_XSS, host_xss);
 		rdmsrl(MSR_IA32_XSS, host_xss);
 
 
 	return 0;
 	return 0;

+ 8 - 8
arch/x86/kvm/x86.c

@@ -2611,7 +2611,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_MAX_MCE_BANKS;
 		r = KVM_MAX_MCE_BANKS;
 		break;
 		break;
 	case KVM_CAP_XCRS:
 	case KVM_CAP_XCRS:
-		r = cpu_has_xsave;
+		r = boot_cpu_has(X86_FEATURE_XSAVE);
 		break;
 		break;
 	case KVM_CAP_TSC_CONTROL:
 	case KVM_CAP_TSC_CONTROL:
 		r = kvm_has_tsc_control;
 		r = kvm_has_tsc_control;
@@ -3094,7 +3094,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 
 
 	/* Set XSTATE_BV and possibly XCOMP_BV.  */
 	/* Set XSTATE_BV and possibly XCOMP_BV.  */
 	xsave->header.xfeatures = xstate_bv;
 	xsave->header.xfeatures = xstate_bv;
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 		xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 
 	/*
 	/*
@@ -3121,7 +3121,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 					 struct kvm_xsave *guest_xsave)
 {
 {
-	if (cpu_has_xsave) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
 		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
 		fill_xsave((u8 *) guest_xsave->region, vcpu);
 		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 	} else {
@@ -3139,7 +3139,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 	u64 xstate_bv =
 	u64 xstate_bv =
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
 
-	if (cpu_has_xsave) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		/*
 		/*
 		 * Here we allow setting states that are not present in
 		 * Here we allow setting states that are not present in
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
@@ -3160,7 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
 					struct kvm_xcrs *guest_xcrs)
 					struct kvm_xcrs *guest_xcrs)
 {
 {
-	if (!cpu_has_xsave) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 		guest_xcrs->nr_xcrs = 0;
 		guest_xcrs->nr_xcrs = 0;
 		return;
 		return;
 	}
 	}
@@ -3176,7 +3176,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 {
 {
 	int i, r = 0;
 	int i, r = 0;
 
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
 	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5865,7 +5865,7 @@ int kvm_arch_init(void *opaque)
 
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
 
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
 
 	kvm_lapic_init();
 	kvm_lapic_init();
@@ -7293,7 +7293,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 static void fx_init(struct kvm_vcpu *vcpu)
 static void fx_init(struct kvm_vcpu *vcpu)
 {
 {
 	fpstate_init(&vcpu->arch.guest_fpu.state);
 	fpstate_init(&vcpu->arch.guest_fpu.state);
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
 		vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 

部分文件因为文件数量过多而无法显示