Browse Source

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this development cycle were:

   - a large number of call stack dumping/printing improvements: higher
     robustness, better cross-context dumping, improved output, etc.
     (Josh Poimboeuf)

   - vDSO getcpu() performance improvement for future Intel CPUs with
     the RDPID instruction (Andy Lutomirski)

   - add two new Intel AVX512 features and the CPUID support
     infrastructure for it: AVX512IFMA and AVX512VBMI. (Gayatri Kammela,
     He Chen)

   - more copy-user unification (Borislav Petkov)

   - entry code assembly macro simplifications (Alexander Kuleshov)

   - vDSO C/R support improvements (Dmitry Safonov)

   - misc fixes and cleanups (Borislav Petkov, Paul Bolle)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
  scripts/decode_stacktrace.sh: Fix address line detection on x86
  x86/boot/64: Use defines for page size
  x86/dumpstack: Make stack name tags more comprehensible
  selftests/x86: Add test_vdso to test getcpu()
  x86/vdso: Use RDPID in preference to LSL when available
  x86/dumpstack: Handle NULL stack pointer in show_trace_log_lvl()
  x86/cpufeatures: Enable new AVX512 cpu features
  x86/cpuid: Provide get_scattered_cpuid_leaf()
  x86/cpuid: Cleanup cpuid_regs definitions
  x86/copy_user: Unify the code by removing the 64-bit asm _copy_*_user() variants
  x86/unwind: Ensure stack grows down
  x86/vdso: Set vDSO pointer only after success
  x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE
  x86/unwind: Detect bad stack return address
  x86/dumpstack: Warn on stack recursion
  x86/unwind: Warn on bad frame pointer
  x86/decoder: Use stderr if insn sanity test fails
  x86/decoder: Use stdout if insn decoder test is successful
  mm/page_alloc: Remove kernel address exposure in free_reserved_area()
  x86/dumpstack: Remove raw stack dump
  ...
Linus Torvalds 8 years ago
parent
commit
5645688f9d
41 changed files with 703 additions and 497 deletions
  1. 0 3
      Documentation/kernel-parameters.txt
  2. 0 8
      Documentation/sysctl/kernel.txt
  3. 0 4
      Documentation/x86/x86_64/boot-options.txt
  4. 22 11
      arch/x86/entry/calling.h
  5. 93 48
      arch/x86/entry/entry_32.S
  6. 7 9
      arch/x86/entry/entry_64.S
  7. 3 7
      arch/x86/entry/vdso/vma.c
  8. 19 26
      arch/x86/events/intel/pt.c
  9. 3 0
      arch/x86/include/asm/cpufeatures.h
  10. 0 1
      arch/x86/include/asm/kdebug.h
  11. 14 0
      arch/x86/include/asm/processor.h
  12. 1 7
      arch/x86/include/asm/stacktrace.h
  13. 15 1
      arch/x86/include/asm/unwind.h
  14. 6 1
      arch/x86/include/asm/vgtod.h
  15. 3 5
      arch/x86/include/uapi/asm/prctl.h
  16. 38 19
      arch/x86/kernel/cpu/scattered.c
  17. 0 4
      arch/x86/kernel/cpuid.c
  18. 28 40
      arch/x86/kernel/dumpstack.c
  19. 12 44
      arch/x86/kernel/dumpstack_32.c
  20. 12 67
      arch/x86/kernel/dumpstack_64.c
  21. 2 0
      arch/x86/kernel/fpu/xstate.c
  22. 29 20
      arch/x86/kernel/head_32.S
  23. 26 26
      arch/x86/kernel/head_64.S
  24. 3 4
      arch/x86/kernel/process_32.c
  25. 9 4
      arch/x86/kernel/process_64.c
  26. 1 3
      arch/x86/kernel/smpboot.c
  27. 152 9
      arch/x86/kernel/unwind_frame.c
  28. 1 1
      arch/x86/kernel/vmlinux.lds.S
  29. 0 47
      arch/x86/lib/copy_user_64.S
  30. 49 0
      arch/x86/lib/usercopy.c
  31. 0 49
      arch/x86/lib/usercopy_32.c
  32. 1 2
      arch/x86/mm/fault.c
  33. 2 2
      arch/x86/platform/uv/uv_nmi.c
  34. 2 1
      arch/x86/tools/insn_sanity.c
  35. 1 1
      arch/x86/tools/test_get_len.c
  36. 0 7
      kernel/sysctl.c
  37. 2 2
      mm/page_alloc.c
  38. 2 1
      scripts/decode_stacktrace.sh
  39. 21 12
      scripts/faddr2line
  40. 1 1
      tools/testing/selftests/x86/Makefile
  41. 123 0
      tools/testing/selftests/x86/test_vdso.c

+ 0 - 3
Documentation/kernel-parameters.txt

@@ -1963,9 +1963,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			kmemcheck=2 (one-shot mode)
 			kmemcheck=2 (one-shot mode)
 			Default: 2 (one-shot mode)
 			Default: 2 (one-shot mode)
 
 
-	kstack=N	[X86] Print N words from the kernel stack
-			in oops dumps.
-
 	kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
 	kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
 			Default is 0 (don't ignore, but inject #GP)
 			Default is 0 (don't ignore, but inject #GP)
 
 

+ 0 - 8
Documentation/sysctl/kernel.txt

@@ -40,7 +40,6 @@ show up in /proc/sys/kernel:
 - hung_task_warnings
 - hung_task_warnings
 - kexec_load_disabled
 - kexec_load_disabled
 - kptr_restrict
 - kptr_restrict
-- kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
 - modules_disabled
@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using
 
 
 ==============================================================
 ==============================================================
 
 
-kstack_depth_to_print: (X86 only)
-
-Controls the number of words to print when dumping the raw
-kernel stack.
-
-==============================================================
-
 l2cr: (PPC only)
 l2cr: (PPC only)
 
 
 This flag controls the L2 cache of G3 processor boards. If
 This flag controls the L2 cache of G3 processor boards. If

+ 0 - 4
Documentation/x86/x86_64/boot-options.txt

@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit)
     space might stop working. Use this option if you have devices that
     space might stop working. Use this option if you have devices that
     are accessed from userspace directly on some PCI host bridge.
     are accessed from userspace directly on some PCI host bridge.
 
 
-Debugging
-
-  kstack=N	Print N words from the kernel stack in oops dumps.
-
 Miscellaneous
 Miscellaneous
 
 
 	nogbpages
 	nogbpages

+ 22 - 11
arch/x86/entry/calling.h

@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with
 
 
 #define SIZEOF_PTREGS	21*8
 #define SIZEOF_PTREGS	21*8
 
 
-	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
-	addq	$-(15*8+\addskip), %rsp
+	.macro ALLOC_PT_GPREGS_ON_STACK
+	addq	$-(15*8), %rsp
 	.endm
 	.endm
 
 
 	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
 	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with
 	movq 5*8+\offset(%rsp), %rbx
 	movq 5*8+\offset(%rsp), %rbx
 	.endm
 	.endm
 
 
-	.macro ZERO_EXTRA_REGS
-	xorl	%r15d, %r15d
-	xorl	%r14d, %r14d
-	xorl	%r13d, %r13d
-	xorl	%r12d, %r12d
-	xorl	%ebp, %ebp
-	xorl	%ebx, %ebx
-	.endm
-
 	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
 	.if \rstor_r11
 	movq 6*8(%rsp), %r11
 	movq 6*8(%rsp), %r11
@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
 	.byte 0xf1
 	.byte 0xf1
 	.endm
 	.endm
 
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+	.if \ptregs_offset
+		leaq \ptregs_offset(%rsp), %rbp
+	.else
+		mov %rsp, %rbp
+	.endif
+	orq	$0x1, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 #endif /* CONFIG_X86_64 */
 
 
 /*
 /*

+ 93 - 48
arch/x86/entry/entry_32.S

@@ -45,6 +45,7 @@
 #include <asm/asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/smap.h>
 #include <asm/export.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 
 
 	.section .entry.text, "ax"
 	.section .entry.text, "ax"
 
 
@@ -175,6 +176,22 @@
 	SET_KERNEL_GS %edx
 	SET_KERNEL_GS %edx
 .endm
 .endm
 
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+	mov %esp, %ebp
+	orl $0x1, %ebp
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
 .macro RESTORE_INT_REGS
 	popl	%ebx
 	popl	%ebx
 	popl	%ecx
 	popl	%ecx
@@ -237,6 +254,23 @@ ENTRY(__switch_to_asm)
 	jmp	__switch_to
 	jmp	__switch_to
 END(__switch_to_asm)
 END(__switch_to_asm)
 
 
+/*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+	FRAME_BEGIN
+
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
+
+	FRAME_END
+	ret
+ENDPROC(schedule_tail_wrapper)
 /*
 /*
  * A newly forked process directly context switches into this address.
  * A newly forked process directly context switches into this address.
  *
  *
@@ -245,9 +279,7 @@ END(__switch_to_asm)
  * edi: kernel thread arg
  * edi: kernel thread arg
  */
  */
 ENTRY(ret_from_fork)
 ENTRY(ret_from_fork)
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
+	call	schedule_tail_wrapper
 
 
 	testl	%ebx, %ebx
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 	jnz	1f		/* kernel threads are uncommon */
@@ -307,13 +339,13 @@ END(ret_from_exception)
 #ifdef CONFIG_PREEMPT
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
 ENTRY(resume_kernel)
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
+.Lneed_resched:
 	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	restore_all
 	jnz	restore_all
 	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz	restore_all
 	jz	restore_all
 	call	preempt_schedule_irq
 	call	preempt_schedule_irq
-	jmp	need_resched
+	jmp	.Lneed_resched
 END(resume_kernel)
 END(resume_kernel)
 #endif
 #endif
 
 
@@ -334,7 +366,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
  */
  */
 ENTRY(xen_sysenter_target)
 ENTRY(xen_sysenter_target)
 	addl	$5*4, %esp			/* remove xen-provided frame */
 	addl	$5*4, %esp			/* remove xen-provided frame */
-	jmp	sysenter_past_esp
+	jmp	.Lsysenter_past_esp
 #endif
 #endif
 
 
 /*
 /*
@@ -371,7 +403,7 @@ ENTRY(xen_sysenter_target)
  */
  */
 ENTRY(entry_SYSENTER_32)
 ENTRY(entry_SYSENTER_32)
 	movl	TSS_sysenter_sp0(%esp), %esp
 	movl	TSS_sysenter_sp0(%esp), %esp
-sysenter_past_esp:
+.Lsysenter_past_esp:
 	pushl	$__USER_DS		/* pt_regs->ss */
 	pushl	$__USER_DS		/* pt_regs->ss */
 	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
 	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
 	pushfl				/* pt_regs->flags (except IF = 0) */
 	pushfl				/* pt_regs->flags (except IF = 0) */
@@ -504,9 +536,9 @@ ENTRY(entry_INT80_32)
 
 
 restore_all:
 restore_all:
 	TRACE_IRQS_IRET
 	TRACE_IRQS_IRET
-restore_all_notrace:
+.Lrestore_all_notrace:
 #ifdef CONFIG_X86_ESPFIX32
 #ifdef CONFIG_X86_ESPFIX32
-	ALTERNATIVE	"jmp restore_nocheck", "", X86_BUG_ESPFIX
+	ALTERNATIVE	"jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
 
 
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 	/*
 	/*
@@ -518,22 +550,23 @@ restore_all_notrace:
 	movb	PT_CS(%esp), %al
 	movb	PT_CS(%esp), %al
 	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
 	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
-	je ldt_ss				# returning to user-space with LDT SS
+	je .Lldt_ss				# returning to user-space with LDT SS
 #endif
 #endif
-restore_nocheck:
+.Lrestore_nocheck:
 	RESTORE_REGS 4				# skip orig_eax/error_code
 	RESTORE_REGS 4				# skip orig_eax/error_code
-irq_return:
+.Lirq_return:
 	INTERRUPT_RETURN
 	INTERRUPT_RETURN
+
 .section .fixup, "ax"
 .section .fixup, "ax"
 ENTRY(iret_exc	)
 ENTRY(iret_exc	)
 	pushl	$0				# no error code
 	pushl	$0				# no error code
 	pushl	$do_iret_error
 	pushl	$do_iret_error
-	jmp	error_code
+	jmp	common_exception
 .previous
 .previous
-	_ASM_EXTABLE(irq_return, iret_exc)
+	_ASM_EXTABLE(.Lirq_return, iret_exc)
 
 
 #ifdef CONFIG_X86_ESPFIX32
 #ifdef CONFIG_X86_ESPFIX32
-ldt_ss:
+.Lldt_ss:
 /*
 /*
  * Setup and switch to ESPFIX stack
  * Setup and switch to ESPFIX stack
  *
  *
@@ -562,7 +595,7 @@ ldt_ss:
 	 */
 	 */
 	DISABLE_INTERRUPTS(CLBR_EAX)
 	DISABLE_INTERRUPTS(CLBR_EAX)
 	lss	(%esp), %esp			/* switch to espfix segment */
 	lss	(%esp), %esp			/* switch to espfix segment */
-	jmp	restore_nocheck
+	jmp	.Lrestore_nocheck
 #endif
 #endif
 ENDPROC(entry_INT80_32)
 ENDPROC(entry_INT80_32)
 
 
@@ -624,6 +657,7 @@ common_interrupt:
 	ASM_CLAC
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	movl	%esp, %eax
 	call	do_IRQ
 	call	do_IRQ
@@ -635,6 +669,7 @@ ENTRY(name)				\
 	ASM_CLAC;			\
 	ASM_CLAC;			\
 	pushl	$~(nr);			\
 	pushl	$~(nr);			\
 	SAVE_ALL;			\
 	SAVE_ALL;			\
+	ENCODE_FRAME_POINTER;		\
 	TRACE_IRQS_OFF			\
 	TRACE_IRQS_OFF			\
 	movl	%esp, %eax;		\
 	movl	%esp, %eax;		\
 	call	fn;			\
 	call	fn;			\
@@ -659,7 +694,7 @@ ENTRY(coprocessor_error)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_coprocessor_error
 	pushl	$do_coprocessor_error
-	jmp	error_code
+	jmp	common_exception
 END(coprocessor_error)
 END(coprocessor_error)
 
 
 ENTRY(simd_coprocessor_error)
 ENTRY(simd_coprocessor_error)
@@ -673,14 +708,14 @@ ENTRY(simd_coprocessor_error)
 #else
 #else
 	pushl	$do_simd_coprocessor_error
 	pushl	$do_simd_coprocessor_error
 #endif
 #endif
-	jmp	error_code
+	jmp	common_exception
 END(simd_coprocessor_error)
 END(simd_coprocessor_error)
 
 
 ENTRY(device_not_available)
 ENTRY(device_not_available)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	pushl	$-1				# mark this as an int
 	pushl	$do_device_not_available
 	pushl	$do_device_not_available
-	jmp	error_code
+	jmp	common_exception
 END(device_not_available)
 END(device_not_available)
 
 
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT
@@ -694,59 +729,59 @@ ENTRY(overflow)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_overflow
 	pushl	$do_overflow
-	jmp	error_code
+	jmp	common_exception
 END(overflow)
 END(overflow)
 
 
 ENTRY(bounds)
 ENTRY(bounds)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_bounds
 	pushl	$do_bounds
-	jmp	error_code
+	jmp	common_exception
 END(bounds)
 END(bounds)
 
 
 ENTRY(invalid_op)
 ENTRY(invalid_op)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_invalid_op
 	pushl	$do_invalid_op
-	jmp	error_code
+	jmp	common_exception
 END(invalid_op)
 END(invalid_op)
 
 
 ENTRY(coprocessor_segment_overrun)
 ENTRY(coprocessor_segment_overrun)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_coprocessor_segment_overrun
 	pushl	$do_coprocessor_segment_overrun
-	jmp	error_code
+	jmp	common_exception
 END(coprocessor_segment_overrun)
 END(coprocessor_segment_overrun)
 
 
 ENTRY(invalid_TSS)
 ENTRY(invalid_TSS)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_invalid_TSS
 	pushl	$do_invalid_TSS
-	jmp	error_code
+	jmp	common_exception
 END(invalid_TSS)
 END(invalid_TSS)
 
 
 ENTRY(segment_not_present)
 ENTRY(segment_not_present)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_segment_not_present
 	pushl	$do_segment_not_present
-	jmp	error_code
+	jmp	common_exception
 END(segment_not_present)
 END(segment_not_present)
 
 
 ENTRY(stack_segment)
 ENTRY(stack_segment)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_stack_segment
 	pushl	$do_stack_segment
-	jmp	error_code
+	jmp	common_exception
 END(stack_segment)
 END(stack_segment)
 
 
 ENTRY(alignment_check)
 ENTRY(alignment_check)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_alignment_check
 	pushl	$do_alignment_check
-	jmp	error_code
+	jmp	common_exception
 END(alignment_check)
 END(alignment_check)
 
 
 ENTRY(divide_error)
 ENTRY(divide_error)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0				# no error code
 	pushl	$0				# no error code
 	pushl	$do_divide_error
 	pushl	$do_divide_error
-	jmp	error_code
+	jmp	common_exception
 END(divide_error)
 END(divide_error)
 
 
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_MCE
@@ -754,7 +789,7 @@ ENTRY(machine_check)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	machine_check_vector
 	pushl	machine_check_vector
-	jmp	error_code
+	jmp	common_exception
 END(machine_check)
 END(machine_check)
 #endif
 #endif
 
 
@@ -762,13 +797,14 @@ ENTRY(spurious_interrupt_bug)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$0
 	pushl	$0
 	pushl	$do_spurious_interrupt_bug
 	pushl	$do_spurious_interrupt_bug
-	jmp	error_code
+	jmp	common_exception
 END(spurious_interrupt_bug)
 END(spurious_interrupt_bug)
 
 
 #ifdef CONFIG_XEN
 #ifdef CONFIG_XEN
 ENTRY(xen_hypervisor_callback)
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 
 
 	/*
 	/*
@@ -823,6 +859,7 @@ ENTRY(xen_failsafe_callback)
 	jmp	iret_exc
 	jmp	iret_exc
 5:	pushl	$-1				/* orig_ax = -1 => not a system call */
 5:	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	jmp	ret_from_exception
 	jmp	ret_from_exception
 
 
 .section .fixup, "ax"
 .section .fixup, "ax"
@@ -882,7 +919,7 @@ ftrace_call:
 	popl	%edx
 	popl	%edx
 	popl	%ecx
 	popl	%ecx
 	popl	%eax
 	popl	%eax
-ftrace_ret:
+.Lftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 .globl ftrace_graph_call
 ftrace_graph_call:
 ftrace_graph_call:
@@ -952,7 +989,7 @@ GLOBAL(ftrace_regs_call)
 	popl	%gs
 	popl	%gs
 	addl	$8, %esp			/* Skip orig_ax and ip */
 	addl	$8, %esp			/* Skip orig_ax and ip */
 	popf					/* Pop flags at end (no addl to corrupt flags) */
 	popf					/* Pop flags at end (no addl to corrupt flags) */
-	jmp	ftrace_ret
+	jmp	.Lftrace_ret
 
 
 	popf
 	popf
 	jmp	ftrace_stub
 	jmp	ftrace_stub
@@ -963,7 +1000,7 @@ ENTRY(mcount)
 	jb	ftrace_stub			/* Paging not enabled yet? */
 	jb	ftrace_stub			/* Paging not enabled yet? */
 
 
 	cmpl	$ftrace_stub, ftrace_trace_function
 	cmpl	$ftrace_stub, ftrace_trace_function
-	jnz	trace
+	jnz	.Ltrace
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	cmpl	$ftrace_stub, ftrace_graph_return
 	cmpl	$ftrace_stub, ftrace_graph_return
 	jnz	ftrace_graph_caller
 	jnz	ftrace_graph_caller
@@ -976,7 +1013,7 @@ ftrace_stub:
 	ret
 	ret
 
 
 	/* taken from glibc */
 	/* taken from glibc */
-trace:
+.Ltrace:
 	pushl	%eax
 	pushl	%eax
 	pushl	%ecx
 	pushl	%ecx
 	pushl	%edx
 	pushl	%edx
@@ -1027,7 +1064,7 @@ return_to_handler:
 ENTRY(trace_page_fault)
 ENTRY(trace_page_fault)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$trace_do_page_fault
 	pushl	$trace_do_page_fault
-	jmp	error_code
+	jmp	common_exception
 END(trace_page_fault)
 END(trace_page_fault)
 #endif
 #endif
 
 
@@ -1035,7 +1072,10 @@ ENTRY(page_fault)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_page_fault
 	pushl	$do_page_fault
 	ALIGN
 	ALIGN
-error_code:
+	jmp common_exception
+END(page_fault)
+
+common_exception:
 	/* the function address is in %gs's slot on the stack */
 	/* the function address is in %gs's slot on the stack */
 	pushl	%fs
 	pushl	%fs
 	pushl	%es
 	pushl	%es
@@ -1047,6 +1087,7 @@ error_code:
 	pushl	%edx
 	pushl	%edx
 	pushl	%ecx
 	pushl	%ecx
 	pushl	%ebx
 	pushl	%ebx
+	ENCODE_FRAME_POINTER
 	cld
 	cld
 	movl	$(__KERNEL_PERCPU), %ecx
 	movl	$(__KERNEL_PERCPU), %ecx
 	movl	%ecx, %fs
 	movl	%ecx, %fs
@@ -1064,7 +1105,7 @@ error_code:
 	movl	%esp, %eax			# pt_regs pointer
 	movl	%esp, %eax			# pt_regs pointer
 	call	*%edi
 	call	*%edi
 	jmp	ret_from_exception
 	jmp	ret_from_exception
-END(page_fault)
+END(common_exception)
 
 
 ENTRY(debug)
 ENTRY(debug)
 	/*
 	/*
@@ -1079,6 +1120,7 @@ ENTRY(debug)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# error code 0
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
 	movl	%esp, %eax			# pt_regs pointer
 
 
@@ -1094,11 +1136,11 @@ ENTRY(debug)
 
 
 .Ldebug_from_sysenter_stack:
 .Ldebug_from_sysenter_stack:
 	/* We're on the SYSENTER stack.  Switch off. */
 	/* We're on the SYSENTER stack.  Switch off. */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	call	do_debug
 	call	do_debug
-	movl	%ebp, %esp
+	movl	%ebx, %esp
 	jmp	ret_from_exception
 	jmp	ret_from_exception
 END(debug)
 END(debug)
 
 
@@ -1116,11 +1158,12 @@ ENTRY(nmi)
 	movl	%ss, %eax
 	movl	%ss, %eax
 	cmpw	$__ESPFIX_SS, %ax
 	cmpw	$__ESPFIX_SS, %ax
 	popl	%eax
 	popl	%eax
-	je	nmi_espfix_stack
+	je	.Lnmi_espfix_stack
 #endif
 #endif
 
 
 	pushl	%eax				# pt_regs->orig_ax
 	pushl	%eax				# pt_regs->orig_ax
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# zero error code
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
 	movl	%esp, %eax			# pt_regs pointer
 
 
@@ -1132,21 +1175,21 @@ ENTRY(nmi)
 
 
 	/* Not on SYSENTER stack. */
 	/* Not on SYSENTER stack. */
 	call	do_nmi
 	call	do_nmi
-	jmp	restore_all_notrace
+	jmp	.Lrestore_all_notrace
 
 
 .Lnmi_from_sysenter_stack:
 .Lnmi_from_sysenter_stack:
 	/*
 	/*
 	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
 	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
 	 * is using the thread stack right now, so it's safe for us to use it.
 	 * is using the thread stack right now, so it's safe for us to use it.
 	 */
 	 */
-	movl	%esp, %ebp
+	movl	%esp, %ebx
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
 	call	do_nmi
 	call	do_nmi
-	movl	%ebp, %esp
-	jmp	restore_all_notrace
+	movl	%ebx, %esp
+	jmp	.Lrestore_all_notrace
 
 
 #ifdef CONFIG_X86_ESPFIX32
 #ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
+.Lnmi_espfix_stack:
 	/*
 	/*
 	 * create the pointer to lss back
 	 * create the pointer to lss back
 	 */
 	 */
@@ -1159,12 +1202,13 @@ nmi_espfix_stack:
 	.endr
 	.endr
 	pushl	%eax
 	pushl	%eax
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	FIXUP_ESPFIX_STACK			# %eax == %esp
 	FIXUP_ESPFIX_STACK			# %eax == %esp
 	xorl	%edx, %edx			# zero error code
 	xorl	%edx, %edx			# zero error code
 	call	do_nmi
 	call	do_nmi
 	RESTORE_REGS
 	RESTORE_REGS
 	lss	12+4(%esp), %esp		# back to espfix stack
 	lss	12+4(%esp), %esp		# back to espfix stack
-	jmp	irq_return
+	jmp	.Lirq_return
 #endif
 #endif
 END(nmi)
 END(nmi)
 
 
@@ -1172,6 +1216,7 @@ ENTRY(int3)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
 	SAVE_ALL
+	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# zero error code
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
 	movl	%esp, %eax			# pt_regs pointer
@@ -1181,14 +1226,14 @@ END(int3)
 
 
 ENTRY(general_protection)
 ENTRY(general_protection)
 	pushl	$do_general_protection
 	pushl	$do_general_protection
-	jmp	error_code
+	jmp	common_exception
 END(general_protection)
 END(general_protection)
 
 
 #ifdef CONFIG_KVM_GUEST
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
 ENTRY(async_page_fault)
 	ASM_CLAC
 	ASM_CLAC
 	pushl	$do_async_page_fault
 	pushl	$do_async_page_fault
-	jmp	error_code
+	jmp	common_exception
 END(async_page_fault)
 END(async_page_fault)
 #endif
 #endif
 
 

+ 7 - 9
arch/x86/entry/entry_64.S

@@ -38,12 +38,6 @@
 #include <asm/export.h>
 #include <asm/export.h>
 #include <linux/err.h>
 #include <linux/err.h>
 
 
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64			(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT			0x80000000
-#define __AUDIT_ARCH_LE				0x40000000
-
 .code64
 .code64
 .section .entry.text, "ax"
 .section .entry.text, "ax"
 
 
@@ -469,6 +463,7 @@ END(irq_entries_start)
 	ALLOC_PT_GPREGS_ON_STACK
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 
 
 	testb	$3, CS(%rsp)
 	testb	$3, CS(%rsp)
 	jz	1f
 	jz	1f
@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
 	ALLOC_PT_GPREGS_ON_STACK
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS
 	SAVE_C_REGS
 	SAVE_EXTRA_REGS
 	SAVE_EXTRA_REGS
+	ENCODE_FRAME_POINTER
 	jmp	error_exit
 	jmp	error_exit
 END(xen_failsafe_callback)
 END(xen_failsafe_callback)
 
 
@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
 	cld
 	cld
 	SAVE_C_REGS 8
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
 	rdmsr
@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
 	cld
 	cld
 	SAVE_C_REGS 8
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
 	SAVE_EXTRA_REGS 8
+	ENCODE_FRAME_POINTER 8
 	xorl	%ebx, %ebx
 	xorl	%ebx, %ebx
 	testb	$3, CS+8(%rsp)
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 	jz	.Lerror_kernelspace
@@ -1257,6 +1255,7 @@ ENTRY(nmi)
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r13		/* pt_regs->r13 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r14		/* pt_regs->r14 */
 	pushq	%r15		/* pt_regs->r15 */
 	pushq	%r15		/* pt_regs->r15 */
+	ENCODE_FRAME_POINTER
 
 
 	/*
 	/*
 	 * At this point we no longer need to worry about stack damage
 	 * At this point we no longer need to worry about stack damage
@@ -1270,11 +1269,10 @@ ENTRY(nmi)
 
 
 	/*
 	/*
 	 * Return back to user mode.  We must *not* do the normal exit
 	 * Return back to user mode.  We must *not* do the normal exit
-	 * work, because we don't want to enable interrupts.  Fortunately,
-	 * do_nmi doesn't modify pt_regs.
+	 * work, because we don't want to enable interrupts.
 	 */
 	 */
 	SWAPGS
 	SWAPGS
-	jmp	restore_c_regs_and_iret
+	jmp	restore_regs_and_iret
 
 
 .Lnmi_from_kernel:
 .Lnmi_from_kernel:
 	/*
 	/*

+ 3 - 7
arch/x86/entry/vdso/vma.c

@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
 	}
 	}
 
 
 	text_start = addr - image->sym_vvar_start;
 	text_start = addr - image->sym_vvar_start;
-	current->mm->context.vdso = (void __user *)text_start;
-	current->mm->context.vdso_image = image;
 
 
 	/*
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
 	 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
 	if (IS_ERR(vma)) {
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		ret = PTR_ERR(vma);
 		do_munmap(mm, text_start, image->size);
 		do_munmap(mm, text_start, image->size);
+	} else {
+		current->mm->context.vdso = (void __user *)text_start;
+		current->mm->context.vdso_image = image;
 	}
 	}
 
 
 up_fail:
 up_fail:
-	if (ret) {
-		current->mm->context.vdso = NULL;
-		current->mm->context.vdso_image = NULL;
-	}
-
 	up_write(&mm->mmap_sem);
 	up_write(&mm->mmap_sem);
 	return ret;
 	return ret;
 }
 }

+ 19 - 26
arch/x86/events/intel/pt.c

@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
 
 
 static struct pt_pmu pt_pmu;
 static struct pt_pmu pt_pmu;
 
 
-enum cpuid_regs {
-	CR_EAX = 0,
-	CR_ECX,
-	CR_EDX,
-	CR_EBX
-};
-
 /*
 /*
  * Capabilities of Intel PT hardware, such as number of address bits or
  * Capabilities of Intel PT hardware, such as number of address bits or
  * supported output schemes, are cached and exported to userspace as "caps"
  * supported output schemes, are cached and exported to userspace as "caps"
@@ -64,21 +57,21 @@ static struct pt_cap_desc {
 	u8		reg;
 	u8		reg;
 	u32		mask;
 	u32		mask;
 } pt_caps[] = {
 } pt_caps[] = {
-	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
-	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
-	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
-	PT_CAP(ip_filtering,		0, CR_EBX, BIT(2)),
-	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
-	PT_CAP(ptwrite,			0, CR_EBX, BIT(4)),
-	PT_CAP(power_event_trace,	0, CR_EBX, BIT(5)),
-	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
-	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
-	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
-	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
-	PT_CAP(num_address_ranges,	1, CR_EAX, 0x3),
-	PT_CAP(mtc_periods,		1, CR_EAX, 0xffff0000),
-	PT_CAP(cycle_thresholds,	1, CR_EBX, 0xffff),
-	PT_CAP(psb_periods,		1, CR_EBX, 0xffff0000),
+	PT_CAP(max_subleaf,		0, CPUID_EAX, 0xffffffff),
+	PT_CAP(cr3_filtering,		0, CPUID_EBX, BIT(0)),
+	PT_CAP(psb_cyc,			0, CPUID_EBX, BIT(1)),
+	PT_CAP(ip_filtering,		0, CPUID_EBX, BIT(2)),
+	PT_CAP(mtc,			0, CPUID_EBX, BIT(3)),
+	PT_CAP(ptwrite,			0, CPUID_EBX, BIT(4)),
+	PT_CAP(power_event_trace,	0, CPUID_EBX, BIT(5)),
+	PT_CAP(topa_output,		0, CPUID_ECX, BIT(0)),
+	PT_CAP(topa_multiple_entries,	0, CPUID_ECX, BIT(1)),
+	PT_CAP(single_range_output,	0, CPUID_ECX, BIT(2)),
+	PT_CAP(payloads_lip,		0, CPUID_ECX, BIT(31)),
+	PT_CAP(num_address_ranges,	1, CPUID_EAX, 0x3),
+	PT_CAP(mtc_periods,		1, CPUID_EAX, 0xffff0000),
+	PT_CAP(cycle_thresholds,	1, CPUID_EBX, 0xffff),
+	PT_CAP(psb_periods,		1, CPUID_EBX, 0xffff0000),
 };
 };
 
 
 static u32 pt_cap_get(enum pt_capabilities cap)
 static u32 pt_cap_get(enum pt_capabilities cap)
@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
 
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
 		cpuid_count(20, i,
 		cpuid_count(20, i,
-			    &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
-			    &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
-			    &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
-			    &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+			    &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
+			    &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
 	}
 	}
 
 
 	ret = -ENOMEM;
 	ret = -ENOMEM;

+ 3 - 0
arch/x86/include/asm/cpufeatures.h

@@ -227,6 +227,7 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
@@ -280,8 +281,10 @@
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
 
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */

+ 0 - 1
arch/x86/include/asm/kdebug.h

@@ -21,7 +21,6 @@ enum die_val {
 	DIE_NMIUNKNOWN,
 	DIE_NMIUNKNOWN,
 };
 };
 
 
-extern void printk_address(unsigned long address);
 extern void die(const char *, struct pt_regs *,long);
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void show_stack_regs(struct pt_regs *regs);

+ 14 - 0
arch/x86/include/asm/processor.h

@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
 	u32			microcode;
 	u32			microcode;
 };
 };
 
 
+struct cpuid_regs {
+	u32 eax, ebx, ecx, edx;
+};
+
+enum cpuid_regs_idx {
+	CPUID_EAX = 0,
+	CPUID_EBX,
+	CPUID_ECX,
+	CPUID_EDX,
+};
+
 #define X86_VENDOR_INTEL	0
 #define X86_VENDOR_INTEL	0
 #define X86_VENDOR_CYRIX	1
 #define X86_VENDOR_CYRIX	1
 #define X86_VENDOR_AMD		2
 #define X86_VENDOR_AMD		2
@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+				    unsigned int sub_leaf,
+				    enum cpuid_regs_idx reg);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 
 

+ 1 - 7
arch/x86/include/asm/stacktrace.h

@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask);
 		   struct stack_info *info, unsigned long *visit_mask);
 
 
-void stack_type_str(enum stack_type type, const char **begin,
-		    const char **end);
+const char *stack_type_name(enum stack_type type);
 
 
 static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 {
 {
@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 		addr + len > begin && addr + len <= end);
 		addr + len > begin && addr + len <= end);
 }
 }
 
 
-extern int kstack_depth_to_print;
-
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 #define STACKSLOTS_PER_LINE 8
 #define STACKSLOTS_PER_LINE 8
 #else
 #else
@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			unsigned long *stack, char *log_lvl);
 			unsigned long *stack, char *log_lvl);
 
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *sp, char *log_lvl);
-
 extern unsigned int code_bytes;
 extern unsigned int code_bytes;
 
 
 /* The form of the top of the frame on the stack */
 /* The form of the top of the frame on the stack */

+ 15 - 1
arch/x86/include/asm/unwind.h

@@ -13,6 +13,7 @@ struct unwind_state {
 	int graph_idx;
 	int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
 #ifdef CONFIG_FRAME_POINTER
 	unsigned long *bp;
 	unsigned long *bp;
+	struct pt_regs *regs;
 #else
 #else
 	unsigned long *sp;
 	unsigned long *sp;
 #endif
 #endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	if (unwind_done(state))
 	if (unwind_done(state))
 		return NULL;
 		return NULL;
 
 
-	return state->bp + 1;
+	return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	return state->regs;
 }
 }
 
 
 #else /* !CONFIG_FRAME_POINTER */
 #else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
 	return NULL;
 	return NULL;
 }
 }
 
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 #endif /* CONFIG_FRAME_POINTER */
 
 
 #endif /* _ASM_X86_UNWIND_H */
 #endif /* _ASM_X86_UNWIND_H */

+ 6 - 1
arch/x86/include/asm/vgtod.h

@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
 	 * works on all CPUs.  This is volatile so that it orders
 	 * works on all CPUs.  This is volatile so that it orders
 	 * correctly wrt barrier() and to keep gcc from cleverly
 	 * correctly wrt barrier() and to keep gcc from cleverly
 	 * hoisting it out of the calling function.
 	 * hoisting it out of the calling function.
+	 *
+	 * If RDPID is available, use it.
 	 */
 	 */
-	asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+	alternative_io ("lsl %[p],%[seg]",
+			".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+			X86_FEATURE_RDPID,
+			[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
 
 
 	return p;
 	return p;
 }
 }

+ 3 - 5
arch/x86/include/uapi/asm/prctl.h

@@ -6,10 +6,8 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 #define ARCH_GET_GS 0x1004
 
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-# define ARCH_MAP_VDSO_X32	0x2001
-# define ARCH_MAP_VDSO_32	0x2002
-# define ARCH_MAP_VDSO_64	0x2003
-#endif
+#define ARCH_MAP_VDSO_X32	0x2001
+#define ARCH_MAP_VDSO_32	0x2002
+#define ARCH_MAP_VDSO_64	0x2003
 
 
 #endif /* _ASM_X86_PRCTL_H */
 #endif /* _ASM_X86_PRCTL_H */

+ 38 - 19
arch/x86/kernel/cpu/scattered.c

@@ -17,11 +17,17 @@ struct cpuid_bit {
 	u32 sub_leaf;
 	u32 sub_leaf;
 };
 };
 
 
-enum cpuid_regs {
-	CR_EAX = 0,
-	CR_ECX,
-	CR_EDX,
-	CR_EBX
+/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+static const struct cpuid_bit cpuid_bits[] = {
+	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+	{ X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
+	{ X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
+	{ X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
+	{ X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
+	{ X86_FEATURE_HW_PSTATE,        CPUID_EDX,  7, 0x80000007, 0 },
+	{ X86_FEATURE_CPB,              CPUID_EDX,  9, 0x80000007, 0 },
+	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
+	{ 0, 0, 0, 0, 0 }
 };
 };
 
 
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 	u32 regs[4];
 	u32 regs[4];
 	const struct cpuid_bit *cb;
 	const struct cpuid_bit *cb;
 
 
-	static const struct cpuid_bit cpuid_bits[] = {
-		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
-		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
-		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
-		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
-		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
-		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
-		{ X86_FEATURE_CPB,		CR_EDX, 9, 0x80000007, 0 },
-		{ X86_FEATURE_PROC_FEEDBACK,	CR_EDX,11, 0x80000007, 0 },
-		{ 0, 0, 0, 0, 0 }
-	};
-
 	for (cb = cpuid_bits; cb->feature; cb++) {
 	for (cb = cpuid_bits; cb->feature; cb++) {
 
 
 		/* Verify that the level is valid */
 		/* Verify that the level is valid */
@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 		    max_level > (cb->level | 0xffff))
 		    max_level > (cb->level | 0xffff))
 			continue;
 			continue;
 
 
-		cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
-			    &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
+		cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
+			    &regs[CPUID_EBX], &regs[CPUID_ECX],
+			    &regs[CPUID_EDX]);
 
 
 		if (regs[cb->reg] & (1 << cb->bit))
 		if (regs[cb->reg] & (1 << cb->bit))
 			set_cpu_cap(c, cb->feature);
 			set_cpu_cap(c, cb->feature);
 	}
 	}
 }
 }
+
+u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
+			     enum cpuid_regs_idx reg)
+{
+	const struct cpuid_bit *cb;
+	u32 cpuid_val = 0;
+
+	for (cb = cpuid_bits; cb->feature; cb++) {
+
+		if (level > cb->level)
+			continue;
+
+		if (level < cb->level)
+			break;
+
+		if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
+			if (cpu_has(&boot_cpu_data, cb->feature))
+				cpuid_val |= BIT(cb->bit);
+		}
+	}
+
+	return cpuid_val;
+}
+EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);

+ 0 - 4
arch/x86/kernel/cpuid.c

@@ -46,10 +46,6 @@
 
 
 static struct class *cpuid_class;
 static struct class *cpuid_class;
 
 
-struct cpuid_regs {
-	u32 eax, ebx, ecx, edx;
-};
-
 static void cpuid_smp_cpuid(void *cmd_block)
 static void cpuid_smp_cpuid(void *cmd_block)
 {
 {
 	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
 	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;

+ 28 - 40
arch/x86/kernel/dumpstack.c

@@ -22,7 +22,6 @@
 int panic_on_unrecovered_nmi;
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
 int panic_on_io_nmi;
 unsigned int code_bytes = 64;
 unsigned int code_bytes = 64;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 static int die_counter;
 
 
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable,
 				 char *log_lvl)
 				 char *log_lvl)
 {
 {
 	touch_nmi_watchdog();
 	touch_nmi_watchdog();
-	printk("%s [<%p>] %s%pB\n",
-		log_lvl, (void *)address, reliable ? "" : "? ",
-		(void *)address);
-}
-
-void printk_address(unsigned long address)
-{
-	pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
+	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 }
 
 
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	printk("%sCall Trace:\n", log_lvl);
 	printk("%sCall Trace:\n", log_lvl);
 
 
 	unwind_start(&state, task, regs, stack);
 	unwind_start(&state, task, regs, stack);
+	stack = stack ? : get_stack_pointer(task, regs);
 
 
 	/*
 	/*
 	 * Iterate through the stacks, starting with the current stack pointer.
 	 * Iterate through the stacks, starting with the current stack pointer.
@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * - softirq stack
 	 * - softirq stack
 	 * - hardirq stack
 	 * - hardirq stack
 	 */
 	 */
-	for (; stack; stack = stack_info.next_sp) {
-		const char *str_begin, *str_end;
+	for (regs = NULL; stack; stack = stack_info.next_sp) {
+		const char *stack_name;
 
 
 		/*
 		/*
 		 * If we overflowed the task stack into a guard page, jump back
 		 * If we overflowed the task stack into a guard page, jump back
@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (get_stack_info(stack, task, &stack_info, &visit_mask))
 		if (get_stack_info(stack, task, &stack_info, &visit_mask))
 			break;
 			break;
 
 
-		stack_type_str(stack_info.type, &str_begin, &str_end);
-		if (str_begin)
-			printk("%s <%s> ", log_lvl, str_begin);
+		stack_name = stack_type_name(stack_info.type);
+		if (stack_name)
+			printk("%s <%s>\n", log_lvl, stack_name);
 
 
 		/*
 		/*
 		 * Scan the stack, printing any text addresses we find.  At the
 		 * Scan the stack, printing any text addresses we find.  At the
@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			if (!__kernel_text_address(addr))
 			if (!__kernel_text_address(addr))
 				continue;
 				continue;
 
 
+			/*
+			 * Don't print regs->ip again if it was already printed
+			 * by __show_regs() below.
+			 */
+			if (regs && stack == &regs->ip) {
+				unwind_next_frame(&state);
+				continue;
+			}
+
 			if (stack == ret_addr_p)
 			if (stack == ret_addr_p)
 				reliable = 1;
 				reliable = 1;
 
 
@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			 * of the addresses will just be printed as unreliable.
 			 * of the addresses will just be printed as unreliable.
 			 */
 			 */
 			unwind_next_frame(&state);
 			unwind_next_frame(&state);
+
+			/* if the frame has entry regs, print them */
+			regs = unwind_get_entry_regs(&state);
+			if (regs)
+				__show_regs(regs, 0);
 		}
 		}
 
 
-		if (str_end)
-			printk("%s <%s> ", log_lvl, str_end);
+		if (stack_name)
+			printk("%s </%s>\n", log_lvl, stack_name);
 	}
 	}
 }
 }
 
 
@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	if (!sp && task == current)
 	if (!sp && task == current)
 		sp = get_stack_pointer(current, NULL);
 		sp = get_stack_pointer(current, NULL);
 
 
-	show_stack_log_lvl(task, NULL, sp, "");
+	show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
 }
 }
 
 
 void show_stack_regs(struct pt_regs *regs)
 void show_stack_regs(struct pt_regs *regs)
 {
 {
-	show_stack_log_lvl(current, regs, NULL, "");
+	show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 }
 }
 
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
 		sp = kernel_stack_pointer(regs);
 		sp = kernel_stack_pointer(regs);
 		savesegment(ss, ss);
 		savesegment(ss, ss);
 	}
 	}
-	printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-	print_symbol("%s", regs->ip);
-	printk(" SS:ESP %04x:%08lx\n", ss, sp);
+	printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
+	       (void *)regs->ip, ss, sp);
 #else
 #else
 	/* Executive summary in case the oops scrolled away */
 	/* Executive summary in case the oops scrolled away */
-	printk(KERN_ALERT "RIP ");
-	printk_address(regs->ip);
-	printk(" RSP <%016lx>\n", regs->sp);
+	printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
 #endif
 #endif
 	return 0;
 	return 0;
 }
 }
@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 	oops_end(flags, regs, sig);
 }
 }
 
 
-static int __init kstack_setup(char *s)
-{
-	ssize_t ret;
-	unsigned long val;
-
-	if (!s)
-		return -EINVAL;
-
-	ret = kstrtoul(s, 0, &val);
-	if (ret)
-		return ret;
-	kstack_depth_to_print = val;
-	return 0;
-}
-early_param("kstack", kstack_setup);
-
 static int __init code_bytes_setup(char *s)
 static int __init code_bytes_setup(char *s)
 {
 {
 	ssize_t ret;
 	ssize_t ret;

+ 12 - 44
arch/x86/kernel/dumpstack_32.c

@@ -16,18 +16,15 @@
 
 
 #include <asm/stacktrace.h>
 #include <asm/stacktrace.h>
 
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
 {
-	switch (type) {
-	case STACK_TYPE_IRQ:
-	case STACK_TYPE_SOFTIRQ:
-		*begin = "IRQ";
-		*end   = "EOI";
-		break;
-	default:
-		*begin = NULL;
-		*end   = NULL;
-	}
+	if (type == STACK_TYPE_IRQ)
+		return "IRQ";
+
+	if (type == STACK_TYPE_SOFTIRQ)
+		return "SOFTIRQ";
+
+	return NULL;
 }
 }
 
 
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
@@ -109,8 +106,10 @@ recursion_check:
 	 * just break out and report an unknown stack type.
 	 * just break out and report an unknown stack type.
 	 */
 	 */
 	if (visit_mask) {
 	if (visit_mask) {
-		if (*visit_mask & (1UL << info->type))
+		if (*visit_mask & (1UL << info->type)) {
+			printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
 			goto unknown;
 			goto unknown;
+		}
 		*visit_mask |= 1UL << info->type;
 		*visit_mask |= 1UL << info->type;
 	}
 	}
 
 
@@ -121,36 +120,6 @@ unknown:
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *sp, char *log_lvl)
-{
-	unsigned long *stack;
-	int i;
-
-	if (!try_get_task_stack(task))
-		return;
-
-	sp = sp ? : get_stack_pointer(task, regs);
-
-	stack = sp;
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (kstack_end(stack))
-			break;
-		if ((i % STACKSLOTS_PER_LINE) == 0) {
-			if (i != 0)
-				pr_cont("\n");
-			printk("%s %08lx", log_lvl, *stack++);
-		} else
-			pr_cont(" %08lx", *stack++);
-		touch_nmi_watchdog();
-	}
-	pr_cont("\n");
-	show_trace_log_lvl(task, regs, sp, log_lvl);
-
-	put_task_stack(task);
-}
-
-
 void show_regs(struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 {
 	int i;
 	int i;
@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
 		unsigned char c;
 		unsigned char c;
 		u8 *ip;
 		u8 *ip;
 
 
-		pr_emerg("Stack:\n");
-		show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
+		show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
 
 
 		pr_emerg("Code:");
 		pr_emerg("Code:");
 
 

+ 12 - 67
arch/x86/kernel/dumpstack_64.c

@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
 	[DEBUG_STACK - 1]			= DEBUG_STKSZ
 	[DEBUG_STACK - 1]			= DEBUG_STKSZ
 };
 };
 
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
 {
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
 
 
-	switch (type) {
-	case STACK_TYPE_IRQ:
-		*begin = "IRQ";
-		*end   = "EOI";
-		break;
-	case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
-		*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
-		*end   = "EOE";
-		break;
-	default:
-		*begin = NULL;
-		*end   = NULL;
-	}
+	if (type == STACK_TYPE_IRQ)
+		return "IRQ";
+
+	if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+		return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+
+	return NULL;
 }
 }
 
 
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
@@ -128,8 +122,10 @@ recursion_check:
 	 * just break out and report an unknown stack type.
 	 * just break out and report an unknown stack type.
 	 */
 	 */
 	if (visit_mask) {
 	if (visit_mask) {
-		if (*visit_mask & (1UL << info->type))
+		if (*visit_mask & (1UL << info->type)) {
+			printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
 			goto unknown;
 			goto unknown;
+		}
 		*visit_mask |= 1UL << info->type;
 		*visit_mask |= 1UL << info->type;
 	}
 	}
 
 
@@ -140,56 +136,6 @@ unknown:
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *sp, char *log_lvl)
-{
-	unsigned long *irq_stack_end;
-	unsigned long *irq_stack;
-	unsigned long *stack;
-	int i;
-
-	if (!try_get_task_stack(task))
-		return;
-
-	irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
-	irq_stack     = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
-
-	sp = sp ? : get_stack_pointer(task, regs);
-
-	stack = sp;
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		unsigned long word;
-
-		if (stack >= irq_stack && stack <= irq_stack_end) {
-			if (stack == irq_stack_end) {
-				stack = (unsigned long *) (irq_stack_end[-1]);
-				pr_cont(" <EOI> ");
-			}
-		} else {
-		if (kstack_end(stack))
-			break;
-		}
-
-		if (probe_kernel_address(stack, word))
-			break;
-
-		if ((i % STACKSLOTS_PER_LINE) == 0) {
-			if (i != 0)
-				pr_cont("\n");
-			printk("%s %016lx", log_lvl, word);
-		} else
-			pr_cont(" %016lx", word);
-
-		stack++;
-		touch_nmi_watchdog();
-	}
-
-	pr_cont("\n");
-	show_trace_log_lvl(task, regs, sp, log_lvl);
-
-	put_task_stack(task);
-}
-
 void show_regs(struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 {
 	int i;
 	int i;
@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
 		unsigned char c;
 		unsigned char c;
 		u8 *ip;
 		u8 *ip;
 
 
-		printk(KERN_DEFAULT "Stack:\n");
-		show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
+		show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 
 
 		printk(KERN_DEFAULT "Code: ");
 		printk(KERN_DEFAULT "Code: ");
 
 

+ 2 - 0
arch/x86/kernel/fpu/xstate.c

@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_AVX);
 	setup_clear_cpu_cap(X86_FEATURE_AVX);
 	setup_clear_cpu_cap(X86_FEATURE_AVX2);
 	setup_clear_cpu_cap(X86_FEATURE_AVX2);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
 	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);

+ 29 - 20
arch/x86/kernel/head_32.S

@@ -63,6 +63,8 @@
 #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
 #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
 #endif
 #endif
 
 
+#define SIZEOF_PTREGS 17*4
+
 /*
 /*
  * Number of possible pages in the lowmem region.
  * Number of possible pages in the lowmem region.
  *
  *
@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT
 	/* This is can only trip for a broken bootloader... */
 	/* This is can only trip for a broken bootloader... */
 	cmpw $0x207, pa(boot_params + BP_version)
 	cmpw $0x207, pa(boot_params + BP_version)
-	jb default_entry
+	jb .Ldefault_entry
 
 
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 		we're booting under. */
 		we're booting under. */
 	movl pa(boot_params + BP_hardware_subarch), %eax
 	movl pa(boot_params + BP_hardware_subarch), %eax
 	cmpl $num_subarch_entries, %eax
 	cmpl $num_subarch_entries, %eax
-	jae bad_subarch
+	jae .Lbad_subarch
 
 
 	movl pa(subarch_entries)(,%eax,4), %eax
 	movl pa(subarch_entries)(,%eax,4), %eax
 	subl $__PAGE_OFFSET, %eax
 	subl $__PAGE_OFFSET, %eax
 	jmp *%eax
 	jmp *%eax
 
 
-bad_subarch:
+.Lbad_subarch:
 WEAK(lguest_entry)
 WEAK(lguest_entry)
 WEAK(xen_entry)
 WEAK(xen_entry)
 	/* Unknown implementation; there's really
 	/* Unknown implementation; there's really
@@ -270,14 +272,14 @@ WEAK(xen_entry)
 	__INITDATA
 	__INITDATA
 
 
 subarch_entries:
 subarch_entries:
-	.long default_entry		/* normal x86/PC */
+	.long .Ldefault_entry		/* normal x86/PC */
 	.long lguest_entry		/* lguest hypervisor */
 	.long lguest_entry		/* lguest hypervisor */
 	.long xen_entry			/* Xen hypervisor */
 	.long xen_entry			/* Xen hypervisor */
-	.long default_entry		/* Moorestown MID */
+	.long .Ldefault_entry		/* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
 num_subarch_entries = (. - subarch_entries) / 4
 .previous
 .previous
 #else
 #else
-	jmp default_entry
+	jmp .Ldefault_entry
 #endif /* CONFIG_PARAVIRT */
 #endif /* CONFIG_PARAVIRT */
 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
 ENTRY(start_cpu0)
 ENTRY(start_cpu0)
 	movl initial_stack, %ecx
 	movl initial_stack, %ecx
 	movl %ecx, %esp
 	movl %ecx, %esp
-	jmp  *(initial_code)
+	call *(initial_code)
+1:	jmp 1b
 ENDPROC(start_cpu0)
 ENDPROC(start_cpu0)
 #endif
 #endif
 
 
@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
 	call load_ucode_ap
 	call load_ucode_ap
 #endif
 #endif
 
 
-default_entry:
+.Ldefault_entry:
 #define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
 #define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
 			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
 			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
 			 X86_CR0_PG)
 			 X86_CR0_PG)
@@ -347,7 +350,7 @@ default_entry:
 	pushfl
 	pushfl
 	popl %eax			# get EFLAGS
 	popl %eax			# get EFLAGS
 	testl $X86_EFLAGS_ID,%eax	# did EFLAGS.ID remained set?
 	testl $X86_EFLAGS_ID,%eax	# did EFLAGS.ID remained set?
-	jz enable_paging		# hw disallowed setting of ID bit
+	jz .Lenable_paging		# hw disallowed setting of ID bit
 					# which means no CPUID and no CR4
 					# which means no CPUID and no CR4
 
 
 	xorl %eax,%eax
 	xorl %eax,%eax
@@ -357,13 +360,13 @@ default_entry:
 	movl $1,%eax
 	movl $1,%eax
 	cpuid
 	cpuid
 	andl $~1,%edx			# Ignore CPUID.FPU
 	andl $~1,%edx			# Ignore CPUID.FPU
-	jz enable_paging		# No flags or only CPUID.FPU = no CR4
+	jz .Lenable_paging		# No flags or only CPUID.FPU = no CR4
 
 
 	movl pa(mmu_cr4_features),%eax
 	movl pa(mmu_cr4_features),%eax
 	movl %eax,%cr4
 	movl %eax,%cr4
 
 
 	testb $X86_CR4_PAE, %al		# check if PAE is enabled
 	testb $X86_CR4_PAE, %al		# check if PAE is enabled
-	jz enable_paging
+	jz .Lenable_paging
 
 
 	/* Check if extended functions are implemented */
 	/* Check if extended functions are implemented */
 	movl $0x80000000, %eax
 	movl $0x80000000, %eax
@@ -371,7 +374,7 @@ default_entry:
 	/* Value must be in the range 0x80000001 to 0x8000ffff */
 	/* Value must be in the range 0x80000001 to 0x8000ffff */
 	subl $0x80000001, %eax
 	subl $0x80000001, %eax
 	cmpl $(0x8000ffff-0x80000001), %eax
 	cmpl $(0x8000ffff-0x80000001), %eax
-	ja enable_paging
+	ja .Lenable_paging
 
 
 	/* Clear bogus XD_DISABLE bits */
 	/* Clear bogus XD_DISABLE bits */
 	call verify_cpu
 	call verify_cpu
@@ -380,7 +383,7 @@ default_entry:
 	cpuid
 	cpuid
 	/* Execute Disable bit supported? */
 	/* Execute Disable bit supported? */
 	btl $(X86_FEATURE_NX & 31), %edx
 	btl $(X86_FEATURE_NX & 31), %edx
-	jnc enable_paging
+	jnc .Lenable_paging
 
 
 	/* Setup EFER (Extended Feature Enable Register) */
 	/* Setup EFER (Extended Feature Enable Register) */
 	movl $MSR_EFER, %ecx
 	movl $MSR_EFER, %ecx
@@ -390,7 +393,7 @@ default_entry:
 	/* Make changes effective */
 	/* Make changes effective */
 	wrmsr
 	wrmsr
 
 
-enable_paging:
+.Lenable_paging:
 
 
 /*
 /*
  * Enable paging
  * Enable paging
@@ -419,7 +422,7 @@ enable_paging:
  */
  */
 	movb $4,X86			# at least 486
 	movb $4,X86			# at least 486
 	cmpl $-1,X86_CPUID
 	cmpl $-1,X86_CPUID
-	je is486
+	je .Lis486
 
 
 	/* get vendor info */
 	/* get vendor info */
 	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
 	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
@@ -430,7 +433,7 @@ enable_paging:
 	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
 	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
 
 
 	orl %eax,%eax			# do we have processor info as well?
 	orl %eax,%eax			# do we have processor info as well?
-	je is486
+	je .Lis486
 
 
 	movl $1,%eax		# Use the CPUID instruction to get CPU type
 	movl $1,%eax		# Use the CPUID instruction to get CPU type
 	cpuid
 	cpuid
@@ -444,7 +447,7 @@ enable_paging:
 	movb %cl,X86_MASK
 	movb %cl,X86_MASK
 	movl %edx,X86_CAPABILITY
 	movl %edx,X86_CAPABILITY
 
 
-is486:
+.Lis486:
 	movl $0x50022,%ecx	# set AM, WP, NE and MP
 	movl $0x50022,%ecx	# set AM, WP, NE and MP
 	movl %cr0,%eax
 	movl %cr0,%eax
 	andl $0x80000011,%eax	# Save PG,PE,ET
 	andl $0x80000011,%eax	# Save PG,PE,ET
@@ -470,8 +473,9 @@ is486:
 	xorl %eax,%eax			# Clear LDT
 	xorl %eax,%eax			# Clear LDT
 	lldt %ax
 	lldt %ax
 
 
-	pushl $0		# fake return address for unwinder
-	jmp *(initial_code)
+	call *(initial_code)
+1:	jmp 1b
+ENDPROC(startup_32_smp)
 
 
 #include "verify_cpu.S"
 #include "verify_cpu.S"
 
 
@@ -709,7 +713,12 @@ ENTRY(initial_page_table)
 .data
 .data
 .balign 4
 .balign 4
 ENTRY(initial_stack)
 ENTRY(initial_stack)
-	.long init_thread_union+THREAD_SIZE
+	/*
+	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+	 * unwinder reliably detect the end of the stack.
+	 */
+	.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
+	      TOP_OF_KERNEL_STACK_PADDING;
 
 
 __INITRODATA
 __INITRODATA
 int_msg:
 int_msg:

+ 26 - 26
arch/x86/kernel/head_64.S

@@ -66,13 +66,8 @@ startup_64:
 	 * tables and then reload them.
 	 * tables and then reload them.
 	 */
 	 */
 
 
-	/*
-	 * Setup stack for verify_cpu(). "-8" because initial_stack is defined
-	 * this way, see below. Our best guess is a NULL ptr for stack
-	 * termination heuristics and we don't want to break anything which
-	 * might depend on it (kgdb, ...).
-	 */
-	leaq	(__end_init_task - 8)(%rip), %rsp
+	/* Set up the stack for verify_cpu(), similar to initial_stack below */
+	leaq	(__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
 
 
 	/* Sanitize CPU configuration */
 	/* Sanitize CPU configuration */
 	call verify_cpu
 	call verify_cpu
@@ -117,20 +112,20 @@ startup_64:
 	movq	%rdi, %rax
 	movq	%rdi, %rax
 	shrq	$PGDIR_SHIFT, %rax
 	shrq	$PGDIR_SHIFT, %rax
 
 
-	leaq	(4096 + _KERNPG_TABLE)(%rbx), %rdx
+	leaq	(PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
 	movq	%rdx, 0(%rbx,%rax,8)
 	movq	%rdx, 0(%rbx,%rax,8)
 	movq	%rdx, 8(%rbx,%rax,8)
 	movq	%rdx, 8(%rbx,%rax,8)
 
 
-	addq	$4096, %rdx
+	addq	$PAGE_SIZE, %rdx
 	movq	%rdi, %rax
 	movq	%rdi, %rax
 	shrq	$PUD_SHIFT, %rax
 	shrq	$PUD_SHIFT, %rax
 	andl	$(PTRS_PER_PUD-1), %eax
 	andl	$(PTRS_PER_PUD-1), %eax
-	movq	%rdx, 4096(%rbx,%rax,8)
+	movq	%rdx, PAGE_SIZE(%rbx,%rax,8)
 	incl	%eax
 	incl	%eax
 	andl	$(PTRS_PER_PUD-1), %eax
 	andl	$(PTRS_PER_PUD-1), %eax
-	movq	%rdx, 4096(%rbx,%rax,8)
+	movq	%rdx, PAGE_SIZE(%rbx,%rax,8)
 
 
-	addq	$8192, %rbx
+	addq	$PAGE_SIZE * 2, %rbx
 	movq	%rdi, %rax
 	movq	%rdi, %rax
 	shrq	$PMD_SHIFT, %rdi
 	shrq	$PMD_SHIFT, %rdi
 	addq	$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
 	addq	$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
@@ -265,13 +260,17 @@ ENTRY(secondary_startup_64)
 	movl	$MSR_GS_BASE,%ecx
 	movl	$MSR_GS_BASE,%ecx
 	movl	initial_gs(%rip),%eax
 	movl	initial_gs(%rip),%eax
 	movl	initial_gs+4(%rip),%edx
 	movl	initial_gs+4(%rip),%edx
-	wrmsr	
+	wrmsr
 
 
 	/* rsi is pointer to real mode structure with interesting info.
 	/* rsi is pointer to real mode structure with interesting info.
 	   pass it to C */
 	   pass it to C */
 	movq	%rsi, %rdi
 	movq	%rsi, %rdi
-	
-	/* Finally jump to run C code and to be on real kernel address
+	jmp	start_cpu
+ENDPROC(secondary_startup_64)
+
+ENTRY(start_cpu)
+	/*
+	 * Jump to run C code and to be on a real kernel address.
 	 * Since we are running on identity-mapped space we have to jump
 	 * Since we are running on identity-mapped space we have to jump
 	 * to the full 64bit address, this is only possible as indirect
 	 * to the full 64bit address, this is only possible as indirect
 	 * jump.  In addition we need to ensure %cs is set so we make this
 	 * jump.  In addition we need to ensure %cs is set so we make this
@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64)
 	 *	REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
 	 *	REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
 	 *		address given in m16:64.
 	 *		address given in m16:64.
 	 */
 	 */
-	movq	initial_code(%rip),%rax
-	pushq	$0		# fake return address to stop unwinder
+	call	1f		# put return address on stack for unwinder
+1:	xorq	%rbp, %rbp	# clear frame pointer
+	movq	initial_code(%rip), %rax
 	pushq	$__KERNEL_CS	# set correct cs
 	pushq	$__KERNEL_CS	# set correct cs
 	pushq	%rax		# target address in negative space
 	pushq	%rax		# target address in negative space
 	lretq
 	lretq
-ENDPROC(secondary_startup_64)
+ENDPROC(start_cpu)
 
 
 #include "verify_cpu.S"
 #include "verify_cpu.S"
 
 
@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64)
 /*
 /*
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
  * up already except stack. We just set up stack here. Then call
  * up already except stack. We just set up stack here. Then call
- * start_secondary().
+ * start_secondary() via start_cpu().
  */
  */
 ENTRY(start_cpu0)
 ENTRY(start_cpu0)
-	movq initial_stack(%rip),%rsp
-	movq	initial_code(%rip),%rax
-	pushq	$0		# fake return address to stop unwinder
-	pushq	$__KERNEL_CS	# set correct cs
-	pushq	%rax		# target address in negative space
-	lretq
+	movq	initial_stack(%rip), %rsp
+	jmp	start_cpu
 ENDPROC(start_cpu0)
 ENDPROC(start_cpu0)
 #endif
 #endif
 
 
@@ -328,7 +324,11 @@ ENDPROC(start_cpu0)
 	GLOBAL(initial_gs)
 	GLOBAL(initial_gs)
 	.quad	INIT_PER_CPU_VAR(irq_stack_union)
 	.quad	INIT_PER_CPU_VAR(irq_stack_union)
 	GLOBAL(initial_stack)
 	GLOBAL(initial_stack)
-	.quad  init_thread_union+THREAD_SIZE-8
+	/*
+	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+	 * unwinder reliably detect the end of the stack.
+	 */
+	.quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
 	__FINITDATA
 	__FINITDATA
 
 
 bad_address:
 bad_address:

+ 3 - 4
arch/x86/kernel/process_32.c

@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
 		savesegment(gs, gs);
 		savesegment(gs, gs);
 	}
 	}
 
 
-	printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-			(u16)regs->cs, regs->ip, regs->flags,
-			smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->ip);
+	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
+	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
+		smp_processor_id());
 
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
 		regs->ax, regs->bx, regs->cx, regs->dx);

+ 9 - 4
arch/x86/kernel/process_64.c

@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
 	unsigned int fsindex, gsindex;
 	unsigned int fsindex, gsindex;
 	unsigned int ds, cs, es;
 	unsigned int ds, cs, es;
 
 
-	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
-	printk_address(regs->ip);
-	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
-			regs->sp, regs->flags);
+	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
+		(void *)regs->ip);
+	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+		regs->sp, regs->flags);
+	if (regs->orig_ax != -1)
+		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+	else
+		pr_cont("\n");
+
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
 	       regs->ax, regs->bx, regs->cx);
 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",

+ 1 - 3
arch/x86/kernel/smpboot.c

@@ -987,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	int cpu0_nmi_registered = 0;
 	int cpu0_nmi_registered = 0;
 	unsigned long timeout;
 	unsigned long timeout;
 
 
-	idle->thread.sp = (unsigned long) (((struct pt_regs *)
-			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
-
+	idle->thread.sp = (unsigned long)task_pt_regs(idle);
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 	initial_stack  = idle->thread.sp;

+ 152 - 9
arch/x86/kernel/unwind_frame.c

@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 	if (unwind_done(state))
 	if (unwind_done(state))
 		return 0;
 		return 0;
 
 
+	if (state->regs && user_mode(state->regs))
+		return 0;
+
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
 				     addr_p);
 				     addr_p);
 
 
-	return __kernel_text_address(addr) ? addr : 0;
+	if (!__kernel_text_address(addr)) {
+		printk_deferred_once(KERN_WARNING
+			"WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
+			(void *)addr, addr_p, state->task->comm,
+			state->task->pid);
+		return 0;
+	}
+
+	return addr;
 }
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
 
+static size_t regs_size(struct pt_regs *regs)
+{
+	/* x86_32 regs from kernel mode are two words shorter: */
+	if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
+		return sizeof(*regs) - 2*sizeof(long);
+
+	return sizeof(*regs);
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	unsigned long bp = (unsigned long)state->bp;
+	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+	return bp == regs - FRAME_HEADER_SIZE;
+}
+
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (!(regs & 0x1))
+		return NULL;
+
+	return (struct pt_regs *)(regs & ~0x1);
+}
+
 static bool update_stack_state(struct unwind_state *state, void *addr,
 static bool update_stack_state(struct unwind_state *state, void *addr,
 			       size_t len)
 			       size_t len)
 {
 {
@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
 
 
 bool unwind_next_frame(struct unwind_state *state)
 bool unwind_next_frame(struct unwind_state *state)
 {
 {
-	unsigned long *next_bp;
+	struct pt_regs *regs;
+	unsigned long *next_bp, *next_frame;
+	size_t next_len;
+	enum stack_type prev_type = state->stack_info.type;
 
 
 	if (unwind_done(state))
 	if (unwind_done(state))
 		return false;
 		return false;
 
 
-	next_bp = (unsigned long *)*state->bp;
+	/* have we reached the end? */
+	if (state->regs && user_mode(state->regs))
+		goto the_end;
+
+	if (is_last_task_frame(state)) {
+		regs = task_pt_regs(state->task);
+
+		/*
+		 * kthreads (other than the boot CPU's idle thread) have some
+		 * partial regs at the end of their stack which were placed
+		 * there by copy_thread_tls().  But the regs don't have any
+		 * useful information, so we can skip them.
+		 *
+		 * This user_mode() check is slightly broader than a PF_KTHREAD
+		 * check because it also catches the awkward situation where a
+		 * newly forked kthread transitions into a user task by calling
+		 * do_execve(), which eventually clears PF_KTHREAD.
+		 */
+		if (!user_mode(regs))
+			goto the_end;
+
+		/*
+		 * We're almost at the end, but not quite: there's still the
+		 * syscall regs frame.  Entry code doesn't encode the regs
+		 * pointer for syscalls, so we have to set it manually.
+		 */
+		state->regs = regs;
+		state->bp = NULL;
+		return true;
+	}
+
+	/* get the next frame pointer */
+	if (state->regs)
+		next_bp = (unsigned long *)state->regs->bp;
+	else
+		next_bp = (unsigned long *)*state->bp;
+
+	/* is the next frame pointer an encoded pointer to pt_regs? */
+	regs = decode_frame_pointer(next_bp);
+	if (regs) {
+		next_frame = (unsigned long *)regs;
+		next_len = sizeof(*regs);
+	} else {
+		next_frame = next_bp;
+		next_len = FRAME_HEADER_SIZE;
+	}
 
 
 	/* make sure the next frame's data is accessible */
 	/* make sure the next frame's data is accessible */
-	if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
-		return false;
+	if (!update_stack_state(state, next_frame, next_len)) {
+		/*
+		 * Don't warn on bad regs->bp.  An interrupt in entry code
+		 * might cause a false positive warning.
+		 */
+		if (state->regs)
+			goto the_end;
+
+		goto bad_address;
+	}
+
+	/* Make sure it only unwinds up and doesn't overlap the last frame: */
+	if (state->stack_info.type == prev_type) {
+		if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
+			goto bad_address;
+
+		if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
+			goto bad_address;
+	}
 
 
 	/* move to the next frame */
 	/* move to the next frame */
-	state->bp = next_bp;
+	if (regs) {
+		state->regs = regs;
+		state->bp = NULL;
+	} else {
+		state->bp = next_bp;
+		state->regs = NULL;
+	}
+
 	return true;
 	return true;
+
+bad_address:
+	if (state->regs) {
+		printk_deferred_once(KERN_WARNING
+			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
+			state->regs, state->task->comm,
+			state->task->pid, next_frame);
+	} else {
+		printk_deferred_once(KERN_WARNING
+			"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
+			state->bp, state->task->comm,
+			state->task->pid, next_frame);
+	}
+the_end:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
 }
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long *first_frame)
 		    struct pt_regs *regs, unsigned long *first_frame)
 {
 {
+	unsigned long *bp, *frame;
+	size_t len;
+
 	memset(state, 0, sizeof(*state));
 	memset(state, 0, sizeof(*state));
 	state->task = task;
 	state->task = task;
 
 
@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	}
 	}
 
 
 	/* set up the starting stack frame */
 	/* set up the starting stack frame */
-	state->bp = get_frame_pointer(task, regs);
+	bp = get_frame_pointer(task, regs);
+	regs = decode_frame_pointer(bp);
+	if (regs) {
+		state->regs = regs;
+		frame = (unsigned long *)regs;
+		len = sizeof(*regs);
+	} else {
+		state->bp = bp;
+		frame = bp;
+		len = FRAME_HEADER_SIZE;
+	}
 
 
 	/* initialize stack info and make sure the frame data is accessible */
 	/* initialize stack info and make sure the frame data is accessible */
-	get_stack_info(state->bp, state->task, &state->stack_info,
+	get_stack_info(frame, state->task, &state->stack_info,
 		       &state->stack_mask);
 		       &state->stack_mask);
-	update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+	update_stack_state(state, frame, len);
 
 
 	/*
 	/*
 	 * The caller can provide the address of the first frame directly
 	 * The caller can provide the address of the first frame directly

+ 1 - 1
arch/x86/kernel/vmlinux.lds.S

@@ -91,10 +91,10 @@ SECTIONS
 	/* Text and read-only data */
 	/* Text and read-only data */
 	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
 	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
 		_text = .;
 		_text = .;
+		_stext = .;
 		/* bootstrapping code */
 		/* bootstrapping code */
 		HEAD_TEXT
 		HEAD_TEXT
 		. = ALIGN(8);
 		. = ALIGN(8);
-		_stext = .;
 		TEXT_TEXT
 		TEXT_TEXT
 		SCHED_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
 		CPUIDLE_TEXT

+ 0 - 47
arch/x86/lib/copy_user_64.S

@@ -16,53 +16,6 @@
 #include <asm/smap.h>
 #include <asm/smap.h>
 #include <asm/export.h>
 #include <asm/export.h>
 
 
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
-	mov PER_CPU_VAR(current_task), %rax
-	movq %rdi,%rcx
-	addq %rdx,%rcx
-	jc bad_to_user
-	cmpq TASK_addr_limit(%rax),%rcx
-	ja bad_to_user
-	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
-		      "jmp copy_user_generic_string",		\
-		      X86_FEATURE_REP_GOOD,			\
-		      "jmp copy_user_enhanced_fast_string",	\
-		      X86_FEATURE_ERMS
-ENDPROC(_copy_to_user)
-EXPORT_SYMBOL(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
-	mov PER_CPU_VAR(current_task), %rax
-	movq %rsi,%rcx
-	addq %rdx,%rcx
-	jc bad_from_user
-	cmpq TASK_addr_limit(%rax),%rcx
-	ja bad_from_user
-	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
-		      "jmp copy_user_generic_string",		\
-		      X86_FEATURE_REP_GOOD,			\
-		      "jmp copy_user_enhanced_fast_string",	\
-		      X86_FEATURE_ERMS
-ENDPROC(_copy_from_user)
-EXPORT_SYMBOL(_copy_from_user)
-
-
-	.section .fixup,"ax"
-	/* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
-	movl %edx,%ecx
-	xorl %eax,%eax
-	rep
-	stosb
-bad_to_user:
-	movl %edx,%eax
-	ret
-ENDPROC(bad_from_user)
-	.previous
-
 /*
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
  * copy_user_generic_unrolled - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
  * This version is for CPUs like P4 that don't have efficient micro

+ 49 - 0
arch/x86/lib/usercopy.c

@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	return ret;
 	return ret;
 }
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+EXPORT_SYMBOL(_copy_to_user);
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+EXPORT_SYMBOL(_copy_from_user);

+ 0 - 49
arch/x86/lib/usercopy_32.c

@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 	return n;
 	return n;
 }
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
-{
-	if (access_ok(VERIFY_WRITE, to, n))
-		n = __copy_to_user(to, from, n);
-	return n;
-}
-EXPORT_SYMBOL(_copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
-{
-	if (access_ok(VERIFY_READ, from, n))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
-}
-EXPORT_SYMBOL(_copy_from_user);

+ 1 - 2
arch/x86/mm/fault.c

@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		printk(KERN_CONT "paging request");
 		printk(KERN_CONT "paging request");
 
 
 	printk(KERN_CONT " at %p\n", (void *) address);
 	printk(KERN_CONT " at %p\n", (void *) address);
-	printk(KERN_ALERT "IP:");
-	printk_address(regs->ip);
+	printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
 
 
 	dump_pagetable(address);
 	dump_pagetable(address);
 }
 }

+ 2 - 2
arch/x86/platform/uv/uv_nmi.c

@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
 /* Dump Instruction Pointer info */
 /* Dump Instruction Pointer info */
 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
 {
 {
-	pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
-	printk_address(regs->ip);
+	pr_info("UV: %4d %6d %-32.32s %pS",
+		cpu, current->pid, current->comm, (void *)regs->ip);
 }
 }
 
 
 /*
 /*

+ 2 - 1
arch/x86/tools/insn_sanity.c

@@ -269,7 +269,8 @@ int main(int argc, char **argv)
 		insns++;
 		insns++;
 	}
 	}
 
 
-	fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
+	fprintf((errors) ? stderr : stdout,
+		"%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
 		prog,
 		prog,
 		(errors) ? "Failure" : "Success",
 		(errors) ? "Failure" : "Success",
 		insns,
 		insns,

+ 1 - 1
arch/x86/tools/test_get_len.c

@@ -167,7 +167,7 @@ int main(int argc, char **argv)
 		fprintf(stderr, "Warning: decoded and checked %d"
 		fprintf(stderr, "Warning: decoded and checked %d"
 			" instructions with %d warnings\n", insns, warnings);
 			" instructions with %d warnings\n", insns, warnings);
 	else
 	else
-		fprintf(stderr, "Succeed: decoded and checked %d"
+		fprintf(stdout, "Success: decoded and checked %d"
 			" instructions\n", insns);
 			" instructions\n", insns);
 	return 0;
 	return 0;
 }
 }

+ 0 - 7
kernel/sysctl.c

@@ -982,13 +982,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 		.proc_handler	= proc_dointvec,
 	},
 	},
-	{
-		.procname	= "kstack_depth_to_print",
-		.data		= &kstack_depth_to_print,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 	{
 		.procname	= "io_delay_type",
 		.procname	= "io_delay_type",
 		.data		= &io_delay_type,
 		.data		= &io_delay_type,

+ 2 - 2
mm/page_alloc.c

@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
 	}
 	}
 
 
 	if (pages && s)
 	if (pages && s)
-		pr_info("Freeing %s memory: %ldK (%p - %p)\n",
-			s, pages << (PAGE_SHIFT - 10), start, end);
+		pr_info("Freeing %s memory: %ldK\n",
+			s, pages << (PAGE_SHIFT - 10));
 
 
 	return pages;
 	return pages;
 }
 }

+ 2 - 1
scripts/decode_stacktrace.sh

@@ -139,7 +139,8 @@ handle_line() {
 
 
 while read line; do
 while read line; do
 	# Let's see if we have an address in the line
 	# Let's see if we have an address in the line
-	if [[ $line =~ \[\<([^]]+)\>\]  ]]; then
+	if [[ $line =~ \[\<([^]]+)\>\] ]] ||
+	   [[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
 		# Translate address to line numbers
 		# Translate address to line numbers
 		handle_line "$line"
 		handle_line "$line"
 	# Is it a code line?
 	# Is it a code line?

+ 21 - 12
scripts/faddr2line

@@ -105,9 +105,18 @@ __faddr2line() {
 	# In rare cases there might be duplicates.
 	# In rare cases there might be duplicates.
 	while read symbol; do
 	while read symbol; do
 		local fields=($symbol)
 		local fields=($symbol)
-		local sym_base=0x${fields[1]}
-		local sym_size=${fields[2]}
-		local sym_type=${fields[3]}
+		local sym_base=0x${fields[0]}
+		local sym_type=${fields[1]}
+		local sym_end=0x${fields[3]}
+
+		# calculate the size
+		local sym_size=$(($sym_end - $sym_base))
+		if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
+			warn "bad symbol size: base: $sym_base end: $sym_end"
+			DONE=1
+			return
+		fi
+		sym_size=0x$(printf %x $sym_size)
 
 
 		# calculate the address
 		# calculate the address
 		local addr=$(($sym_base + $offset))
 		local addr=$(($sym_base + $offset))
@@ -116,26 +125,26 @@ __faddr2line() {
 			DONE=1
 			DONE=1
 			return
 			return
 		fi
 		fi
-		local hexaddr=0x$(printf %x $addr)
+		addr=0x$(printf %x $addr)
 
 
 		# weed out non-function symbols
 		# weed out non-function symbols
-		if [[ $sym_type != "FUNC" ]]; then
+		if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
 			[[ $print_warnings = 1 ]] &&
 			[[ $print_warnings = 1 ]] &&
-				echo "skipping $func address at $hexaddr due to non-function symbol"
+				echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'"
 			continue
 			continue
 		fi
 		fi
 
 
 		# if the user provided a size, make sure it matches the symbol's size
 		# if the user provided a size, make sure it matches the symbol's size
 		if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
 		if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
 			[[ $print_warnings = 1 ]] &&
 			[[ $print_warnings = 1 ]] &&
-				echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
+				echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)"
 			continue;
 			continue;
 		fi
 		fi
 
 
 		# make sure the provided offset is within the symbol's range
 		# make sure the provided offset is within the symbol's range
 		if [[ $offset -gt $sym_size ]]; then
 		if [[ $offset -gt $sym_size ]]; then
 			[[ $print_warnings = 1 ]] &&
 			[[ $print_warnings = 1 ]] &&
-				echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
+				echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)"
 			continue
 			continue
 		fi
 		fi
 
 
@@ -143,12 +152,12 @@ __faddr2line() {
 		[[ $FIRST = 0 ]] && echo
 		[[ $FIRST = 0 ]] && echo
 		FIRST=0
 		FIRST=0
 
 
-		local hexsize=0x$(printf %x $sym_size)
-		echo "$func+$offset/$hexsize:"
-		addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
+		# pass real address to addr2line
+		echo "$func+$offset/$sym_size:"
+		addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
 		DONE=1
 		DONE=1
 
 
-	done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+	done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }')
 }
 }
 
 
 [[ $# -lt 2 ]] && usage
 [[ $# -lt 2 ]] && usage

+ 1 - 1
tools/testing/selftests/x86/Makefile

@@ -6,7 +6,7 @@ include ../lib.mk
 
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
 			check_initial_reg_state sigreturn ldt_gdt iopl \
 			check_initial_reg_state sigreturn ldt_gdt iopl \
-			protection_keys
+			protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 			vdso_restorer

+ 123 - 0
tools/testing/selftests/x86/test_vdso.c

@@ -0,0 +1,123 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+int nerrs = 0;
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+void fill_function_pointers()
+{
+	void *vdso = dlopen("linux-vdso.so.1",
+			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu)
+		printf("Warning: failed to find getcpu in vDSO\n");
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+		       void* cache)
+{
+	return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static void test_getcpu(void)
+{
+	printf("[RUN]\tTesting getcpu...\n");
+
+	for (int cpu = 0; ; cpu++) {
+		cpu_set_t cpuset;
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+			return;
+
+		unsigned cpu_sys, cpu_vdso, cpu_vsys,
+			node_sys, node_vdso, node_vsys;
+		long ret_sys, ret_vdso = 1, ret_vsys = 1;
+		unsigned node;
+
+		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+		if (vdso_getcpu)
+			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+		if (vgetcpu)
+			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+		if (!ret_sys)
+			node = node_sys;
+		else if (!ret_vdso)
+			node = node_vdso;
+		else if (!ret_vsys)
+			node = node_vsys;
+
+		bool ok = true;
+		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+			ok = false;
+		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+			ok = false;
+		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+			ok = false;
+
+		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+		if (!ret_sys)
+			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+		if (!ret_vdso)
+			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+		if (!ret_vsys)
+			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+			       node_vsys);
+		printf("\n");
+
+		if (!ok)
+			nerrs++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	fill_function_pointers();
+
+	test_getcpu();
+
+	return nerrs ? 1 : 0;
+}