Эх сурвалжийг харах

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Overlapping changes all over.

The mini-qdisc bits were a little bit tricky, however.

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 7 жил өмнө
parent
commit
c02b3741eb
100 өөрчлөгдсөн 1775 нэмэгдсэн , 298 устгасан
  1. 16 0
      Documentation/ABI/testing/sysfs-devices-system-cpu
  2. 48 10
      Documentation/admin-guide/kernel-parameters.txt
  3. 2 2
      Documentation/filesystems/nilfs2.txt
  4. 15 8
      Documentation/kbuild/kconfig-language.txt
  5. 1 1
      Documentation/usb/gadget-testing.txt
  6. 186 0
      Documentation/x86/pti.txt
  7. 2 2
      MAINTAINERS
  8. 24 21
      Makefile
  9. 1 1
      arch/ia64/kernel/time.c
  10. 6 0
      arch/powerpc/include/asm/exception-64e.h
  11. 55 2
      arch/powerpc/include/asm/exception-64s.h
  12. 13 0
      arch/powerpc/include/asm/feature-fixups.h
  13. 17 0
      arch/powerpc/include/asm/hvcall.h
  14. 10 0
      arch/powerpc/include/asm/paca.h
  15. 14 0
      arch/powerpc/include/asm/plpar_wrappers.h
  16. 13 0
      arch/powerpc/include/asm/setup.h
  17. 5 0
      arch/powerpc/kernel/asm-offsets.c
  18. 36 8
      arch/powerpc/kernel/entry_64.S
  19. 124 13
      arch/powerpc/kernel/exceptions-64s.S
  20. 101 0
      arch/powerpc/kernel/setup_64.c
  21. 9 0
      arch/powerpc/kernel/vmlinux.lds.S
  22. 1 0
      arch/powerpc/kvm/book3s_64_mmu.c
  23. 61 29
      arch/powerpc/kvm/book3s_64_mmu_hv.c
  24. 4 5
      arch/powerpc/kvm/book3s_hv_rmhandlers.S
  25. 2 0
      arch/powerpc/kvm/book3s_pr.c
  26. 5 2
      arch/powerpc/kvm/book3s_rmhandlers.S
  27. 2 2
      arch/powerpc/kvm/book3s_segment.S
  28. 41 0
      arch/powerpc/lib/feature-fixups.c
  29. 49 0
      arch/powerpc/platforms/powernv/setup.c
  30. 18 3
      arch/powerpc/platforms/pseries/dlpar.c
  31. 2 0
      arch/powerpc/platforms/pseries/pseries.h
  32. 2 1
      arch/powerpc/platforms/pseries/ras.c
  33. 35 0
      arch/powerpc/platforms/pseries/setup.c
  34. 14 1
      arch/x86/Kconfig
  35. 8 0
      arch/x86/Makefile
  36. 3 2
      arch/x86/crypto/aesni-intel_asm.S
  37. 2 1
      arch/x86/crypto/camellia-aesni-avx-asm_64.S
  38. 2 1
      arch/x86/crypto/camellia-aesni-avx2-asm_64.S
  39. 2 1
      arch/x86/crypto/crc32c-pcl-intel-asm_64.S
  40. 19 17
      arch/x86/entry/calling.h
  41. 3 2
      arch/x86/entry/entry_32.S
  42. 9 3
      arch/x86/entry/entry_64.S
  43. 18 0
      arch/x86/events/intel/bts.c
  44. 25 0
      arch/x86/include/asm/asm-prototypes.h
  45. 4 0
      arch/x86/include/asm/cpufeatures.h
  46. 10 8
      arch/x86/include/asm/mshyperv.h
  47. 3 0
      arch/x86/include/asm/msr-index.h
  48. 214 0
      arch/x86/include/asm/nospec-branch.h
  49. 1 0
      arch/x86/include/asm/pci_x86.h
  50. 1 1
      arch/x86/include/asm/processor-flags.h
  51. 3 3
      arch/x86/include/asm/tlbflush.h
  52. 3 2
      arch/x86/include/asm/xen/hypercall.h
  53. 5 2
      arch/x86/kernel/alternative.c
  54. 26 2
      arch/x86/kernel/cpu/amd.c
  55. 185 0
      arch/x86/kernel/cpu/bugs.c
  56. 3 0
      arch/x86/kernel/cpu/common.c
  57. 11 2
      arch/x86/kernel/cpu/microcode/intel.c
  58. 4 2
      arch/x86/kernel/ftrace_32.S
  59. 4 4
      arch/x86/kernel/ftrace_64.S
  60. 5 4
      arch/x86/kernel/irq_32.c
  61. 11 0
      arch/x86/kernel/tboot.c
  62. 12 7
      arch/x86/kvm/mmu.c
  63. 5 8
      arch/x86/kvm/svm.c
  64. 15 6
      arch/x86/kvm/vmx.c
  65. 1 0
      arch/x86/lib/Makefile
  66. 4 3
      arch/x86/lib/checksum_32.S
  67. 48 0
      arch/x86/lib/retpoline.S
  68. 6 26
      arch/x86/mm/pti.c
  69. 5 0
      arch/x86/pci/common.c
  70. 18 11
      arch/x86/pci/fixup.c
  71. 2 0
      arch/x86/platform/efi/efi_64.c
  72. 1 1
      arch/x86/platform/intel-mid/device_libs/platform_bt.c
  73. 3 5
      arch/x86/xen/mmu_pv.c
  74. 1 1
      arch/x86/xen/xen-ops.h
  75. 12 0
      crypto/algapi.c
  76. 3 0
      drivers/base/Kconfig
  77. 48 0
      drivers/base/cpu.c
  78. 2 2
      drivers/gpu/drm/i915/gvt/cmd_parser.c
  79. 4 1
      drivers/gpu/drm/i915/gvt/gtt.c
  80. 1 1
      drivers/gpu/drm/i915/i915_gem.c
  81. 2 0
      drivers/gpu/drm/i915/i915_reg.h
  82. 5 0
      drivers/gpu/drm/i915/intel_engine_cs.c
  83. 3 0
      drivers/gpu/drm/i915/intel_lrc.c
  84. 1 0
      drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
  85. 3 0
      drivers/gpu/drm/tegra/sor.c
  86. 0 3
      drivers/gpu/drm/vc4/vc4_irq.c
  87. 3 0
      drivers/gpu/drm/vc4/vc4_v3d.c
  88. 2 0
      drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
  89. 0 6
      drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
  90. 1 1
      drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
  91. 12 29
      drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
  92. 2 2
      drivers/infiniband/hw/hfi1/file_ops.c
  93. 7 4
      drivers/infiniband/hw/mlx5/main.c
  94. 3 4
      drivers/infiniband/hw/mlx5/qp.c
  95. 1 0
      drivers/infiniband/ulp/isert/ib_isert.c
  96. 3 0
      drivers/mmc/host/renesas_sdhi_core.c
  97. 4 2
      drivers/mmc/host/s3cmci.c
  98. 3 1
      drivers/mux/core.c
  99. 4 0
      drivers/net/ethernet/cirrus/cs89x0.c
  100. 7 6
      drivers/net/ethernet/ibm/ibmvnic.c

+ 16 - 0
Documentation/ABI/testing/sysfs-devices-system-cpu

@@ -375,3 +375,19 @@ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	information about CPUs heterogeneity.
 Description:	information about CPUs heterogeneity.
 
 
 		cpu_capacity: capacity of cpu#.
 		cpu_capacity: capacity of cpu#.
+
+What:		/sys/devices/system/cpu/vulnerabilities
+		/sys/devices/system/cpu/vulnerabilities/meltdown
+		/sys/devices/system/cpu/vulnerabilities/spectre_v1
+		/sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date:		January 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Information about CPU vulnerabilities
+
+		The files are named after the code names of CPU
+		vulnerabilities. The output of those files reflects the
+		state of the CPUs in the system. Possible output values:
+
+		"Not affected"	  CPU is not affected by the vulnerability
+		"Vulnerable"	  CPU is affected and no mitigation in effect
+		"Mitigation: $M"  CPU is affected and mitigation $M is in effect

+ 48 - 10
Documentation/admin-guide/kernel-parameters.txt

@@ -713,9 +713,6 @@
 			It will be ignored when crashkernel=X,high is not used
 			It will be ignored when crashkernel=X,high is not used
 			or memory reserved is below 4G.
 			or memory reserved is below 4G.
 
 
-	crossrelease_fullstack
-			[KNL] Allow to record full stack trace in cross-release
-
 	cryptomgr.notests
 	cryptomgr.notests
                         [KNL] Disable crypto self-tests
                         [KNL] Disable crypto self-tests
 
 
@@ -2626,6 +2623,11 @@
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 			Equivalent to smt=1.
 
 
+	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
+			(indirect branch prediction) vulnerability. System may
+			allow data leaks with this option, which is equivalent
+			to spectre_v2=off.
+
 	noxsave		[BUGS=X86] Disables x86 extended register state save
 	noxsave		[BUGS=X86] Disables x86 extended register state save
 			and restore using xsave. The kernel will fallback to
 			and restore using xsave. The kernel will fallback to
 			enabling legacy floating-point and sse state.
 			enabling legacy floating-point and sse state.
@@ -2712,8 +2714,6 @@
 			steal time is computed, but won't influence scheduler
 			steal time is computed, but won't influence scheduler
 			behaviour
 			behaviour
 
 
-	nopti		[X86-64] Disable kernel page table isolation
-
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 
 
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
@@ -3100,6 +3100,12 @@
 		pcie_scan_all	Scan all possible PCIe devices.  Otherwise we
 		pcie_scan_all	Scan all possible PCIe devices.  Otherwise we
 				only look for one device below a PCIe downstream
 				only look for one device below a PCIe downstream
 				port.
 				port.
+		big_root_window	Try to add a big 64bit memory window to the PCIe
+				root complex on AMD CPUs. Some GFX hardware
+				can resize a BAR to allow access to all VRAM.
+				Adding the window is slightly risky (it may
+				conflict with unreported devices), so this
+				taints the kernel.
 
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
 			Management.
@@ -3288,11 +3294,20 @@
 	pt.		[PARIDE]
 	pt.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
 			See Documentation/blockdev/paride.txt.
 
 
-	pti=		[X86_64]
-			Control user/kernel address space isolation:
-			on - enable
-			off - disable
-			auto - default setting
+	pti=		[X86_64] Control Page Table Isolation of user and
+			kernel address spaces.  Disabling this feature
+			removes hardening, but improves performance of
+			system calls and interrupts.
+
+			on   - unconditionally enable
+			off  - unconditionally disable
+			auto - kernel detects whether your CPU model is
+			       vulnerable to issues that PTI mitigates
+
+			Not specifying this option is equivalent to pti=auto.
+
+	nopti		[X86_64]
+			Equivalent to pti=off
 
 
 	pty.legacy_count=
 	pty.legacy_count=
 			[KNL] Number of legacy pty's. Overwrites compiled-in
 			[KNL] Number of legacy pty's. Overwrites compiled-in
@@ -3943,6 +3958,29 @@
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/laptops/sonypi.txt
 			See Documentation/laptops/sonypi.txt
 
 
+	spectre_v2=	[X86] Control mitigation of Spectre variant 2
+			(indirect branch speculation) vulnerability.
+
+			on   - unconditionally enable
+			off  - unconditionally disable
+			auto - kernel detects whether your CPU model is
+			       vulnerable
+
+			Selecting 'on' will, and 'auto' may, choose a
+			mitigation method at run time according to the
+			CPU, the available microcode, the setting of the
+			CONFIG_RETPOLINE configuration option, and the
+			compiler with which the kernel was built.
+
+			Specific mitigations can also be selected manually:
+
+			retpoline	  - replace indirect branches
+			retpoline,generic - google's original retpoline
+			retpoline,amd     - AMD-specific minimal thunk
+
+			Not specifying this option is equivalent to
+			spectre_v2=auto.
+
 	spia_io_base=	[HW,MTD]
 	spia_io_base=	[HW,MTD]
 	spia_fio_base=
 	spia_fio_base=
 	spia_pedr=
 	spia_pedr=

+ 2 - 2
Documentation/filesystems/nilfs2.txt

@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 cleaner or garbage collector) are required.  Details on the tools are
 cleaner or garbage collector) are required.  Details on the tools are
 described in the man pages included in the package.
 described in the man pages included in the package.
 
 
-Project web page:    http://nilfs.sourceforge.net/
-Download page:       http://nilfs.sourceforge.net/en/download.html
+Project web page:    https://nilfs.sourceforge.io/
+Download page:       https://nilfs.sourceforge.io/en/download.html
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 
 
 Caveats
 Caveats

+ 15 - 8
Documentation/kbuild/kconfig-language.txt

@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 <expr> ::= <symbol>                             (1)
 <expr> ::= <symbol>                             (1)
            <symbol> '=' <symbol>                (2)
            <symbol> '=' <symbol>                (2)
            <symbol> '!=' <symbol>               (3)
            <symbol> '!=' <symbol>               (3)
-           '(' <expr> ')'                       (4)
-           '!' <expr>                           (5)
-           <expr> '&&' <expr>                   (6)
-           <expr> '||' <expr>                   (7)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
 
 
 Expressions are listed in decreasing order of precedence. 
 Expressions are listed in decreasing order of precedence. 
 
 
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
     otherwise 'n'.
     otherwise 'n'.
 (3) If the values of both symbols are equal, it returns 'n',
 (3) If the values of both symbols are equal, it returns 'n',
     otherwise 'y'.
     otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
 
 
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when its
 respectively for calculations). A menu entry becomes visible when its

+ 1 - 1
Documentation/usb/gadget-testing.txt

@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 in each line. The rules stated above are best illustrated with an example:
 in each line. The rules stated above are best illustrated with an example:
 
 
 # mkdir functions/uvc.usb0/control/header/h
 # mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
 # ln -s header/h class/fs
 # ln -s header/h class/fs
 # ln -s header/h class/ss
 # ln -s header/h class/ss
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p

+ 186 - 0
Documentation/x86/pti.txt

@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications.  When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy.  When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT).  There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled.  It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI.  This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level.  This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel.  This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal.  The only difference is when the kernel
+makes entries in the top (PGD) level.  In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables.  This leaves a single, shared set of
+userspace page tables to manage.  One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important.  But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+  a. Each process now needs an order-1 PGD instead of order-0.
+     (Consumes an additional 4k per process).
+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+     aligned so that it can be mapped by setting a single PMD
+     entry.  This consumes nearly 2MB of RAM once the kernel
+     is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+  a. CR3 manipulation to switch between the page table copies
+     must be done at interrupt, syscall, and exception entry
+     and exit (it can be skipped when the kernel is interrupted,
+     though.)  Moves to CR3 are on the order of a hundred
+     cycles, and are required at every entry and exit.
+  b. A "trampoline" must be used for SYSCALL entry.  This
+     trampoline depends on a smaller set of resources than the
+     non-PTI SYSCALL entry code, so requires mapping fewer
+     things into the userspace page tables.  The downside is
+     that stacks must be switched at entry time.
+  d. Global pages are disabled for all kernel structures not
+     mapped into both kernel and userspace page tables.  This
+     feature of the MMU allows different processes to share TLB
+     entries mapping the kernel.  Losing the feature means more
+     TLB misses after a context switch.  The actual loss of
+     performance is very small, however, never exceeding 1%.
+  d. Process Context IDentifiers (PCID) is a CPU feature that
+     allows us to skip flushing the entire TLB when switching page
+     tables by setting a special bit in CR3 when the page tables
+     are changed.  This makes switching the page tables (at context
+     switch, or kernel entry/exit) cheaper.  But, on systems with
+     PCID support, the context switch code must flush both the user
+     and kernel entries out of the TLB.  The user PCID TLB flush is
+     deferred until the exit to userspace, minimizing the cost.
+     See intel.com/sdm for the gory PCID/INVPCID details.
+  e. The userspace page tables must be populated for each new
+     process.  Even without PTI, the shared kernel mappings
+     are created by copying top-level (PGD) entries into each
+     new process.  But, with PTI, there are now *two* kernel
+     mappings: one in the kernel page tables that maps everything
+     and one for the entry/exit structures.  At fork(), we need to
+     copy both.
+  f. In addition to the fork()-time copying, there must also
+     be an update to the userspace PGD any time a set_pgd() is done
+     on a PGD used to map userspace.  This ensures that the kernel
+     and userspace copies always map the same userspace
+     memory.
+  g. On systems without PCID support, each CR3 write flushes
+     the entire TLB.  That means that each syscall, interrupt
+     or exception flushes the TLB.
+  h. INVPCID is a TLB-flushing instruction which allows flushing
+     of TLB entries for non-current PCIDs.  Some systems support
+     PCIDs, but do not support INVPCID.  On these systems, addresses
+     can only be flushed from the TLB for the current PCID.  When
+     flushing a kernel address, we need to flush all PCIDs, so a
+     single kernel address flush will require a TLB-flushing CR3
+     write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+   unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+   boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
+   several minutes.  These tests frequently uncover corner cases in the
+   kernel entry code.  In general, old kernels might cause these tests
+   themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+   frequent performance monitoring non-maskable interrupts (see "NMI"
+   in /proc/interrupts).  This exercises the NMI entry/exit code which
+   is known to trigger bugs in code paths that did not expect to be
+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
+   NMIs, and using two -c with separate counters encourages nested NMIs
+   and less deterministic behavior.
+
+	while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+   This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code.  Usually a bug in one of the
+   more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup.  Bugs
+   in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
+   like screwing up a page table switch.  Also caused by
+   incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI.  The NMI code is separate from main
+   interrupt handlers and can have bugs that do not affect
+   normal interrupts.  Also caused by incorrectly mapping NMI
+   code.  NMIs that interrupt the entry code must be very
+   careful and can be the cause of crashes that show up when
+   running perf.
+ * Kernel crashes at the first exit to userspace.  entry_64.S
+   bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+   in entry_64.S that return to userspace are sometimes separate
+   from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+   faults upon page faults.  Caused by touching non-pti-mapped
+   data in the entry code, or forgetting to switch to kernel
+   CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+   as mount(8) failing to mount the rootfs.  These have
+   tended to be TLB invalidation issues.  Usually invalidating
+   the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf

+ 2 - 2
MAINTAINERS

@@ -9660,8 +9660,8 @@ F:	include/uapi/linux/sunrpc/
 NILFS2 FILESYSTEM
 NILFS2 FILESYSTEM
 M:	Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 M:	Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 L:	linux-nilfs@vger.kernel.org
 L:	linux-nilfs@vger.kernel.org
-W:	http://nilfs.sourceforge.net/
-W:	http://nilfs.osdn.jp/
+W:	https://nilfs.sourceforge.io/
+W:	https://nilfs.osdn.jp/
 T:	git git://github.com/konis/nilfs2.git
 T:	git git://github.com/konis/nilfs2.git
 S:	Supported
 S:	Supported
 F:	Documentation/filesystems/nilfs2.txt
 F:	Documentation/filesystems/nilfs2.txt

+ 24 - 21
Makefile

@@ -2,7 +2,7 @@
 VERSION = 4
 VERSION = 4
 PATCHLEVEL = 15
 PATCHLEVEL = 15
 SUBLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 NAME = Fearless Coyote
 
 
 # *DOCUMENTATION*
 # *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC	:= --gcc-toolchain=$(GCC_TOOLCHAIN)
 endif
 endif
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 endif
 
 
 ifeq ($(config-targets),1)
 ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 endif
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 KBUILD_CFLAGS += $(stackp-flag)
 
 
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
 ifdef CONFIG_FRAME_POINTER
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
 else

+ 1 - 1
arch/ia64/kernel/time.c

@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
 	}
 	}
 
 
 	if (ti->softirq_time) {
 	if (ti->softirq_time) {
-		delta = cycle_to_nsec(ti->softirq_time));
+		delta = cycle_to_nsec(ti->softirq_time);
 		account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
 		account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
 	}
 	}
 
 

+ 6 - 0
arch/powerpc/include/asm/exception-64e.h

@@ -209,5 +209,11 @@ exc_##label##_book3e:
 	ori	r3,r3,vector_offset@l;		\
 	ori	r3,r3,vector_offset@l;		\
 	mtspr	SPRN_IVOR##vector_number,r3;
 	mtspr	SPRN_IVOR##vector_number,r3;
 
 
+#define RFI_TO_KERNEL							\
+	rfi
+
+#define RFI_TO_USER							\
+	rfi
+
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 
 

+ 55 - 2
arch/powerpc/include/asm/exception-64s.h

@@ -74,6 +74,59 @@
  */
  */
 #define EX_R3		EX_DAR
 #define EX_R3		EX_DAR
 
 
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT							\
+	RFI_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop
+
+#define RFI_TO_KERNEL							\
+	rfid
+
+#define RFI_TO_USER							\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL						\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_GUEST							\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define HRFI_TO_KERNEL							\
+	hrfid
+
+#define HRFI_TO_USER							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_GUEST							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN							\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
 #ifdef CONFIG_RELOCATABLE
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
 	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mtspr	SPRN_##h##SRR1,r10;					\
 	mtspr	SPRN_##h##SRR1,r10;					\
-	h##rfid;							\
+	h##RFI_TO_KERNEL;						\
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mtspr	SPRN_##h##SRR0,r12;					\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
 	mtspr	SPRN_##h##SRR1,r10;					\
 	mtspr	SPRN_##h##SRR1,r10;					\
-	h##rfid;							\
+	h##RFI_TO_KERNEL;						\
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 
 
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)			\
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)			\

+ 13 - 0
arch/powerpc/include/asm/feature-fixups.h

@@ -187,7 +187,20 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET label##1b-label##3b;		\
 	FTR_ENTRY_OFFSET label##1b-label##3b;		\
 	.popsection;
 	.popsection;
 
 
+#define RFI_FLUSH_FIXUP_SECTION				\
+951:							\
+	.pushsection __rfi_flush_fixup,"a";		\
+	.align 2;					\
+952:							\
+	FTR_ENTRY_OFFSET 951b-952b;			\
+	.popsection;
+
+
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
 void apply_feature_fixups(void);
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
 void setup_feature_keys(void);
 #endif
 #endif

+ 17 - 0
arch/powerpc/include/asm/hvcall.h

@@ -241,6 +241,7 @@
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
 #define H_POLL_PENDING		0x1D8
@@ -330,6 +331,17 @@
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
 /* >= 0 values are CPU number */
 /* >= 0 values are CPU number */
 
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK	0x18
 #define PROC_TABLE_OP_MASK	0x18
 #define PROC_TABLE_DEREG	0x10
 #define PROC_TABLE_DEREG	0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
 	}
 	}
 }
 }
 
 
+struct h_cpu_char_result {
+	u64 character;
+	u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
 #endif /* _ASM_POWERPC_HVCALL_H */

+ 10 - 0
arch/powerpc/include/asm/paca.h

@@ -232,6 +232,16 @@ struct paca_struct {
 	struct sibling_subcore_state *sibling_subcore_state;
 	struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * rfi fallback flush must be in its own cacheline to prevent
+	 * other paca data leaking into the L1d
+	 */
+	u64 exrfi[EX_SIZE] __aligned(0x80);
+	void *rfi_flush_fallback_area;
+	u64 l1d_flush_congruence;
+	u64 l1d_flush_sets;
+#endif
 };
 };
 
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
 extern void copy_mm_to_paca(struct mm_struct *mm);

+ 14 - 0
arch/powerpc/include/asm/plpar_wrappers.h

@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 }
 
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+	if (rc == H_SUCCESS) {
+		p->character = retbuf[0];
+		p->behaviour = retbuf[1];
+	}
+
+	return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */

+ 13 - 0
arch/powerpc/include/asm/setup.h

@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 #endif /* CONFIG_PPC_PSERIES */
 
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+	L1D_FLUSH_NONE		= 0x1,
+	L1D_FLUSH_FALLBACK	= 0x2,
+	L1D_FLUSH_ORI		= 0x4,
+	L1D_FLUSH_MTTRIG	= 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* !__ASSEMBLY__ */
 
 
 #endif	/* _ASM_POWERPC_SETUP_H */
 #endif	/* _ASM_POWERPC_SETUP_H */

+ 5 - 0
arch/powerpc/kernel/asm-offsets.c

@@ -237,6 +237,11 @@ int main(void)
 	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
 	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
 	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
 	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
 	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
 	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+	OFFSET(PACA_EXRFI, paca_struct, exrfi);
+	OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+	OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
 #endif
 #endif
 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
 	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
 	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);

+ 36 - 8
arch/powerpc/kernel/entry_64.S

@@ -37,6 +37,11 @@
 #include <asm/tm.h>
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 
 /*
 /*
  * System calls.
  * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
+	ld	r2,GPR2(r1)
+	ld	r1,GPR1(r1)
+	mtlr	r4
+	mtcr	r5
+	mtspr	SPRN_SRR0,r7
+	mtspr	SPRN_SRR1,r8
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
+
+	/* exit to kernel */
 1:	ld	r2,GPR2(r1)
 1:	ld	r2,GPR2(r1)
 	ld	r1,GPR1(r1)
 	ld	r1,GPR1(r1)
 	mtlr	r4
 	mtlr	r4
 	mtcr	r5
 	mtcr	r5
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
 	mtspr	SPRN_SRR1,r8
-	RFI
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 
 
 .Lsyscall_error:
 .Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	mtmsrd	r10, 1
 	mtmsrd	r10, 1
 	mtspr	SPRN_SRR0, r11
 	mtspr	SPRN_SRR0, r11
 	mtspr	SPRN_SRR1, r12
 	mtspr	SPRN_SRR1, r12
-
-	rfid
+	RFI_TO_USER
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 #endif
 #endif
 _ASM_NOKPROBE_SYMBOL(system_call_common);
 _ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
 	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
 	REST_GPR(13, r1)
 	REST_GPR(13, r1)
-1:
+
 	mtspr	SPRN_SRR1,r3
 	mtspr	SPRN_SRR1,r3
 
 
 	ld	r2,_CCR(r1)
 	ld	r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r3,GPR3(r1)
 	ld	r3,GPR3(r1)
 	ld	r4,GPR4(r1)
 	ld	r4,GPR4(r1)
 	ld	r1,GPR1(r1)
 	ld	r1,GPR1(r1)
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
 
 
-	rfid
+1:	mtspr	SPRN_SRR1,r3
+
+	ld	r2,_CCR(r1)
+	mtcrf	0xFF,r2
+	ld	r2,_NIP(r1)
+	mtspr	SPRN_SRR0,r2
+
+	ld	r0,GPR0(r1)
+	ld	r2,GPR2(r1)
+	ld	r3,GPR3(r1)
+	ld	r4,GPR4(r1)
+	ld	r1,GPR1(r1)
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 
 
 #endif /* CONFIG_PPC_BOOK3E */
 #endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
 	
 	
 	mtspr	SPRN_SRR0,r5
 	mtspr	SPRN_SRR0,r5
 	mtspr	SPRN_SRR1,r6
 	mtspr	SPRN_SRR1,r6
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 
 
 rtas_return_loc:
 rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
 
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
 	mtspr	SPRN_SRR1,r4
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
 	andc	r11,r11,r12
 	andc	r11,r11,r12
 	mtsrr1	r11
 	mtsrr1	r11
-	rfid
+	RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 #endif /* CONFIG_PPC_BOOK3E */
 
 
 1:	/* Return from OF */
 1:	/* Return from OF */

+ 124 - 13
arch/powerpc/kernel/exceptions-64s.S

@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
 	LOAD_HANDLER(r12, machine_check_handle_early)
 	LOAD_HANDLER(r12, machine_check_handle_early)
 1:	mtspr	SPRN_SRR0,r12
 1:	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r11
 	mtspr	SPRN_SRR1,r11
-	rfid
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 2:
 2:
 	/* Stack overflow. Stay on emergency stack and panic.
 	/* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	li	r3,MSR_ME
 	li	r3,MSR_ME
 	andc	r10,r10,r3		/* Turn off MSR_ME */
 	andc	r10,r10,r3		/* Turn off MSR_ME */
 	mtspr	SPRN_SRR1,r10
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 	b	.
 2:
 2:
 	/*
 	/*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 */
 	 */
 	bl	machine_check_queue_event
 	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
 	MACHINE_CHECK_HANDLER_WINDUP
-	rfid
+	RFI_TO_USER_OR_KERNEL
 9:
 9:
 	/* Deliver the machine check to host kernel in V mode. */
 	/* Deliver the machine check to host kernel in V mode. */
 	MACHINE_CHECK_HANDLER_WINDUP
 	MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
 
 
+	andi.	r9,r11,MSR_PR	// Check for exception from userspace
+	cmpdi	cr4,r9,MSR_PR	// And save the result in CR4 for later
+
 	/*
 	/*
 	 * Test MSR_RI before calling slb_allocate_realmode, because the
 	 * Test MSR_RI before calling slb_allocate_realmode, because the
 	 * MSR in r11 gets clobbered. However we still want to allocate
 	 * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
 
 	/* All done -- return from exception. */
 	/* All done -- return from exception. */
 
 
+	bne	cr4,1f		/* returning to kernel */
+
 .machine	push
 .machine	push
 .machine	"power4"
 .machine	"power4"
 	mtcrf	0x80,r9
 	mtcrf	0x80,r9
+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	ld	r11,PACA_EXSLB+EX_R11(r13)
 	ld	r11,PACA_EXSLB+EX_R11(r13)
 	ld	r12,PACA_EXSLB+EX_R12(r13)
 	ld	r12,PACA_EXSLB+EX_R12(r13)
 	ld	r13,PACA_EXSLB+EX_R13(r13)
 	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
+1:
+.machine	push
+.machine	"power4"
+	mtcrf	0x80,r9
+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
+	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop
+
+	RESTORE_CTR(r9, PACA_EXSLB)
+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
+	mr	r3,r12
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 	b	.	/* prevent speculative execution */
 
 
+
 2:	std     r3,PACA_EXSLB+EX_DAR(r13)
 2:	std     r3,PACA_EXSLB+EX_DAR(r13)
 	mr	r3,r12
 	mr	r3,r12
 	mfspr	r11,SPRN_SRR0
 	mfspr	r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mtspr	SPRN_SRR0,r10
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 	b	.
 
 
 8:	std     r3,PACA_EXSLB+EX_DAR(r13)
 8:	std     r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mtspr	SPRN_SRR0,r10
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
 	mtspr	SPRN_SRR1,r10
-	rfid
+	RFI_TO_KERNEL
 	b	.
 	b	.
 
 
 EXC_COMMON_BEGIN(unrecov_slb)
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 	mtspr	SPRN_SRR0,r10 ; 				\
 	mtspr	SPRN_SRR0,r10 ; 				\
 	ld	r10,PACAKMSR(r13) ;				\
 	ld	r10,PACAKMSR(r13) ;				\
 	mtspr	SPRN_SRR1,r10 ; 				\
 	mtspr	SPRN_SRR1,r10 ; 				\
-	rfid ; 							\
+	RFI_TO_KERNEL ;						\
 	b	. ;	/* prevent speculative execution */
 	b	. ;	/* prevent speculative execution */
 
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 	xori	r12,r12,MSR_LE ;				\
 	xori	r12,r12,MSR_LE ;				\
 	mtspr	SPRN_SRR1,r12 ;					\
 	mtspr	SPRN_SRR1,r12 ;					\
 	mr	r13,r9 ;					\
 	mr	r13,r9 ;					\
-	rfid ;		/* return to userspace */		\
+	RFI_TO_USER ;	/* return to userspace */		\
 	b	. ;	/* prevent speculative execution */
 	b	. ;	/* prevent speculative execution */
 #else
 #else
 #define SYSCALL_FASTENDIAN_TEST
 #define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
 	mtcr	r11
 	mtcr	r11
 	REST_GPR(11, r1)
 	REST_GPR(11, r1)
 	ld	r1,GPR1(r1)
 	ld	r1,GPR1(r1)
-	hrfid
+	HRFI_TO_USER_OR_KERNEL
 
 
 1:	mtcr	r11
 1:	mtcr	r11
 	REST_GPR(11, r1)
 	REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
-	HRFID
+	HRFI_TO_UNKNOWN
 	b	.
 	b	.
 #endif
 #endif
 
 
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:					\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	/* returns to kernel where r13 must be set up, so don't restore it */ \
 	/* returns to kernel where r13 must be set up, so don't restore it */ \
-	##_H##rfid;					\
+	##_H##RFI_TO_KERNEL;				\
 	b	.;					\
 	b	.;					\
 	MASKED_DEC_HANDLER(_H)
 	MASKED_DEC_HANDLER(_H)
 
 
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	std	r12,PACA_EXRFI+EX_R12(r13)
+	std	r8,PACA_EXRFI+EX_R13(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+	addi	r12,r12,8
+	mtctr	r11
+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+1:	li	r8,0
+	.rept	8 /* 8-way set associative */
+	ldx	r11,r10,r8
+	add	r8,r8,r12
+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
+	.endr
+	addi	r10,r10,128 /* 128 byte cache line */
+	bdnz	1b
+
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r12,PACA_EXRFI+EX_R12(r13)
+	ld	r8,PACA_EXRFI+EX_R13(r13)
+	GET_SCRATCH0(r13);
+	rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	std	r12,PACA_EXRFI+EX_R12(r13)
+	std	r8,PACA_EXRFI+EX_R13(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+	addi	r12,r12,8
+	mtctr	r11
+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+1:	li	r8,0
+	.rept	8 /* 8-way set associative */
+	ldx	r11,r10,r8
+	add	r8,r8,r12
+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
+	.endr
+	addi	r10,r10,128 /* 128 byte cache line */
+	bdnz	1b
+
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r12,PACA_EXRFI+EX_R12(r13)
+	ld	r8,PACA_EXRFI+EX_R13(r13)
+	GET_SCRATCH0(r13);
+	hrfid
+
 /*
 /*
  * Real mode exceptions actually use this too, but alternate
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
  * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
 	addi	r13, r13, 4
 	addi	r13, r13, 4
 	mtspr	SPRN_SRR0, r13
 	mtspr	SPRN_SRR0, r13
 	GET_SCRATCH0(r13)
 	GET_SCRATCH0(r13)
-	rfid
+	RFI_TO_KERNEL
 	b	.
 	b	.
 
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
 	addi	r13, r13, 4
 	addi	r13, r13, 4
 	mtspr	SPRN_HSRR0, r13
 	mtspr	SPRN_HSRR0, r13
 	GET_SCRATCH0(r13)
 	GET_SCRATCH0(r13)
-	hrfid
+	HRFI_TO_KERNEL
 	b	.
 	b	.
 #endif
 #endif
 
 

+ 101 - 0
arch/powerpc/kernel/setup_64.c

@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
 	return 0;
 	return 0;
 }
 }
 early_initcall(disable_hardlockup_detector);
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+	pr_info("rfi-flush: disabled on command line.");
+	no_rfi_flush = true;
+	return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+	handle_no_rfi_flush(NULL);
+	return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+	/*
+	 * We don't need to do the flush explicitly, just enter+exit kernel is
+	 * sufficient, the RFI exit handlers will do the right thing.
+	 */
+}
+
+void rfi_flush_enable(bool enable)
+{
+	if (rfi_flush == enable)
+		return;
+
+	if (enable) {
+		do_rfi_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else
+		do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+	rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+	u64 l1d_size, limit;
+	int cpu;
+
+	l1d_size = ppc64_caches.l1d.size;
+	limit = min(safe_stack_limit(), ppc64_rma_size);
+
+	/*
+	 * Align to L1d size, and size it at 2x L1d size, to catch possible
+	 * hardware prefetch runoff. We don't have a recipe for load patterns to
+	 * reliably avoid the prefetcher.
+	 */
+	l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * The fallback flush is currently coded for 8-way
+		 * associativity. Different associativity is possible, but it
+		 * will be treated as 8-way and may not evict the lines as
+		 * effectively.
+		 *
+		 * 128 byte lines are mandatory.
+		 */
+		u64 c = l1d_size / 8;
+
+		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+		paca[cpu].l1d_flush_congruence = c;
+		paca[cpu].l1d_flush_sets = c / 128;
+	}
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+	if (types & L1D_FLUSH_FALLBACK) {
+		pr_info("rfi-flush: Using fallback displacement flush\n");
+		init_fallback_flush();
+	}
+
+	if (types & L1D_FLUSH_ORI)
+		pr_info("rfi-flush: Using ori type flush\n");
+
+	if (types & L1D_FLUSH_MTTRIG)
+		pr_info("rfi-flush: Using mttrig type flush\n");
+
+	enabled_flush_types = types;
+
+	if (!no_rfi_flush)
+		rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */

+ 9 - 0
arch/powerpc/kernel/vmlinux.lds.S

@@ -132,6 +132,15 @@ SECTIONS
 	/* Read-only data */
 	/* Read-only data */
 	RO_DATA(PAGE_SIZE)
 	RO_DATA(PAGE_SIZE)
 
 
+#ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+		__start___rfi_flush_fixup = .;
+		*(__rfi_flush_fixup)
+		__stop___rfi_flush_fixup = .;
+	}
+#endif
+
 	EXCEPTION_TABLE(0)
 	EXCEPTION_TABLE(0)
 
 
 	NOTES :kernel :notes
 	NOTES :kernel :notes

+ 1 - 0
arch/powerpc/kvm/book3s_64_mmu.c

@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		gpte->may_read = true;
 		gpte->may_read = true;
 		gpte->may_write = true;
 		gpte->may_write = true;
 		gpte->page_size = MMU_PAGE_4K;
 		gpte->page_size = MMU_PAGE_4K;
+		gpte->wimg = HPTE_R_M;
 
 
 		return 0;
 		return 0;
 	}
 	}

+ 61 - 29
arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
 	u32 order;
 	u32 order;
 
 
 	/* These fields protected by kvm->lock */
 	/* These fields protected by kvm->lock */
+
+	/* Possible values and their usage:
+	 *  <0     an error occurred during allocation,
+	 *  -EBUSY allocation is in the progress,
+	 *  0      allocation made successfuly.
+	 */
 	int error;
 	int error;
-	bool prepare_done;
 
 
-	/* Private to the work thread, until prepare_done is true,
-	 * then protected by kvm->resize_hpt_sem */
+	/* Private to the work thread, until error != -EBUSY,
+	 * then protected by kvm->lock.
+	 */
 	struct kvm_hpt_info hpt;
 	struct kvm_hpt_info hpt;
 };
 };
 
 
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 		 * Reset all the reverse-mapping chains for all memslots
 		 * Reset all the reverse-mapping chains for all memslots
 		 */
 		 */
 		kvmppc_rmap_reset(kvm);
 		kvmppc_rmap_reset(kvm);
-		/* Ensure that each vcpu will flush its TLB on next entry. */
-		cpumask_setall(&kvm->arch.need_tlb_flush);
 		err = 0;
 		err = 0;
 		goto out;
 		goto out;
 	}
 	}
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 	kvmppc_set_hpt(kvm, &info);
 	kvmppc_set_hpt(kvm, &info);
 
 
 out:
 out:
+	if (err == 0)
+		/* Ensure that each vcpu will flush its TLB on next entry. */
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+
 	mutex_unlock(&kvm->lock);
 	mutex_unlock(&kvm->lock);
 	return err;
 	return err;
 }
 }
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 {
 {
-	BUG_ON(kvm->arch.resize_hpt != resize);
+	if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+		return;
 
 
 	if (!resize)
 	if (!resize)
 		return;
 		return;
 
 
-	if (resize->hpt.virt)
-		kvmppc_free_hpt(&resize->hpt);
+	if (resize->error != -EBUSY) {
+		if (resize->hpt.virt)
+			kvmppc_free_hpt(&resize->hpt);
+		kfree(resize);
+	}
 
 
-	kvm->arch.resize_hpt = NULL;
-	kfree(resize);
+	if (kvm->arch.resize_hpt == resize)
+		kvm->arch.resize_hpt = NULL;
 }
 }
 
 
 static void resize_hpt_prepare_work(struct work_struct *work)
 static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
 						     struct kvm_resize_hpt,
 						     struct kvm_resize_hpt,
 						     work);
 						     work);
 	struct kvm *kvm = resize->kvm;
 	struct kvm *kvm = resize->kvm;
-	int err;
+	int err = 0;
 
 
-	resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
-			 resize->order);
-
-	err = resize_hpt_allocate(resize);
+	if (WARN_ON(resize->error != -EBUSY))
+		return;
 
 
 	mutex_lock(&kvm->lock);
 	mutex_lock(&kvm->lock);
 
 
+	/* Request is still current? */
+	if (kvm->arch.resize_hpt == resize) {
+		/* We may request large allocations here:
+		 * do not sleep with kvm->lock held for a while.
+		 */
+		mutex_unlock(&kvm->lock);
+
+		resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+				 resize->order);
+
+		err = resize_hpt_allocate(resize);
+
+		/* We have strict assumption about -EBUSY
+		 * when preparing for HPT resize.
+		 */
+		if (WARN_ON(err == -EBUSY))
+			err = -EINPROGRESS;
+
+		mutex_lock(&kvm->lock);
+		/* It is possible that kvm->arch.resize_hpt != resize
+		 * after we grab kvm->lock again.
+		 */
+	}
+
 	resize->error = err;
 	resize->error = err;
-	resize->prepare_done = true;
+
+	if (kvm->arch.resize_hpt != resize)
+		resize_hpt_release(kvm, resize);
 
 
 	mutex_unlock(&kvm->lock);
 	mutex_unlock(&kvm->lock);
 }
 }
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
 
 	if (resize) {
 	if (resize) {
 		if (resize->order == shift) {
 		if (resize->order == shift) {
-			/* Suitable resize in progress */
-			if (resize->prepare_done) {
-				ret = resize->error;
-				if (ret != 0)
-					resize_hpt_release(kvm, resize);
-			} else {
+			/* Suitable resize in progress? */
+			ret = resize->error;
+			if (ret == -EBUSY)
 				ret = 100; /* estimated time in ms */
 				ret = 100; /* estimated time in ms */
-			}
+			else if (ret)
+				resize_hpt_release(kvm, resize);
 
 
 			goto out;
 			goto out;
 		}
 		}
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 		ret = -ENOMEM;
 		ret = -ENOMEM;
 		goto out;
 		goto out;
 	}
 	}
+
+	resize->error = -EBUSY;
 	resize->order = shift;
 	resize->order = shift;
 	resize->kvm = kvm;
 	resize->kvm = kvm;
 	INIT_WORK(&resize->work, resize_hpt_prepare_work);
 	INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
 	if (!resize || (resize->order != shift))
 	if (!resize || (resize->order != shift))
 		goto out;
 		goto out;
 
 
-	ret = -EBUSY;
-	if (!resize->prepare_done)
-		goto out;
-
 	ret = resize->error;
 	ret = resize->error;
-	if (ret != 0)
+	if (ret)
 		goto out;
 		goto out;
 
 
 	ret = resize_hpt_rehash(resize);
 	ret = resize_hpt_rehash(resize);
-	if (ret != 0)
+	if (ret)
 		goto out;
 		goto out;
 
 
 	resize_hpt_pivot(resize);
 	resize_hpt_pivot(resize);

+ 4 - 5
arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	mtmsrd	r0,1		/* clear RI in MSR */
 	mtmsrd	r0,1		/* clear RI in MSR */
 	mtsrr0	r5
 	mtsrr0	r5
 	mtsrr1	r6
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 
 kvmppc_call_hv_entry:
 kvmppc_call_hv_entry:
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr0	r8
 	mtsrr1	r7
 	mtsrr1	r7
-	RFI
+	RFI_TO_KERNEL
 
 
 	/* Virtual-mode return */
 	/* Virtual-mode return */
 .Lvirt_return:
 .Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r0, VCPU_GPR(R0)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
 	ld	r4, VCPU_GPR(R4)(r4)
-
-	hrfid
+	HRFI_TO_GUEST
 	b	.
 	b	.
 
 
 secondary_too_late:
 secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	ld	r4, PACAKMSR(r13)
 	ld	r4, PACAKMSR(r13)
 	mtspr	SPRN_SRR0, r3
 	mtspr	SPRN_SRR0, r3
 	mtspr	SPRN_SRR1, r4
 	mtspr	SPRN_SRR1, r4
-	rfid
+	RFI_TO_KERNEL
 9:	addi	r3, r1, STACK_FRAME_OVERHEAD
 9:	addi	r3, r1, STACK_FRAME_OVERHEAD
 	bl	kvmppc_bad_interrupt
 	bl	kvmppc_bad_interrupt
 	b	9b
 	b	9b

+ 2 - 0
arch/powerpc/kvm/book3s_pr.c

@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define MSR_USER32 MSR_USER
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
 #define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
 #endif
 #endif
 
 
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.eaddr = eaddr;
 		pte.eaddr = eaddr;
 		pte.vpage = eaddr >> 12;
 		pte.vpage = eaddr >> 12;
 		pte.page_size = MMU_PAGE_64K;
 		pte.page_size = MMU_PAGE_64K;
+		pte.wimg = HPTE_R_M;
 	}
 	}
 
 
 	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
 	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {

+ 5 - 2
arch/powerpc/kvm/book3s_rmhandlers.S

@@ -46,6 +46,9 @@
 
 
 #define FUNC(name)		name
 #define FUNC(name)		name
 
 
+#define RFI_TO_KERNEL	RFI
+#define RFI_TO_GUEST	RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 .macro INTERRUPT_TRAMPOLINE intno
 
 
 .global kvmppc_trampoline_\intno
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
 	GET_SCRATCH0(r13)
 	GET_SCRATCH0(r13)
 
 
 	/* And get back into the code */
 	/* And get back into the code */
-	RFI
+	RFI_TO_KERNEL
 #endif
 #endif
 
 
 /*
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
 	ori	r5, r5, MSR_EE
 	ori	r5, r5, MSR_EE
 	mtsrr0	r7
 	mtsrr0	r7
 	mtsrr1	r6
 	mtsrr1	r6
-	RFI
+	RFI_TO_KERNEL
 
 
 #include "book3s_segment.S"
 #include "book3s_segment.S"

+ 2 - 2
arch/powerpc/kvm/book3s_segment.S

@@ -156,7 +156,7 @@ no_dcbz32_on:
 	PPC_LL	r9, SVCPU_R9(r3)
 	PPC_LL	r9, SVCPU_R9(r3)
 	PPC_LL	r3, (SVCPU_R3)(r3)
 	PPC_LL	r3, (SVCPU_R3)(r3)
 
 
-	RFI
+	RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 kvmppc_handler_trampoline_enter_end:
 
 
 
 
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
 	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
 	beqa	BOOK3S_INTERRUPT_DOORBELL
 	beqa	BOOK3S_INTERRUPT_DOORBELL
 
 
-	RFI
+	RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
 kvmppc_handler_trampoline_exit_end:

+ 41 - 0
arch/powerpc/lib/feature-fixups.c

@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 	}
 	}
 }
 }
 
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+	unsigned int instrs[3], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___rfi_flush_fixup),
+	end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+
+	if (types & L1D_FLUSH_FALLBACK)
+		/* b .+16 to fallback flush */
+		instrs[0] = 0x48000010;
+
+	i = 0;
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+		patch_instruction(dest + 1, instrs[1]);
+		patch_instruction(dest + 2, instrs[2]);
+	}
+
+	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
 {
 	long *start, *end;
 	long *start, *end;

+ 49 - 0
arch/powerpc/platforms/powernv/setup.c

@@ -37,13 +37,62 @@
 #include <asm/kexec.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/tm.h>
 #include <asm/tm.h>
+#include <asm/setup.h>
 
 
 #include "powernv.h"
 #include "powernv.h"
 
 
+static void pnv_setup_rfi_flush(void)
+{
+	struct device_node *np, *fw_features;
+	enum l1d_flush_type type;
+	int enable;
+
+	/* Default to fallback in case fw-features are not available */
+	type = L1D_FLUSH_FALLBACK;
+	enable = 1;
+
+	np = of_find_node_by_name(NULL, "ibm,opal");
+	fw_features = of_get_child_by_name(np, "fw-features");
+	of_node_put(np);
+
+	if (fw_features) {
+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+		if (np && of_property_read_bool(np, "enabled"))
+			type = L1D_FLUSH_MTTRIG;
+
+		of_node_put(np);
+
+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+		if (np && of_property_read_bool(np, "enabled"))
+			type = L1D_FLUSH_ORI;
+
+		of_node_put(np);
+
+		/* Enable unless firmware says NOT to */
+		enable = 2;
+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable--;
+
+		of_node_put(np);
+
+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+		if (np && of_property_read_bool(np, "disabled"))
+			enable--;
+
+		of_node_put(np);
+		of_node_put(fw_features);
+	}
+
+	setup_rfi_flush(type, enable > 0);
+}
+
 static void __init pnv_setup_arch(void)
 static void __init pnv_setup_arch(void)
 {
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
 
+	pnv_setup_rfi_flush();
+
 	/* Initialize SMP */
 	/* Initialize SMP */
 	pnv_smp_init();
 	pnv_smp_init();
 
 

+ 18 - 3
arch/powerpc/platforms/pseries/dlpar.c

@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
 
 static CLASS_ATTR_RW(dlpar);
 static CLASS_ATTR_RW(dlpar);
 
 
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
 {
 {
+	if (pseries_hp_wq)
+		return 0;
+
 	pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
 	pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
-					WQ_UNBOUND, 1);
+			WQ_UNBOUND, 1);
+
+	return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+	int rc;
+
+	rc = dlpar_workqueue_init();
+	if (rc)
+		return rc;
+
 	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
 }
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
 
 

+ 2 - 0
arch/powerpc/platforms/pseries/pseries.h

@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
 	return CMO_PageSize;
 	return CMO_PageSize;
 }
 }
 
 
+int dlpar_workqueue_init(void);
+
 #endif /* _PSERIES_PSERIES_H */
 #endif /* _PSERIES_PSERIES_H */

+ 2 - 1
arch/powerpc/platforms/pseries/ras.c

@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
 	/* Hotplug Events */
 	/* Hotplug Events */
 	np = of_find_node_by_path("/event-sources/hot-plug-events");
 	np = of_find_node_by_path("/event-sources/hot-plug-events");
 	if (np != NULL) {
 	if (np != NULL) {
-		request_event_sources_irqs(np, ras_hotplug_interrupt,
+		if (dlpar_workqueue_init() == 0)
+			request_event_sources_irqs(np, ras_hotplug_interrupt,
 					   "RAS_HOTPLUG");
 					   "RAS_HOTPLUG");
 		of_node_put(np);
 		of_node_put(np);
 	}
 	}

+ 35 - 0
arch/powerpc/platforms/pseries/setup.c

@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
 	of_pci_check_probe_only();
 	of_pci_check_probe_only();
 }
 }
 
 
+static void pseries_setup_rfi_flush(void)
+{
+	struct h_cpu_char_result result;
+	enum l1d_flush_type types;
+	bool enable;
+	long rc;
+
+	/* Enable by default */
+	enable = true;
+
+	rc = plpar_get_cpu_characteristics(&result);
+	if (rc == H_SUCCESS) {
+		types = L1D_FLUSH_NONE;
+
+		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+			types |= L1D_FLUSH_MTTRIG;
+		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+			types |= L1D_FLUSH_ORI;
+
+		/* Use fallback if nothing set in hcall */
+		if (types == L1D_FLUSH_NONE)
+			types = L1D_FLUSH_FALLBACK;
+
+		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+			enable = false;
+	} else {
+		/* Default to fallback if case hcall is not available */
+		types = L1D_FLUSH_FALLBACK;
+	}
+
+	setup_rfi_flush(types, enable);
+}
+
 static void __init pSeries_setup_arch(void)
 static void __init pSeries_setup_arch(void)
 {
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
 
 	fwnmi_init();
 	fwnmi_init();
 
 
+	pseries_setup_rfi_flush();
+
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
 	pci_add_flags(PCI_PROBE_ONLY);
 	pci_add_flags(PCI_PROBE_ONLY);
 
 

+ 14 - 1
arch/x86/Kconfig

@@ -55,7 +55,6 @@ config X86
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
-	# Causing hangs/crashes, see the commit that added this change for details.
 	select ARCH_HAS_REFCOUNT
 	select ARCH_HAS_REFCOUNT
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IOMAP
 	select GENERIC_IOMAP
@@ -430,6 +430,19 @@ config GOLDFISH
        def_bool y
        def_bool y
        depends on X86_GOLDFISH
        depends on X86_GOLDFISH
 
 
+config RETPOLINE
+	bool "Avoid speculative indirect branches in kernel"
+	default y
+	help
+	  Compile kernel with the retpoline compiler options to guard against
+	  kernel-to-user data leaks by avoiding speculative indirect
+	  branches. Requires a compiler with -mindirect-branch=thunk-extern
+	  support for full protection. The kernel may run slower.
+
+	  Without compiler support, at least indirect branches in assembler
+	  code are eliminated. Since this includes the syscall entry path,
+	  it is not entirely pointless.
+
 config INTEL_RDT
 config INTEL_RDT
 	bool "Intel Resource Director Technology support"
 	bool "Intel Resource Director Technology support"
 	default n
 	default n

+ 8 - 0
arch/x86/Makefile

@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+    ifneq ($(RETPOLINE_CFLAGS),)
+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+    endif
+endif
+
 archscripts: scripts_basic
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 

+ 3 - 2
arch/x86/crypto/aesni-intel_asm.S

@@ -32,6 +32,7 @@
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/inst.h>
 #include <asm/inst.h>
 #include <asm/frame.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 
 /*
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
 	pxor INC, STATE4
 	pxor INC, STATE4
 	movdqu IV, 0x30(OUTP)
 	movdqu IV, 0x30(OUTP)
 
 
-	call *%r11
+	CALL_NOSPEC %r11
 
 
 	movdqu 0x00(OUTP), INC
 	movdqu 0x00(OUTP), INC
 	pxor INC, STATE1
 	pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
 	_aesni_gf128mul_x_ble()
 	_aesni_gf128mul_x_ble()
 	movups IV, (IVP)
 	movups IV, (IVP)
 
 
-	call *%r11
+	CALL_NOSPEC %r11
 
 
 	movdqu 0x40(OUTP), INC
 	movdqu 0x40(OUTP), INC
 	pxor INC, STATE1
 	pxor INC, STATE1

+ 2 - 1
arch/x86/crypto/camellia-aesni-avx-asm_64.S

@@ -17,6 +17,7 @@
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/frame.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
 
-	call *%r9;
+	CALL_NOSPEC %r9;
 
 
 	addq $(16 * 16), %rsp;
 	addq $(16 * 16), %rsp;
 
 

+ 2 - 1
arch/x86/crypto/camellia-aesni-avx2-asm_64.S

@@ -12,6 +12,7 @@
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/frame.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
 
-	call *%r9;
+	CALL_NOSPEC %r9;
 
 
 	addq $(16 * 32), %rsp;
 	addq $(16 * 32), %rsp;
 
 

+ 2 - 1
arch/x86/crypto/crc32c-pcl-intel-asm_64.S

@@ -45,6 +45,7 @@
 
 
 #include <asm/inst.h>
 #include <asm/inst.h>
 #include <linux/linkage.h>
 #include <linux/linkage.h>
+#include <asm/nospec-branch.h>
 
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
 
@@ -172,7 +173,7 @@ continue_block:
 	movzxw  (bufp, %rax, 2), len
 	movzxw  (bufp, %rax, 2), len
 	lea	crc_array(%rip), bufp
 	lea	crc_array(%rip), bufp
 	lea     (bufp, len, 1), bufp
 	lea     (bufp, len, 1), bufp
-	jmp     *bufp
+	JMP_NOSPEC bufp
 
 
 	################################################################
 	################################################################
 	## 2a) PROCESS FULL BLOCKS:
 	## 2a) PROCESS FULL BLOCKS:

+ 19 - 17
arch/x86/entry/calling.h

@@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
  * halves:
  * halves:
  */
  */
-#define PTI_SWITCH_PGTABLES_MASK	(1<<PAGE_SHIFT)
-#define PTI_SWITCH_MASK		(PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+#define PTI_USER_PGTABLE_BIT		PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK		(1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT		X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK		(1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
 
 
 .macro SET_NOFLUSH_BIT	reg:req
 .macro SET_NOFLUSH_BIT	reg:req
 	bts	$X86_CR3_PCID_NOFLUSH_BIT, \reg
 	bts	$X86_CR3_PCID_NOFLUSH_BIT, \reg
@@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro ADJUST_KERNEL_CR3 reg:req
 .macro ADJUST_KERNEL_CR3 reg:req
 	ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
 	ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
 	/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
 	/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
-	andq    $(~PTI_SWITCH_MASK), \reg
+	andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
 .endm
 .endm
 
 
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
@@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
 	/* Flush needed, clear the bit */
 	/* Flush needed, clear the bit */
 	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
 	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
 	movq	\scratch_reg2, \scratch_reg
 	movq	\scratch_reg2, \scratch_reg
-	jmp	.Lwrcr3_\@
+	jmp	.Lwrcr3_pcid_\@
 
 
 .Lnoflush_\@:
 .Lnoflush_\@:
 	movq	\scratch_reg2, \scratch_reg
 	movq	\scratch_reg2, \scratch_reg
 	SET_NOFLUSH_BIT \scratch_reg
 	SET_NOFLUSH_BIT \scratch_reg
 
 
+.Lwrcr3_pcid_\@:
+	/* Flip the ASID to the user version */
+	orq	$(PTI_USER_PCID_MASK), \scratch_reg
+
 .Lwrcr3_\@:
 .Lwrcr3_\@:
-	/* Flip the PGD and ASID to the user version */
-	orq     $(PTI_SWITCH_MASK), \scratch_reg
+	/* Flip the PGD to the user version */
+	orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
 	mov	\scratch_reg, %cr3
 	mov	\scratch_reg, %cr3
 .Lend_\@:
 .Lend_\@:
 .endm
 .endm
@@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
 	movq	%cr3, \scratch_reg
 	movq	%cr3, \scratch_reg
 	movq	\scratch_reg, \save_reg
 	movq	\scratch_reg, \save_reg
 	/*
 	/*
-	 * Is the "switch mask" all zero?  That means that both of
-	 * these are zero:
-	 *
-	 *	1. The user/kernel PCID bit, and
-	 *	2. The user/kernel "bit" that points CR3 to the
-	 *	   bottom half of the 8k PGD
-	 *
-	 * That indicates a kernel CR3 value, not a user CR3.
+	 * Test the user pagetable bit. If set, then the user page tables
+	 * are active. If clear CR3 already has the kernel page table
+	 * active.
 	 */
 	 */
-	testq	$(PTI_SWITCH_MASK), \scratch_reg
-	jz	.Ldone_\@
+	bt	$PTI_USER_PGTABLE_BIT, \scratch_reg
+	jnc	.Ldone_\@
 
 
 	ADJUST_KERNEL_CR3 \scratch_reg
 	ADJUST_KERNEL_CR3 \scratch_reg
 	movq	\scratch_reg, %cr3
 	movq	\scratch_reg, %cr3
@@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
 	 * KERNEL pages can always resume with NOFLUSH as we do
 	 * KERNEL pages can always resume with NOFLUSH as we do
 	 * explicit flushes.
 	 * explicit flushes.
 	 */
 	 */
-	bt	$X86_CR3_PTI_SWITCH_BIT, \save_reg
+	bt	$PTI_USER_PGTABLE_BIT, \save_reg
 	jnc	.Lnoflush_\@
 	jnc	.Lnoflush_\@
 
 
 	/*
 	/*

+ 3 - 2
arch/x86/entry/entry_32.S

@@ -44,6 +44,7 @@
 #include <asm/asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 
 	.section .entry.text, "ax"
 	.section .entry.text, "ax"
 
 
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
 
 
 	/* kernel thread */
 	/* kernel thread */
 1:	movl	%edi, %eax
 1:	movl	%edi, %eax
-	call	*%ebx
+	CALL_NOSPEC %ebx
 	/*
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
 	 * calling do_execve().  Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
 	movl	%ecx, %es
 	movl	%ecx, %es
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
 	movl	%esp, %eax			# pt_regs pointer
-	call	*%edi
+	CALL_NOSPEC %edi
 	jmp	ret_from_exception
 	jmp	ret_from_exception
 END(common_exception)
 END(common_exception)
 
 

+ 9 - 3
arch/x86/entry/entry_64.S

@@ -37,6 +37,7 @@
 #include <asm/pgtable_types.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
 #include <asm/export.h>
 #include <asm/frame.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 #include <linux/err.h>
 #include <linux/err.h>
 
 
 #include "calling.h"
 #include "calling.h"
@@ -191,7 +192,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
 	 */
 	 */
 	pushq	%rdi
 	pushq	%rdi
 	movq	$entry_SYSCALL_64_stage2, %rdi
 	movq	$entry_SYSCALL_64_stage2, %rdi
-	jmp	*%rdi
+	JMP_NOSPEC %rdi
 END(entry_SYSCALL_64_trampoline)
 END(entry_SYSCALL_64_trampoline)
 
 
 	.popsection
 	.popsection
@@ -270,7 +271,12 @@ entry_SYSCALL_64_fastpath:
 	 * It might end up jumping to the slow path.  If it jumps, RAX
 	 * It might end up jumping to the slow path.  If it jumps, RAX
 	 * and all argument registers are clobbered.
 	 * and all argument registers are clobbered.
 	 */
 	 */
+#ifdef CONFIG_RETPOLINE
+	movq	sys_call_table(, %rax, 8), %rax
+	call	__x86_indirect_thunk_rax
+#else
 	call	*sys_call_table(, %rax, 8)
 	call	*sys_call_table(, %rax, 8)
+#endif
 .Lentry_SYSCALL_64_after_fastpath_call:
 .Lentry_SYSCALL_64_after_fastpath_call:
 
 
 	movq	%rax, RAX(%rsp)
 	movq	%rax, RAX(%rsp)
@@ -442,7 +448,7 @@ ENTRY(stub_ptregs_64)
 	jmp	entry_SYSCALL64_slow_path
 	jmp	entry_SYSCALL64_slow_path
 
 
 1:
 1:
-	jmp	*%rax				/* Called from C */
+	JMP_NOSPEC %rax				/* Called from C */
 END(stub_ptregs_64)
 END(stub_ptregs_64)
 
 
 .macro ptregs_stub func
 .macro ptregs_stub func
@@ -521,7 +527,7 @@ ENTRY(ret_from_fork)
 1:
 1:
 	/* kernel thread */
 	/* kernel thread */
 	movq	%r12, %rdi
 	movq	%r12, %rdi
-	call	*%rbx
+	CALL_NOSPEC %rbx
 	/*
 	/*
 	 * A kernel thread is allowed to return here after successfully
 	 * A kernel thread is allowed to return here after successfully
 	 * calling do_execve().  Exit to userspace to complete the execve()
 	 * calling do_execve().  Exit to userspace to complete the execve()

+ 18 - 0
arch/x86/events/intel/bts.c

@@ -582,6 +582,24 @@ static __init int bts_init(void)
 	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 		return -ENODEV;
 		return -ENODEV;
 
 
+	if (boot_cpu_has(X86_FEATURE_PTI)) {
+		/*
+		 * BTS hardware writes through a virtual memory map we must
+		 * either use the kernel physical map, or the user mapping of
+		 * the AUX buffer.
+		 *
+		 * However, since this driver supports per-CPU and per-task inherit
+		 * we cannot use the user mapping since it will not be availble
+		 * if we're not running the owning process.
+		 *
+		 * With PTI we can't use the kernal map either, because its not
+		 * there when we run userspace.
+		 *
+		 * For now, disable this driver when using PTI.
+		 */
+		return -ENODEV;
+	}
+
 	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
 	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
 				  PERF_PMU_CAP_EXCLUSIVE;
 				  PERF_PMU_CAP_EXCLUSIVE;
 	bts_pmu.task_ctx_nr	= perf_sw_context;
 	bts_pmu.task_ctx_nr	= perf_sw_context;

+ 25 - 0
arch/x86/include/asm/asm-prototypes.h

@@ -11,7 +11,32 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
 #include <asm/preempt.h>
+#include <asm/asm.h>
 
 
 #ifndef CONFIG_X86_CMPXCHG64
 #ifndef CONFIG_X86_CMPXCHG64
 extern void cmpxchg8b_emu(void);
 extern void cmpxchg8b_emu(void);
 #endif
 #endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */

+ 4 - 0
arch/x86/include/asm/cpufeatures.h

@@ -203,6 +203,8 @@
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE		( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -342,5 +344,7 @@
 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 
 #endif /* _ASM_X86_CPUFEATURES_H */
 #endif /* _ASM_X86_CPUFEATURES_H */

+ 10 - 8
arch/x86/include/asm/mshyperv.h

@@ -7,6 +7,7 @@
 #include <linux/nmi.h>
 #include <linux/nmi.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/hyperv.h>
 #include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
 
 
 /*
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 		return U64_MAX;
 		return U64_MAX;
 
 
 	__asm__ __volatile__("mov %4, %%r8\n"
 	__asm__ __volatile__("mov %4, %%r8\n"
-			     "call *%5"
+			     CALL_NOSPEC
 			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
 			       "+c" (control), "+d" (input_address)
-			     :  "r" (output_address), "m" (hv_hypercall_pg)
+			     :  "r" (output_address),
+				THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
 #else
 #else
 	u32 input_address_hi = upper_32_bits(input_address);
 	u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	if (!hv_hypercall_pg)
 	if (!hv_hypercall_pg)
 		return U64_MAX;
 		return U64_MAX;
 
 
-	__asm__ __volatile__("call *%7"
+	__asm__ __volatile__(CALL_NOSPEC
 			     : "=A" (hv_status),
 			     : "=A" (hv_status),
 			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
 			       "D"(output_address_hi), "S"(output_address_lo),
-			       "m" (hv_hypercall_pg)
+			       THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory");
 			     : "cc", "memory");
 #endif /* !x86_64 */
 #endif /* !x86_64 */
 	return hv_status;
 	return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	{
 	{
-		__asm__ __volatile__("call *%4"
+		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
 				       "+c" (control), "+d" (input1)
-				     : "m" (hv_hypercall_pg)
+				     : THUNK_TARGET(hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
 				     : "cc", "r8", "r9", "r10", "r11");
 	}
 	}
 #else
 #else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		u32 input1_hi = upper_32_bits(input1);
 		u32 input1_hi = upper_32_bits(input1);
 		u32 input1_lo = lower_32_bits(input1);
 		u32 input1_lo = lower_32_bits(input1);
 
 
-		__asm__ __volatile__ ("call *%5"
+		__asm__ __volatile__ (CALL_NOSPEC
 				      : "=A"(hv_status),
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
 					"+c"(input1_lo),
 					ASM_CALL_CONSTRAINT
 					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 				      :	"A" (control),
 					"b" (input1_hi),
 					"b" (input1_hi),
-					"m" (hv_hypercall_pg)
+					THUNK_TARGET(hv_hypercall_pg)
 				      : "cc", "edi", "esi");
 				      : "cc", "edi", "esi");
 	}
 	}
 #endif
 #endif

+ 3 - 0
arch/x86/include/asm/msr-index.h

@@ -355,6 +355,9 @@
 #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
 #define MSR_FAM10H_NODE_ID		0xc001100c
+#define MSR_F10H_DECFG			0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 
 /* K8 MSRs */
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM1			0xc001001a

+ 214 - 0
arch/x86/include/asm/nospec-branch.h

@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS		16	/* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
+	mov	$(nr/2), reg;			\
+771:						\
+	call	772f;				\
+773:	/* speculation trap */			\
+	pause;					\
+	jmp	773b;				\
+772:						\
+	call	774f;				\
+775:	/* speculation trap */			\
+	pause;					\
+	jmp	775b;				\
+774:						\
+	dec	reg;				\
+	jnz	771b;				\
+	add	$(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative.  It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+	.Lannotate_\@:
+	.pushsection .discard.nospec
+	.long .Lannotate_\@ - .
+	.popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+	call	.Ldo_rop_\@
+.Lspec_trap_\@:
+	pause
+	jmp	.Lspec_trap_\@
+.Ldo_rop_\@:
+	mov	\reg, (%_ASM_SP)
+	ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+	jmp	.Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+	RETPOLINE_JMP \reg
+.Ldo_call_\@:
+	call	.Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE_2 __stringify(jmp *\reg),				\
+		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
+		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+	jmp	*\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE_2 __stringify(call *\reg),				\
+		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+	call	*\reg
+#endif
+.endm
+
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+	ANNOTATE_NOSPEC_ALTERNATIVE
+	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
+		\ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE				\
+	"999:\n\t"						\
+	".pushsection .discard.nospec\n\t"			\
+	".long 999b - .\n\t"					\
+	".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC						\
+	ANNOTATE_NOSPEC_ALTERNATIVE				\
+	ALTERNATIVE(						\
+	"call *%[thunk_target]\n",				\
+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
+	X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\
+	"       jmp    904f;\n"					\
+	"       .align 16\n"					\
+	"901:	call   903f;\n"					\
+	"902:	pause;\n"					\
+	"       jmp    902b;\n"					\
+	"       .align 16\n"					\
+	"903:	addl   $4, %%esp;\n"				\
+	"       pushl  %[thunk_target];\n"			\
+	"       ret;\n"						\
+	"       .align 16\n"					\
+	"904:	call   901b;\n",				\
+	X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+	SPECTRE_V2_NONE,
+	SPECTRE_V2_RETPOLINE_MINIMAL,
+	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+	SPECTRE_V2_RETPOLINE_GENERIC,
+	SPECTRE_V2_RETPOLINE_AMD,
+	SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+	unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+		      ALTERNATIVE("jmp 910f",
+				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+				  X86_FEATURE_RETPOLINE)
+		      "910:"
+		      : "=&r" (loops), ASM_CALL_CONSTRAINT
+		      : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */

+ 1 - 0
arch/x86/include/asm/pci_x86.h

@@ -38,6 +38,7 @@ do {						\
 #define PCI_NOASSIGN_ROMS	0x80000
 #define PCI_NOASSIGN_ROMS	0x80000
 #define PCI_ROOT_NO_CRS		0x100000
 #define PCI_ROOT_NO_CRS		0x100000
 #define PCI_NOASSIGN_BARS	0x200000
 #define PCI_NOASSIGN_BARS	0x200000
+#define PCI_BIG_ROOT_WINDOW	0x400000
 
 
 extern unsigned int pci_probe;
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
 extern unsigned long pirq_table_addr;

+ 1 - 1
arch/x86/include/asm/processor-flags.h

@@ -40,7 +40,7 @@
 #define CR3_NOFLUSH	BIT_ULL(63)
 #define CR3_NOFLUSH	BIT_ULL(63)
 
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define X86_CR3_PTI_SWITCH_BIT	11
+# define X86_CR3_PTI_PCID_USER_BIT	11
 #endif
 #endif
 
 
 #else
 #else

+ 3 - 3
arch/x86/include/asm/tlbflush.h

@@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
 	 * Make sure that the dynamic ASID space does not confict with the
 	 * Make sure that the dynamic ASID space does not confict with the
 	 * bit we are using to switch between user and kernel ASIDs.
 	 * bit we are using to switch between user and kernel ASIDs.
 	 */
 	 */
-	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
 
 
 	/*
 	/*
 	 * The ASID being passed in here should have respected the
 	 * The ASID being passed in here should have respected the
 	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
 	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
 	 */
 	 */
-	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
 #endif
 #endif
 	/*
 	/*
 	 * The dynamically-assigned ASIDs that get passed in are small
 	 * The dynamically-assigned ASIDs that get passed in are small
@@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
 {
 {
 	u16 ret = kern_pcid(asid);
 	u16 ret = kern_pcid(asid);
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-	ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+	ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
 #endif
 #endif
 	return ret;
 	return ret;
 }
 }

+ 3 - 2
arch/x86/include/asm/xen/hypercall.h

@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/smap.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 
 
 #include <xen/interface/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
 
 	stac();
 	stac();
-	asm volatile("call *%[call]"
+	asm volatile(CALL_NOSPEC
 		     : __HYPERCALL_5PARAM
 		     : __HYPERCALL_5PARAM
-		     : [call] "a" (&hypercall_page[call])
+		     : [thunk_target] "a" (&hypercall_page[call])
 		     : __HYPERCALL_CLOBBER5);
 		     : __HYPERCALL_CLOBBER5);
 	clac();
 	clac();
 
 

+ 5 - 2
arch/x86/kernel/alternative.c

@@ -344,9 +344,12 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
+	int i;
 
 
-	if (instr[0] != 0x90)
-		return;
+	for (i = 0; i < a->padlen; i++) {
+		if (instr[i] != 0x90)
+			return;
+	}
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);

+ 26 - 2
arch/x86/kernel/cpu/amd.c

@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_K8);
 		set_cpu_cap(c, X86_FEATURE_K8);
 
 
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
-		/* MFENCE stops RDTSC speculation */
-		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		unsigned long long val;
+		int ret;
+
+		/*
+		 * A serializing LFENCE has less overhead than MFENCE, so
+		 * use it for execution serialization.  On families which
+		 * don't have that MSR, LFENCE is already serializing.
+		 * msr_set_bit() uses the safe accessors, too, even if the MSR
+		 * is not present.
+		 */
+		msr_set_bit(MSR_F10H_DECFG,
+			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+		/*
+		 * Verify that the MSR write was successful (could be running
+		 * under a hypervisor) and only then assume that LFENCE is
+		 * serializing.
+		 */
+		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+			/* A serializing LFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+		} else {
+			/* MFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		}
 	}
 	}
 
 
 	/*
 	/*

+ 185 - 0
arch/x86/kernel/cpu/bugs.c

@@ -10,6 +10,10 @@
  */
  */
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/utsname.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
 #include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/set_memory.h>
 
 
+static void __init spectre_v2_select_mitigation(void);
+
 void __init check_bugs(void)
 void __init check_bugs(void)
 {
 {
 	identify_boot_cpu();
 	identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
 		print_cpu_info(&boot_cpu_data);
 		print_cpu_info(&boot_cpu_data);
 	}
 	}
 
 
+	/* Select the proper spectre mitigation before patching alternatives */
+	spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 	/*
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
 		set_memory_4k((unsigned long)__va(0), 1);
 		set_memory_4k((unsigned long)__va(0), 1);
 #endif
 #endif
 }
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+	SPECTRE_V2_CMD_NONE,
+	SPECTRE_V2_CMD_AUTO,
+	SPECTRE_V2_CMD_FORCE,
+	SPECTRE_V2_CMD_RETPOLINE,
+	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+	SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+	[SPECTRE_V2_NONE]			= "Vulnerable",
+	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
+	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+	return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+	int len = strlen(opt);
+
+	return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+	char arg[20];
+	int ret;
+
+	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+				  sizeof(arg));
+	if (ret > 0)  {
+		if (match_option(arg, ret, "off")) {
+			goto disable;
+		} else if (match_option(arg, ret, "on")) {
+			spec2_print_if_secure("force enabled on command line.");
+			return SPECTRE_V2_CMD_FORCE;
+		} else if (match_option(arg, ret, "retpoline")) {
+			spec2_print_if_insecure("retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE;
+		} else if (match_option(arg, ret, "retpoline,amd")) {
+			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+				return SPECTRE_V2_CMD_AUTO;
+			}
+			spec2_print_if_insecure("AMD retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_AMD;
+		} else if (match_option(arg, ret, "retpoline,generic")) {
+			spec2_print_if_insecure("generic retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+		} else if (match_option(arg, ret, "auto")) {
+			return SPECTRE_V2_CMD_AUTO;
+		}
+	}
+
+	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+		return SPECTRE_V2_CMD_AUTO;
+disable:
+	spec2_print_if_insecure("disabled on command line.");
+	return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+	/*
+	 * If the CPU is not affected and the command line mode is NONE or AUTO
+	 * then nothing to do.
+	 */
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+		return;
+
+	switch (cmd) {
+	case SPECTRE_V2_CMD_NONE:
+		return;
+
+	case SPECTRE_V2_CMD_FORCE:
+		/* FALLTRHU */
+	case SPECTRE_V2_CMD_AUTO:
+		goto retpoline_auto;
+
+	case SPECTRE_V2_CMD_RETPOLINE_AMD:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_amd;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_generic;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_auto;
+		break;
+	}
+	pr_err("kernel not compiled with retpoline; no mitigation available!");
+	return;
+
+retpoline_auto:
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+	retpoline_amd:
+		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+			goto retpoline_generic;
+		}
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	} else {
+	retpoline_generic:
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+					 SPECTRE_V2_RETPOLINE_MINIMAL;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	}
+
+	spectre_v2_enabled = mode;
+	pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		return sprintf(buf, "Not affected\n");
+	if (boot_cpu_has(X86_FEATURE_PTI))
+		return sprintf(buf, "Mitigation: PTI\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+		return sprintf(buf, "Not affected\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif

+ 3 - 0
arch/x86/kernel/cpu/common.c

@@ -926,6 +926,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->x86_vendor != X86_VENDOR_AMD)
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
 		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
 
 
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
 	fpu__init_system(c);
 	fpu__init_system(c);
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32

+ 11 - 2
arch/x86/kernel/cpu/microcode/intel.c

@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 
-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-		pr_err_once("late loading on model 79 is disabled.\n");
+	/*
+	 * Late loading on model 79 with microcode revision less than 0x0b000021
+	 * may result in a system hang. This behavior is documented in item
+	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+	 */
+	if (c->x86 == 6 &&
+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+	    c->x86_mask == 0x01 &&
+	    c->microcode < 0x0b000021) {
+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 		return true;
 		return true;
 	}
 	}
 
 

+ 4 - 2
arch/x86/kernel/ftrace_32.S

@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/segment.h>
 #include <asm/export.h>
 #include <asm/export.h>
 #include <asm/ftrace.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
 
 
 #ifdef CC_USING_FENTRY
 #ifdef CC_USING_FENTRY
 # define function_hook	__fentry__
 # define function_hook	__fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
 	movl	0x4(%ebp), %edx
 	movl	0x4(%ebp), %edx
 	subl	$MCOUNT_INSN_SIZE, %eax
 	subl	$MCOUNT_INSN_SIZE, %eax
 
 
-	call	*ftrace_trace_function
+	movl	ftrace_trace_function, %ecx
+	CALL_NOSPEC %ecx
 
 
 	popl	%edx
 	popl	%edx
 	popl	%ecx
 	popl	%ecx
@@ -241,5 +243,5 @@ return_to_handler:
 	movl	%eax, %ecx
 	movl	%eax, %ecx
 	popl	%edx
 	popl	%edx
 	popl	%eax
 	popl	%eax
-	jmp	*%ecx
+	JMP_NOSPEC %ecx
 #endif
 #endif

+ 4 - 4
arch/x86/kernel/ftrace_64.S

@@ -7,7 +7,7 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
 #include <asm/export.h>
-
+#include <asm/nospec-branch.h>
 
 
 	.code64
 	.code64
 	.section .entry.text, "ax"
 	.section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
 	 * ip and parent ip are used and the list function is called when
 	 * ip and parent ip are used and the list function is called when
 	 * function tracing is enabled.
 	 * function tracing is enabled.
 	 */
 	 */
-	call   *ftrace_trace_function
-
+	movq ftrace_trace_function, %r8
+	CALL_NOSPEC %r8
 	restore_mcount_regs
 	restore_mcount_regs
 
 
 	jmp fgraph_trace
 	jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	movq (%rsp), %rax
 	addq $24, %rsp
 	addq $24, %rsp
-	jmp *%rdi
+	JMP_NOSPEC %rdi
 #endif
 #endif

+ 5 - 4
arch/x86/kernel/irq_32.c

@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 
 
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/nospec-branch.h>
 
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
 
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 static void call_on_stack(void *func, void *stack)
 {
 {
 	asm volatile("xchgl	%%ebx,%%esp	\n"
 	asm volatile("xchgl	%%ebx,%%esp	\n"
-		     "call	*%%edi		\n"
+		     CALL_NOSPEC
 		     "movl	%%ebx,%%esp	\n"
 		     "movl	%%ebx,%%esp	\n"
 		     : "=b" (stack)
 		     : "=b" (stack)
 		     : "0" (stack),
 		     : "0" (stack),
-		       "D"(func)
+		       [thunk_target] "D"(func)
 		     : "memory", "cc", "edx", "ecx", "eax");
 		     : "memory", "cc", "edx", "ecx", "eax");
 }
 }
 
 
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 		call_on_stack(print_stack_overflow, isp);
 		call_on_stack(print_stack_overflow, isp);
 
 
 	asm volatile("xchgl	%%ebx,%%esp	\n"
 	asm volatile("xchgl	%%ebx,%%esp	\n"
-		     "call	*%%edi		\n"
+		     CALL_NOSPEC
 		     "movl	%%ebx,%%esp	\n"
 		     "movl	%%ebx,%%esp	\n"
 		     : "=a" (arg1), "=b" (isp)
 		     : "=a" (arg1), "=b" (isp)
 		     :  "0" (desc),   "1" (isp),
 		     :  "0" (desc),   "1" (isp),
-			"D" (desc->handle_irq)
+			[thunk_target] "D" (desc->handle_irq)
 		     : "memory", "cc", "ecx");
 		     : "memory", "cc", "ecx");
 	return 1;
 	return 1;
 }
 }

+ 11 - 0
arch/x86/kernel/tboot.c

@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
 		return -1;
 		return -1;
 	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
 	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
 	pte_unmap(pte);
 	pte_unmap(pte);
+
+	/*
+	 * PTI poisons low addresses in the kernel page tables in the
+	 * name of making them unusable for userspace.  To execute
+	 * code at such a low address, the poison must be cleared.
+	 *
+	 * Note: 'pgd' actually gets set in p4d_alloc() _or_
+	 * pud_alloc() depending on 4/5-level paging.
+	 */
+	pgd->pgd &= ~_PAGE_NX;
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 12 - 7
arch/x86/kvm/mmu.c

@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
 {
 	if (unlikely(!lapic_in_kernel(vcpu) ||
 	if (unlikely(!lapic_in_kernel(vcpu) ||
-		     kvm_event_needs_reinjection(vcpu)))
+		     kvm_event_needs_reinjection(vcpu) ||
+		     vcpu->arch.exception.pending))
 		return false;
 		return false;
 
 
 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
 
 
 int kvm_mmu_module_init(void)
 int kvm_mmu_module_init(void)
 {
 {
+	int ret = -ENOMEM;
+
 	kvm_mmu_clear_all_pte_masks();
 	kvm_mmu_clear_all_pte_masks();
 
 
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
 					    sizeof(struct pte_list_desc),
 					    sizeof(struct pte_list_desc),
 					    0, SLAB_ACCOUNT, NULL);
 					    0, SLAB_ACCOUNT, NULL);
 	if (!pte_list_desc_cache)
 	if (!pte_list_desc_cache)
-		goto nomem;
+		goto out;
 
 
 	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
 	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
 						  sizeof(struct kvm_mmu_page),
 						  sizeof(struct kvm_mmu_page),
 						  0, SLAB_ACCOUNT, NULL);
 						  0, SLAB_ACCOUNT, NULL);
 	if (!mmu_page_header_cache)
 	if (!mmu_page_header_cache)
-		goto nomem;
+		goto out;
 
 
 	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
 	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
-		goto nomem;
+		goto out;
 
 
-	register_shrinker(&mmu_shrinker);
+	ret = register_shrinker(&mmu_shrinker);
+	if (ret)
+		goto out;
 
 
 	return 0;
 	return 0;
 
 
-nomem:
+out:
 	mmu_destroy_caches();
 	mmu_destroy_caches();
-	return -ENOMEM;
+	return ret;
 }
 }
 
 
 /*
 /*

+ 5 - 8
arch/x86/kvm/svm.c

@@ -45,6 +45,7 @@
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
 #include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
 
 
 #include <asm/virtext.h>
 #include <asm/virtext.h>
 #include "trace.h"
 #include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 {
 {
 	struct vmcb_control_area *c, *h;
 	struct vmcb_control_area *c, *h;
 	struct nested_state *g;
 	struct nested_state *g;
-	u32 h_intercept_exceptions;
 
 
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 
 
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 	h = &svm->nested.hsave->control;
 	h = &svm->nested.hsave->control;
 	g = &svm->nested;
 	g = &svm->nested;
 
 
-	/* No need to intercept #UD if L1 doesn't intercept it */
-	h_intercept_exceptions =
-		h->intercept_exceptions & ~(1U << UD_VECTOR);
-
 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
-	c->intercept_exceptions =
-		h_intercept_exceptions | g->intercept_exceptions;
+	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
 	c->intercept = h->intercept | g->intercept;
 	c->intercept = h->intercept | g->intercept;
 }
 }
 
 
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
 {
 {
 	int er;
 	int er;
 
 
-	WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
 	if (er == EMULATE_USER_EXIT)
 	if (er == EMULATE_USER_EXIT)
 		return 0;
 		return 0;
@@ -5034,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 #endif
 		);
 		);
 
 
+	/* Eliminate branch target predictions from guest mode */
+	vmexit_fill_RSB();
+
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
 #else

+ 15 - 6
arch/x86/kvm/vmx.c

@@ -50,6 +50,7 @@
 #include <asm/apic.h>
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
 
 
 #include "trace.h"
 #include "trace.h"
 #include "pmu.h"
 #include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
 {
 {
 	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
 	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
 
 
-	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
-	    vmcs_field_to_offset_table[field] == 0)
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+		return -ENOENT;
+
+	/*
+	 * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+	 * generic mechanism.
+	 */
+	asm("lfence");
+
+	if (vmcs_field_to_offset_table[field] == 0)
 		return -ENOENT;
 		return -ENOENT;
 
 
 	return vmcs_field_to_offset_table[field];
 	return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 {
 	u32 eb;
 	u32 eb;
 
 
-	eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
 	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
 	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
 	if ((vcpu->guest_debug &
 	if ((vcpu->guest_debug &
 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 */
 	 */
 	if (is_guest_mode(vcpu))
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
-	else
-		eb |= 1u << UD_VECTOR;
 
 
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 }
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		return 1;  /* already handled by vmx_vcpu_run() */
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 
 	if (is_invalid_opcode(intr_info)) {
 	if (is_invalid_opcode(intr_info)) {
-		WARN_ON_ONCE(is_guest_mode(vcpu));
 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
 		if (er == EMULATE_USER_EXIT)
 		if (er == EMULATE_USER_EXIT)
 			return 0;
 			return 0;
@@ -9485,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 #endif
 	      );
 	      );
 
 
+	/* Eliminate branch target predictions from guest mode */
+	vmexit_fill_RSB();
+
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (debugctlmsr)
 	if (debugctlmsr)
 		update_debugctlmsr(debugctlmsr);
 		update_debugctlmsr(debugctlmsr);

+ 1 - 0
arch/x86/lib/Makefile

@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
 
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
 

+ 4 - 3
arch/x86/lib/checksum_32.S

@@ -29,7 +29,8 @@
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/asm.h>
 #include <asm/export.h>
 #include <asm/export.h>
-				
+#include <asm/nospec-branch.h>
+
 /*
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
  */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
 	negl %ebx
 	negl %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	lea 45f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi
 	testl %esi, %esi
-	jmp *%ebx
+	JMP_NOSPEC %ebx
 
 
 	# Handle 2-byte-aligned regions
 	# Handle 2-byte-aligned regions
 20:	addw (%esi), %ax
 20:	addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
 	andl $-32,%edx
 	andl $-32,%edx
 	lea 3f(%ebx,%ebx), %ebx
 	lea 3f(%ebx,%ebx), %ebx
 	testl %esi, %esi 
 	testl %esi, %esi 
-	jmp *%ebx
+	JMP_NOSPEC %ebx
 1:	addl $64,%esi
 1:	addl $64,%esi
 	addl $64,%edi 
 	addl $64,%edi 
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)

+ 48 - 0
arch/x86/lib/retpoline.S

@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+	.section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+	CFI_STARTPROC
+	JMP_NOSPEC %\reg
+	CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif

+ 6 - 26
arch/x86/mm/pti.c

@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
  *
  *
  * Returns a pointer to a P4D on success, or NULL on failure.
  * Returns a pointer to a P4D on success, or NULL on failure.
  */
  */
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 {
 {
 	pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
 	pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 		if (!new_p4d_page)
 		if (!new_p4d_page)
 			return NULL;
 			return NULL;
 
 
-		if (pgd_none(*pgd)) {
-			set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
-			new_p4d_page = 0;
-		}
-		if (new_p4d_page)
-			free_page(new_p4d_page);
+		set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
 	}
 	}
 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
 
 
@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
  *
  *
  * Returns a pointer to a PMD on success, or NULL on failure.
  * Returns a pointer to a PMD on success, or NULL on failure.
  */
  */
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 {
 {
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
 	p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 		if (!new_pud_page)
 		if (!new_pud_page)
 			return NULL;
 			return NULL;
 
 
-		if (p4d_none(*p4d)) {
-			set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
-			new_pud_page = 0;
-		}
-		if (new_pud_page)
-			free_page(new_pud_page);
+		set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
 	}
 	}
 
 
 	pud = pud_offset(p4d, address);
 	pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 		if (!new_pmd_page)
 		if (!new_pmd_page)
 			return NULL;
 			return NULL;
 
 
-		if (pud_none(*pud)) {
-			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
-			new_pmd_page = 0;
-		}
-		if (new_pmd_page)
-			free_page(new_pmd_page);
+		set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
 	}
 	}
 
 
 	return pmd_offset(pud, address);
 	return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
 		if (!new_pte_page)
 		if (!new_pte_page)
 			return NULL;
 			return NULL;
 
 
-		if (pmd_none(*pmd)) {
-			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
-			new_pte_page = 0;
-		}
-		if (new_pte_page)
-			free_page(new_pte_page);
+		set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
 	}
 	}
 
 
 	pte = pte_offset_kernel(pmd, address);
 	pte = pte_offset_kernel(pmd, address);

+ 5 - 0
arch/x86/pci/common.c

@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
 	} else if (!strcmp(str, "nocrs")) {
 	} else if (!strcmp(str, "nocrs")) {
 		pci_probe |= PCI_ROOT_NO_CRS;
 		pci_probe |= PCI_ROOT_NO_CRS;
 		return NULL;
 		return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	} else if (!strcmp(str, "big_root_window")) {
+		pci_probe |= PCI_BIG_ROOT_WINDOW;
+		return NULL;
+#endif
 	} else if (!strcmp(str, "earlydump")) {
 	} else if (!strcmp(str, "earlydump")) {
 		pci_early_dump_regs = 1;
 		pci_early_dump_regs = 1;
 		return NULL;
 		return NULL;

+ 18 - 11
arch/x86/pci/fixup.c

@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
  */
  */
 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 {
 {
-	unsigned i;
 	u32 base, limit, high;
 	u32 base, limit, high;
-	struct resource *res, *conflict;
 	struct pci_dev *other;
 	struct pci_dev *other;
+	struct resource *res;
+	unsigned i;
+	int r;
+
+	if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+		return;
 
 
 	/* Check that we are the only device of that type */
 	/* Check that we are the only device of that type */
 	other = pci_get_device(dev->vendor, dev->device, NULL);
 	other = pci_get_device(dev->vendor, dev->device, NULL);
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 	if (!res)
 	if (!res)
 		return;
 		return;
 
 
+	/*
+	 * Allocate a 256GB window directly below the 0xfd00000000 hardware
+	 * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+	 */
 	res->name = "PCI Bus 0000:00";
 	res->name = "PCI Bus 0000:00";
 	res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
 	res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
 		IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
 		IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
-	res->start = 0x100000000ull;
+	res->start = 0xbd00000000ull;
 	res->end = 0xfd00000000ull - 1;
 	res->end = 0xfd00000000ull - 1;
 
 
-	/* Just grab the free area behind system memory for this */
-	while ((conflict = request_resource_conflict(&iomem_resource, res))) {
-		if (conflict->end >= res->end) {
-			kfree(res);
-			return;
-		}
-		res->start = conflict->end + 1;
+	r = request_resource(&iomem_resource, res);
+	if (r) {
+		kfree(res);
+		return;
 	}
 	}
 
 
-	dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+	dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+		 res);
+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 
 
 	base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
 	base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
 		AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
 		AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;

+ 2 - 0
arch/x86/platform/efi/efi_64.c

@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
 				pud[j] = *pud_offset(p4d_k, vaddr);
 				pud[j] = *pud_offset(p4d_k, vaddr);
 			}
 			}
 		}
 		}
+		pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
 	}
 	}
+
 out:
 out:
 	__flush_tlb_all();
 	__flush_tlb_all();
 
 

+ 1 - 1
arch/x86/platform/intel-mid/device_libs/platform_bt.c

@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 	return 0;
 	return 0;
 }
 }
 
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
 	.setup	= tng_bt_sfi_setup,
 	.setup	= tng_bt_sfi_setup,
 };
 };
 
 

+ 3 - 5
arch/x86/xen/mmu_pv.c

@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
 {
 	struct {
 	struct {
 		struct mmuext_op op;
 		struct mmuext_op op;
-#ifdef CONFIG_SMP
-		DECLARE_BITMAP(mask, num_processors);
-#else
 		DECLARE_BITMAP(mask, NR_CPUS);
 		DECLARE_BITMAP(mask, NR_CPUS);
-#endif
 	} *args;
 	} *args;
 	struct multicall_space mcs;
 	struct multicall_space mcs;
+	const size_t mc_entry_size = sizeof(args->op) +
+		sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
 
 
 	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 
 
 	if (cpumask_empty(cpus))
 	if (cpumask_empty(cpus))
 		return;		/* nothing to do */
 		return;		/* nothing to do */
 
 
-	mcs = xen_mc_entry(sizeof(*args));
+	mcs = xen_mc_entry(mc_entry_size);
 	args = mcs.args;
 	args = mcs.args;
 	args->op.arg2.vcpumask = to_cpumask(args->mask);
 	args->op.arg2.vcpumask = to_cpumask(args->mask);
 
 

+ 1 - 1
arch/x86/xen/xen-ops.h

@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void xen_setup_cpu_clockevents(void);
 void xen_save_time_memory_area(void);
 void xen_save_time_memory_area(void);
 void xen_restore_time_memory_area(void);
 void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
 
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);

+ 12 - 0
crypto/algapi.c

@@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 
 
 			spawn->alg = NULL;
 			spawn->alg = NULL;
 			spawns = &inst->alg.cra_users;
 			spawns = &inst->alg.cra_users;
+
+			/*
+			 * We may encounter an unregistered instance here, since
+			 * an instance's spawns are set up prior to the instance
+			 * being registered.  An unregistered instance will have
+			 * NULL ->cra_users.next, since ->cra_users isn't
+			 * properly initialized until registration.  But an
+			 * unregistered instance cannot have any users, so treat
+			 * it the same as ->cra_users being empty.
+			 */
+			if (spawns->next == NULL)
+				break;
 		}
 		}
 	} while ((spawns = crypto_more_spawns(alg, &stack, &top,
 	} while ((spawns = crypto_more_spawns(alg, &stack, &top,
 					      &secondary_spawns)));
 					      &secondary_spawns)));

+ 3 - 0
drivers/base/Kconfig

@@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
 config GENERIC_CPU_AUTOPROBE
 config GENERIC_CPU_AUTOPROBE
 	bool
 	bool
 
 
+config GENERIC_CPU_VULNERABILITIES
+	bool
+
 config SOC_BUS
 config SOC_BUS
 	bool
 	bool
 	select GLOB
 	select GLOB

+ 48 - 0
drivers/base/cpu.c

@@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void)
 #endif
 #endif
 }
 }
 
 
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+	&dev_attr_meltdown.attr,
+	&dev_attr_spectre_v1.attr,
+	&dev_attr_spectre_v2.attr,
+	NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+	.name  = "vulnerabilities",
+	.attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+	if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+			       &cpu_root_vulnerabilities_group))
+		pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
 void __init cpu_dev_init(void)
 void __init cpu_dev_init(void)
 {
 {
 	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
 	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
 		panic("Failed to register CPU subsystem");
 		panic("Failed to register CPU subsystem");
 
 
 	cpu_dev_register_generic();
 	cpu_dev_register_generic();
+	cpu_register_vulnerabilities();
 }
 }

+ 2 - 2
drivers/gpu/drm/i915/gvt/cmd_parser.c

@@ -2777,12 +2777,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 }
 }
 
 
 static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
 static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
-		unsigned int opcode, int rings)
+		unsigned int opcode, unsigned long rings)
 {
 {
 	struct cmd_info *info = NULL;
 	struct cmd_info *info = NULL;
 	unsigned int ring;
 	unsigned int ring;
 
 
-	for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
+	for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
 		info = find_cmd_entry(gvt, opcode, ring);
 		info = find_cmd_entry(gvt, opcode, ring);
 		if (info)
 		if (info)
 			break;
 			break;

+ 4 - 1
drivers/gpu/drm/i915/gvt/gtt.c

@@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
 			return ret;
 			return ret;
 	} else {
 	} else {
 		if (!test_bit(index, spt->post_shadow_bitmap)) {
 		if (!test_bit(index, spt->post_shadow_bitmap)) {
+			int type = spt->shadow_page.type;
+
 			ppgtt_get_shadow_entry(spt, &se, index);
 			ppgtt_get_shadow_entry(spt, &se, index);
 			ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
 			ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
 			if (ret)
 			if (ret)
 				return ret;
 				return ret;
+			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+			ppgtt_set_shadow_entry(spt, &se, index);
 		}
 		}
-
 		ppgtt_set_post_shadow(spt, index);
 		ppgtt_set_post_shadow(spt, index);
 	}
 	}
 
 

+ 1 - 1
drivers/gpu/drm/i915/i915_gem.c

@@ -467,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio)
 	struct drm_i915_gem_request *rq;
 	struct drm_i915_gem_request *rq;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *engine;
 
 
-	if (!dma_fence_is_i915(fence))
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 		return;
 		return;
 
 
 	rq = to_request(fence);
 	rq = to_request(fence);

+ 2 - 0
drivers/gpu/drm/i915/i915_reg.h

@@ -7027,6 +7027,8 @@ enum {
 #define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
 #define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
 #define  DISABLE_PIXEL_MASK_CAMMING		(1<<14)
 #define  DISABLE_PIXEL_MASK_CAMMING		(1<<14)
 
 
+#define GEN9_SLICE_COMMON_ECO_CHICKEN1		_MMIO(0x731c)
+
 #define GEN7_L3SQCREG1				_MMIO(0xB010)
 #define GEN7_L3SQCREG1				_MMIO(0xB010)
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
 

+ 5 - 0
drivers/gpu/drm/i915/intel_engine_cs.c

@@ -1390,6 +1390,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
+	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+	ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+	if (ret)
+		return ret;
+
 	/* WaToEnableHwFixForPushConstHWBug:glk */
 	/* WaToEnableHwFixForPushConstHWBug:glk */
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);

+ 3 - 0
drivers/gpu/drm/i915/intel_lrc.c

@@ -974,6 +974,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
 
+	if (i915_gem_request_completed(request))
+		return;
+
 	if (prio <= READ_ONCE(request->priotree.priority))
 	if (prio <= READ_ONCE(request->priotree.priority))
 		return;
 		return;
 
 

+ 1 - 0
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c

@@ -174,6 +174,7 @@ gf119_sor = {
 		.links = gf119_sor_dp_links,
 		.links = gf119_sor_dp_links,
 		.power = g94_sor_dp_power,
 		.power = g94_sor_dp_power,
 		.pattern = gf119_sor_dp_pattern,
 		.pattern = gf119_sor_dp_pattern,
+		.drive = gf119_sor_dp_drive,
 		.vcpi = gf119_sor_dp_vcpi,
 		.vcpi = gf119_sor_dp_vcpi,
 		.audio = gf119_sor_dp_audio,
 		.audio = gf119_sor_dp_audio,
 		.audio_sym = gf119_sor_dp_audio_sym,
 		.audio_sym = gf119_sor_dp_audio_sym,

+ 3 - 0
drivers/gpu/drm/tegra/sor.c

@@ -2656,6 +2656,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
 				name, err);
 				name, err);
 			goto remove;
 			goto remove;
 		}
 		}
+	} else {
+		/* fall back to the module clock on SOR0 (eDP/LVDS only) */
+		sor->clk_out = sor->clk;
 	}
 	}
 
 
 	sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	sor->clk_parent = devm_clk_get(&pdev->dev, "parent");

+ 0 - 3
drivers/gpu/drm/vc4/vc4_irq.c

@@ -209,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev)
 {
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
 
-	/* Undo the effects of a previous vc4_irq_uninstall. */
-	enable_irq(dev->irq);
-
 	/* Enable both the render done and out of memory interrupts. */
 	/* Enable both the render done and out of memory interrupts. */
 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
 

+ 3 - 0
drivers/gpu/drm/vc4/vc4_v3d.c

@@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev)
 		return ret;
 		return ret;
 
 
 	vc4_v3d_init_hw(vc4->dev);
 	vc4_v3d_init_hw(vc4->dev);
+
+	/* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+	enable_irq(vc4->dev->irq);
 	vc4_irq_postinstall(vc4->dev);
 	vc4_irq_postinstall(vc4->dev);
 
 
 	return 0;
 	return 0;

+ 2 - 0
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c

@@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
 	}
 	}
 
 
 	view_type = vmw_view_cmd_to_type(header->id);
 	view_type = vmw_view_cmd_to_type(header->id);
+	if (view_type == vmw_view_max)
+		return -EINVAL;
 	cmd = container_of(header, typeof(*cmd), header);
 	cmd = container_of(header, typeof(*cmd), header);
 	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 				user_surface_converter,
 				user_surface_converter,

+ 0 - 6
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c

@@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
 	vps->pinned = 0;
 	vps->pinned = 0;
 
 
 	/* Mapping is managed by prepare_fb/cleanup_fb */
 	/* Mapping is managed by prepare_fb/cleanup_fb */
-	memset(&vps->guest_map, 0, sizeof(vps->guest_map));
 	memset(&vps->host_map, 0, sizeof(vps->host_map));
 	memset(&vps->host_map, 0, sizeof(vps->host_map));
 	vps->cpp = 0;
 	vps->cpp = 0;
 
 
@@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
 
 
 	/* Should have been freed by cleanup_fb */
 	/* Should have been freed by cleanup_fb */
-	if (vps->guest_map.virtual) {
-		DRM_ERROR("Guest mapping not freed\n");
-		ttm_bo_kunmap(&vps->guest_map);
-	}
-
 	if (vps->host_map.virtual) {
 	if (vps->host_map.virtual) {
 		DRM_ERROR("Host mapping not freed\n");
 		DRM_ERROR("Host mapping not freed\n");
 		ttm_bo_kunmap(&vps->host_map);
 		ttm_bo_kunmap(&vps->host_map);

+ 1 - 1
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h

@@ -175,7 +175,7 @@ struct vmw_plane_state {
 	int pinned;
 	int pinned;
 
 
 	/* For CPU Blit */
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map, guest_map;
+	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 	unsigned int cpp;
 };
 };
 
 

+ 12 - 29
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c

@@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
 	bool defined;
 	bool defined;
 
 
 	/* For CPU Blit */
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map, guest_map;
+	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 	unsigned int cpp;
 };
 };
 
 
@@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	s32 src_pitch, dst_pitch;
 	s32 src_pitch, dst_pitch;
 	u8 *src, *dst;
 	u8 *src, *dst;
 	bool not_used;
 	bool not_used;
-
+	struct ttm_bo_kmap_obj guest_map;
+	int ret;
 
 
 	if (!dirty->num_hits)
 	if (!dirty->num_hits)
 		return;
 		return;
@@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	if (width == 0 || height == 0)
 	if (width == 0 || height == 0)
 		return;
 		return;
 
 
+	ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
+			  &guest_map);
+	if (ret) {
+		DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
+			  ret);
+		goto out_cleanup;
+	}
 
 
 	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
@@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
 
 	dst_pitch = ddirty->pitch;
 	dst_pitch = ddirty->pitch;
-	dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
+	dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
 	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
 
 
@@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 		vmw_fifo_commit(dev_priv, sizeof(*cmd));
 		vmw_fifo_commit(dev_priv, sizeof(*cmd));
 	}
 	}
 
 
+	ttm_bo_kunmap(&guest_map);
 out_cleanup:
 out_cleanup:
 	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
 	ddirty->right = ddirty->bottom = S32_MIN;
@@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
 {
 	struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 	struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
 
-	if (vps->guest_map.virtual)
-		ttm_bo_kunmap(&vps->guest_map);
-
 	if (vps->host_map.virtual)
 	if (vps->host_map.virtual)
 		ttm_bo_kunmap(&vps->host_map);
 		ttm_bo_kunmap(&vps->host_map);
 
 
@@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	 */
 	 */
 	if (vps->content_fb_type == SEPARATE_DMA &&
 	if (vps->content_fb_type == SEPARATE_DMA &&
 	    !(dev_priv->capabilities & SVGA_CAP_3D)) {
 	    !(dev_priv->capabilities & SVGA_CAP_3D)) {
-
-		struct vmw_framebuffer_dmabuf *new_vfbd;
-
-		new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
-
-		ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
-				     NULL);
-		if (ret)
-			goto out_srf_unpin;
-
-		ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
-				  new_vfbd->buffer->base.num_pages,
-				  &vps->guest_map);
-
-		ttm_bo_unreserve(&new_vfbd->buffer->base);
-
-		if (ret) {
-			DRM_ERROR("Failed to map content buffer to CPU\n");
-			goto out_srf_unpin;
-		}
-
 		ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
 		ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
 				  vps->surf->res.backup->base.num_pages,
 				  vps->surf->res.backup->base.num_pages,
 				  &vps->host_map);
 				  &vps->host_map);
 		if (ret) {
 		if (ret) {
 			DRM_ERROR("Failed to map display buffer to CPU\n");
 			DRM_ERROR("Failed to map display buffer to CPU\n");
-			ttm_bo_kunmap(&vps->guest_map);
 			goto out_srf_unpin;
 			goto out_srf_unpin;
 		}
 		}
 
 
@@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 	stdu->display_srf = vps->surf;
 	stdu->display_srf = vps->surf;
 	stdu->content_fb_type = vps->content_fb_type;
 	stdu->content_fb_type = vps->content_fb_type;
 	stdu->cpp = vps->cpp;
 	stdu->cpp = vps->cpp;
-	memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
 	memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 	memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
 
 	if (!stdu->defined)
 	if (!stdu->defined)

+ 2 - 2
drivers/infiniband/hw/hfi1/file_ops.c

@@ -763,11 +763,11 @@ static int complete_subctxt(struct hfi1_filedata *fd)
 	}
 	}
 
 
 	if (ret) {
 	if (ret) {
-		hfi1_rcd_put(fd->uctxt);
-		fd->uctxt = NULL;
 		spin_lock_irqsave(&fd->dd->uctxt_lock, flags);
 		spin_lock_irqsave(&fd->dd->uctxt_lock, flags);
 		__clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
 		__clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
 		spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags);
 		spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags);
+		hfi1_rcd_put(fd->uctxt);
+		fd->uctxt = NULL;
 	}
 	}
 
 
 	return ret;
 	return ret;

+ 7 - 4
drivers/infiniband/hw/mlx5/main.c

@@ -1324,7 +1324,8 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
 		return err;
 		return err;
 
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
-	    !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+	    (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return err;
 		return err;
 
 
 	mutex_lock(&dev->lb_mutex);
 	mutex_lock(&dev->lb_mutex);
@@ -1342,7 +1343,8 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
 	mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
 	mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
 
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
-	    !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+	    (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return;
 		return;
 
 
 	mutex_lock(&dev->lb_mutex);
 	mutex_lock(&dev->lb_mutex);
@@ -4158,7 +4160,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		goto err_cnt;
 		goto err_cnt;
 
 
 	dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
 	dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
-	if (!dev->mdev->priv.uar)
+	if (IS_ERR(dev->mdev->priv.uar))
 		goto err_cong;
 		goto err_cong;
 
 
 	err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
 	err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
@@ -4187,7 +4189,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	}
 	}
 
 
 	if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
 	if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
-	    MLX5_CAP_GEN(mdev, disable_local_lb))
+	    (MLX5_CAP_GEN(mdev, disable_local_lb_uc) ||
+	     MLX5_CAP_GEN(mdev, disable_local_lb_mc)))
 		mutex_init(&dev->lb_mutex);
 		mutex_init(&dev->lb_mutex);
 
 
 	dev->ib_active = true;
 	dev->ib_active = true;

+ 3 - 4
drivers/infiniband/hw/mlx5/qp.c

@@ -4362,12 +4362,11 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
 
 
 	memset(ah_attr, 0, sizeof(*ah_attr));
 	memset(ah_attr, 0, sizeof(*ah_attr));
 
 
-	ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
-	rdma_ah_set_port_num(ah_attr, path->port);
-	if (rdma_ah_get_port_num(ah_attr) == 0 ||
-	    rdma_ah_get_port_num(ah_attr) > MLX5_CAP_GEN(dev, num_ports))
+	if (!path->port || path->port > MLX5_CAP_GEN(dev, num_ports))
 		return;
 		return;
 
 
+	ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
+
 	rdma_ah_set_port_num(ah_attr, path->port);
 	rdma_ah_set_port_num(ah_attr, path->port);
 	rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
 	rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
 
 

+ 1 - 0
drivers/infiniband/ulp/isert/ib_isert.c

@@ -741,6 +741,7 @@ isert_connect_error(struct rdma_cm_id *cma_id)
 {
 {
 	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
 
+	ib_drain_qp(isert_conn->qp);
 	list_del_init(&isert_conn->node);
 	list_del_init(&isert_conn->node);
 	isert_conn->cm_id = NULL;
 	isert_conn->cm_id = NULL;
 	isert_put_conn(isert_conn);
 	isert_put_conn(isert_conn);

+ 3 - 0
drivers/mmc/host/renesas_sdhi_core.c

@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/host.h>
@@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
+
+MODULE_LICENSE("GPL v2");

+ 4 - 2
drivers/mmc/host/s3cmci.c

@@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = {
 struct s3cmci_reg {
 struct s3cmci_reg {
 	unsigned short	addr;
 	unsigned short	addr;
 	unsigned char	*name;
 	unsigned char	*name;
-} debug_regs[] = {
+};
+
+static const struct s3cmci_reg debug_regs[] = {
 	DBG_REG(CON),
 	DBG_REG(CON),
 	DBG_REG(PRE),
 	DBG_REG(PRE),
 	DBG_REG(CMDARG),
 	DBG_REG(CMDARG),
@@ -1446,7 +1448,7 @@ struct s3cmci_reg {
 static int s3cmci_regs_show(struct seq_file *seq, void *v)
 static int s3cmci_regs_show(struct seq_file *seq, void *v)
 {
 {
 	struct s3cmci_host *host = seq->private;
 	struct s3cmci_host *host = seq->private;
-	struct s3cmci_reg *rptr = debug_regs;
+	const struct s3cmci_reg *rptr = debug_regs;
 
 
 	for (; rptr->name; rptr++)
 	for (; rptr->name; rptr++)
 		seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
 		seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,

+ 3 - 1
drivers/mux/core.c

@@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
 	return dev->of_node == data;
 	return dev->of_node == data;
 }
 }
 
 
+/* Note this function returns a reference to the mux_chip dev. */
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 {
 {
 	struct device *dev;
 	struct device *dev;
@@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
 	    (!args.args_count && (mux_chip->controllers > 1))) {
 	    (!args.args_count && (mux_chip->controllers > 1))) {
 		dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
 		dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
 			np, args.np);
 			np, args.np);
+		put_device(&mux_chip->dev);
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 	}
 	}
 
 
@@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
 	if (controller >= mux_chip->controllers) {
 	if (controller >= mux_chip->controllers) {
 		dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
 		dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
 			np, controller, args.np);
 			np, controller, args.np);
+		put_device(&mux_chip->dev);
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 	}
 	}
 
 
-	get_device(&mux_chip->dev);
 	return &mux_chip->mux[controller];
 	return &mux_chip->mux[controller];
 }
 }
 EXPORT_SYMBOL_GPL(mux_control_get);
 EXPORT_SYMBOL_GPL(mux_control_get);

+ 4 - 0
drivers/net/ethernet/cirrus/cs89x0.c

@@ -1913,3 +1913,7 @@ static struct platform_driver cs89x0_driver = {
 module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
 module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
 
 
 #endif /* CONFIG_CS89x0_PLATFORM */
 #endif /* CONFIG_CS89x0_PLATFORM */
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crystal Semiconductor (Now Cirrus Logic) CS89[02]0 network driver");
+MODULE_AUTHOR("Russell Nelson <nelson@crynwr.com>");

+ 7 - 6
drivers/net/ethernet/ibm/ibmvnic.c

@@ -757,6 +757,12 @@ static int ibmvnic_login(struct net_device *netdev)
 		}
 		}
 	} while (adapter->renegotiate);
 	} while (adapter->renegotiate);
 
 
+	/* handle pending MAC address changes after successful login */
+	if (adapter->mac_change_pending) {
+		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
+		adapter->mac_change_pending = false;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -994,11 +1000,6 @@ static int ibmvnic_open(struct net_device *netdev)
 
 
 	mutex_lock(&adapter->reset_lock);
 	mutex_lock(&adapter->reset_lock);
 
 
-	if (adapter->mac_change_pending) {
-		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
-		adapter->mac_change_pending = false;
-	}
-
 	if (adapter->state != VNIC_CLOSED) {
 	if (adapter->state != VNIC_CLOSED) {
 		rc = ibmvnic_login(netdev);
 		rc = ibmvnic_login(netdev);
 		if (rc) {
 		if (rc) {
@@ -1532,7 +1533,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	struct sockaddr *addr = p;
 	struct sockaddr *addr = p;
 
 
-	if (adapter->state != VNIC_OPEN) {
+	if (adapter->state == VNIC_PROBED) {
 		memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
 		memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
 		adapter->mac_change_pending = true;
 		adapter->mac_change_pending = true;
 		return 0;
 		return 0;

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно