7 years ago · c86d95cb6b
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -375,3 +375,19 @@ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 
				 Description:	information about CPUs heterogeneity.
			
 
				 
			
 
				 		cpu_capacity: capacity of cpu#.
			
 
				+
			
 
				+What:		/sys/devices/system/cpu/vulnerabilities
			
 
				+		/sys/devices/system/cpu/vulnerabilities/meltdown
			
 
				+		/sys/devices/system/cpu/vulnerabilities/spectre_v1
			
 
				+		/sys/devices/system/cpu/vulnerabilities/spectre_v2
			
 
				+Date:		January 2018
			
 
				+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
			
 
				+Description:	Information about CPU vulnerabilities
			
 
				+
			
 
				+		The files are named after the code names of CPU
			
 
				+		vulnerabilities. The output of those files reflects the
			
 
				+		state of the CPUs in the system. Possible output values:
			
 
				+
			
 
				+		"Not affected"	  CPU is not affected by the vulnerability
			
 
				+		"Vulnerable"	  CPU is affected and no mitigation in effect
			
 
				+		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
			
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -713,9 +713,6 @@
 
				 			It will be ignored when crashkernel=X,high is not used
			
 
				 			or memory reserved is below 4G.
			
 
				 
			
 
				-	crossrelease_fullstack
			
 
				-			[KNL] Allow to record full stack trace in cross-release
			
 
				-
			
 
				 	cryptomgr.notests
			
 
				                         [KNL] Disable crypto self-tests
			
 
				 
			
@@ -2626,6 +2623,11 @@
 
				 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
			
 
				 			Equivalent to smt=1.
			
 
				 
			
 
				+	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
			
 
				+			(indirect branch prediction) vulnerability. System may
			
 
				+			allow data leaks with this option, which is equivalent
			
 
				+			to spectre_v2=off.
			
 
				+
			
 
				 	noxsave		[BUGS=X86] Disables x86 extended register state save
			
 
				 			and restore using xsave. The kernel will fallback to
			
 
				 			enabling legacy floating-point and sse state.
			
@@ -2712,8 +2714,6 @@
 
				 			steal time is computed, but won't influence scheduler
			
 
				 			behaviour
			
 
				 
			
 
				-	nopti		[X86-64] Disable kernel page table isolation
			
 
				-
			
 
				 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
			
 
				 
			
 
				 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
			
@@ -3100,6 +3100,12 @@
 
				 		pcie_scan_all	Scan all possible PCIe devices.  Otherwise we
			
 
				 				only look for one device below a PCIe downstream
			
 
				 				port.
			
 
				+		big_root_window	Try to add a big 64bit memory window to the PCIe
			
 
				+				root complex on AMD CPUs. Some GFX hardware
			
 
				+				can resize a BAR to allow access to all VRAM.
			
 
				+				Adding the window is slightly risky (it may
			
 
				+				conflict with unreported devices), so this
			
 
				+				taints the kernel.
			
 
				 
			
 
				 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
			
 
				 			Management.
			
@@ -3288,11 +3294,20 @@
 
				 	pt.		[PARIDE]
			
 
				 			See Documentation/blockdev/paride.txt.
			
 
				 
			
 
				-	pti=		[X86_64]
			
 
				-			Control user/kernel address space isolation:
			
 
				-			on - enable
			
 
				-			off - disable
			
 
				-			auto - default setting
			
 
				+	pti=		[X86_64] Control Page Table Isolation of user and
			
 
				+			kernel address spaces.  Disabling this feature
			
 
				+			removes hardening, but improves performance of
			
 
				+			system calls and interrupts.
			
 
				+
			
 
				+			on   - unconditionally enable
			
 
				+			off  - unconditionally disable
			
 
				+			auto - kernel detects whether your CPU model is
			
 
				+			       vulnerable to issues that PTI mitigates
			
 
				+
			
 
				+			Not specifying this option is equivalent to pti=auto.
			
 
				+
			
 
				+	nopti		[X86_64]
			
 
				+			Equivalent to pti=off
			
 
				 
			
 
				 	pty.legacy_count=
			
 
				 			[KNL] Number of legacy pty's. Overwrites compiled-in
			
@@ -3943,6 +3958,29 @@
 
				 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
			
 
				 			See Documentation/laptops/sonypi.txt
			
 
				 
			
 
				+	spectre_v2=	[X86] Control mitigation of Spectre variant 2
			
 
				+			(indirect branch speculation) vulnerability.
			
 
				+
			
 
				+			on   - unconditionally enable
			
 
				+			off  - unconditionally disable
			
 
				+			auto - kernel detects whether your CPU model is
			
 
				+			       vulnerable
			
 
				+
			
 
				+			Selecting 'on' will, and 'auto' may, choose a
			
 
				+			mitigation method at run time according to the
			
 
				+			CPU, the available microcode, the setting of the
			
 
				+			CONFIG_RETPOLINE configuration option, and the
			
 
				+			compiler with which the kernel was built.
			
 
				+
			
 
				+			Specific mitigations can also be selected manually:
			
 
				+
			
 
				+			retpoline	  - replace indirect branches
			
 
				+			retpoline,generic - google's original retpoline
			
 
				+			retpoline,amd     - AMD-specific minimal thunk
			
 
				+
			
 
				+			Not specifying this option is equivalent to
			
 
				+			spectre_v2=auto.
			
 
				+
			
 
				 	spia_io_base=	[HW,MTD]
			
 
				 	spia_fio_base=
			
 
				 	spia_pedr=
			
--- a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
+++ b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
@@ -1,10 +1,31 @@
 
				 * Freescale MXS audio complex with SGTL5000 codec
			
 
				 
			
 
				 Required properties:
			
 
				-- compatible: "fsl,mxs-audio-sgtl5000"
			
 
				-- model: The user-visible name of this sound complex
			
 
				-- saif-controllers: The phandle list of the MXS SAIF controller
			
 
				-- audio-codec: The phandle of the SGTL5000 audio codec
			
 
				+- compatible		: "fsl,mxs-audio-sgtl5000"
			
 
				+- model			: The user-visible name of this sound complex
			
 
				+- saif-controllers	: The phandle list of the MXS SAIF controller
			
 
				+- audio-codec		: The phandle of the SGTL5000 audio codec
			
 
				+- audio-routing		: A list of the connections between audio components.
			
 
				+			  Each entry is a pair of strings, the first being the
			
 
				+			  connection's sink, the second being the connection's
			
 
				+			  source. Valid names could be power supplies, SGTL5000
			
 
				+			  pins, and the jacks on the board:
			
 
				+
			
 
				+			  Power supplies:
			
 
				+			   * Mic Bias
			
 
				+
			
 
				+			  SGTL5000 pins:
			
 
				+			   * MIC_IN
			
 
				+			   * LINE_IN
			
 
				+			   * HP_OUT
			
 
				+			   * LINE_OUT
			
 
				+
			
 
				+			  Board connectors:
			
 
				+			   * Mic Jack
			
 
				+			   * Line In Jack
			
 
				+			   * Headphone Jack
			
 
				+			   * Line Out Jack
			
 
				+			   * Ext Spk
			
 
				 
			
 
				 Example:
			
 
				 
			
@@ -14,4 +35,8 @@ sound {
 
				 	model = "imx28-evk-sgtl5000";
			
 
				 	saif-controllers = <&saif0 &saif1>;
			
 
				 	audio-codec = <&sgtl5000>;
			
 
				+	audio-routing =
			
 
				+		"MIC_IN", "Mic Jack",
			
 
				+		"Mic Jack", "Mic Bias",
			
 
				+		"Headphone Jack", "HP_OUT";
			
 
				 };
			
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 
				 cleaner or garbage collector) are required.  Details on the tools are
			
 
				 described in the man pages included in the package.
			
 
				 
			
 
				-Project web page:    http://nilfs.sourceforge.net/
			
 
				-Download page:       http://nilfs.sourceforge.net/en/download.html
			
 
				+Project web page:    https://nilfs.sourceforge.io/
			
 
				+Download page:       https://nilfs.sourceforge.io/en/download.html
			
 
				 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
			
 
				 
			
 
				 Caveats
			
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 
				 <expr> ::= <symbol>                             (1)
			
 
				            <symbol> '=' <symbol>                (2)
			
 
				            <symbol> '!=' <symbol>               (3)
			
 
				-           '(' <expr> ')'                       (4)
			
 
				-           '!' <expr>                           (5)
			
 
				-           <expr> '&&' <expr>                   (6)
			
 
				-           <expr> '||' <expr>                   (7)
			
 
				+           <symbol1> '<' <symbol2>              (4)
			
 
				+           <symbol1> '>' <symbol2>              (4)
			
 
				+           <symbol1> '<=' <symbol2>             (4)
			
 
				+           <symbol1> '>=' <symbol2>             (4)
			
 
				+           '(' <expr> ')'                       (5)
			
 
				+           '!' <expr>                           (6)
			
 
				+           <expr> '&&' <expr>                   (7)
			
 
				+           <expr> '||' <expr>                   (8)
			
 
				 
			
 
				 Expressions are listed in decreasing order of precedence. 
			
 
				 
			
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
 
				     otherwise 'n'.
			
 
				 (3) If the values of both symbols are equal, it returns 'n',
			
 
				     otherwise 'y'.
			
 
				-(4) Returns the value of the expression. Used to override precedence.
			
 
				-(5) Returns the result of (2-/expr/).
			
 
				-(6) Returns the result of min(/expr/, /expr/).
			
 
				-(7) Returns the result of max(/expr/, /expr/).
			
 
				+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
			
 
				+    or greater-or-equal than value of <symbol2>, it returns 'y',
			
 
				+    otherwise 'n'.
			
 
				+(5) Returns the value of the expression. Used to override precedence.
			
 
				+(6) Returns the result of (2-/expr/).
			
 
				+(7) Returns the result of min(/expr/, /expr/).
			
 
				+(8) Returns the result of max(/expr/, /expr/).
			
 
				 
			
 
				 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
			
 
				 respectively for calculations). A menu entry becomes visible when its
			
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -9,6 +9,7 @@ Contents:
 
				    batman-adv
			
 
				    kapi
			
 
				    z8530book
			
 
				+   msg_zerocopy
			
 
				 
			
 
				 .. only::  subproject
			
 
				 
			
@@ -16,4 +17,3 @@ Contents:
 
				    =======
			
 
				 
			
 
				    * :ref:`genindex`
			
 
				-
			
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
 
				 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
			
 
				 		error(1, errno, "setsockopt zerocopy");
			
 
				 
			
 
				+Setting the socket option only works when the socket is in its initial
			
 
				+(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
			
 
				+for example, will lead to an EBUSY error. In this case, the option should be set
			
 
				+to the listening socket and it will be inherited by the accepted sockets.
			
 
				 
			
 
				 Transmission
			
 
				 ------------
			
--- a/Documentation/usb/gadget-testing.txt
+++ b/Documentation/usb/gadget-testing.txt
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 
				 in each line. The rules stated above are best illustrated with an example:
			
 
				 
			
 
				 # mkdir functions/uvc.usb0/control/header/h
			
 
				-# cd functions/uvc.usb0/control/header/h
			
 
				+# cd functions/uvc.usb0/control/
			
 
				 # ln -s header/h class/fs
			
 
				 # ln -s header/h class/ss
			
 
				 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
			
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
 
				+Overview
			
 
				+========
			
 
				+
			
 
				+Page Table Isolation (pti, previously known as KAISER[1]) is a
			
 
				+countermeasure against attacks on the shared user/kernel address
			
 
				+space such as the "Meltdown" approach[2].
			
 
				+
			
 
				+To mitigate this class of attacks, we create an independent set of
			
 
				+page tables for use only when running userspace applications.  When
			
 
				+the kernel is entered via syscalls, interrupts or exceptions, the
			
 
				+page tables are switched to the full "kernel" copy.  When the system
			
 
				+switches back to user mode, the user copy is used again.
			
 
				+
			
 
				+The userspace page tables contain only a minimal amount of kernel
			
 
				+data: only what is needed to enter/exit the kernel such as the
			
 
				+entry/exit functions themselves and the interrupt descriptor table
			
 
				+(IDT).  There are a few strictly unnecessary things that get mapped
			
 
				+such as the first C function when entering an interrupt (see
			
 
				+comments in pti.c).
			
 
				+
			
 
				+This approach helps to ensure that side-channel attacks leveraging
			
 
				+the paging structures do not function when PTI is enabled.  It can be
			
 
				+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
			
 
				+Once enabled at compile-time, it can be disabled at boot with the
			
 
				+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
			
 
				+
			
 
				+Page Table Management
			
 
				+=====================
			
 
				+
			
 
				+When PTI is enabled, the kernel manages two sets of page tables.
			
 
				+The first set is very similar to the single set which is present in
			
 
				+kernels without PTI.  This includes a complete mapping of userspace
			
 
				+that the kernel can use for things like copy_to_user().
			
 
				+
			
 
				+Although _complete_, the user portion of the kernel page tables is
			
 
				+crippled by setting the NX bit in the top level.  This ensures
			
 
				+that any missed kernel->user CR3 switch will immediately crash
			
 
				+userspace upon executing its first instruction.
			
 
				+
			
 
				+The userspace page tables map only the kernel data needed to enter
			
 
				+and exit the kernel.  This data is entirely contained in the 'struct
			
 
				+cpu_entry_area' structure which is placed in the fixmap which gives
			
 
				+each CPU's copy of the area a compile-time-fixed virtual address.
			
 
				+
			
 
				+For new userspace mappings, the kernel makes the entries in its
			
 
				+page tables like normal.  The only difference is when the kernel
			
 
				+makes entries in the top (PGD) level.  In addition to setting the
			
 
				+entry in the main kernel PGD, a copy of the entry is made in the
			
 
				+userspace page tables' PGD.
			
 
				+
			
 
				+This sharing at the PGD level also inherently shares all the lower
			
 
				+layers of the page tables.  This leaves a single, shared set of
			
 
				+userspace page tables to manage.  One PTE to lock, one set of
			
 
				+accessed bits, dirty bits, etc...
			
 
				+
			
 
				+Overhead
			
 
				+========
			
 
				+
			
 
				+Protection against side-channel attacks is important.  But,
			
 
				+this protection comes at a cost:
			
 
				+
			
 
				+1. Increased Memory Use
			
 
				+  a. Each process now needs an order-1 PGD instead of order-0.
			
 
				+     (Consumes an additional 4k per process).
			
 
				+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
			
 
				+     aligned so that it can be mapped by setting a single PMD
			
 
				+     entry.  This consumes nearly 2MB of RAM once the kernel
			
 
				+     is decompressed, but no space in the kernel image itself.
			
 
				+
			
 
				+2. Runtime Cost
			
 
				+  a. CR3 manipulation to switch between the page table copies
			
 
				+     must be done at interrupt, syscall, and exception entry
			
 
				+     and exit (it can be skipped when the kernel is interrupted,
			
 
				+     though.)  Moves to CR3 are on the order of a hundred
			
 
				+     cycles, and are required at every entry and exit.
			
 
				+  b. A "trampoline" must be used for SYSCALL entry.  This
			
 
				+     trampoline depends on a smaller set of resources than the
			
 
				+     non-PTI SYSCALL entry code, so requires mapping fewer
			
 
				+     things into the userspace page tables.  The downside is
			
 
				+     that stacks must be switched at entry time.
			
 
				+  d. Global pages are disabled for all kernel structures not
			
 
				+     mapped into both kernel and userspace page tables.  This
			
 
				+     feature of the MMU allows different processes to share TLB
			
 
				+     entries mapping the kernel.  Losing the feature means more
			
 
				+     TLB misses after a context switch.  The actual loss of
			
 
				+     performance is very small, however, never exceeding 1%.
			
 
				+  d. Process Context IDentifiers (PCID) is a CPU feature that
			
 
				+     allows us to skip flushing the entire TLB when switching page
			
 
				+     tables by setting a special bit in CR3 when the page tables
			
 
				+     are changed.  This makes switching the page tables (at context
			
 
				+     switch, or kernel entry/exit) cheaper.  But, on systems with
			
 
				+     PCID support, the context switch code must flush both the user
			
 
				+     and kernel entries out of the TLB.  The user PCID TLB flush is
			
 
				+     deferred until the exit to userspace, minimizing the cost.
			
 
				+     See intel.com/sdm for the gory PCID/INVPCID details.
			
 
				+  e. The userspace page tables must be populated for each new
			
 
				+     process.  Even without PTI, the shared kernel mappings
			
 
				+     are created by copying top-level (PGD) entries into each
			
 
				+     new process.  But, with PTI, there are now *two* kernel
			
 
				+     mappings: one in the kernel page tables that maps everything
			
 
				+     and one for the entry/exit structures.  At fork(), we need to
			
 
				+     copy both.
			
 
				+  f. In addition to the fork()-time copying, there must also
			
 
				+     be an update to the userspace PGD any time a set_pgd() is done
			
 
				+     on a PGD used to map userspace.  This ensures that the kernel
			
 
				+     and userspace copies always map the same userspace
			
 
				+     memory.
			
 
				+  g. On systems without PCID support, each CR3 write flushes
			
 
				+     the entire TLB.  That means that each syscall, interrupt
			
 
				+     or exception flushes the TLB.
			
 
				+  h. INVPCID is a TLB-flushing instruction which allows flushing
			
 
				+     of TLB entries for non-current PCIDs.  Some systems support
			
 
				+     PCIDs, but do not support INVPCID.  On these systems, addresses
			
 
				+     can only be flushed from the TLB for the current PCID.  When
			
 
				+     flushing a kernel address, we need to flush all PCIDs, so a
			
 
				+     single kernel address flush will require a TLB-flushing CR3
			
 
				+     write upon the next use of every PCID.
			
 
				+
			
 
				+Possible Future Work
			
 
				+====================
			
 
				+1. We can be more careful about not actually writing to CR3
			
 
				+   unless its value is actually changed.
			
 
				+2. Allow PTI to be enabled/disabled at runtime in addition to the
			
 
				+   boot-time switching.
			
 
				+
			
 
				+Testing
			
 
				+========
			
 
				+
			
 
				+To test stability of PTI, the following test procedure is recommended,
			
 
				+ideally doing all of these in parallel:
			
 
				+
			
 
				+1. Set CONFIG_DEBUG_ENTRY=y
			
 
				+2. Run several copies of all of the tools/testing/selftests/x86/ tests
			
 
				+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
			
 
				+   several minutes.  These tests frequently uncover corner cases in the
			
 
				+   kernel entry code.  In general, old kernels might cause these tests
			
 
				+   themselves to crash, but they should never crash the kernel.
			
 
				+3. Run the 'perf' tool in a mode (top or record) that generates many
			
 
				+   frequent performance monitoring non-maskable interrupts (see "NMI"
			
 
				+   in /proc/interrupts).  This exercises the NMI entry/exit code which
			
 
				+   is known to trigger bugs in code paths that did not expect to be
			
 
				+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
			
 
				+   NMIs, and using two -c with separate counters encourages nested NMIs
			
 
				+   and less deterministic behavior.
			
 
				+
			
 
				+	while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
			
 
				+
			
 
				+4. Launch a KVM virtual machine.
			
 
				+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
			
 
				+   This has been a lightly-tested code path and needs extra scrutiny.
			
 
				+
			
 
				+Debugging
			
 
				+=========
			
 
				+
			
 
				+Bugs in PTI cause a few different signatures of crashes
			
 
				+that are worth noting here.
			
 
				+
			
 
				+ * Failures of the selftests/x86 code.  Usually a bug in one of the
			
 
				+   more obscure corners of entry_64.S
			
 
				+ * Crashes in early boot, especially around CPU bringup.  Bugs
			
 
				+   in the trampoline code or mappings cause these.
			
 
				+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
			
 
				+   like screwing up a page table switch.  Also caused by
			
 
				+   incorrectly mapping the IRQ handler entry code.
			
 
				+ * Crashes at the first NMI.  The NMI code is separate from main
			
 
				+   interrupt handlers and can have bugs that do not affect
			
 
				+   normal interrupts.  Also caused by incorrectly mapping NMI
			
 
				+   code.  NMIs that interrupt the entry code must be very
			
 
				+   careful and can be the cause of crashes that show up when
			
 
				+   running perf.
			
 
				+ * Kernel crashes at the first exit to userspace.  entry_64.S
			
 
				+   bugs, or failing to map some of the exit code.
			
 
				+ * Crashes at first interrupt that interrupts userspace. The paths
			
 
				+   in entry_64.S that return to userspace are sometimes separate
			
 
				+   from the ones that return to the kernel.
			
 
				+ * Double faults: overflowing the kernel stack because of page
			
 
				+   faults upon page faults.  Caused by touching non-pti-mapped
			
 
				+   data in the entry code, or forgetting to switch to kernel
			
 
				+   CR3 before calling into C functions which are not pti-mapped.
			
 
				+ * Userspace segfaults early in boot, sometimes manifesting
			
 
				+   as mount(8) failing to mount the rootfs.  These have
			
 
				+   tended to be TLB invalidation issues.  Usually invalidating
			
 
				+   the wrong PCID, or otherwise missing an invalidation.
			
 
				+
			
 
				+1. https://gruss.cc/files/kaiser.pdf
			
 
				+2. https://meltdownattack.com/meltdown.pdf
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9638,8 +9638,8 @@ F:	include/uapi/linux/sunrpc/
 
				 NILFS2 FILESYSTEM
			
 
				 M:	Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
			
 
				 L:	linux-nilfs@vger.kernel.org
			
 
				-W:	http://nilfs.sourceforge.net/
			
 
				-W:	http://nilfs.osdn.jp/
			
 
				+W:	https://nilfs.sourceforge.io/
			
 
				+W:	https://nilfs.osdn.jp/
			
 
				 T:	git git://github.com/konis/nilfs2.git
			
 
				 S:	Supported
			
 
				 F:	Documentation/filesystems/nilfs2.txt
			
@@ -10135,7 +10135,7 @@ F:	drivers/irqchip/irq-ompic.c
 
				 F:	drivers/irqchip/irq-or1k-*
			
 
				 
			
 
				 OPENVSWITCH
			
 
				-M:	Pravin Shelar <pshelar@nicira.com>
			
 
				+M:	Pravin B Shelar <pshelar@ovn.org>
			
 
				 L:	netdev@vger.kernel.org
			
 
				 L:	dev@openvswitch.org
			
 
				 W:	http://openvswitch.org
			
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
				 VERSION = 4
			
 
				 PATCHLEVEL = 15
			
 
				 SUBLEVEL = 0
			
 
				-EXTRAVERSION = -rc7
			
 
				+EXTRAVERSION = -rc8
			
 
				 NAME = Fearless Coyote
			
 
				 
			
 
				 # *DOCUMENTATION*
			
@@ -484,26 +484,6 @@ CLANG_GCC_TC	:= --gcc-toolchain=$(GCC_TOOLCHAIN)
 
				 endif
			
 
				 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
			
 
				 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
			
 
				-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
			
 
				-# Quiet clang warning: comparison of unsigned expression < 0 is always false
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
			
 
				-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
			
 
				-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
			
 
				-# See modpost pattern 2
			
 
				-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
			
 
				-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
			
 
				-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
			
 
				-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
			
 
				-else
			
 
				-
			
 
				-# These warnings generated too much noise in a regular build.
			
 
				-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
			
 
				-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
			
 
				 endif
			
 
				 
			
 
				 ifeq ($(config-targets),1)
			
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 
				 endif
			
 
				 KBUILD_CFLAGS += $(stackp-flag)
			
 
				 
			
 
				+ifeq ($(cc-name),clang)
			
 
				+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
			
 
				+# Quiet clang warning: comparison of unsigned expression < 0 is always false
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
			
 
				+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
			
 
				+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
			
 
				+# See modpost pattern 2
			
 
				+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
			
 
				+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
			
 
				+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
			
 
				+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
			
 
				+else
			
 
				+
			
 
				+# These warnings generated too much noise in a regular build.
			
 
				+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
			
 
				+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
			
 
				+endif
			
 
				+
			
 
				 ifdef CONFIG_FRAME_POINTER
			
 
				 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
			
 
				 else
			
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
 
				 	}
			
 
				 
			
 
				 	if (ti->softirq_time) {
			
 
				-		delta = cycle_to_nsec(ti->softirq_time));
			
 
				+		delta = cycle_to_nsec(ti->softirq_time);
			
 
				 		account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
			
 
				 	}
			
 
				 
			
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -235,6 +235,7 @@ LEAF(mips_cps_core_init)
 
				 	has_mt	t0, 3f
			
 
				 
			
 
				 	.set	push
			
 
				+	.set	MIPS_ISA_LEVEL_RAW
			
 
				 	.set	mt
			
 
				 
			
 
				 	/* Only allow 1 TC per VPE to execute... */
			
@@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes)
 
				 #elif defined(CONFIG_MIPS_MT)
			
 
				 
			
 
				 	.set	push
			
 
				+	.set	MIPS_ISA_LEVEL_RAW
			
 
				 	.set	mt
			
 
				 
			
 
				 	/* If the core doesn't support MT then return */
			
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 
				 	struct task_struct *t;
			
 
				 	int max_users;
			
 
				 
			
 
				+	/* If nothing to change, return right away, successfully.  */
			
 
				+	if (value == mips_get_process_fp_mode(task))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* Only accept a mode change if 64-bit FP enabled for o32.  */
			
 
				+	if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	/* And only for o32 tasks.  */
			
 
				+	if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				 	/* Check the value is valid */
			
 
				 	if (value & ~known_bits)
			
 
				 		return -EOPNOTSUPP;
			
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target,
 
				 
			
 
				 #endif /* CONFIG_64BIT */
			
 
				 
			
 
				+/*
			
 
				+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
			
 
				+ * !CONFIG_CPU_HAS_MSA variant.  FP context's general register slots
			
 
				+ * correspond 1:1 to buffer slots.  Only general registers are copied.
			
 
				+ */
			
 
				+static int fpr_get_fpa(struct task_struct *target,
			
 
				+		       unsigned int *pos, unsigned int *count,
			
 
				+		       void **kbuf, void __user **ubuf)
			
 
				+{
			
 
				+	return user_regset_copyout(pos, count, kbuf, ubuf,
			
 
				+				   &target->thread.fpu,
			
 
				+				   0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
			
 
				+ * CONFIG_CPU_HAS_MSA variant.  Only lower 64 bits of FP context's
			
 
				+ * general register slots are copied to buffer slots.  Only general
			
 
				+ * registers are copied.
			
 
				+ */
			
 
				+static int fpr_get_msa(struct task_struct *target,
			
 
				+		       unsigned int *pos, unsigned int *count,
			
 
				+		       void **kbuf, void __user **ubuf)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+	u64 fpr_val;
			
 
				+	int err;
			
 
				+
			
 
				+	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
			
 
				+	for (i = 0; i < NUM_FPU_REGS; i++) {
			
 
				+		fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
			
 
				+		err = user_regset_copyout(pos, count, kbuf, ubuf,
			
 
				+					  &fpr_val, i * sizeof(elf_fpreg_t),
			
 
				+					  (i + 1) * sizeof(elf_fpreg_t));
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
			
 
				+ * Choose the appropriate helper for general registers, and then copy
			
 
				+ * the FCSR register separately.
			
 
				+ */
			
 
				 static int fpr_get(struct task_struct *target,
			
 
				 		   const struct user_regset *regset,
			
 
				 		   unsigned int pos, unsigned int count,
			
 
				 		   void *kbuf, void __user *ubuf)
			
 
				 {
			
 
				-	unsigned i;
			
 
				+	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
			
 
				 	int err;
			
 
				-	u64 fpr_val;
			
 
				 
			
 
				-	/* XXX fcr31  */
			
 
				+	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
			
 
				+		err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
			
 
				+	else
			
 
				+		err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				 
			
 
				-	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
			
 
				-		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
			
 
				-					   &target->thread.fpu,
			
 
				-					   0, sizeof(elf_fpregset_t));
			
 
				+	err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
			
 
				+				  &target->thread.fpu.fcr31,
			
 
				+				  fcr31_pos, fcr31_pos + sizeof(u32));
			
 
				 
			
 
				-	for (i = 0; i < NUM_FPU_REGS; i++) {
			
 
				-		fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
			
 
				-		err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
			
 
				-					  &fpr_val, i * sizeof(elf_fpreg_t),
			
 
				-					  (i + 1) * sizeof(elf_fpreg_t));
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
			
 
				+ * !CONFIG_CPU_HAS_MSA variant.   Buffer slots correspond 1:1 to FP
			
 
				+ * context's general register slots.  Only general registers are copied.
			
 
				+ */
			
 
				+static int fpr_set_fpa(struct task_struct *target,
			
 
				+		       unsigned int *pos, unsigned int *count,
			
 
				+		       const void **kbuf, const void __user **ubuf)
			
 
				+{
			
 
				+	return user_regset_copyin(pos, count, kbuf, ubuf,
			
 
				+				  &target->thread.fpu,
			
 
				+				  0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
			
 
				+ * CONFIG_CPU_HAS_MSA variant.  Buffer slots are copied to lower 64
			
 
				+ * bits only of FP context's general register slots.  Only general
			
 
				+ * registers are copied.
			
 
				+ */
			
 
				+static int fpr_set_msa(struct task_struct *target,
			
 
				+		       unsigned int *pos, unsigned int *count,
			
 
				+		       const void **kbuf, const void __user **ubuf)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+	u64 fpr_val;
			
 
				+	int err;
			
 
				+
			
 
				+	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
			
 
				+	for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
			
 
				+		err = user_regset_copyin(pos, count, kbuf, ubuf,
			
 
				+					 &fpr_val, i * sizeof(elf_fpreg_t),
			
 
				+					 (i + 1) * sizeof(elf_fpreg_t));
			
 
				 		if (err)
			
 
				 			return err;
			
 
				+		set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
			
 
				+ * Choose the appropriate helper for general registers, and then copy
			
 
				+ * the FCSR register separately.
			
 
				+ *
			
 
				+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
			
 
				+ * which is supposed to have been guaranteed by the kernel before
			
 
				+ * calling us, e.g. in `ptrace_regset'.  We enforce that requirement,
			
 
				+ * so that we can safely avoid preinitializing temporaries for
			
 
				+ * partial register writes.
			
 
				+ */
			
 
				 static int fpr_set(struct task_struct *target,
			
 
				 		   const struct user_regset *regset,
			
 
				 		   unsigned int pos, unsigned int count,
			
 
				 		   const void *kbuf, const void __user *ubuf)
			
 
				 {
			
 
				-	unsigned i;
			
 
				+	const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
			
 
				+	u32 fcr31;
			
 
				 	int err;
			
 
				-	u64 fpr_val;
			
 
				 
			
 
				-	/* XXX fcr31  */
			
 
				+	BUG_ON(count % sizeof(elf_fpreg_t));
			
 
				+
			
 
				+	if (pos + count > sizeof(elf_fpregset_t))
			
 
				+		return -EIO;
			
 
				 
			
 
				 	init_fp_ctx(target);
			
 
				 
			
 
				-	if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
			
 
				-		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
			
 
				-					  &target->thread.fpu,
			
 
				-					  0, sizeof(elf_fpregset_t));
			
 
				+	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
			
 
				+		err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
			
 
				+	else
			
 
				+		err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				 
			
 
				-	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
			
 
				-	for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
			
 
				+	if (count > 0) {
			
 
				 		err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
			
 
				-					 &fpr_val, i * sizeof(elf_fpreg_t),
			
 
				-					 (i + 1) * sizeof(elf_fpreg_t));
			
 
				+					 &fcr31,
			
 
				+					 fcr31_pos, fcr31_pos + sizeof(u32));
			
 
				 		if (err)
			
 
				 			return err;
			
 
				-		set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
			
 
				+
			
 
				+		ptrace_setfcr31(target, fcr31);
			
 
				 	}
			
 
				 
			
 
				-	return 0;
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 enum mips_regset {
			
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
 
				 	ori	r3,r3,vector_offset@l;		\
			
 
				 	mtspr	SPRN_IVOR##vector_number,r3;
			
 
				 
			
 
				+#define RFI_TO_KERNEL							\
			
 
				+	rfi
			
 
				+
			
 
				+#define RFI_TO_USER							\
			
 
				+	rfi
			
 
				+
			
 
				 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
			
 
				 
			
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,59 @@
 
				  */
			
 
				 #define EX_R3		EX_DAR
			
 
				 
			
 
				+/*
			
 
				+ * Macros for annotating the expected destination of (h)rfid
			
 
				+ *
			
 
				+ * The nop instructions allow us to insert one or more instructions to flush the
			
 
				+ * L1-D cache when returning to userspace or a guest.
			
 
				+ */
			
 
				+#define RFI_FLUSH_SLOT							\
			
 
				+	RFI_FLUSH_FIXUP_SECTION;					\
			
 
				+	nop;								\
			
 
				+	nop;								\
			
 
				+	nop
			
 
				+
			
 
				+#define RFI_TO_KERNEL							\
			
 
				+	rfid
			
 
				+
			
 
				+#define RFI_TO_USER							\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	rfid;								\
			
 
				+	b	rfi_flush_fallback
			
 
				+
			
 
				+#define RFI_TO_USER_OR_KERNEL						\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	rfid;								\
			
 
				+	b	rfi_flush_fallback
			
 
				+
			
 
				+#define RFI_TO_GUEST							\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	rfid;								\
			
 
				+	b	rfi_flush_fallback
			
 
				+
			
 
				+#define HRFI_TO_KERNEL							\
			
 
				+	hrfid
			
 
				+
			
 
				+#define HRFI_TO_USER							\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	hrfid;								\
			
 
				+	b	hrfi_flush_fallback
			
 
				+
			
 
				+#define HRFI_TO_USER_OR_KERNEL						\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	hrfid;								\
			
 
				+	b	hrfi_flush_fallback
			
 
				+
			
 
				+#define HRFI_TO_GUEST							\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	hrfid;								\
			
 
				+	b	hrfi_flush_fallback
			
 
				+
			
 
				+#define HRFI_TO_UNKNOWN							\
			
 
				+	RFI_FLUSH_SLOT;							\
			
 
				+	hrfid;								\
			
 
				+	b	hrfi_flush_fallback
			
 
				+
			
 
				 #ifdef CONFIG_RELOCATABLE
			
 
				 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\
			
 
				 	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
			
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
				 	mtspr	SPRN_##h##SRR0,r12;					\
			
 
				 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
			
 
				 	mtspr	SPRN_##h##SRR1,r10;					\
			
 
				-	h##rfid;							\
			
 
				+	h##RFI_TO_KERNEL;						\
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\
			
 
				 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
			
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
				 	mtspr	SPRN_##h##SRR0,r12;					\
			
 
				 	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
			
 
				 	mtspr	SPRN_##h##SRR1,r10;					\
			
 
				-	h##rfid;							\
			
 
				+	h##RFI_TO_KERNEL;						\
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 
			
 
				 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)			\
			
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3:					       	\
 
				 	FTR_ENTRY_OFFSET label##1b-label##3b;		\
			
 
				 	.popsection;
			
 
				 
			
 
				+#define RFI_FLUSH_FIXUP_SECTION				\
			
 
				+951:							\
			
 
				+	.pushsection __rfi_flush_fixup,"a";		\
			
 
				+	.align 2;					\
			
 
				+952:							\
			
 
				+	FTR_ENTRY_OFFSET 951b-952b;			\
			
 
				+	.popsection;
			
 
				+
			
 
				+
			
 
				 #ifndef __ASSEMBLY__
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
			
 
				+
			
 
				 void apply_feature_fixups(void);
			
 
				 void setup_feature_keys(void);
			
 
				 #endif
			
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
 
				 #define H_GET_HCA_INFO          0x1B8
			
 
				 #define H_GET_PERF_COUNT        0x1BC
			
 
				 #define H_MANAGE_TRACE          0x1C0
			
 
				+#define H_GET_CPU_CHARACTERISTICS 0x1C8
			
 
				 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
			
 
				 #define H_QUERY_INT_STATE       0x1E4
			
 
				 #define H_POLL_PENDING		0x1D8
			
@@ -330,6 +331,17 @@
 
				 #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
			
 
				 /* >= 0 values are CPU number */
			
 
				 
			
 
				+/* H_GET_CPU_CHARACTERISTICS return values */
			
 
				+#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0
			
 
				+#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1
			
 
				+#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
			
 
				+#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
			
 
				+#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
			
 
				+
			
 
				+#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
			
 
				+#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
			
 
				+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
			
 
				+
			
 
				 /* Flag values used in H_REGISTER_PROC_TBL hcall */
			
 
				 #define PROC_TABLE_OP_MASK	0x18
			
 
				 #define PROC_TABLE_DEREG	0x10
			
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+struct h_cpu_char_result {
			
 
				+	u64 character;
			
 
				+	u64 behaviour;
			
 
				+};
			
 
				+
			
 
				 #endif /* __ASSEMBLY__ */
			
 
				 #endif /* __KERNEL__ */
			
 
				 #endif /* _ASM_POWERPC_HVCALL_H */
			
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -232,6 +232,16 @@ struct paca_struct {
 
				 	struct sibling_subcore_state *sibling_subcore_state;
			
 
				 #endif
			
 
				 #endif
			
 
				+#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+	/*
			
 
				+	 * rfi fallback flush must be in its own cacheline to prevent
			
 
				+	 * other paca data leaking into the L1d
			
 
				+	 */
			
 
				+	u64 exrfi[EX_SIZE] __aligned(0x80);
			
 
				+	void *rfi_flush_fallback_area;
			
 
				+	u64 l1d_flush_congruence;
			
 
				+	u64 l1d_flush_sets;
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 extern void copy_mm_to_paca(struct mm_struct *mm);
			
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
 
				 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
			
 
				 }
			
 
				 
			
 
				+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
			
 
				+{
			
 
				+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
			
 
				+	long rc;
			
 
				+
			
 
				+	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
			
 
				+	if (rc == H_SUCCESS) {
			
 
				+		p->character = retbuf[0];
			
 
				+		p->behaviour = retbuf[1];
			
 
				+	}
			
 
				+
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
			
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 
				 static inline void pseries_little_endian_exceptions(void) {}
			
 
				 #endif /* CONFIG_PPC_PSERIES */
			
 
				 
			
 
				+void rfi_flush_enable(bool enable);
			
 
				+
			
 
				+/* These are bit flags */
			
 
				+enum l1d_flush_type {
			
 
				+	L1D_FLUSH_NONE		= 0x1,
			
 
				+	L1D_FLUSH_FALLBACK	= 0x2,
			
 
				+	L1D_FLUSH_ORI		= 0x4,
			
 
				+	L1D_FLUSH_MTTRIG	= 0x8,
			
 
				+};
			
 
				+
			
 
				+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
			
 
				+void do_rfi_flush_fixups(enum l1d_flush_type types);
			
 
				+
			
 
				 #endif /* !__ASSEMBLY__ */
			
 
				 
			
 
				 #endif	/* _ASM_POWERPC_SETUP_H */
			
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -237,6 +237,11 @@ int main(void)
 
				 	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
			
 
				 	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
			
 
				 	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
			
 
				+	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
			
 
				+	OFFSET(PACA_EXRFI, paca_struct, exrfi);
			
 
				+	OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
			
 
				+	OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
			
 
				+
			
 
				 #endif
			
 
				 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
			
 
				 	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
			
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -37,6 +37,11 @@
 
				 #include <asm/tm.h>
			
 
				 #include <asm/ppc-opcode.h>
			
 
				 #include <asm/export.h>
			
 
				+#ifdef CONFIG_PPC_BOOK3S
			
 
				+#include <asm/exception-64s.h>
			
 
				+#else
			
 
				+#include <asm/exception-64e.h>
			
 
				+#endif
			
 
				 
			
 
				 /*
			
 
				  * System calls.
			
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 
				 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
			
 
				 
			
 
				 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
			
 
				+	ld	r2,GPR2(r1)
			
 
				+	ld	r1,GPR1(r1)
			
 
				+	mtlr	r4
			
 
				+	mtcr	r5
			
 
				+	mtspr	SPRN_SRR0,r7
			
 
				+	mtspr	SPRN_SRR1,r8
			
 
				+	RFI_TO_USER
			
 
				+	b	.	/* prevent speculative execution */
			
 
				+
			
 
				+	/* exit to kernel */
			
 
				 1:	ld	r2,GPR2(r1)
			
 
				 	ld	r1,GPR1(r1)
			
 
				 	mtlr	r4
			
 
				 	mtcr	r5
			
 
				 	mtspr	SPRN_SRR0,r7
			
 
				 	mtspr	SPRN_SRR1,r8
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 
			
 
				 .Lsyscall_error:
			
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 	mtmsrd	r10, 1
			
 
				 	mtspr	SPRN_SRR0, r11
			
 
				 	mtspr	SPRN_SRR1, r12
			
 
				-
			
 
				-	rfid
			
 
				+	RFI_TO_USER
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 #endif
			
 
				 _ASM_NOKPROBE_SYMBOL(system_call_common);
			
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 
				 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
			
 
				 	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
			
 
				 	REST_GPR(13, r1)
			
 
				-1:
			
 
				+
			
 
				 	mtspr	SPRN_SRR1,r3
			
 
				 
			
 
				 	ld	r2,_CCR(r1)
			
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 	ld	r3,GPR3(r1)
			
 
				 	ld	r4,GPR4(r1)
			
 
				 	ld	r1,GPR1(r1)
			
 
				+	RFI_TO_USER
			
 
				+	b	.	/* prevent speculative execution */
			
 
				 
			
 
				-	rfid
			
 
				+1:	mtspr	SPRN_SRR1,r3
			
 
				+
			
 
				+	ld	r2,_CCR(r1)
			
 
				+	mtcrf	0xFF,r2
			
 
				+	ld	r2,_NIP(r1)
			
 
				+	mtspr	SPRN_SRR0,r2
			
 
				+
			
 
				+	ld	r0,GPR0(r1)
			
 
				+	ld	r2,GPR2(r1)
			
 
				+	ld	r3,GPR3(r1)
			
 
				+	ld	r4,GPR4(r1)
			
 
				+	ld	r1,GPR1(r1)
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 
			
 
				 #endif /* CONFIG_PPC_BOOK3E */
			
@@ -1073,7 +1101,7 @@ __enter_rtas:
 
				 	
			
 
				 	mtspr	SPRN_SRR0,r5
			
 
				 	mtspr	SPRN_SRR1,r6
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 
			
 
				 rtas_return_loc:
			
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
				 
			
 
				 	mtspr	SPRN_SRR0,r3
			
 
				 	mtspr	SPRN_SRR1,r4
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
			
 
				 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
			
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
 
				 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
			
 
				 	andc	r11,r11,r12
			
 
				 	mtsrr1	r11
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 #endif /* CONFIG_PPC_BOOK3E */
			
 
				 
			
 
				 1:	/* Return from OF */
			
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
 
				 	LOAD_HANDLER(r12, machine_check_handle_early)
			
 
				 1:	mtspr	SPRN_SRR0,r12
			
 
				 	mtspr	SPRN_SRR1,r11
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 2:
			
 
				 	/* Stack overflow. Stay on emergency stack and panic.
			
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 
				 	li	r3,MSR_ME
			
 
				 	andc	r10,r10,r3		/* Turn off MSR_ME */
			
 
				 	mtspr	SPRN_SRR1,r10
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.
			
 
				 2:
			
 
				 	/*
			
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 
				 	 */
			
 
				 	bl	machine_check_queue_event
			
 
				 	MACHINE_CHECK_HANDLER_WINDUP
			
 
				-	rfid
			
 
				+	RFI_TO_USER_OR_KERNEL
			
 
				 9:
			
 
				 	/* Deliver the machine check to host kernel in V mode. */
			
 
				 	MACHINE_CHECK_HANDLER_WINDUP
			
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
 
				 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
			
 
				 	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
			
 
				 
			
 
				+	andi.	r9,r11,MSR_PR	// Check for exception from userspace
			
 
				+	cmpdi	cr4,r9,MSR_PR	// And save the result in CR4 for later
			
 
				+
			
 
				 	/*
			
 
				 	 * Test MSR_RI before calling slb_allocate_realmode, because the
			
 
				 	 * MSR in r11 gets clobbered. However we still want to allocate
			
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 
			
 
				 	/* All done -- return from exception. */
			
 
				 
			
 
				+	bne	cr4,1f		/* returning to kernel */
			
 
				+
			
 
				 .machine	push
			
 
				 .machine	"power4"
			
 
				 	mtcrf	0x80,r9
			
 
				+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
			
 
				 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
			
 
				 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
			
 
				 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
			
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 	ld	r11,PACA_EXSLB+EX_R11(r13)
			
 
				 	ld	r12,PACA_EXSLB+EX_R12(r13)
			
 
				 	ld	r13,PACA_EXSLB+EX_R13(r13)
			
 
				-	rfid
			
 
				+	RFI_TO_USER
			
 
				+	b	.	/* prevent speculative execution */
			
 
				+1:
			
 
				+.machine	push
			
 
				+.machine	"power4"
			
 
				+	mtcrf	0x80,r9
			
 
				+	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
			
 
				+	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
			
 
				+	mtcrf	0x02,r9		/* I/D indication is in cr6 */
			
 
				+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
			
 
				+.machine	pop
			
 
				+
			
 
				+	RESTORE_CTR(r9, PACA_EXSLB)
			
 
				+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
			
 
				+	mr	r3,r12
			
 
				+	ld	r9,PACA_EXSLB+EX_R9(r13)
			
 
				+	ld	r10,PACA_EXSLB+EX_R10(r13)
			
 
				+	ld	r11,PACA_EXSLB+EX_R11(r13)
			
 
				+	ld	r12,PACA_EXSLB+EX_R12(r13)
			
 
				+	ld	r13,PACA_EXSLB+EX_R13(r13)
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.	/* prevent speculative execution */
			
 
				 
			
 
				+
			
 
				 2:	std     r3,PACA_EXSLB+EX_DAR(r13)
			
 
				 	mr	r3,r12
			
 
				 	mfspr	r11,SPRN_SRR0
			
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 	mtspr	SPRN_SRR0,r10
			
 
				 	ld	r10,PACAKMSR(r13)
			
 
				 	mtspr	SPRN_SRR1,r10
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.
			
 
				 
			
 
				 8:	std     r3,PACA_EXSLB+EX_DAR(r13)
			
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
				 	mtspr	SPRN_SRR0,r10
			
 
				 	ld	r10,PACAKMSR(r13)
			
 
				 	mtspr	SPRN_SRR1,r10
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.
			
 
				 
			
 
				 EXC_COMMON_BEGIN(unrecov_slb)
			
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 
				 	mtspr	SPRN_SRR0,r10 ; 				\
			
 
				 	ld	r10,PACAKMSR(r13) ;				\
			
 
				 	mtspr	SPRN_SRR1,r10 ; 				\
			
 
				-	rfid ; 							\
			
 
				+	RFI_TO_KERNEL ;						\
			
 
				 	b	. ;	/* prevent speculative execution */
			
 
				 
			
 
				 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
			
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\
 
				 	xori	r12,r12,MSR_LE ;				\
			
 
				 	mtspr	SPRN_SRR1,r12 ;					\
			
 
				 	mr	r13,r9 ;					\
			
 
				-	rfid ;		/* return to userspace */		\
			
 
				+	RFI_TO_USER ;	/* return to userspace */		\
			
 
				 	b	. ;	/* prevent speculative execution */
			
 
				 #else
			
 
				 #define SYSCALL_FASTENDIAN_TEST
			
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
 
				 	mtcr	r11
			
 
				 	REST_GPR(11, r1)
			
 
				 	ld	r1,GPR1(r1)
			
 
				-	hrfid
			
 
				+	HRFI_TO_USER_OR_KERNEL
			
 
				 
			
 
				 1:	mtcr	r11
			
 
				 	REST_GPR(11, r1)
			
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 
				 	ld	r11,PACA_EXGEN+EX_R11(r13)
			
 
				 	ld	r12,PACA_EXGEN+EX_R12(r13)
			
 
				 	ld	r13,PACA_EXGEN+EX_R13(r13)
			
 
				-	HRFID
			
 
				+	HRFI_TO_UNKNOWN
			
 
				 	b	.
			
 
				 #endif
			
 
				 
			
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:					\
 
				 	ld	r10,PACA_EXGEN+EX_R10(r13);		\
			
 
				 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
			
 
				 	/* returns to kernel where r13 must be set up, so don't restore it */ \
			
 
				-	##_H##rfid;					\
			
 
				+	##_H##RFI_TO_KERNEL;				\
			
 
				 	b	.;					\
			
 
				 	MASKED_DEC_HANDLER(_H)
			
 
				 
			
 
				+TRAMP_REAL_BEGIN(rfi_flush_fallback)
			
 
				+	SET_SCRATCH0(r13);
			
 
				+	GET_PACA(r13);
			
 
				+	std	r9,PACA_EXRFI+EX_R9(r13)
			
 
				+	std	r10,PACA_EXRFI+EX_R10(r13)
			
 
				+	std	r11,PACA_EXRFI+EX_R11(r13)
			
 
				+	std	r12,PACA_EXRFI+EX_R12(r13)
			
 
				+	std	r8,PACA_EXRFI+EX_R13(r13)
			
 
				+	mfctr	r9
			
 
				+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
			
 
				+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
			
 
				+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
			
 
				+	/*
			
 
				+	 * The load adresses are at staggered offsets within cachelines,
			
 
				+	 * which suits some pipelines better (on others it should not
			
 
				+	 * hurt).
			
 
				+	 */
			
 
				+	addi	r12,r12,8
			
 
				+	mtctr	r11
			
 
				+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
			
 
				+
			
 
				+	/* order ld/st prior to dcbt stop all streams with flushing */
			
 
				+	sync
			
 
				+1:	li	r8,0
			
 
				+	.rept	8 /* 8-way set associative */
			
 
				+	ldx	r11,r10,r8
			
 
				+	add	r8,r8,r12
			
 
				+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
			
 
				+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
			
 
				+	.endr
			
 
				+	addi	r10,r10,128 /* 128 byte cache line */
			
 
				+	bdnz	1b
			
 
				+
			
 
				+	mtctr	r9
			
 
				+	ld	r9,PACA_EXRFI+EX_R9(r13)
			
 
				+	ld	r10,PACA_EXRFI+EX_R10(r13)
			
 
				+	ld	r11,PACA_EXRFI+EX_R11(r13)
			
 
				+	ld	r12,PACA_EXRFI+EX_R12(r13)
			
 
				+	ld	r8,PACA_EXRFI+EX_R13(r13)
			
 
				+	GET_SCRATCH0(r13);
			
 
				+	rfid
			
 
				+
			
 
				+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
			
 
				+	SET_SCRATCH0(r13);
			
 
				+	GET_PACA(r13);
			
 
				+	std	r9,PACA_EXRFI+EX_R9(r13)
			
 
				+	std	r10,PACA_EXRFI+EX_R10(r13)
			
 
				+	std	r11,PACA_EXRFI+EX_R11(r13)
			
 
				+	std	r12,PACA_EXRFI+EX_R12(r13)
			
 
				+	std	r8,PACA_EXRFI+EX_R13(r13)
			
 
				+	mfctr	r9
			
 
				+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
			
 
				+	ld	r11,PACA_L1D_FLUSH_SETS(r13)
			
 
				+	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
			
 
				+	/*
			
 
				+	 * The load adresses are at staggered offsets within cachelines,
			
 
				+	 * which suits some pipelines better (on others it should not
			
 
				+	 * hurt).
			
 
				+	 */
			
 
				+	addi	r12,r12,8
			
 
				+	mtctr	r11
			
 
				+	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
			
 
				+
			
 
				+	/* order ld/st prior to dcbt stop all streams with flushing */
			
 
				+	sync
			
 
				+1:	li	r8,0
			
 
				+	.rept	8 /* 8-way set associative */
			
 
				+	ldx	r11,r10,r8
			
 
				+	add	r8,r8,r12
			
 
				+	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not
			
 
				+	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx
			
 
				+	.endr
			
 
				+	addi	r10,r10,128 /* 128 byte cache line */
			
 
				+	bdnz	1b
			
 
				+
			
 
				+	mtctr	r9
			
 
				+	ld	r9,PACA_EXRFI+EX_R9(r13)
			
 
				+	ld	r10,PACA_EXRFI+EX_R10(r13)
			
 
				+	ld	r11,PACA_EXRFI+EX_R11(r13)
			
 
				+	ld	r12,PACA_EXRFI+EX_R12(r13)
			
 
				+	ld	r8,PACA_EXRFI+EX_R13(r13)
			
 
				+	GET_SCRATCH0(r13);
			
 
				+	hrfid
			
 
				+
			
 
				 /*
			
 
				  * Real mode exceptions actually use this too, but alternate
			
 
				  * instruction code patches (which end up in the common .text area)
			
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
 
				 	addi	r13, r13, 4
			
 
				 	mtspr	SPRN_SRR0, r13
			
 
				 	GET_SCRATCH0(r13)
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 	b	.
			
 
				 
			
 
				 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
			
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
 
				 	addi	r13, r13, 4
			
 
				 	mtspr	SPRN_HSRR0, r13
			
 
				 	GET_SCRATCH0(r13)
			
 
				-	hrfid
			
 
				+	HRFI_TO_KERNEL
			
 
				 	b	.
			
 
				 #endif
			
 
				 
			
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
 
				 	return 0;
			
 
				 }
			
 
				 early_initcall(disable_hardlockup_detector);
			
 
				+
			
 
				+#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+static enum l1d_flush_type enabled_flush_types;
			
 
				+static void *l1d_flush_fallback_area;
			
 
				+static bool no_rfi_flush;
			
 
				+bool rfi_flush;
			
 
				+
			
 
				+static int __init handle_no_rfi_flush(char *p)
			
 
				+{
			
 
				+	pr_info("rfi-flush: disabled on command line.");
			
 
				+	no_rfi_flush = true;
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("no_rfi_flush", handle_no_rfi_flush);
			
 
				+
			
 
				+/*
			
 
				+ * The RFI flush is not KPTI, but because users will see doco that says to use
			
 
				+ * nopti we hijack that option here to also disable the RFI flush.
			
 
				+ */
			
 
				+static int __init handle_no_pti(char *p)
			
 
				+{
			
 
				+	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
			
 
				+	handle_no_rfi_flush(NULL);
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("nopti", handle_no_pti);
			
 
				+
			
 
				+static void do_nothing(void *unused)
			
 
				+{
			
 
				+	/*
			
 
				+	 * We don't need to do the flush explicitly, just enter+exit kernel is
			
 
				+	 * sufficient, the RFI exit handlers will do the right thing.
			
 
				+	 */
			
 
				+}
			
 
				+
			
 
				+void rfi_flush_enable(bool enable)
			
 
				+{
			
 
				+	if (rfi_flush == enable)
			
 
				+		return;
			
 
				+
			
 
				+	if (enable) {
			
 
				+		do_rfi_flush_fixups(enabled_flush_types);
			
 
				+		on_each_cpu(do_nothing, NULL, 1);
			
 
				+	} else
			
 
				+		do_rfi_flush_fixups(L1D_FLUSH_NONE);
			
 
				+
			
 
				+	rfi_flush = enable;
			
 
				+}
			
 
				+
			
 
				+static void init_fallback_flush(void)
			
 
				+{
			
 
				+	u64 l1d_size, limit;
			
 
				+	int cpu;
			
 
				+
			
 
				+	l1d_size = ppc64_caches.l1d.size;
			
 
				+	limit = min(safe_stack_limit(), ppc64_rma_size);
			
 
				+
			
 
				+	/*
			
 
				+	 * Align to L1d size, and size it at 2x L1d size, to catch possible
			
 
				+	 * hardware prefetch runoff. We don't have a recipe for load patterns to
			
 
				+	 * reliably avoid the prefetcher.
			
 
				+	 */
			
 
				+	l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
			
 
				+	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
			
 
				+
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		/*
			
 
				+		 * The fallback flush is currently coded for 8-way
			
 
				+		 * associativity. Different associativity is possible, but it
			
 
				+		 * will be treated as 8-way and may not evict the lines as
			
 
				+		 * effectively.
			
 
				+		 *
			
 
				+		 * 128 byte lines are mandatory.
			
 
				+		 */
			
 
				+		u64 c = l1d_size / 8;
			
 
				+
			
 
				+		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
			
 
				+		paca[cpu].l1d_flush_congruence = c;
			
 
				+		paca[cpu].l1d_flush_sets = c / 128;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
			
 
				+{
			
 
				+	if (types & L1D_FLUSH_FALLBACK) {
			
 
				+		pr_info("rfi-flush: Using fallback displacement flush\n");
			
 
				+		init_fallback_flush();
			
 
				+	}
			
 
				+
			
 
				+	if (types & L1D_FLUSH_ORI)
			
 
				+		pr_info("rfi-flush: Using ori type flush\n");
			
 
				+
			
 
				+	if (types & L1D_FLUSH_MTTRIG)
			
 
				+		pr_info("rfi-flush: Using mttrig type flush\n");
			
 
				+
			
 
				+	enabled_flush_types = types;
			
 
				+
			
 
				+	if (!no_rfi_flush)
			
 
				+		rfi_flush_enable(enable);
			
 
				+}
			
 
				+#endif /* CONFIG_PPC_BOOK3S_64 */
			
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
 
				 	/* Read-only data */
			
 
				 	RO_DATA(PAGE_SIZE)
			
 
				 
			
 
				+#ifdef CONFIG_PPC64
			
 
				+	. = ALIGN(8);
			
 
				+	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
			
 
				+		__start___rfi_flush_fixup = .;
			
 
				+		*(__rfi_flush_fixup)
			
 
				+		__stop___rfi_flush_fixup = .;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	EXCEPTION_TABLE(0)
			
 
				 
			
 
				 	NOTES :kernel :notes
			
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
				 		gpte->may_read = true;
			
 
				 		gpte->may_write = true;
			
 
				 		gpte->page_size = MMU_PAGE_4K;
			
 
				+		gpte->wimg = HPTE_R_M;
			
 
				 
			
 
				 		return 0;
			
 
				 	}
			
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
 
				 	u32 order;
			
 
				 
			
 
				 	/* These fields protected by kvm->lock */
			
 
				+
			
 
				+	/* Possible values and their usage:
			
 
				+	 *  <0     an error occurred during allocation,
			
 
				+	 *  -EBUSY allocation is in the progress,
			
 
				+	 *  0      allocation made successfuly.
			
 
				+	 */
			
 
				 	int error;
			
 
				-	bool prepare_done;
			
 
				 
			
 
				-	/* Private to the work thread, until prepare_done is true,
			
 
				-	 * then protected by kvm->resize_hpt_sem */
			
 
				+	/* Private to the work thread, until error != -EBUSY,
			
 
				+	 * then protected by kvm->lock.
			
 
				+	 */
			
 
				 	struct kvm_hpt_info hpt;
			
 
				 };
			
 
				 
			
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 
				 		 * Reset all the reverse-mapping chains for all memslots
			
 
				 		 */
			
 
				 		kvmppc_rmap_reset(kvm);
			
 
				-		/* Ensure that each vcpu will flush its TLB on next entry. */
			
 
				-		cpumask_setall(&kvm->arch.need_tlb_flush);
			
 
				 		err = 0;
			
 
				 		goto out;
			
 
				 	}
			
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 
				 	kvmppc_set_hpt(kvm, &info);
			
 
				 
			
 
				 out:
			
 
				+	if (err == 0)
			
 
				+		/* Ensure that each vcpu will flush its TLB on next entry. */
			
 
				+		cpumask_setall(&kvm->arch.need_tlb_flush);
			
 
				+
			
 
				 	mutex_unlock(&kvm->lock);
			
 
				 	return err;
			
 
				 }
			
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
				 
			
 
				 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
			
 
				 {
			
 
				-	BUG_ON(kvm->arch.resize_hpt != resize);
			
 
				+	if (WARN_ON(!mutex_is_locked(&kvm->lock)))
			
 
				+		return;
			
 
				 
			
 
				 	if (!resize)
			
 
				 		return;
			
 
				 
			
 
				-	if (resize->hpt.virt)
			
 
				-		kvmppc_free_hpt(&resize->hpt);
			
 
				+	if (resize->error != -EBUSY) {
			
 
				+		if (resize->hpt.virt)
			
 
				+			kvmppc_free_hpt(&resize->hpt);
			
 
				+		kfree(resize);
			
 
				+	}
			
 
				 
			
 
				-	kvm->arch.resize_hpt = NULL;
			
 
				-	kfree(resize);
			
 
				+	if (kvm->arch.resize_hpt == resize)
			
 
				+		kvm->arch.resize_hpt = NULL;
			
 
				 }
			
 
				 
			
 
				 static void resize_hpt_prepare_work(struct work_struct *work)
			
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
 
				 						     struct kvm_resize_hpt,
			
 
				 						     work);
			
 
				 	struct kvm *kvm = resize->kvm;
			
 
				-	int err;
			
 
				+	int err = 0;
			
 
				 
			
 
				-	resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
			
 
				-			 resize->order);
			
 
				-
			
 
				-	err = resize_hpt_allocate(resize);
			
 
				+	if (WARN_ON(resize->error != -EBUSY))
			
 
				+		return;
			
 
				 
			
 
				 	mutex_lock(&kvm->lock);
			
 
				 
			
 
				+	/* Request is still current? */
			
 
				+	if (kvm->arch.resize_hpt == resize) {
			
 
				+		/* We may request large allocations here:
			
 
				+		 * do not sleep with kvm->lock held for a while.
			
 
				+		 */
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+		resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
			
 
				+				 resize->order);
			
 
				+
			
 
				+		err = resize_hpt_allocate(resize);
			
 
				+
			
 
				+		/* We have strict assumption about -EBUSY
			
 
				+		 * when preparing for HPT resize.
			
 
				+		 */
			
 
				+		if (WARN_ON(err == -EBUSY))
			
 
				+			err = -EINPROGRESS;
			
 
				+
			
 
				+		mutex_lock(&kvm->lock);
			
 
				+		/* It is possible that kvm->arch.resize_hpt != resize
			
 
				+		 * after we grab kvm->lock again.
			
 
				+		 */
			
 
				+	}
			
 
				+
			
 
				 	resize->error = err;
			
 
				-	resize->prepare_done = true;
			
 
				+
			
 
				+	if (kvm->arch.resize_hpt != resize)
			
 
				+		resize_hpt_release(kvm, resize);
			
 
				 
			
 
				 	mutex_unlock(&kvm->lock);
			
 
				 }
			
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
				 
			
 
				 	if (resize) {
			
 
				 		if (resize->order == shift) {
			
 
				-			/* Suitable resize in progress */
			
 
				-			if (resize->prepare_done) {
			
 
				-				ret = resize->error;
			
 
				-				if (ret != 0)
			
 
				-					resize_hpt_release(kvm, resize);
			
 
				-			} else {
			
 
				+			/* Suitable resize in progress? */
			
 
				+			ret = resize->error;
			
 
				+			if (ret == -EBUSY)
			
 
				 				ret = 100; /* estimated time in ms */
			
 
				-			}
			
 
				+			else if (ret)
			
 
				+				resize_hpt_release(kvm, resize);
			
 
				 
			
 
				 			goto out;
			
 
				 		}
			
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
				 		ret = -ENOMEM;
			
 
				 		goto out;
			
 
				 	}
			
 
				+
			
 
				+	resize->error = -EBUSY;
			
 
				 	resize->order = shift;
			
 
				 	resize->kvm = kvm;
			
 
				 	INIT_WORK(&resize->work, resize_hpt_prepare_work);
			
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
 
				 	if (!resize || (resize->order != shift))
			
 
				 		goto out;
			
 
				 
			
 
				-	ret = -EBUSY;
			
 
				-	if (!resize->prepare_done)
			
 
				-		goto out;
			
 
				-
			
 
				 	ret = resize->error;
			
 
				-	if (ret != 0)
			
 
				+	if (ret)
			
 
				 		goto out;
			
 
				 
			
 
				 	ret = resize_hpt_rehash(resize);
			
 
				-	if (ret != 0)
			
 
				+	if (ret)
			
 
				 		goto out;
			
 
				 
			
 
				 	resize_hpt_pivot(resize);
			
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 
				 	mtmsrd	r0,1		/* clear RI in MSR */
			
 
				 	mtsrr0	r5
			
 
				 	mtsrr1	r6
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 
			
 
				 kvmppc_call_hv_entry:
			
 
				 BEGIN_FTR_SECTION
			
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	mtmsrd	r6, 1			/* Clear RI in MSR */
			
 
				 	mtsrr0	r8
			
 
				 	mtsrr1	r7
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 
			
 
				 	/* Virtual-mode return */
			
 
				 .Lvirt_return:
			
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
				 
			
 
				 	ld	r0, VCPU_GPR(R0)(r4)
			
 
				 	ld	r4, VCPU_GPR(R4)(r4)
			
 
				-
			
 
				-	hrfid
			
 
				+	HRFI_TO_GUEST
			
 
				 	b	.
			
 
				 
			
 
				 secondary_too_late:
			
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 
				 	ld	r4, PACAKMSR(r13)
			
 
				 	mtspr	SPRN_SRR0, r3
			
 
				 	mtspr	SPRN_SRR1, r4
			
 
				-	rfid
			
 
				+	RFI_TO_KERNEL
			
 
				 9:	addi	r3, r1, STACK_FRAME_OVERHEAD
			
 
				 	bl	kvmppc_bad_interrupt
			
 
				 	b	9b
			
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
				 #define MSR_USER32 MSR_USER
			
 
				 #define MSR_USER64 MSR_USER
			
 
				 #define HW_PAGE_SIZE PAGE_SIZE
			
 
				+#define HPTE_R_M   _PAGE_COHERENT
			
 
				 #endif
			
 
				 
			
 
				 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
			
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
				 		pte.eaddr = eaddr;
			
 
				 		pte.vpage = eaddr >> 12;
			
 
				 		pte.page_size = MMU_PAGE_64K;
			
 
				+		pte.wimg = HPTE_R_M;
			
 
				 	}
			
 
				 
			
 
				 	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
			
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
 
				 
			
 
				 #define FUNC(name)		name
			
 
				 
			
 
				+#define RFI_TO_KERNEL	RFI
			
 
				+#define RFI_TO_GUEST	RFI
			
 
				+
			
 
				 .macro INTERRUPT_TRAMPOLINE intno
			
 
				 
			
 
				 .global kvmppc_trampoline_\intno
			
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
 
				 	GET_SCRATCH0(r13)
			
 
				 
			
 
				 	/* And get back into the code */
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
 
				 	ori	r5, r5, MSR_EE
			
 
				 	mtsrr0	r7
			
 
				 	mtsrr1	r6
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 
			
 
				 #include "book3s_segment.S"
			
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
 
				 	PPC_LL	r9, SVCPU_R9(r3)
			
 
				 	PPC_LL	r3, (SVCPU_R3)(r3)
			
 
				 
			
 
				-	RFI
			
 
				+	RFI_TO_GUEST
			
 
				 kvmppc_handler_trampoline_enter_end:
			
 
				 
			
 
				 
			
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
				 	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
			
 
				 	beqa	BOOK3S_INTERRUPT_DOORBELL
			
 
				 
			
 
				-	RFI
			
 
				+	RFI_TO_KERNEL
			
 
				 kvmppc_handler_trampoline_exit_end:
			
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+void do_rfi_flush_fixups(enum l1d_flush_type types)
			
 
				+{
			
 
				+	unsigned int instrs[3], *dest;
			
 
				+	long *start, *end;
			
 
				+	int i;
			
 
				+
			
 
				+	start = PTRRELOC(&__start___rfi_flush_fixup),
			
 
				+	end = PTRRELOC(&__stop___rfi_flush_fixup);
			
 
				+
			
 
				+	instrs[0] = 0x60000000; /* nop */
			
 
				+	instrs[1] = 0x60000000; /* nop */
			
 
				+	instrs[2] = 0x60000000; /* nop */
			
 
				+
			
 
				+	if (types & L1D_FLUSH_FALLBACK)
			
 
				+		/* b .+16 to fallback flush */
			
 
				+		instrs[0] = 0x48000010;
			
 
				+
			
 
				+	i = 0;
			
 
				+	if (types & L1D_FLUSH_ORI) {
			
 
				+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
			
 
				+		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
			
 
				+	}
			
 
				+
			
 
				+	if (types & L1D_FLUSH_MTTRIG)
			
 
				+		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
			
 
				+
			
 
				+	for (i = 0; start < end; start++, i++) {
			
 
				+		dest = (void *)start + *start;
			
 
				+
			
 
				+		pr_devel("patching dest %lx\n", (unsigned long)dest);
			
 
				+
			
 
				+		patch_instruction(dest, instrs[0]);
			
 
				+		patch_instruction(dest + 1, instrs[1]);
			
 
				+		patch_instruction(dest + 2, instrs[2]);
			
 
				+	}
			
 
				+
			
 
				+	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
			
 
				+}
			
 
				+#endif /* CONFIG_PPC_BOOK3S_64 */
			
 
				+
			
 
				 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
			
 
				 {
			
 
				 	long *start, *end;
			
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,13 +37,62 @@
 
				 #include <asm/kexec.h>
			
 
				 #include <asm/smp.h>
			
 
				 #include <asm/tm.h>
			
 
				+#include <asm/setup.h>
			
 
				 
			
 
				 #include "powernv.h"
			
 
				 
			
 
				+static void pnv_setup_rfi_flush(void)
			
 
				+{
			
 
				+	struct device_node *np, *fw_features;
			
 
				+	enum l1d_flush_type type;
			
 
				+	int enable;
			
 
				+
			
 
				+	/* Default to fallback in case fw-features are not available */
			
 
				+	type = L1D_FLUSH_FALLBACK;
			
 
				+	enable = 1;
			
 
				+
			
 
				+	np = of_find_node_by_name(NULL, "ibm,opal");
			
 
				+	fw_features = of_get_child_by_name(np, "fw-features");
			
 
				+	of_node_put(np);
			
 
				+
			
 
				+	if (fw_features) {
			
 
				+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
			
 
				+		if (np && of_property_read_bool(np, "enabled"))
			
 
				+			type = L1D_FLUSH_MTTRIG;
			
 
				+
			
 
				+		of_node_put(np);
			
 
				+
			
 
				+		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
			
 
				+		if (np && of_property_read_bool(np, "enabled"))
			
 
				+			type = L1D_FLUSH_ORI;
			
 
				+
			
 
				+		of_node_put(np);
			
 
				+
			
 
				+		/* Enable unless firmware says NOT to */
			
 
				+		enable = 2;
			
 
				+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
			
 
				+		if (np && of_property_read_bool(np, "disabled"))
			
 
				+			enable--;
			
 
				+
			
 
				+		of_node_put(np);
			
 
				+
			
 
				+		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
			
 
				+		if (np && of_property_read_bool(np, "disabled"))
			
 
				+			enable--;
			
 
				+
			
 
				+		of_node_put(np);
			
 
				+		of_node_put(fw_features);
			
 
				+	}
			
 
				+
			
 
				+	setup_rfi_flush(type, enable > 0);
			
 
				+}
			
 
				+
			
 
				 static void __init pnv_setup_arch(void)
			
 
				 {
			
 
				 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
			
 
				 
			
 
				+	pnv_setup_rfi_flush();
			
 
				+
			
 
				 	/* Initialize SMP */
			
 
				 	pnv_smp_init();
			
 
				 
			
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
				 
			
 
				 static CLASS_ATTR_RW(dlpar);
			
 
				 
			
 
				-static int __init pseries_dlpar_init(void)
			
 
				+int __init dlpar_workqueue_init(void)
			
 
				 {
			
 
				+	if (pseries_hp_wq)
			
 
				+		return 0;
			
 
				+
			
 
				 	pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
			
 
				-					WQ_UNBOUND, 1);
			
 
				+			WQ_UNBOUND, 1);
			
 
				+
			
 
				+	return pseries_hp_wq ? 0 : -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+static int __init dlpar_sysfs_init(void)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = dlpar_workqueue_init();
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+
			
 
				 	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
			
 
				 }
			
 
				-machine_device_initcall(pseries, pseries_dlpar_init);
			
 
				+machine_device_initcall(pseries, dlpar_sysfs_init);
			
 
				 
			
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
 
				 	return CMO_PageSize;
			
 
				 }
			
 
				 
			
 
				+int dlpar_workqueue_init(void);
			
 
				+
			
 
				 #endif /* _PSERIES_PSERIES_H */
			
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
 
				 	/* Hotplug Events */
			
 
				 	np = of_find_node_by_path("/event-sources/hot-plug-events");
			
 
				 	if (np != NULL) {
			
 
				-		request_event_sources_irqs(np, ras_hotplug_interrupt,
			
 
				+		if (dlpar_workqueue_init() == 0)
			
 
				+			request_event_sources_irqs(np, ras_hotplug_interrupt,
			
 
				 					   "RAS_HOTPLUG");
			
 
				 		of_node_put(np);
			
 
				 	}
			
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
 
				 	of_pci_check_probe_only();
			
 
				 }
			
 
				 
			
 
				+static void pseries_setup_rfi_flush(void)
			
 
				+{
			
 
				+	struct h_cpu_char_result result;
			
 
				+	enum l1d_flush_type types;
			
 
				+	bool enable;
			
 
				+	long rc;
			
 
				+
			
 
				+	/* Enable by default */
			
 
				+	enable = true;
			
 
				+
			
 
				+	rc = plpar_get_cpu_characteristics(&result);
			
 
				+	if (rc == H_SUCCESS) {
			
 
				+		types = L1D_FLUSH_NONE;
			
 
				+
			
 
				+		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
			
 
				+			types |= L1D_FLUSH_MTTRIG;
			
 
				+		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
			
 
				+			types |= L1D_FLUSH_ORI;
			
 
				+
			
 
				+		/* Use fallback if nothing set in hcall */
			
 
				+		if (types == L1D_FLUSH_NONE)
			
 
				+			types = L1D_FLUSH_FALLBACK;
			
 
				+
			
 
				+		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
			
 
				+			enable = false;
			
 
				+	} else {
			
 
				+		/* Default to fallback if case hcall is not available */
			
 
				+		types = L1D_FLUSH_FALLBACK;
			
 
				+	}
			
 
				+
			
 
				+	setup_rfi_flush(types, enable);
			
 
				+}
			
 
				+
			
 
				 static void __init pSeries_setup_arch(void)
			
 
				 {
			
 
				 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
			
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
				 
			
 
				 	fwnmi_init();
			
 
				 
			
 
				+	pseries_setup_rfi_flush();
			
 
				+
			
 
				 	/* By default, only probe PCI (can be overridden by rtas_pci) */
			
 
				 	pci_add_flags(PCI_PROBE_ONLY);
			
 
				 
			
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -0,0 +1,75 @@
 
				+CONFIG_SMP=y
			
 
				+CONFIG_PCI=y
			
 
				+CONFIG_PCIE_XILINX=y
			
 
				+CONFIG_SYSVIPC=y
			
 
				+CONFIG_POSIX_MQUEUE=y
			
 
				+CONFIG_IKCONFIG=y
			
 
				+CONFIG_IKCONFIG_PROC=y
			
 
				+CONFIG_CGROUPS=y
			
 
				+CONFIG_CGROUP_SCHED=y
			
 
				+CONFIG_CFS_BANDWIDTH=y
			
 
				+CONFIG_CGROUP_BPF=y
			
 
				+CONFIG_NAMESPACES=y
			
 
				+CONFIG_USER_NS=y
			
 
				+CONFIG_BLK_DEV_INITRD=y
			
 
				+CONFIG_EXPERT=y
			
 
				+CONFIG_CHECKPOINT_RESTORE=y
			
 
				+CONFIG_BPF_SYSCALL=y
			
 
				+CONFIG_NET=y
			
 
				+CONFIG_PACKET=y
			
 
				+CONFIG_UNIX=y
			
 
				+CONFIG_INET=y
			
 
				+CONFIG_IP_MULTICAST=y
			
 
				+CONFIG_IP_ADVANCED_ROUTER=y
			
 
				+CONFIG_IP_PNP=y
			
 
				+CONFIG_IP_PNP_DHCP=y
			
 
				+CONFIG_IP_PNP_BOOTP=y
			
 
				+CONFIG_IP_PNP_RARP=y
			
 
				+CONFIG_NETLINK_DIAG=y
			
 
				+CONFIG_DEVTMPFS=y
			
 
				+CONFIG_BLK_DEV_LOOP=y
			
 
				+CONFIG_VIRTIO_BLK=y
			
 
				+CONFIG_BLK_DEV_SD=y
			
 
				+CONFIG_BLK_DEV_SR=y
			
 
				+CONFIG_ATA=y
			
 
				+CONFIG_SATA_AHCI=y
			
 
				+CONFIG_SATA_AHCI_PLATFORM=y
			
 
				+CONFIG_NETDEVICES=y
			
 
				+CONFIG_VIRTIO_NET=y
			
 
				+CONFIG_MACB=y
			
 
				+CONFIG_E1000E=y
			
 
				+CONFIG_R8169=y
			
 
				+CONFIG_MICROSEMI_PHY=y
			
 
				+CONFIG_INPUT_MOUSEDEV=y
			
 
				+CONFIG_SERIAL_8250=y
			
 
				+CONFIG_SERIAL_8250_CONSOLE=y
			
 
				+CONFIG_SERIAL_OF_PLATFORM=y
			
 
				+# CONFIG_PTP_1588_CLOCK is not set
			
 
				+CONFIG_DRM=y
			
 
				+CONFIG_DRM_RADEON=y
			
 
				+CONFIG_FRAMEBUFFER_CONSOLE=y
			
 
				+CONFIG_USB=y
			
 
				+CONFIG_USB_XHCI_HCD=y
			
 
				+CONFIG_USB_XHCI_PLATFORM=y
			
 
				+CONFIG_USB_EHCI_HCD=y
			
 
				+CONFIG_USB_EHCI_HCD_PLATFORM=y
			
 
				+CONFIG_USB_OHCI_HCD=y
			
 
				+CONFIG_USB_OHCI_HCD_PLATFORM=y
			
 
				+CONFIG_USB_STORAGE=y
			
 
				+CONFIG_USB_UAS=y
			
 
				+CONFIG_VIRTIO_MMIO=y
			
 
				+CONFIG_RAS=y
			
 
				+CONFIG_EXT4_FS=y
			
 
				+CONFIG_EXT4_FS_POSIX_ACL=y
			
 
				+CONFIG_AUTOFS4_FS=y
			
 
				+CONFIG_MSDOS_FS=y
			
 
				+CONFIG_VFAT_FS=y
			
 
				+CONFIG_TMPFS=y
			
 
				+CONFIG_TMPFS_POSIX_ACL=y
			
 
				+CONFIG_NFS_FS=y
			
 
				+CONFIG_NFS_V4=y
			
 
				+CONFIG_NFS_V4_1=y
			
 
				+CONFIG_NFS_V4_2=y
			
 
				+CONFIG_ROOT_NFS=y
			
 
				+# CONFIG_RCU_TRACE is not set
			
 
				+CONFIG_CRYPTO_USER_API_HASH=y
			
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -17,10 +17,10 @@
 
				 #include <linux/const.h>
			
 
				 
			
 
				 /* Status register flags */
			
 
				-#define SR_IE   _AC(0x00000002, UL) /* Interrupt Enable */
			
 
				-#define SR_PIE  _AC(0x00000020, UL) /* Previous IE */
			
 
				-#define SR_PS   _AC(0x00000100, UL) /* Previously Supervisor */
			
 
				-#define SR_SUM  _AC(0x00040000, UL) /* Supervisor may access User Memory */
			
 
				+#define SR_SIE	_AC(0x00000002, UL) /* Supervisor Interrupt Enable */
			
 
				+#define SR_SPIE	_AC(0x00000020, UL) /* Previous Supervisor IE */
			
 
				+#define SR_SPP	_AC(0x00000100, UL) /* Previously Supervisor */
			
 
				+#define SR_SUM	_AC(0x00040000, UL) /* Supervisor may access User Memory */
			
 
				 
			
 
				 #define SR_FS           _AC(0x00006000, UL) /* Floating-point Status */
			
 
				 #define SR_FS_OFF       _AC(0x00000000, UL)
			
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -21,8 +21,6 @@
 
				 
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				-#ifdef CONFIG_MMU
			
 
				-
			
 
				 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
			
 
				 
			
 
				 /*
			
@@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
				 
			
 
				 extern void iounmap(volatile void __iomem *addr);
			
 
				 
			
 
				-#endif /* CONFIG_MMU */
			
 
				-
			
 
				 /* Generic IO read/write.  These perform native-endian accesses. */
			
 
				 #define __raw_writeb __raw_writeb
			
 
				 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
			
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
 
				 /* unconditionally enable interrupts */
			
 
				 static inline void arch_local_irq_enable(void)
			
 
				 {
			
 
				-	csr_set(sstatus, SR_IE);
			
 
				+	csr_set(sstatus, SR_SIE);
			
 
				 }
			
 
				 
			
 
				 /* unconditionally disable interrupts */
			
 
				 static inline void arch_local_irq_disable(void)
			
 
				 {
			
 
				-	csr_clear(sstatus, SR_IE);
			
 
				+	csr_clear(sstatus, SR_SIE);
			
 
				 }
			
 
				 
			
 
				 /* get status and disable interrupts */
			
 
				 static inline unsigned long arch_local_irq_save(void)
			
 
				 {
			
 
				-	return csr_read_clear(sstatus, SR_IE);
			
 
				+	return csr_read_clear(sstatus, SR_SIE);
			
 
				 }
			
 
				 
			
 
				 /* test flags */
			
 
				 static inline int arch_irqs_disabled_flags(unsigned long flags)
			
 
				 {
			
 
				-	return !(flags & SR_IE);
			
 
				+	return !(flags & SR_SIE);
			
 
				 }
			
 
				 
			
 
				 /* test hardware interrupt enable bit */
			
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
 
				 /* set interrupt enabled status */
			
 
				 static inline void arch_local_irq_restore(unsigned long flags)
			
 
				 {
			
 
				-	csr_set(sstatus, flags & SR_IE);
			
 
				+	csr_set(sstatus, flags & SR_SIE);
			
 
				 }
			
 
				 
			
 
				 #endif /* _ASM_RISCV_IRQFLAGS_H */
			
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -20,8 +20,6 @@
 
				 
			
 
				 #ifndef __ASSEMBLY__
			
 
				 
			
 
				-#ifdef CONFIG_MMU
			
 
				-
			
 
				 /* Page Upper Directory not used in RISC-V */
			
 
				 #include <asm-generic/pgtable-nopud.h>
			
 
				 #include <asm/page.h>
			
@@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void)
 
				 	/* No page table caches to initialize */
			
 
				 }
			
 
				 
			
 
				-#endif /* CONFIG_MMU */
			
 
				-
			
 
				 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
			
 
				 #define VMALLOC_END      (PAGE_OFFSET - 1)
			
 
				 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
			
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -66,7 +66,7 @@ struct pt_regs {
 
				 #define REG_FMT "%08lx"
			
 
				 #endif
			
 
				 
			
 
				-#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0)
			
 
				+#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
			
 
				 
			
 
				 
			
 
				 /* Helpers for working with the instruction pointer */
			
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -15,8 +15,6 @@
 
				 #ifndef _ASM_RISCV_TLBFLUSH_H
			
 
				 #define _ASM_RISCV_TLBFLUSH_H
			
 
				 
			
 
				-#ifdef CONFIG_MMU
			
 
				-
			
 
				 #include <linux/mm_types.h>
			
 
				 
			
 
				 /*
			
@@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 
				 	flush_tlb_all();
			
 
				 }
			
 
				 
			
 
				-#endif /* CONFIG_MMU */
			
 
				-
			
 
				 #endif /* _ASM_RISCV_TLBFLUSH_H */
			
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
 
				  * call.
			
 
				  */
			
 
				 
			
 
				-#ifdef CONFIG_MMU
			
 
				 #define __get_user_asm(insn, x, ptr, err)			\
			
 
				 do {								\
			
 
				 	uintptr_t __tmp;					\
			
@@ -153,13 +152,11 @@ do {								\
 
				 	__disable_user_access();				\
			
 
				 	(x) = __x;						\
			
 
				 } while (0)
			
 
				-#endif /* CONFIG_MMU */
			
 
				 
			
 
				 #ifdef CONFIG_64BIT
			
 
				 #define __get_user_8(x, ptr, err) \
			
 
				 	__get_user_asm("ld", x, ptr, err)
			
 
				 #else /* !CONFIG_64BIT */
			
 
				-#ifdef CONFIG_MMU
			
 
				 #define __get_user_8(x, ptr, err)				\
			
 
				 do {								\
			
 
				 	u32 __user *__ptr = (u32 __user *)(ptr);		\
			
@@ -193,7 +190,6 @@ do {								\
 
				 	(x) = (__typeof__(x))((__typeof__((x)-(x)))(		\
			
 
				 		(((u64)__hi << 32) | __lo)));			\
			
 
				 } while (0)
			
 
				-#endif /* CONFIG_MMU */
			
 
				 #endif /* CONFIG_64BIT */
			
 
				 
			
 
				 
			
@@ -267,8 +263,6 @@ do {								\
 
				 		((x) = 0, -EFAULT);				\
			
 
				 })
			
 
				 
			
 
				-
			
 
				-#ifdef CONFIG_MMU
			
 
				 #define __put_user_asm(insn, x, ptr, err)			\
			
 
				 do {								\
			
 
				 	uintptr_t __tmp;					\
			
@@ -292,14 +286,11 @@ do {								\
 
				 		: "rJ" (__x), "i" (-EFAULT));			\
			
 
				 	__disable_user_access();				\
			
 
				 } while (0)
			
 
				-#endif /* CONFIG_MMU */
			
 
				-
			
 
				 
			
 
				 #ifdef CONFIG_64BIT
			
 
				 #define __put_user_8(x, ptr, err) \
			
 
				 	__put_user_asm("sd", x, ptr, err)
			
 
				 #else /* !CONFIG_64BIT */
			
 
				-#ifdef CONFIG_MMU
			
 
				 #define __put_user_8(x, ptr, err)				\
			
 
				 do {								\
			
 
				 	u32 __user *__ptr = (u32 __user *)(ptr);		\
			
@@ -329,7 +320,6 @@ do {								\
 
				 		: "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT));	\
			
 
				 	__disable_user_access();				\
			
 
				 } while (0)
			
 
				-#endif /* CONFIG_MMU */
			
 
				 #endif /* CONFIG_64BIT */
			
 
				 
			
 
				 
			
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
 
				  * will set "err" to -EFAULT, while successful accesses return the previous
			
 
				  * value.
			
 
				  */
			
 
				-#ifdef CONFIG_MMU
			
 
				 #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb)	\
			
 
				 ({								\
			
 
				 	__typeof__(ptr) __ptr = (ptr);				\
			
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
 
				 	(err) = __err;						\
			
 
				 	__ret;							\
			
 
				 })
			
 
				-#endif /* CONFIG_MMU */
			
 
				 
			
 
				 #endif /* _ASM_RISCV_UACCESS_H */
			
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -14,3 +14,4 @@
 
				 #define __ARCH_HAVE_MMU
			
 
				 #define __ARCH_WANT_SYS_CLONE
			
 
				 #include <uapi/asm/unistd.h>
			
 
				+#include <uapi/asm/syscalls.h>
			
--- a/arch/riscv/include/asm/vdso-syscalls.h
+++ b/arch/riscv/include/asm/vdso-syscalls.h
@@ -1,28 +0,0 @@
 
				-/*
			
 
				- * Copyright (C) 2017 SiFive
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU General Public License version 2 as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				- */
			
 
				-
			
 
				-#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
			
 
				-#define _ASM_RISCV_VDSO_SYSCALLS_H
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				-
			
 
				-/* These syscalls are only used by the vDSO and are not in the uapi. */
			
 
				-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
			
 
				-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#endif /* _ASM_RISCV_VDSO_H */
			
--- a/arch/riscv/include/uapi/asm/syscalls.h
+++ b/arch/riscv/include/uapi/asm/syscalls.h
@@ -0,0 +1,26 @@
 
				+/* SPDX-License-Identifier: GPL-2.0 */
			
 
				+/*
			
 
				+ * Copyright (C) 2017 SiFive
			
 
				+ */
			
 
				+
			
 
				+#ifndef _ASM__UAPI__SYSCALLS_H
			
 
				+#define _ASM__UAPI__SYSCALLS_H
			
 
				+
			
 
				+/*
			
 
				+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
			
 
				+ * having a direct 'fence.i' instruction available to userspace (which we
			
 
				+ * can't trap!), that's not actually viable when running on Linux because the
			
 
				+ * kernel might schedule a process on another hart.  There is no way for
			
 
				+ * userspace to handle this without invoking the kernel (as it doesn't know the
			
 
				+ * thread->hart mappings), so we've defined a RISC-V specific system call to
			
 
				+ * flush the instruction cache.
			
 
				+ *
			
 
				+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
			
 
				+ * address range, with the flush applying to either all threads or just the
			
 
				+ * caller.  We don't currently do anything with the address range, that's just
			
 
				+ * in there for forwards compatibility.
			
 
				+ */
			
 
				+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
			
 
				+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
			
 
				+
			
 
				+#endif
			
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -196,7 +196,7 @@ handle_syscall:
 
				 	addi s2, s2, 0x4
			
 
				 	REG_S s2, PT_SEPC(sp)
			
 
				 	/* System calls run with interrupts enabled */
			
 
				-	csrs sstatus, SR_IE
			
 
				+	csrs sstatus, SR_SIE
			
 
				 	/* Trace syscalls, but only if requested by the user. */
			
 
				 	REG_L t0, TASK_TI_FLAGS(tp)
			
 
				 	andi t0, t0, _TIF_SYSCALL_TRACE
			
@@ -224,8 +224,8 @@ ret_from_syscall:
 
				 
			
 
				 ret_from_exception:
			
 
				 	REG_L s0, PT_SSTATUS(sp)
			
 
				-	csrc sstatus, SR_IE
			
 
				-	andi s0, s0, SR_PS
			
 
				+	csrc sstatus, SR_SIE
			
 
				+	andi s0, s0, SR_SPP
			
 
				 	bnez s0, restore_all
			
 
				 
			
 
				 resume_userspace:
			
@@ -255,7 +255,7 @@ work_pending:
 
				 	bnez s1, work_resched
			
 
				 work_notifysig:
			
 
				 	/* Handle pending signals and notify-resume requests */
			
 
				-	csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */
			
 
				+	csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
			
 
				 	move a0, sp /* pt_regs */
			
 
				 	move a1, s0 /* current_thread_info->flags */
			
 
				 	tail do_notify_resume
			
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
 
				 void start_thread(struct pt_regs *regs, unsigned long pc,
			
 
				 	unsigned long sp)
			
 
				 {
			
 
				-	regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL;
			
 
				+	regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
			
 
				 	regs->sepc = pc;
			
 
				 	regs->sp = sp;
			
 
				 	set_fs(USER_DS);
			
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
				 		const register unsigned long gp __asm__ ("gp");
			
 
				 		memset(childregs, 0, sizeof(struct pt_regs));
			
 
				 		childregs->gp = gp;
			
 
				-		childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */
			
 
				+		childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
			
 
				 
			
 
				 		p->thread.ra = (unsigned long)ret_from_kernel_thread;
			
 
				 		p->thread.s[0] = usp; /* fn */
			
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -23,5 +23,4 @@
 
				 void *sys_call_table[__NR_syscalls] = {
			
 
				 	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
			
 
				 #include <asm/unistd.h>
			
 
				-#include <asm/vdso-syscalls.h>
			
 
				 };
			
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -13,7 +13,6 @@
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				 #include <asm/unistd.h>
			
 
				-#include <asm/vdso-syscalls.h>
			
 
				 
			
 
				 	.text
			
 
				 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
			
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 
				 		goto vmalloc_fault;
			
 
				 
			
 
				 	/* Enable interrupts if they were enabled in the parent context. */
			
 
				-	if (likely(regs->sstatus & SR_PIE))
			
 
				+	if (likely(regs->sstatus & SR_SPIE))
			
 
				 		local_irq_enable();
			
 
				 
			
 
				 	/*
			
--- a/arch/sh/boards/mach-se/770x/setup.c
+++ b/arch/sh/boards/mach-se/770x/setup.c
@@ -9,6 +9,7 @@
 
				  */
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/platform_device.h>
			
 
				+#include <linux/sh_eth.h>
			
 
				 #include <mach-se/mach/se.h>
			
 
				 #include <mach-se/mach/mrshpc.h>
			
 
				 #include <asm/machvec.h>
			
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
 
				 #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
			
 
				 	defined(CONFIG_CPU_SUBTYPE_SH7712)
			
 
				 /* SH771X Ethernet driver */
			
 
				+static struct sh_eth_plat_data sh_eth_plat = {
			
 
				+	.phy = PHY_ID,
			
 
				+	.phy_interface = PHY_INTERFACE_MODE_MII,
			
 
				+};
			
 
				+
			
 
				 static struct resource sh_eth0_resources[] = {
			
 
				 	[0] = {
			
 
				 		.start = SH_ETH0_BASE,
			
 
				-		.end = SH_ETH0_BASE + 0x1B8,
			
 
				+		.end = SH_ETH0_BASE + 0x1B8 - 1,
			
 
				 		.flags = IORESOURCE_MEM,
			
 
				 	},
			
 
				 	[1] = {
			
 
				+		.start = SH_TSU_BASE,
			
 
				+		.end = SH_TSU_BASE + 0x200 - 1,
			
 
				+		.flags = IORESOURCE_MEM,
			
 
				+	},
			
 
				+	[2] = {
			
 
				 		.start = SH_ETH0_IRQ,
			
 
				 		.end = SH_ETH0_IRQ,
			
 
				 		.flags = IORESOURCE_IRQ,
			
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
 
				 	.name = "sh771x-ether",
			
 
				 	.id = 0,
			
 
				 	.dev = {
			
 
				-		.platform_data = PHY_ID,
			
 
				+		.platform_data = &sh_eth_plat,
			
 
				 	},
			
 
				 	.num_resources = ARRAY_SIZE(sh_eth0_resources),
			
 
				 	.resource = sh_eth0_resources,
			
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
 
				 static struct resource sh_eth1_resources[] = {
			
 
				 	[0] = {
			
 
				 		.start = SH_ETH1_BASE,
			
 
				-		.end = SH_ETH1_BASE + 0x1B8,
			
 
				+		.end = SH_ETH1_BASE + 0x1B8 - 1,
			
 
				 		.flags = IORESOURCE_MEM,
			
 
				 	},
			
 
				 	[1] = {
			
 
				+		.start = SH_TSU_BASE,
			
 
				+		.end = SH_TSU_BASE + 0x200 - 1,
			
 
				+		.flags = IORESOURCE_MEM,
			
 
				+	},
			
 
				+	[2] = {
			
 
				 		.start = SH_ETH1_IRQ,
			
 
				 		.end = SH_ETH1_IRQ,
			
 
				 		.flags = IORESOURCE_IRQ,
			
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
 
				 	.name = "sh771x-ether",
			
 
				 	.id = 1,
			
 
				 	.dev = {
			
 
				-		.platform_data = PHY_ID,
			
 
				+		.platform_data = &sh_eth_plat,
			
 
				 	},
			
 
				 	.num_resources = ARRAY_SIZE(sh_eth1_resources),
			
 
				 	.resource = sh_eth1_resources,
			
--- a/arch/sh/include/mach-se/mach/se.h
+++ b/arch/sh/include/mach-se/mach/se.h
@@ -100,6 +100,7 @@
 
				 /* Base address */
			
 
				 #define SH_ETH0_BASE 0xA7000000
			
 
				 #define SH_ETH1_BASE 0xA7000400
			
 
				+#define SH_TSU_BASE  0xA7000800
			
 
				 /* PHY ID */
			
 
				 #if defined(CONFIG_CPU_SUBTYPE_SH7710)
			
 
				 # define PHY_ID 0x00
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,6 @@ config X86
 
				 	select ARCH_HAS_GCOV_PROFILE_ALL
			
 
				 	select ARCH_HAS_KCOV			if X86_64
			
 
				 	select ARCH_HAS_PMEM_API		if X86_64
			
 
				-	# Causing hangs/crashes, see the commit that added this change for details.
			
 
				 	select ARCH_HAS_REFCOUNT
			
 
				 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
			
 
				 	select ARCH_HAS_SET_MEMORY
			
@@ -89,6 +88,7 @@ config X86
 
				 	select GENERIC_CLOCKEVENTS_MIN_ADJUST
			
 
				 	select GENERIC_CMOS_UPDATE
			
 
				 	select GENERIC_CPU_AUTOPROBE
			
 
				+	select GENERIC_CPU_VULNERABILITIES
			
 
				 	select GENERIC_EARLY_IOREMAP
			
 
				 	select GENERIC_FIND_FIRST_BIT
			
 
				 	select GENERIC_IOMAP
			
@@ -429,6 +429,19 @@ config GOLDFISH
 
				        def_bool y
			
 
				        depends on X86_GOLDFISH
			
 
				 
			
 
				+config RETPOLINE
			
 
				+	bool "Avoid speculative indirect branches in kernel"
			
 
				+	default y
			
 
				+	help
			
 
				+	  Compile kernel with the retpoline compiler options to guard against
			
 
				+	  kernel-to-user data leaks by avoiding speculative indirect
			
 
				+	  branches. Requires a compiler with -mindirect-branch=thunk-extern
			
 
				+	  support for full protection. The kernel may run slower.
			
 
				+
			
 
				+	  Without compiler support, at least indirect branches in assembler
			
 
				+	  code are eliminated. Since this includes the syscall entry path,
			
 
				+	  it is not entirely pointless.
			
 
				+
			
 
				 config INTEL_RDT
			
 
				 	bool "Intel Resource Director Technology support"
			
 
				 	default n
			
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 
				 #
			
 
				 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
			
 
				 
			
 
				+# Avoid indirect branches in kernel to deal with Spectre
			
 
				+ifdef CONFIG_RETPOLINE
			
 
				+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
			
 
				+    ifneq ($(RETPOLINE_CFLAGS),)
			
 
				+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
			
 
				+    endif
			
 
				+endif
			
 
				+
			
 
				 archscripts: scripts_basic
			
 
				 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
			
 
				 
			
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
 
				 #include <linux/linkage.h>
			
 
				 #include <asm/inst.h>
			
 
				 #include <asm/frame.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 /*
			
 
				  * The following macros are used to move an (un)aligned 16 byte value to/from
			
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
 
				 	pxor INC, STATE4
			
 
				 	movdqu IV, 0x30(OUTP)
			
 
				 
			
 
				-	call *%r11
			
 
				+	CALL_NOSPEC %r11
			
 
				 
			
 
				 	movdqu 0x00(OUTP), INC
			
 
				 	pxor INC, STATE1
			
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
 
				 	_aesni_gf128mul_x_ble()
			
 
				 	movups IV, (IVP)
			
 
				 
			
 
				-	call *%r11
			
 
				+	CALL_NOSPEC %r11
			
 
				 
			
 
				 	movdqu 0x40(OUTP), INC
			
 
				 	pxor INC, STATE1
			
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				 #include <asm/frame.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #define CAMELLIA_TABLE_BYTE_LEN 272
			
 
				 
			
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
 
				 	vpxor 14 * 16(%rax), %xmm15, %xmm14;
			
 
				 	vpxor 15 * 16(%rax), %xmm15, %xmm15;
			
 
				 
			
 
				-	call *%r9;
			
 
				+	CALL_NOSPEC %r9;
			
 
				 
			
 
				 	addq $(16 * 16), %rsp;
			
 
				 
			
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
 
				 
			
 
				 #include <linux/linkage.h>
			
 
				 #include <asm/frame.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #define CAMELLIA_TABLE_BYTE_LEN 272
			
 
				 
			
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
 
				 	vpxor 14 * 32(%rax), %ymm15, %ymm14;
			
 
				 	vpxor 15 * 32(%rax), %ymm15, %ymm15;
			
 
				 
			
 
				-	call *%r9;
			
 
				+	CALL_NOSPEC %r9;
			
 
				 
			
 
				 	addq $(16 * 32), %rsp;
			
 
				 
			
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
 
				 
			
 
				 #include <asm/inst.h>
			
 
				 #include <linux/linkage.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
			
 
				 
			
@@ -172,7 +173,7 @@ continue_block:
 
				 	movzxw  (bufp, %rax, 2), len
			
 
				 	lea	crc_array(%rip), bufp
			
 
				 	lea     (bufp, len, 1), bufp
			
 
				-	jmp     *bufp
			
 
				+	JMP_NOSPEC bufp
			
 
				 
			
 
				 	################################################################
			
 
				 	## 2a) PROCESS FULL BLOCKS:
			
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
 
				  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
			
 
				  * halves:
			
 
				  */
			
 
				-#define PTI_SWITCH_PGTABLES_MASK	(1<<PAGE_SHIFT)
			
 
				-#define PTI_SWITCH_MASK		(PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
			
 
				+#define PTI_USER_PGTABLE_BIT		PAGE_SHIFT
			
 
				+#define PTI_USER_PGTABLE_MASK		(1 << PTI_USER_PGTABLE_BIT)
			
 
				+#define PTI_USER_PCID_BIT		X86_CR3_PTI_PCID_USER_BIT
			
 
				+#define PTI_USER_PCID_MASK		(1 << PTI_USER_PCID_BIT)
			
 
				+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
			
 
				 
			
 
				 .macro SET_NOFLUSH_BIT	reg:req
			
 
				 	bts	$X86_CR3_PCID_NOFLUSH_BIT, \reg
			
@@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
 
				 .macro ADJUST_KERNEL_CR3 reg:req
			
 
				 	ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
			
 
				 	/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
			
 
				-	andq    $(~PTI_SWITCH_MASK), \reg
			
 
				+	andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
			
 
				 .endm
			
 
				 
			
 
				 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
			
@@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
 
				 	/* Flush needed, clear the bit */
			
 
				 	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
			
 
				 	movq	\scratch_reg2, \scratch_reg
			
 
				-	jmp	.Lwrcr3_\@
			
 
				+	jmp	.Lwrcr3_pcid_\@
			
 
				 
			
 
				 .Lnoflush_\@:
			
 
				 	movq	\scratch_reg2, \scratch_reg
			
 
				 	SET_NOFLUSH_BIT \scratch_reg
			
 
				 
			
 
				+.Lwrcr3_pcid_\@:
			
 
				+	/* Flip the ASID to the user version */
			
 
				+	orq	$(PTI_USER_PCID_MASK), \scratch_reg
			
 
				+
			
 
				 .Lwrcr3_\@:
			
 
				-	/* Flip the PGD and ASID to the user version */
			
 
				-	orq     $(PTI_SWITCH_MASK), \scratch_reg
			
 
				+	/* Flip the PGD to the user version */
			
 
				+	orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
			
 
				 	mov	\scratch_reg, %cr3
			
 
				 .Lend_\@:
			
 
				 .endm
			
@@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
 
				 	movq	%cr3, \scratch_reg
			
 
				 	movq	\scratch_reg, \save_reg
			
 
				 	/*
			
 
				-	 * Is the "switch mask" all zero?  That means that both of
			
 
				-	 * these are zero:
			
 
				-	 *
			
 
				-	 *	1. The user/kernel PCID bit, and
			
 
				-	 *	2. The user/kernel "bit" that points CR3 to the
			
 
				-	 *	   bottom half of the 8k PGD
			
 
				-	 *
			
 
				-	 * That indicates a kernel CR3 value, not a user CR3.
			
 
				+	 * Test the user pagetable bit. If set, then the user page tables
			
 
				+	 * are active. If clear CR3 already has the kernel page table
			
 
				+	 * active.
			
 
				 	 */
			
 
				-	testq	$(PTI_SWITCH_MASK), \scratch_reg
			
 
				-	jz	.Ldone_\@
			
 
				+	bt	$PTI_USER_PGTABLE_BIT, \scratch_reg
			
 
				+	jnc	.Ldone_\@
			
 
				 
			
 
				 	ADJUST_KERNEL_CR3 \scratch_reg
			
 
				 	movq	\scratch_reg, %cr3
			
@@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
 
				 	 * KERNEL pages can always resume with NOFLUSH as we do
			
 
				 	 * explicit flushes.
			
 
				 	 */
			
 
				-	bt	$X86_CR3_PTI_SWITCH_BIT, \save_reg
			
 
				+	bt	$PTI_USER_PGTABLE_BIT, \save_reg
			
 
				 	jnc	.Lnoflush_\@
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
 
				 #include <asm/asm.h>
			
 
				 #include <asm/smap.h>
			
 
				 #include <asm/frame.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 	.section .entry.text, "ax"
			
 
				 
			
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
 
				 
			
 
				 	/* kernel thread */
			
 
				 1:	movl	%edi, %eax
			
 
				-	call	*%ebx
			
 
				+	CALL_NOSPEC %ebx
			
 
				 	/*
			
 
				 	 * A kernel thread is allowed to return here after successfully
			
 
				 	 * calling do_execve().  Exit to userspace to complete the execve()
			
@@ -919,7 +920,7 @@ common_exception:
 
				 	movl	%ecx, %es
			
 
				 	TRACE_IRQS_OFF
			
 
				 	movl	%esp, %eax			# pt_regs pointer
			
 
				-	call	*%edi
			
 
				+	CALL_NOSPEC %edi
			
 
				 	jmp	ret_from_exception
			
 
				 END(common_exception)
			
 
				 
			
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -37,6 +37,7 @@
 
				 #include <asm/pgtable_types.h>
			
 
				 #include <asm/export.h>
			
 
				 #include <asm/frame.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 #include <linux/err.h>
			
 
				 
			
 
				 #include "calling.h"
			
@@ -191,7 +192,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
 
				 	 */
			
 
				 	pushq	%rdi
			
 
				 	movq	$entry_SYSCALL_64_stage2, %rdi
			
 
				-	jmp	*%rdi
			
 
				+	JMP_NOSPEC %rdi
			
 
				 END(entry_SYSCALL_64_trampoline)
			
 
				 
			
 
				 	.popsection
			
@@ -270,7 +271,12 @@ entry_SYSCALL_64_fastpath:
 
				 	 * It might end up jumping to the slow path.  If it jumps, RAX
			
 
				 	 * and all argument registers are clobbered.
			
 
				 	 */
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+	movq	sys_call_table(, %rax, 8), %rax
			
 
				+	call	__x86_indirect_thunk_rax
			
 
				+#else
			
 
				 	call	*sys_call_table(, %rax, 8)
			
 
				+#endif
			
 
				 .Lentry_SYSCALL_64_after_fastpath_call:
			
 
				 
			
 
				 	movq	%rax, RAX(%rsp)
			
@@ -442,7 +448,7 @@ ENTRY(stub_ptregs_64)
 
				 	jmp	entry_SYSCALL64_slow_path
			
 
				 
			
 
				 1:
			
 
				-	jmp	*%rax				/* Called from C */
			
 
				+	JMP_NOSPEC %rax				/* Called from C */
			
 
				 END(stub_ptregs_64)
			
 
				 
			
 
				 .macro ptregs_stub func
			
@@ -521,7 +527,7 @@ ENTRY(ret_from_fork)
 
				 1:
			
 
				 	/* kernel thread */
			
 
				 	movq	%r12, %rdi
			
 
				-	call	*%rbx
			
 
				+	CALL_NOSPEC %rbx
			
 
				 	/*
			
 
				 	 * A kernel thread is allowed to return here after successfully
			
 
				 	 * calling do_execve().  Exit to userspace to complete the execve()
			
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -582,6 +582,24 @@ static __init int bts_init(void)
 
				 	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				+	if (boot_cpu_has(X86_FEATURE_PTI)) {
			
 
				+		/*
			
 
				+		 * BTS hardware writes through a virtual memory map we must
			
 
				+		 * either use the kernel physical map, or the user mapping of
			
 
				+		 * the AUX buffer.
			
 
				+		 *
			
 
				+		 * However, since this driver supports per-CPU and per-task inherit
			
 
				+		 * we cannot use the user mapping since it will not be availble
			
 
				+		 * if we're not running the owning process.
			
 
				+		 *
			
 
				+		 * With PTI we can't use the kernal map either, because its not
			
 
				+		 * there when we run userspace.
			
 
				+		 *
			
 
				+		 * For now, disable this driver when using PTI.
			
 
				+		 */
			
 
				+		return -ENODEV;
			
 
				+	}
			
 
				+
			
 
				 	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
			
 
				 				  PERF_PMU_CAP_EXCLUSIVE;
			
 
				 	bts_pmu.task_ctx_nr	= perf_sw_context;
			
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -11,7 +11,32 @@
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/special_insns.h>
			
 
				 #include <asm/preempt.h>
			
 
				+#include <asm/asm.h>
			
 
				 
			
 
				 #ifndef CONFIG_X86_CMPXCHG64
			
 
				 extern void cmpxchg8b_emu(void);
			
 
				 #endif
			
 
				+
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+#ifdef CONFIG_X86_32
			
 
				+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
			
 
				+#else
			
 
				+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
			
 
				+INDIRECT_THUNK(8)
			
 
				+INDIRECT_THUNK(9)
			
 
				+INDIRECT_THUNK(10)
			
 
				+INDIRECT_THUNK(11)
			
 
				+INDIRECT_THUNK(12)
			
 
				+INDIRECT_THUNK(13)
			
 
				+INDIRECT_THUNK(14)
			
 
				+INDIRECT_THUNK(15)
			
 
				+#endif
			
 
				+INDIRECT_THUNK(ax)
			
 
				+INDIRECT_THUNK(bx)
			
 
				+INDIRECT_THUNK(cx)
			
 
				+INDIRECT_THUNK(dx)
			
 
				+INDIRECT_THUNK(si)
			
 
				+INDIRECT_THUNK(di)
			
 
				+INDIRECT_THUNK(bp)
			
 
				+INDIRECT_THUNK(sp)
			
 
				+#endif /* CONFIG_RETPOLINE */
			
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,6 +203,8 @@
 
				 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
			
 
				 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
			
 
				 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
			
 
				+#define X86_FEATURE_RETPOLINE		( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
			
 
				+#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
			
 
				 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
			
 
				 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
			
 
				 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
			
@@ -342,5 +344,7 @@
 
				 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
			
 
				 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
			
 
				 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
			
 
				+#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
			
 
				+#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
			
 
				 
			
 
				 #endif /* _ASM_X86_CPUFEATURES_H */
			
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
 
				 #include <linux/nmi.h>
			
 
				 #include <asm/io.h>
			
 
				 #include <asm/hyperv.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 /*
			
 
				  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
			
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
				 		return U64_MAX;
			
 
				 
			
 
				 	__asm__ __volatile__("mov %4, %%r8\n"
			
 
				-			     "call *%5"
			
 
				+			     CALL_NOSPEC
			
 
				 			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
			
 
				 			       "+c" (control), "+d" (input_address)
			
 
				-			     :  "r" (output_address), "m" (hv_hypercall_pg)
			
 
				+			     :  "r" (output_address),
			
 
				+				THUNK_TARGET(hv_hypercall_pg)
			
 
				 			     : "cc", "memory", "r8", "r9", "r10", "r11");
			
 
				 #else
			
 
				 	u32 input_address_hi = upper_32_bits(input_address);
			
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
				 	if (!hv_hypercall_pg)
			
 
				 		return U64_MAX;
			
 
				 
			
 
				-	__asm__ __volatile__("call *%7"
			
 
				+	__asm__ __volatile__(CALL_NOSPEC
			
 
				 			     : "=A" (hv_status),
			
 
				 			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
			
 
				 			     : "A" (control),
			
 
				 			       "b" (input_address_hi),
			
 
				 			       "D"(output_address_hi), "S"(output_address_lo),
			
 
				-			       "m" (hv_hypercall_pg)
			
 
				+			       THUNK_TARGET(hv_hypercall_pg)
			
 
				 			     : "cc", "memory");
			
 
				 #endif /* !x86_64 */
			
 
				 	return hv_status;
			
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	{
			
 
				-		__asm__ __volatile__("call *%4"
			
 
				+		__asm__ __volatile__(CALL_NOSPEC
			
 
				 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
			
 
				 				       "+c" (control), "+d" (input1)
			
 
				-				     : "m" (hv_hypercall_pg)
			
 
				+				     : THUNK_TARGET(hv_hypercall_pg)
			
 
				 				     : "cc", "r8", "r9", "r10", "r11");
			
 
				 	}
			
 
				 #else
			
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
				 		u32 input1_hi = upper_32_bits(input1);
			
 
				 		u32 input1_lo = lower_32_bits(input1);
			
 
				 
			
 
				-		__asm__ __volatile__ ("call *%5"
			
 
				+		__asm__ __volatile__ (CALL_NOSPEC
			
 
				 				      : "=A"(hv_status),
			
 
				 					"+c"(input1_lo),
			
 
				 					ASM_CALL_CONSTRAINT
			
 
				 				      :	"A" (control),
			
 
				 					"b" (input1_hi),
			
 
				-					"m" (hv_hypercall_pg)
			
 
				+					THUNK_TARGET(hv_hypercall_pg)
			
 
				 				      : "cc", "edi", "esi");
			
 
				 	}
			
 
				 #endif
			
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -355,6 +355,9 @@
 
				 #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
			
 
				 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
			
 
				 #define MSR_FAM10H_NODE_ID		0xc001100c
			
 
				+#define MSR_F10H_DECFG			0xc0011029
			
 
				+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
			
 
				+#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
			
 
				 
			
 
				 /* K8 MSRs */
			
 
				 #define MSR_K8_TOP_MEM1			0xc001001a
			
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,214 @@
 
				+/* SPDX-License-Identifier: GPL-2.0 */
			
 
				+
			
 
				+#ifndef __NOSPEC_BRANCH_H__
			
 
				+#define __NOSPEC_BRANCH_H__
			
 
				+
			
 
				+#include <asm/alternative.h>
			
 
				+#include <asm/alternative-asm.h>
			
 
				+#include <asm/cpufeatures.h>
			
 
				+
			
 
				+/*
			
 
				+ * Fill the CPU return stack buffer.
			
 
				+ *
			
 
				+ * Each entry in the RSB, if used for a speculative 'ret', contains an
			
 
				+ * infinite 'pause; jmp' loop to capture speculative execution.
			
 
				+ *
			
 
				+ * This is required in various cases for retpoline and IBRS-based
			
 
				+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
			
 
				+ * eliminate potentially bogus entries from the RSB, and sometimes
			
 
				+ * purely to ensure that it doesn't get empty, which on some CPUs would
			
 
				+ * allow predictions from other (unwanted!) sources to be used.
			
 
				+ *
			
 
				+ * We define a CPP macro such that it can be used from both .S files and
			
 
				+ * inline assembly. It's possible to do a .macro and then include that
			
 
				+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
			
 
				+ */
			
 
				+
			
 
				+#define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
			
 
				+#define RSB_FILL_LOOPS		16	/* To avoid underflow */
			
 
				+
			
 
				+/*
			
 
				+ * Google experimented with loop-unrolling and this turned out to be
			
 
				+ * the optimal version — two calls, each with their own speculation
			
 
				+ * trap should their return address end up getting used, in a loop.
			
 
				+ */
			
 
				+#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
			
 
				+	mov	$(nr/2), reg;			\
			
 
				+771:						\
			
 
				+	call	772f;				\
			
 
				+773:	/* speculation trap */			\
			
 
				+	pause;					\
			
 
				+	jmp	773b;				\
			
 
				+772:						\
			
 
				+	call	774f;				\
			
 
				+775:	/* speculation trap */			\
			
 
				+	pause;					\
			
 
				+	jmp	775b;				\
			
 
				+774:						\
			
 
				+	dec	reg;				\
			
 
				+	jnz	771b;				\
			
 
				+	add	$(BITS_PER_LONG/8) * nr, sp;
			
 
				+
			
 
				+#ifdef __ASSEMBLY__
			
 
				+
			
 
				+/*
			
 
				+ * This should be used immediately before a retpoline alternative.  It tells
			
 
				+ * objtool where the retpolines are so that it can make sense of the control
			
 
				+ * flow by just reading the original instruction(s) and ignoring the
			
 
				+ * alternatives.
			
 
				+ */
			
 
				+.macro ANNOTATE_NOSPEC_ALTERNATIVE
			
 
				+	.Lannotate_\@:
			
 
				+	.pushsection .discard.nospec
			
 
				+	.long .Lannotate_\@ - .
			
 
				+	.popsection
			
 
				+.endm
			
 
				+
			
 
				+/*
			
 
				+ * These are the bare retpoline primitives for indirect jmp and call.
			
 
				+ * Do not use these directly; they only exist to make the ALTERNATIVE
			
 
				+ * invocation below less ugly.
			
 
				+ */
			
 
				+.macro RETPOLINE_JMP reg:req
			
 
				+	call	.Ldo_rop_\@
			
 
				+.Lspec_trap_\@:
			
 
				+	pause
			
 
				+	jmp	.Lspec_trap_\@
			
 
				+.Ldo_rop_\@:
			
 
				+	mov	\reg, (%_ASM_SP)
			
 
				+	ret
			
 
				+.endm
			
 
				+
			
 
				+/*
			
 
				+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
			
 
				+ * returns to the instruction after the macro.
			
 
				+ */
			
 
				+.macro RETPOLINE_CALL reg:req
			
 
				+	jmp	.Ldo_call_\@
			
 
				+.Ldo_retpoline_jmp_\@:
			
 
				+	RETPOLINE_JMP \reg
			
 
				+.Ldo_call_\@:
			
 
				+	call	.Ldo_retpoline_jmp_\@
			
 
				+.endm
			
 
				+
			
 
				+/*
			
 
				+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
			
 
				+ * indirect jmp/call which may be susceptible to the Spectre variant 2
			
 
				+ * attack.
			
 
				+ */
			
 
				+.macro JMP_NOSPEC reg:req
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+	ANNOTATE_NOSPEC_ALTERNATIVE
			
 
				+	ALTERNATIVE_2 __stringify(jmp *\reg),				\
			
 
				+		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
			
 
				+		__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
			
 
				+#else
			
 
				+	jmp	*\reg
			
 
				+#endif
			
 
				+.endm
			
 
				+
			
 
				+.macro CALL_NOSPEC reg:req
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+	ANNOTATE_NOSPEC_ALTERNATIVE
			
 
				+	ALTERNATIVE_2 __stringify(call *\reg),				\
			
 
				+		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
			
 
				+		__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
			
 
				+#else
			
 
				+	call	*\reg
			
 
				+#endif
			
 
				+.endm
			
 
				+
			
 
				+ /*
			
 
				+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
			
 
				+  * monstrosity above, manually.
			
 
				+  */
			
 
				+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+	ANNOTATE_NOSPEC_ALTERNATIVE
			
 
				+	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
			
 
				+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
			
 
				+		\ftr
			
 
				+.Lskip_rsb_\@:
			
 
				+#endif
			
 
				+.endm
			
 
				+
			
 
				+#else /* __ASSEMBLY__ */
			
 
				+
			
 
				+#define ANNOTATE_NOSPEC_ALTERNATIVE				\
			
 
				+	"999:\n\t"						\
			
 
				+	".pushsection .discard.nospec\n\t"			\
			
 
				+	".long 999b - .\n\t"					\
			
 
				+	".popsection\n\t"
			
 
				+
			
 
				+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
			
 
				+
			
 
				+/*
			
 
				+ * Since the inline asm uses the %V modifier which is only in newer GCC,
			
 
				+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
			
 
				+ */
			
 
				+# define CALL_NOSPEC						\
			
 
				+	ANNOTATE_NOSPEC_ALTERNATIVE				\
			
 
				+	ALTERNATIVE(						\
			
 
				+	"call *%[thunk_target]\n",				\
			
 
				+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
			
 
				+	X86_FEATURE_RETPOLINE)
			
 
				+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
			
 
				+
			
 
				+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
			
 
				+/*
			
 
				+ * For i386 we use the original ret-equivalent retpoline, because
			
 
				+ * otherwise we'll run out of registers. We don't care about CET
			
 
				+ * here, anyway.
			
 
				+ */
			
 
				+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",	\
			
 
				+	"       jmp    904f;\n"					\
			
 
				+	"       .align 16\n"					\
			
 
				+	"901:	call   903f;\n"					\
			
 
				+	"902:	pause;\n"					\
			
 
				+	"       jmp    902b;\n"					\
			
 
				+	"       .align 16\n"					\
			
 
				+	"903:	addl   $4, %%esp;\n"				\
			
 
				+	"       pushl  %[thunk_target];\n"			\
			
 
				+	"       ret;\n"						\
			
 
				+	"       .align 16\n"					\
			
 
				+	"904:	call   901b;\n",				\
			
 
				+	X86_FEATURE_RETPOLINE)
			
 
				+
			
 
				+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
			
 
				+#else /* No retpoline for C / inline asm */
			
 
				+# define CALL_NOSPEC "call *%[thunk_target]\n"
			
 
				+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
			
 
				+#endif
			
 
				+
			
 
				+/* The Spectre V2 mitigation variants */
			
 
				+enum spectre_v2_mitigation {
			
 
				+	SPECTRE_V2_NONE,
			
 
				+	SPECTRE_V2_RETPOLINE_MINIMAL,
			
 
				+	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
			
 
				+	SPECTRE_V2_RETPOLINE_GENERIC,
			
 
				+	SPECTRE_V2_RETPOLINE_AMD,
			
 
				+	SPECTRE_V2_IBRS,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
			
 
				+ * can be followed in the host, by overwriting the RSB completely. Both
			
 
				+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
			
 
				+ * CPUs with IBRS_ATT *might* it be avoided.
			
 
				+ */
			
 
				+static inline void vmexit_fill_RSB(void)
			
 
				+{
			
 
				+#ifdef CONFIG_RETPOLINE
			
 
				+	unsigned long loops = RSB_CLEAR_LOOPS / 2;
			
 
				+
			
 
				+	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
			
 
				+		      ALTERNATIVE("jmp 910f",
			
 
				+				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
			
 
				+				  X86_FEATURE_RETPOLINE)
			
 
				+		      "910:"
			
 
				+		      : "=&r" (loops), ASM_CALL_CONSTRAINT
			
 
				+		      : "r" (loops) : "memory" );
			
 
				+#endif
			
 
				+}
			
 
				+#endif /* __ASSEMBLY__ */
			
 
				+#endif /* __NOSPEC_BRANCH_H__ */
			
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -38,6 +38,7 @@ do {						\
 
				 #define PCI_NOASSIGN_ROMS	0x80000
			
 
				 #define PCI_ROOT_NO_CRS		0x100000
			
 
				 #define PCI_NOASSIGN_BARS	0x200000
			
 
				+#define PCI_BIG_ROOT_WINDOW	0x400000
			
 
				 
			
 
				 extern unsigned int pci_probe;
			
 
				 extern unsigned long pirq_table_addr;
			
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -40,7 +40,7 @@
 
				 #define CR3_NOFLUSH	BIT_ULL(63)
			
 
				 
			
 
				 #ifdef CONFIG_PAGE_TABLE_ISOLATION
			
 
				-# define X86_CR3_PTI_SWITCH_BIT	11
			
 
				+# define X86_CR3_PTI_PCID_USER_BIT	11
			
 
				 #endif
			
 
				 
			
 
				 #else
			
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
 
				 	 * Make sure that the dynamic ASID space does not confict with the
			
 
				 	 * bit we are using to switch between user and kernel ASIDs.
			
 
				 	 */
			
 
				-	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
			
 
				+	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
			
 
				 
			
 
				 	/*
			
 
				 	 * The ASID being passed in here should have respected the
			
 
				 	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
			
 
				 	 */
			
 
				-	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
			
 
				+	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
			
 
				 #endif
			
 
				 	/*
			
 
				 	 * The dynamically-assigned ASIDs that get passed in are small
			
@@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
 
				 {
			
 
				 	u16 ret = kern_pcid(asid);
			
 
				 #ifdef CONFIG_PAGE_TABLE_ISOLATION
			
 
				-	ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
			
 
				+	ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
			
 
				 #endif
			
 
				 	return ret;
			
 
				 }
			
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
 
				 #include <asm/page.h>
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/smap.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #include <xen/interface/xen.h>
			
 
				 #include <xen/interface/sched.h>
			
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
 
				 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
			
 
				 
			
 
				 	stac();
			
 
				-	asm volatile("call *%[call]"
			
 
				+	asm volatile(CALL_NOSPEC
			
 
				 		     : __HYPERCALL_5PARAM
			
 
				-		     : [call] "a" (&hypercall_page[call])
			
 
				+		     : [thunk_target] "a" (&hypercall_page[call])
			
 
				 		     : __HYPERCALL_CLOBBER5);
			
 
				 	clac();
			
 
				 
			
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,9 +344,12 @@ done:
 
				 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				+	int i;
			
 
				 
			
 
				-	if (instr[0] != 0x90)
			
 
				-		return;
			
 
				+	for (i = 0; i < a->padlen; i++) {
			
 
				+		if (instr[i] != 0x90)
			
 
				+			return;
			
 
				+	}
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
			
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
 
				 		set_cpu_cap(c, X86_FEATURE_K8);
			
 
				 
			
 
				 	if (cpu_has(c, X86_FEATURE_XMM2)) {
			
 
				-		/* MFENCE stops RDTSC speculation */
			
 
				-		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
			
 
				+		unsigned long long val;
			
 
				+		int ret;
			
 
				+
			
 
				+		/*
			
 
				+		 * A serializing LFENCE has less overhead than MFENCE, so
			
 
				+		 * use it for execution serialization.  On families which
			
 
				+		 * don't have that MSR, LFENCE is already serializing.
			
 
				+		 * msr_set_bit() uses the safe accessors, too, even if the MSR
			
 
				+		 * is not present.
			
 
				+		 */
			
 
				+		msr_set_bit(MSR_F10H_DECFG,
			
 
				+			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
			
 
				+
			
 
				+		/*
			
 
				+		 * Verify that the MSR write was successful (could be running
			
 
				+		 * under a hypervisor) and only then assume that LFENCE is
			
 
				+		 * serializing.
			
 
				+		 */
			
 
				+		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
			
 
				+		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
			
 
				+			/* A serializing LFENCE stops RDTSC speculation */
			
 
				+			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
			
 
				+		} else {
			
 
				+			/* MFENCE stops RDTSC speculation */
			
 
				+			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
 
				  */
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/utsname.h>
			
 
				+#include <linux/cpu.h>
			
 
				+
			
 
				+#include <asm/nospec-branch.h>
			
 
				+#include <asm/cmdline.h>
			
 
				 #include <asm/bugs.h>
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/processor-flags.h>
			
@@ -20,6 +24,8 @@
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/set_memory.h>
			
 
				 
			
 
				+static void __init spectre_v2_select_mitigation(void);
			
 
				+
			
 
				 void __init check_bugs(void)
			
 
				 {
			
 
				 	identify_boot_cpu();
			
@@ -29,6 +35,9 @@ void __init check_bugs(void)
 
				 		print_cpu_info(&boot_cpu_data);
			
 
				 	}
			
 
				 
			
 
				+	/* Select the proper spectre mitigation before patching alternatives */
			
 
				+	spectre_v2_select_mitigation();
			
 
				+
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	/*
			
 
				 	 * Check whether we are able to run this kernel safely on SMP.
			
@@ -60,3 +69,179 @@ void __init check_bugs(void)
 
				 		set_memory_4k((unsigned long)__va(0), 1);
			
 
				 #endif
			
 
				 }
			
 
				+
			
 
				+/* The kernel command line selection */
			
 
				+enum spectre_v2_mitigation_cmd {
			
 
				+	SPECTRE_V2_CMD_NONE,
			
 
				+	SPECTRE_V2_CMD_AUTO,
			
 
				+	SPECTRE_V2_CMD_FORCE,
			
 
				+	SPECTRE_V2_CMD_RETPOLINE,
			
 
				+	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
			
 
				+	SPECTRE_V2_CMD_RETPOLINE_AMD,
			
 
				+};
			
 
				+
			
 
				+static const char *spectre_v2_strings[] = {
			
 
				+	[SPECTRE_V2_NONE]			= "Vulnerable",
			
 
				+	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
			
 
				+	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
			
 
				+	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
			
 
				+	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
			
 
				+};
			
 
				+
			
 
				+#undef pr_fmt
			
 
				+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
			
 
				+
			
 
				+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
			
 
				+
			
 
				+static void __init spec2_print_if_insecure(const char *reason)
			
 
				+{
			
 
				+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
			
 
				+		pr_info("%s\n", reason);
			
 
				+}
			
 
				+
			
 
				+static void __init spec2_print_if_secure(const char *reason)
			
 
				+{
			
 
				+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
			
 
				+		pr_info("%s\n", reason);
			
 
				+}
			
 
				+
			
 
				+static inline bool retp_compiler(void)
			
 
				+{
			
 
				+	return __is_defined(RETPOLINE);
			
 
				+}
			
 
				+
			
 
				+static inline bool match_option(const char *arg, int arglen, const char *opt)
			
 
				+{
			
 
				+	int len = strlen(opt);
			
 
				+
			
 
				+	return len == arglen && !strncmp(arg, opt, len);
			
 
				+}
			
 
				+
			
 
				+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
			
 
				+{
			
 
				+	char arg[20];
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
			
 
				+				  sizeof(arg));
			
 
				+	if (ret > 0)  {
			
 
				+		if (match_option(arg, ret, "off")) {
			
 
				+			goto disable;
			
 
				+		} else if (match_option(arg, ret, "on")) {
			
 
				+			spec2_print_if_secure("force enabled on command line.");
			
 
				+			return SPECTRE_V2_CMD_FORCE;
			
 
				+		} else if (match_option(arg, ret, "retpoline")) {
			
 
				+			spec2_print_if_insecure("retpoline selected on command line.");
			
 
				+			return SPECTRE_V2_CMD_RETPOLINE;
			
 
				+		} else if (match_option(arg, ret, "retpoline,amd")) {
			
 
				+			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
			
 
				+				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
			
 
				+				return SPECTRE_V2_CMD_AUTO;
			
 
				+			}
			
 
				+			spec2_print_if_insecure("AMD retpoline selected on command line.");
			
 
				+			return SPECTRE_V2_CMD_RETPOLINE_AMD;
			
 
				+		} else if (match_option(arg, ret, "retpoline,generic")) {
			
 
				+			spec2_print_if_insecure("generic retpoline selected on command line.");
			
 
				+			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
			
 
				+		} else if (match_option(arg, ret, "auto")) {
			
 
				+			return SPECTRE_V2_CMD_AUTO;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
			
 
				+		return SPECTRE_V2_CMD_AUTO;
			
 
				+disable:
			
 
				+	spec2_print_if_insecure("disabled on command line.");
			
 
				+	return SPECTRE_V2_CMD_NONE;
			
 
				+}
			
 
				+
			
 
				+static void __init spectre_v2_select_mitigation(void)
			
 
				+{
			
 
				+	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
			
 
				+	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the CPU is not affected and the command line mode is NONE or AUTO
			
 
				+	 * then nothing to do.
			
 
				+	 */
			
 
				+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
			
 
				+	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
			
 
				+		return;
			
 
				+
			
 
				+	switch (cmd) {
			
 
				+	case SPECTRE_V2_CMD_NONE:
			
 
				+		return;
			
 
				+
			
 
				+	case SPECTRE_V2_CMD_FORCE:
			
 
				+		/* FALLTRHU */
			
 
				+	case SPECTRE_V2_CMD_AUTO:
			
 
				+		goto retpoline_auto;
			
 
				+
			
 
				+	case SPECTRE_V2_CMD_RETPOLINE_AMD:
			
 
				+		if (IS_ENABLED(CONFIG_RETPOLINE))
			
 
				+			goto retpoline_amd;
			
 
				+		break;
			
 
				+	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
			
 
				+		if (IS_ENABLED(CONFIG_RETPOLINE))
			
 
				+			goto retpoline_generic;
			
 
				+		break;
			
 
				+	case SPECTRE_V2_CMD_RETPOLINE:
			
 
				+		if (IS_ENABLED(CONFIG_RETPOLINE))
			
 
				+			goto retpoline_auto;
			
 
				+		break;
			
 
				+	}
			
 
				+	pr_err("kernel not compiled with retpoline; no mitigation available!");
			
 
				+	return;
			
 
				+
			
 
				+retpoline_auto:
			
 
				+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
			
 
				+	retpoline_amd:
			
 
				+		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
			
 
				+			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
			
 
				+			goto retpoline_generic;
			
 
				+		}
			
 
				+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
			
 
				+					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
			
 
				+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
			
 
				+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
			
 
				+	} else {
			
 
				+	retpoline_generic:
			
 
				+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
			
 
				+					 SPECTRE_V2_RETPOLINE_MINIMAL;
			
 
				+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
			
 
				+	}
			
 
				+
			
 
				+	spectre_v2_enabled = mode;
			
 
				+	pr_info("%s\n", spectre_v2_strings[mode]);
			
 
				+}
			
 
				+
			
 
				+#undef pr_fmt
			
 
				+
			
 
				+#ifdef CONFIG_SYSFS
			
 
				+ssize_t cpu_show_meltdown(struct device *dev,
			
 
				+			  struct device_attribute *attr, char *buf)
			
 
				+{
			
 
				+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
			
 
				+		return sprintf(buf, "Not affected\n");
			
 
				+	if (boot_cpu_has(X86_FEATURE_PTI))
			
 
				+		return sprintf(buf, "Mitigation: PTI\n");
			
 
				+	return sprintf(buf, "Vulnerable\n");
			
 
				+}
			
 
				+
			
 
				+ssize_t cpu_show_spectre_v1(struct device *dev,
			
 
				+			    struct device_attribute *attr, char *buf)
			
 
				+{
			
 
				+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
			
 
				+		return sprintf(buf, "Not affected\n");
			
 
				+	return sprintf(buf, "Vulnerable\n");
			
 
				+}
			
 
				+
			
 
				+ssize_t cpu_show_spectre_v2(struct device *dev,
			
 
				+			    struct device_attribute *attr, char *buf)
			
 
				+{
			
 
				+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
			
 
				+		return sprintf(buf, "Not affected\n");
			
 
				+
			
 
				+	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
			
 
				+}
			
 
				+#endif
			
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -926,6 +926,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
				 	if (c->x86_vendor != X86_VENDOR_AMD)
			
 
				 		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
			
 
				 
			
 
				+	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
			
 
				+	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
			
 
				+
			
 
				 	fpu__init_system(c);
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 
				 {
			
 
				 	struct cpuinfo_x86 *c = &cpu_data(cpu);
			
 
				 
			
 
				-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
			
 
				-		pr_err_once("late loading on model 79 is disabled.\n");
			
 
				+	/*
			
 
				+	 * Late loading on model 79 with microcode revision less than 0x0b000021
			
 
				+	 * may result in a system hang. This behavior is documented in item
			
 
				+	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
			
 
				+	 */
			
 
				+	if (c->x86 == 6 &&
			
 
				+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
			
 
				+	    c->x86_mask == 0x01 &&
			
 
				+	    c->microcode < 0x0b000021) {
			
 
				+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
			
 
				+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
			
 
				 		return true;
			
 
				 	}
			
 
				 
			
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
 
				 #include <asm/segment.h>
			
 
				 #include <asm/export.h>
			
 
				 #include <asm/ftrace.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #ifdef CC_USING_FENTRY
			
 
				 # define function_hook	__fentry__
			
@@ -197,7 +198,8 @@ ftrace_stub:
 
				 	movl	0x4(%ebp), %edx
			
 
				 	subl	$MCOUNT_INSN_SIZE, %eax
			
 
				 
			
 
				-	call	*ftrace_trace_function
			
 
				+	movl	ftrace_trace_function, %ecx
			
 
				+	CALL_NOSPEC %ecx
			
 
				 
			
 
				 	popl	%edx
			
 
				 	popl	%ecx
			
@@ -241,5 +243,5 @@ return_to_handler:
 
				 	movl	%eax, %ecx
			
 
				 	popl	%edx
			
 
				 	popl	%eax
			
 
				-	jmp	*%ecx
			
 
				+	JMP_NOSPEC %ecx
			
 
				 #endif
			
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
 
				 #include <asm/ptrace.h>
			
 
				 #include <asm/ftrace.h>
			
 
				 #include <asm/export.h>
			
 
				-
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 	.code64
			
 
				 	.section .entry.text, "ax"
			
@@ -286,8 +286,8 @@ trace:
 
				 	 * ip and parent ip are used and the list function is called when
			
 
				 	 * function tracing is enabled.
			
 
				 	 */
			
 
				-	call   *ftrace_trace_function
			
 
				-
			
 
				+	movq ftrace_trace_function, %r8
			
 
				+	CALL_NOSPEC %r8
			
 
				 	restore_mcount_regs
			
 
				 
			
 
				 	jmp fgraph_trace
			
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
 
				 	movq 8(%rsp), %rdx
			
 
				 	movq (%rsp), %rax
			
 
				 	addq $24, %rsp
			
 
				-	jmp *%rdi
			
 
				+	JMP_NOSPEC %rdi
			
 
				 #endif
			
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
 
				 #include <linux/mm.h>
			
 
				 
			
 
				 #include <asm/apic.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #ifdef CONFIG_DEBUG_STACKOVERFLOW
			
 
				 
			
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 
				 static void call_on_stack(void *func, void *stack)
			
 
				 {
			
 
				 	asm volatile("xchgl	%%ebx,%%esp	\n"
			
 
				-		     "call	*%%edi		\n"
			
 
				+		     CALL_NOSPEC
			
 
				 		     "movl	%%ebx,%%esp	\n"
			
 
				 		     : "=b" (stack)
			
 
				 		     : "0" (stack),
			
 
				-		       "D"(func)
			
 
				+		       [thunk_target] "D"(func)
			
 
				 		     : "memory", "cc", "edx", "ecx", "eax");
			
 
				 }
			
 
				 
			
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
				 		call_on_stack(print_stack_overflow, isp);
			
 
				 
			
 
				 	asm volatile("xchgl	%%ebx,%%esp	\n"
			
 
				-		     "call	*%%edi		\n"
			
 
				+		     CALL_NOSPEC
			
 
				 		     "movl	%%ebx,%%esp	\n"
			
 
				 		     : "=a" (arg1), "=b" (isp)
			
 
				 		     :  "0" (desc),   "1" (isp),
			
 
				-			"D" (desc->handle_irq)
			
 
				+			[thunk_target] "D" (desc->handle_irq)
			
 
				 		     : "memory", "cc", "ecx");
			
 
				 	return 1;
			
 
				 }
			
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
 
				 		return -1;
			
 
				 	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
			
 
				 	pte_unmap(pte);
			
 
				+
			
 
				+	/*
			
 
				+	 * PTI poisons low addresses in the kernel page tables in the
			
 
				+	 * name of making them unusable for userspace.  To execute
			
 
				+	 * code at such a low address, the poison must be cleared.
			
 
				+	 *
			
 
				+	 * Note: 'pgd' actually gets set in p4d_alloc() _or_
			
 
				+	 * pud_alloc() depending on 4/5-level paging.
			
 
				+	 */
			
 
				+	pgd->pgd &= ~_PAGE_NX;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
				 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	if (unlikely(!lapic_in_kernel(vcpu) ||
			
 
				-		     kvm_event_needs_reinjection(vcpu)))
			
 
				+		     kvm_event_needs_reinjection(vcpu) ||
			
 
				+		     vcpu->arch.exception.pending))
			
 
				 		return false;
			
 
				 
			
 
				 	if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
			
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
 
				 
			
 
				 int kvm_mmu_module_init(void)
			
 
				 {
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				 	kvm_mmu_clear_all_pte_masks();
			
 
				 
			
 
				 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
			
 
				 					    sizeof(struct pte_list_desc),
			
 
				 					    0, SLAB_ACCOUNT, NULL);
			
 
				 	if (!pte_list_desc_cache)
			
 
				-		goto nomem;
			
 
				+		goto out;
			
 
				 
			
 
				 	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
			
 
				 						  sizeof(struct kvm_mmu_page),
			
 
				 						  0, SLAB_ACCOUNT, NULL);
			
 
				 	if (!mmu_page_header_cache)
			
 
				-		goto nomem;
			
 
				+		goto out;
			
 
				 
			
 
				 	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
			
 
				-		goto nomem;
			
 
				+		goto out;
			
 
				 
			
 
				-	register_shrinker(&mmu_shrinker);
			
 
				+	ret = register_shrinker(&mmu_shrinker);
			
 
				+	if (ret)
			
 
				+		goto out;
			
 
				 
			
 
				 	return 0;
			
 
				 
			
 
				-nomem:
			
 
				+out:
			
 
				 	mmu_destroy_caches();
			
 
				-	return -ENOMEM;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
 
				 #include <asm/debugreg.h>
			
 
				 #include <asm/kvm_para.h>
			
 
				 #include <asm/irq_remapping.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #include <asm/virtext.h>
			
 
				 #include "trace.h"
			
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 
				 {
			
 
				 	struct vmcb_control_area *c, *h;
			
 
				 	struct nested_state *g;
			
 
				-	u32 h_intercept_exceptions;
			
 
				 
			
 
				 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
			
 
				 
			
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 
				 	h = &svm->nested.hsave->control;
			
 
				 	g = &svm->nested;
			
 
				 
			
 
				-	/* No need to intercept #UD if L1 doesn't intercept it */
			
 
				-	h_intercept_exceptions =
			
 
				-		h->intercept_exceptions & ~(1U << UD_VECTOR);
			
 
				-
			
 
				 	c->intercept_cr = h->intercept_cr | g->intercept_cr;
			
 
				 	c->intercept_dr = h->intercept_dr | g->intercept_dr;
			
 
				-	c->intercept_exceptions =
			
 
				-		h_intercept_exceptions | g->intercept_exceptions;
			
 
				+	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
			
 
				 	c->intercept = h->intercept | g->intercept;
			
 
				 }
			
 
				 
			
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
 
				 {
			
 
				 	int er;
			
 
				 
			
 
				-	WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
			
 
				 	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
			
 
				 	if (er == EMULATE_USER_EXIT)
			
 
				 		return 0;
			
@@ -5034,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 		);
			
 
				 
			
 
				+	/* Eliminate branch target predictions from guest mode */
			
 
				+	vmexit_fill_RSB();
			
 
				+
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
			
 
				 #else
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
 
				 #include <asm/apic.h>
			
 
				 #include <asm/irq_remapping.h>
			
 
				 #include <asm/mmu_context.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				 
			
 
				 #include "trace.h"
			
 
				 #include "pmu.h"
			
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
 
				 {
			
 
				 	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
			
 
				 
			
 
				-	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
			
 
				-	    vmcs_field_to_offset_table[field] == 0)
			
 
				+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	/*
			
 
				+	 * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
			
 
				+	 * generic mechanism.
			
 
				+	 */
			
 
				+	asm("lfence");
			
 
				+
			
 
				+	if (vmcs_field_to_offset_table[field] == 0)
			
 
				 		return -ENOENT;
			
 
				 
			
 
				 	return vmcs_field_to_offset_table[field];
			
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	u32 eb;
			
 
				 
			
 
				-	eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
			
 
				+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
			
 
				 	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
			
 
				 	if ((vcpu->guest_debug &
			
 
				 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
			
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
				 	 */
			
 
				 	if (is_guest_mode(vcpu))
			
 
				 		eb |= get_vmcs12(vcpu)->exception_bitmap;
			
 
				-	else
			
 
				-		eb |= 1u << UD_VECTOR;
			
 
				 
			
 
				 	vmcs_write32(EXCEPTION_BITMAP, eb);
			
 
				 }
			
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
				 		return 1;  /* already handled by vmx_vcpu_run() */
			
 
				 
			
 
				 	if (is_invalid_opcode(intr_info)) {
			
 
				-		WARN_ON_ONCE(is_guest_mode(vcpu));
			
 
				 		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
			
 
				 		if (er == EMULATE_USER_EXIT)
			
 
				 			return 0;
			
@@ -9485,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 	      );
			
 
				 
			
 
				+	/* Eliminate branch target predictions from guest mode */
			
 
				+	vmexit_fill_RSB();
			
 
				+
			
 
				 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
			
 
				 	if (debugctlmsr)
			
 
				 		update_debugctlmsr(debugctlmsr);
			
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
 
				 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
			
 
				 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
			
 
				 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
			
 
				+lib-$(CONFIG_RETPOLINE) += retpoline.o
			
 
				 
			
 
				 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
			
 
				 
			
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
 
				 #include <asm/errno.h>
			
 
				 #include <asm/asm.h>
			
 
				 #include <asm/export.h>
			
 
				-				
			
 
				+#include <asm/nospec-branch.h>
			
 
				+
			
 
				 /*
			
 
				  * computes a partial checksum, e.g. for TCP/UDP fragments
			
 
				  */
			
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
 
				 	negl %ebx
			
 
				 	lea 45f(%ebx,%ebx,2), %ebx
			
 
				 	testl %esi, %esi
			
 
				-	jmp *%ebx
			
 
				+	JMP_NOSPEC %ebx
			
 
				 
			
 
				 	# Handle 2-byte-aligned regions
			
 
				 20:	addw (%esi), %ax
			
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
 
				 	andl $-32,%edx
			
 
				 	lea 3f(%ebx,%ebx), %ebx
			
 
				 	testl %esi, %esi 
			
 
				-	jmp *%ebx
			
 
				+	JMP_NOSPEC %ebx
			
 
				 1:	addl $64,%esi
			
 
				 	addl $64,%edi 
			
 
				 	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
			
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
 
				+/* SPDX-License-Identifier: GPL-2.0 */
			
 
				+
			
 
				+#include <linux/stringify.h>
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/dwarf2.h>
			
 
				+#include <asm/cpufeatures.h>
			
 
				+#include <asm/alternative-asm.h>
			
 
				+#include <asm/export.h>
			
 
				+#include <asm/nospec-branch.h>
			
 
				+
			
 
				+.macro THUNK reg
			
 
				+	.section .text.__x86.indirect_thunk.\reg
			
 
				+
			
 
				+ENTRY(__x86_indirect_thunk_\reg)
			
 
				+	CFI_STARTPROC
			
 
				+	JMP_NOSPEC %\reg
			
 
				+	CFI_ENDPROC
			
 
				+ENDPROC(__x86_indirect_thunk_\reg)
			
 
				+.endm
			
 
				+
			
 
				+/*
			
 
				+ * Despite being an assembler file we can't just use .irp here
			
 
				+ * because __KSYM_DEPS__ only uses the C preprocessor and would
			
 
				+ * only see one instance of "__x86_indirect_thunk_\reg" rather
			
 
				+ * than one per register with the correct names. So we do it
			
 
				+ * the simple and nasty way...
			
 
				+ */
			
 
				+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
			
 
				+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
			
 
				+
			
 
				+GENERATE_THUNK(_ASM_AX)
			
 
				+GENERATE_THUNK(_ASM_BX)
			
 
				+GENERATE_THUNK(_ASM_CX)
			
 
				+GENERATE_THUNK(_ASM_DX)
			
 
				+GENERATE_THUNK(_ASM_SI)
			
 
				+GENERATE_THUNK(_ASM_DI)
			
 
				+GENERATE_THUNK(_ASM_BP)
			
 
				+GENERATE_THUNK(_ASM_SP)
			
 
				+#ifdef CONFIG_64BIT
			
 
				+GENERATE_THUNK(r8)
			
 
				+GENERATE_THUNK(r9)
			
 
				+GENERATE_THUNK(r10)
			
 
				+GENERATE_THUNK(r11)
			
 
				+GENERATE_THUNK(r12)
			
 
				+GENERATE_THUNK(r13)
			
 
				+GENERATE_THUNK(r14)
			
 
				+GENERATE_THUNK(r15)
			
 
				+#endif
			
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
 
				  *
			
 
				  * Returns a pointer to a P4D on success, or NULL on failure.
			
 
				  */
			
 
				-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
			
 
				+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
			
 
				 {
			
 
				 	pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
			
 
				 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
			
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 
				 		if (!new_p4d_page)
			
 
				 			return NULL;
			
 
				 
			
 
				-		if (pgd_none(*pgd)) {
			
 
				-			set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
			
 
				-			new_p4d_page = 0;
			
 
				-		}
			
 
				-		if (new_p4d_page)
			
 
				-			free_page(new_p4d_page);
			
 
				+		set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
			
 
				 	}
			
 
				 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
			
 
				 
			
@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 
				  *
			
 
				  * Returns a pointer to a PMD on success, or NULL on failure.
			
 
				  */
			
 
				-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
			
 
				+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
			
 
				 {
			
 
				 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
			
 
				 	p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
			
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 
				 		if (!new_pud_page)
			
 
				 			return NULL;
			
 
				 
			
 
				-		if (p4d_none(*p4d)) {
			
 
				-			set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
			
 
				-			new_pud_page = 0;
			
 
				-		}
			
 
				-		if (new_pud_page)
			
 
				-			free_page(new_pud_page);
			
 
				+		set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
			
 
				 	}
			
 
				 
			
 
				 	pud = pud_offset(p4d, address);
			
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 
				 		if (!new_pmd_page)
			
 
				 			return NULL;
			
 
				 
			
 
				-		if (pud_none(*pud)) {
			
 
				-			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
			
 
				-			new_pmd_page = 0;
			
 
				-		}
			
 
				-		if (new_pmd_page)
			
 
				-			free_page(new_pmd_page);
			
 
				+		set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
			
 
				 	}
			
 
				 
			
 
				 	return pmd_offset(pud, address);
			
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
 
				 		if (!new_pte_page)
			
 
				 			return NULL;
			
 
				 
			
 
				-		if (pmd_none(*pmd)) {
			
 
				-			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
			
 
				-			new_pte_page = 0;
			
 
				-		}
			
 
				-		if (new_pte_page)
			
 
				-			free_page(new_pte_page);
			
 
				+		set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
			
 
				 	}
			
 
				 
			
 
				 	pte = pte_offset_kernel(pmd, address);
			
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
 
				 	} else if (!strcmp(str, "nocrs")) {
			
 
				 		pci_probe |= PCI_ROOT_NO_CRS;
			
 
				 		return NULL;
			
 
				+#ifdef CONFIG_PHYS_ADDR_T_64BIT
			
 
				+	} else if (!strcmp(str, "big_root_window")) {
			
 
				+		pci_probe |= PCI_BIG_ROOT_WINDOW;
			
 
				+		return NULL;
			
 
				+#endif
			
 
				 	} else if (!strcmp(str, "earlydump")) {
			
 
				 		pci_early_dump_regs = 1;
			
 
				 		return NULL;
			
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
 
				  */
			
 
				 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
			
 
				 {
			
 
				-	unsigned i;
			
 
				 	u32 base, limit, high;
			
 
				-	struct resource *res, *conflict;
			
 
				 	struct pci_dev *other;
			
 
				+	struct resource *res;
			
 
				+	unsigned i;
			
 
				+	int r;
			
 
				+
			
 
				+	if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
			
 
				+		return;
			
 
				 
			
 
				 	/* Check that we are the only device of that type */
			
 
				 	other = pci_get_device(dev->vendor, dev->device, NULL);
			
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 
				 	if (!res)
			
 
				 		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * Allocate a 256GB window directly below the 0xfd00000000 hardware
			
 
				+	 * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
			
 
				+	 */
			
 
				 	res->name = "PCI Bus 0000:00";
			
 
				 	res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
			
 
				 		IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
			
 
				-	res->start = 0x100000000ull;
			
 
				+	res->start = 0xbd00000000ull;
			
 
				 	res->end = 0xfd00000000ull - 1;
			
 
				 
			
 
				-	/* Just grab the free area behind system memory for this */
			
 
				-	while ((conflict = request_resource_conflict(&iomem_resource, res))) {
			
 
				-		if (conflict->end >= res->end) {
			
 
				-			kfree(res);
			
 
				-			return;
			
 
				-		}
			
 
				-		res->start = conflict->end + 1;
			
 
				+	r = request_resource(&iomem_resource, res);
			
 
				+	if (r) {
			
 
				+		kfree(res);
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				-	dev_info(&dev->dev, "adding root bus resource %pR\n", res);
			
 
				+	dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
			
 
				+		 res);
			
 
				+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
			
 
				 
			
 
				 	base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
			
 
				 		AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
			
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
 
				 				pud[j] = *pud_offset(p4d_k, vaddr);
			
 
				 			}
			
 
				 		}
			
 
				+		pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
			
 
				 	}
			
 
				+
			
 
				 out:
			
 
				 	__flush_tlb_all();
			
 
				 
			
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
			
 
				+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
			
 
				 	.setup	= tng_bt_sfi_setup,
			
 
				 };
			
 
				 
			
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 
				 {
			
 
				 	struct {
			
 
				 		struct mmuext_op op;
			
 
				-#ifdef CONFIG_SMP
			
 
				-		DECLARE_BITMAP(mask, num_processors);
			
 
				-#else
			
 
				 		DECLARE_BITMAP(mask, NR_CPUS);
			
 
				-#endif
			
 
				 	} *args;
			
 
				 	struct multicall_space mcs;
			
 
				+	const size_t mc_entry_size = sizeof(args->op) +
			
 
				+		sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
			
 
				 
			
 
				 	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
			
 
				 
			
 
				 	if (cpumask_empty(cpus))
			
 
				 		return;		/* nothing to do */
			
 
				 
			
 
				-	mcs = xen_mc_entry(sizeof(*args));
			
 
				+	mcs = xen_mc_entry(mc_entry_size);
			
 
				 	args = mcs.args;
			
 
				 	args->op.arg2.vcpumask = to_cpumask(args->mask);
			
 
				 
			
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
 
				 void xen_setup_cpu_clockevents(void);
			
 
				 void xen_save_time_memory_area(void);
			
 
				 void xen_restore_time_memory_area(void);
			
 
				-void __init xen_init_time_ops(void);
			
 
				+void __ref xen_init_time_ops(void);
			
 
				 void __init xen_hvm_init_time_ops(void);
			
 
				 
			
 
				 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
			
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void blk_drain_queue(struct request_queue *q)
			
 
				+{
			
 
				+	spin_lock_irq(q->queue_lock);
			
 
				+	__blk_drain_queue(q, true);
			
 
				+	spin_unlock_irq(q->queue_lock);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * blk_queue_bypass_start - enter queue bypass mode
			
 
				  * @q: queue of interest
			
@@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q)
 
				 	 */
			
 
				 	blk_freeze_queue(q);
			
 
				 	spin_lock_irq(lock);
			
 
				-	if (!q->mq_ops)
			
 
				-		__blk_drain_queue(q, true);
			
 
				 	queue_flag_set(QUEUE_FLAG_DEAD, q);
			
 
				 	spin_unlock_irq(lock);
			
 
				 
			
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q)
 
				 	 * exported to drivers as the only user for unfreeze is blk_mq.
			
 
				 	 */
			
 
				 	blk_freeze_queue_start(q);
			
 
				+	if (!q->mq_ops)
			
 
				+		blk_drain_queue(q);
			
 
				 	blk_mq_freeze_queue_wait(q);
			
 
				 }