7 years ago · 8c9076b07c
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -109,6 +109,7 @@ parameter is applicable::
 
															 	IPV6	IPv6 support is enabled.
														
 
															 	ISAPNP	ISA PnP code is enabled.
														
 
															 	ISDN	Appropriate ISDN support is enabled.
														
 
															+	ISOL	CPU Isolation is enabled.
														
 
															 	JOY	Appropriate joystick support is enabled.
														
 
															 	KGDB	Kernel debugger support is enabled.
														
 
															 	KVM	Kernel Virtual Machine support is enabled.
														
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -328,11 +328,15 @@
 
															 			not play well with APC CPU idle - disable it if you have
														
 
															 			APC and your system crashes randomly.
														
 
															-	apic=		[APIC,X86-32] Advanced Programmable Interrupt Controller
														
 
															+	apic=		[APIC,X86] Advanced Programmable Interrupt Controller
														
 
															 			Change the output verbosity whilst booting
														
 
															 			Format: { quiet (default) | verbose | debug }
														
 
															 			Change the amount of debugging information output
														
 
															 			when initialising the APIC and IO-APIC components.
														
 
															+			For X86-32, this can also be used to specify an APIC
														
 
															+			driver name.
														
 
															+			Format: apic=driver_name
														
 
															+			Examples: apic=bigsmp
														
 
															 	apic_extnmi=	[APIC,X86] External NMI delivery setting
														
 
															 			Format: { bsp (default) | all | none }
														
@@ -1737,7 +1741,7 @@
 
															 	isapnp=		[ISAPNP]
														
 
															 			Format: <RDP>,<reset>,<pci_scan>,<verbosity>
														
 
															-	isolcpus=	[KNL,SMP] Isolate a given set of CPUs from disturbance.
														
 
															+	isolcpus=	[KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
														
 
															 			[Deprecated - use cpusets instead]
														
 
															 			Format: [flag-list,]<cpu-list>
														
@@ -2662,7 +2666,7 @@
 
															 			Valid arguments: on, off
														
 
															 			Default: on
														
 
															-	nohz_full=	[KNL,BOOT]
														
 
															+	nohz_full=	[KNL,BOOT,SMP,ISOL]
														
 
															 			The argument is a cpu list, as described above.
														
 
															 			In kernels built with CONFIG_NO_HZ_FULL=y, set
														
 
															 			the specified list of CPUs whose tick will be stopped
														
@@ -2708,6 +2712,8 @@
 
															 			steal time is computed, but won't influence scheduler
														
 
															 			behaviour
														
 
															+	nopti		[X86-64] Disable kernel page table isolation
														
 
															+
														
 
															 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
														
 
															 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
														
@@ -3282,6 +3288,12 @@
 
															 	pt.		[PARIDE]
														
 
															 			See Documentation/blockdev/paride.txt.
														
 
															+	pti=		[X86_64]
														
 
															+			Control user/kernel address space isolation:
														
 
															+			on - enable
														
 
															+			off - disable
														
 
															+			auto - default setting
														
 
															+
														
 
															 	pty.legacy_count=
														
 
															 			[KNL] Number of legacy pty's. Overwrites compiled-in
														
 
															 			default number.
														
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
 
															 a sysfs attribute called "force_power".
														
 
															 For example the intel-wmi-thunderbolt driver exposes this attribute in:
														
 
															-  /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
														
 
															+  /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
														
 
															   To force the power to on, write 1 to this attribute file.
														
 
															   To disable force power, write 0 to this attribute file.
														
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -75,3 +75,4 @@ stable kernels.
 
															 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
														
 
															 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
														
 
															 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
														
 
															+| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
														
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
 
															 normal scheduling policy and absolute bandwidth allocation model for
														
 
															 realtime scheduling policy.
														
 
															+WARNING: cgroup2 doesn't yet support control of realtime processes and
														
 
															+the cpu controller can only be enabled when all RT processes are in
														
 
															+the root cgroup.  Be aware that system management software may already
														
 
															+have placed RT processes into nonroot cgroups during the system boot
														
 
															+process, and these processes may need to be moved to the root cgroup
														
 
															+before the cpu controller can be enabled.
														
 
															+
														
 
															 CPU Interface Files
														
 
															 ~~~~~~~~~~~~~~~~~~~
														
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
 
															                  at25df321a
														
 
															                  at25df641
														
 
															                  at26df081a
														
 
															-                 en25s64
														
 
															                  mr25h128
														
 
															                  mr25h256
														
 
															                  mr25h10
														
@@ -33,7 +32,6 @@ Required properties:
 
															                  s25fl008k
														
 
															                  s25fl064k
														
 
															                  sst25vf040b
														
 
															-                 sst25wf040b
														
 
															                  m25p40
														
 
															                  m25p80
														
 
															                  m25p16
														
--- a/Documentation/devicetree/bindings/sound/da7218.txt
+++ b/Documentation/devicetree/bindings/sound/da7218.txt
@@ -73,7 +73,7 @@ Example:
 
															 		compatible = "dlg,da7218";
														
 
															 		reg = <0x1a>;
														
 
															 		interrupt-parent = <&gpio6>;
														
 
															-		interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
														
 
															+		interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
														
 
															 		wakeup-source;
														
 
															 		VDD-supply = <&reg_audio>;
														
--- a/Documentation/devicetree/bindings/sound/da7219.txt
+++ b/Documentation/devicetree/bindings/sound/da7219.txt
@@ -77,7 +77,7 @@ Example:
 
															 		reg = <0x1a>;
														
 
															 		interrupt-parent = <&gpio6>;
														
 
															-		interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
														
 
															+		interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
														
 
															 		VDD-supply = <&reg_audio>;
														
 
															 		VDDMIC-supply = <&reg_audio>;
														
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
 
															   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
														
 
															 - reg : Offset and length of the register set for the device
														
 
															 - interrupts : Should contain CSPI/eCSPI interrupt
														
 
															-- cs-gpios : Specifies the gpio pins to be used for chipselects.
														
 
															 - clocks : Clock specifiers for both ipg and per clocks.
														
 
															 - clock-names : Clock names should include both "ipg" and "per"
														
 
															 See the clock consumer binding,
														
 
															 	Documentation/devicetree/bindings/clock/clock-bindings.txt
														
 
															-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
														
 
															-		Documentation/devicetree/bindings/dma/dma.txt
														
 
															-- dma-names: DMA request names should include "tx" and "rx" if present.
														
 
															-Obsolete properties:
														
 
															-- fsl,spi-num-chipselects : Contains the number of the chipselect
														
 
															+Recommended properties:
														
 
															+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
														
 
															+select lines can be used, they appear to always generate a pulse between each
														
 
															+word of a transfer.  Most use cases will require GPIO based chip selects to
														
 
															+generate a valid transaction.
														
 
															 Optional properties:
														
 
															+- num-cs :  Number of total chip selects, see spi-bus.txt.
														
 
															+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
														
 
															+Documentation/devicetree/bindings/dma/dma.txt.
														
 
															+- dma-names: DMA request names, if present, should include "tx" and "rx".
														
 
															 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
														
 
															 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
														
 
															 the SPI_READY mode-flag needs to be set too.
														
 
															 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
														
 
															+Obsolete properties:
														
 
															+- fsl,spi-num-chipselects : Contains the number of the chipselect
														
 
															+
														
 
															 Example:
														
 
															 ecspi@70010000 {
														
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -156,6 +156,40 @@ handle it in two different ways:
 
															    root of the overlay.  Finally the directory is moved to the new
														
 
															    location.
														
 
															+There are several ways to tune the "redirect_dir" feature.
														
 
															+
														
 
															+Kernel config options:
														
 
															+
														
 
															+- OVERLAY_FS_REDIRECT_DIR:
														
 
															+    If this is enabled, then redirect_dir is turned on by  default.
														
 
															+- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
														
 
															+    If this is enabled, then redirects are always followed by default. Enabling
														
 
															+    this results in a less secure configuration.  Enable this option only when
														
 
															+    worried about backward compatibility with kernels that have the redirect_dir
														
 
															+    feature and follow redirects even if turned off.
														
 
															+
														
 
															+Module options (can also be changed through /sys/module/overlay/parameters/*):
														
 
															+
														
 
															+- "redirect_dir=BOOL":
														
 
															+    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
														
 
															+- "redirect_always_follow=BOOL":
														
 
															+    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
														
 
															+- "redirect_max=NUM":
														
 
															+    The maximum number of bytes in an absolute redirect (default is 256).
														
 
															+
														
 
															+Mount options:
														
 
															+
														
 
															+- "redirect_dir=on":
														
 
															+    Redirects are enabled.
														
 
															+- "redirect_dir=follow":
														
 
															+    Redirects are not created, but followed.
														
 
															+- "redirect_dir=off":
														
 
															+    Redirects are not created and only followed if "redirect_always_follow"
														
 
															+    feature is enabled in the kernel/module config.
														
 
															+- "redirect_dir=nofollow":
														
 
															+    Redirects are not created and not followed (equivalent to "redirect_dir=off"
														
 
															+    if "redirect_always_follow" feature is not enabled).
														
 
															+
														
 
															 Non-directories
														
 
															 ---------------
														
--- a/Documentation/locking/crossrelease.txt
+++ b/Documentation/locking/crossrelease.txt
@@ -1,874 +0,0 @@
 
															-Crossrelease
														
 
															-============
														
 
															-
														
 
															-Started by Byungchul Park <byungchul.park@lge.com>
														
 
															-
														
 
															-Contents:
														
 
															-
														
 
															- (*) Background
														
 
															-
														
 
															-     - What causes deadlock
														
 
															-     - How lockdep works
														
 
															-
														
 
															- (*) Limitation
														
 
															-
														
 
															-     - Limit lockdep
														
 
															-     - Pros from the limitation
														
 
															-     - Cons from the limitation
														
 
															-     - Relax the limitation
														
 
															-
														
 
															- (*) Crossrelease
														
 
															-
														
 
															-     - Introduce crossrelease
														
 
															-     - Introduce commit
														
 
															-
														
 
															- (*) Implementation
														
 
															-
														
 
															-     - Data structures
														
 
															-     - How crossrelease works
														
 
															-
														
 
															- (*) Optimizations
														
 
															-
														
 
															-     - Avoid duplication
														
 
															-     - Lockless for hot paths
														
 
															-
														
 
															- (*) APPENDIX A: What lockdep does to work aggresively
														
 
															-
														
 
															- (*) APPENDIX B: How to avoid adding false dependencies
														
 
															-
														
 
															-
														
 
															-==========
														
 
															-Background
														
 
															-==========
														
 
															-
														
 
															-What causes deadlock
														
 
															---------------------
														
 
															-
														
 
															-A deadlock occurs when a context is waiting for an event to happen,
														
 
															-which is impossible because another (or the) context who can trigger the
														
 
															-event is also waiting for another (or the) event to happen, which is
														
 
															-also impossible due to the same reason.
														
 
															-
														
 
															-For example:
														
 
															-
														
 
															-   A context going to trigger event C is waiting for event A to happen.
														
 
															-   A context going to trigger event A is waiting for event B to happen.
														
 
															-   A context going to trigger event B is waiting for event C to happen.
														
 
															-
														
 
															-A deadlock occurs when these three wait operations run at the same time,
														
 
															-because event C cannot be triggered if event A does not happen, which in
														
 
															-turn cannot be triggered if event B does not happen, which in turn
														
 
															-cannot be triggered if event C does not happen. After all, no event can
														
 
															-be triggered since any of them never meets its condition to wake up.
														
 
															-
														
 
															-A dependency might exist between two waiters and a deadlock might happen
														
 
															-due to an incorrect releationship between dependencies. Thus, we must
														
 
															-define what a dependency is first. A dependency exists between them if:
														
 
															-
														
 
															-   1. There are two waiters waiting for each event at a given time.
														
 
															-   2. The only way to wake up each waiter is to trigger its event.
														
 
															-   3. Whether one can be woken up depends on whether the other can.
														
 
															-
														
 
															-Each wait in the example creates its dependency like:
														
 
															-
														
 
															-   Event C depends on event A.
														
 
															-   Event A depends on event B.
														
 
															-   Event B depends on event C.
														
 
															-
														
 
															-   NOTE: Precisely speaking, a dependency is one between whether a
														
 
															-   waiter for an event can be woken up and whether another waiter for
														
 
															-   another event can be woken up. However from now on, we will describe
														
 
															-   a dependency as if it's one between an event and another event for
														
 
															-   simplicity.
														
 
															-
														
 
															-And they form circular dependencies like:
														
 
															-
														
 
															-    -> C -> A -> B -
														
 
															-   /                \
														
 
															-   \                /
														
 
															-    ----------------
														
 
															-
														
 
															-   where 'A -> B' means that event A depends on event B.
														
 
															-
														
 
															-Such circular dependencies lead to a deadlock since no waiter can meet
														
 
															-its condition to wake up as described.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Circular dependencies cause a deadlock.
														
 
															-
														
 
															-
														
 
															-How lockdep works
														
 
															------------------
														
 
															-
														
 
															-Lockdep tries to detect a deadlock by checking dependencies created by
														
 
															-lock operations, acquire and release. Waiting for a lock corresponds to
														
 
															-waiting for an event, and releasing a lock corresponds to triggering an
														
 
															-event in the previous section.
														
 
															-
														
 
															-In short, lockdep does:
														
 
															-
														
 
															-   1. Detect a new dependency.
														
 
															-   2. Add the dependency into a global graph.
														
 
															-   3. Check if that makes dependencies circular.
														
 
															-   4. Report a deadlock or its possibility if so.
														
 
															-
														
 
															-For example, consider a graph built by lockdep that looks like:
														
 
															-
														
 
															-   A -> B -
														
 
															-           \
														
 
															-            -> E
														
 
															-           /
														
 
															-   C -> D -
														
 
															-
														
 
															-   where A, B,..., E are different lock classes.
														
 
															-
														
 
															-Lockdep will add a dependency into the graph on detection of a new
														
 
															-dependency. For example, it will add a dependency 'E -> C' when a new
														
 
															-dependency between lock E and lock C is detected. Then the graph will be:
														
 
															-
														
 
															-       A -> B -
														
 
															-               \
														
 
															-                -> E -
														
 
															-               /      \
														
 
															-    -> C -> D -        \
														
 
															-   /                   /
														
 
															-   \                  /
														
 
															-    ------------------
														
 
															-
														
 
															-   where A, B,..., E are different lock classes.
														
 
															-
														
 
															-This graph contains a subgraph which demonstrates circular dependencies:
														
 
															-
														
 
															-                -> E -
														
 
															-               /      \
														
 
															-    -> C -> D -        \
														
 
															-   /                   /
														
 
															-   \                  /
														
 
															-    ------------------
														
 
															-
														
 
															-   where C, D and E are different lock classes.
														
 
															-
														
 
															-This is the condition under which a deadlock might occur. Lockdep
														
 
															-reports it on detection after adding a new dependency. This is the way
														
 
															-how lockdep works.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Lockdep detects a deadlock or its possibility by checking if circular
														
 
															-dependencies were created after adding each new dependency.
														
 
															-
														
 
															-
														
 
															-==========
														
 
															-Limitation
														
 
															-==========
														
 
															-
														
 
															-Limit lockdep
														
 
															--------------
														
 
															-
														
 
															-Limiting lockdep to work on only typical locks e.g. spin locks and
														
 
															-mutexes, which are released within the acquire context, the
														
 
															-implementation becomes simple but its capacity for detection becomes
														
 
															-limited. Let's check pros and cons in next section.
														
 
															-
														
 
															-
														
 
															-Pros from the limitation
														
 
															-------------------------
														
 
															-
														
 
															-Given the limitation, when acquiring a lock, locks in a held_locks
														
 
															-cannot be released if the context cannot acquire it so has to wait to
														
 
															-acquire it, which means all waiters for the locks in the held_locks are
														
 
															-stuck. It's an exact case to create dependencies between each lock in
														
 
															-the held_locks and the lock to acquire.
														
 
															-
														
 
															-For example:
														
 
															-
														
 
															-   CONTEXT X
														
 
															-   ---------
														
 
															-   acquire A
														
 
															-   acquire B /* Add a dependency 'A -> B' */
														
 
															-   release B
														
 
															-   release A
														
 
															-
														
 
															-   where A and B are different lock classes.
														
 
															-
														
 
															-When acquiring lock A, the held_locks of CONTEXT X is empty thus no
														
 
															-dependency is added. But when acquiring lock B, lockdep detects and adds
														
 
															-a new dependency 'A -> B' between lock A in the held_locks and lock B.
														
 
															-They can be simply added whenever acquiring each lock.
														
 
															-
														
 
															-And data required by lockdep exists in a local structure, held_locks
														
 
															-embedded in task_struct. Forcing to access the data within the context,
														
 
															-lockdep can avoid racy problems without explicit locks while handling
														
 
															-the local data.
														
 
															-
														
 
															-Lastly, lockdep only needs to keep locks currently being held, to build
														
 
															-a dependency graph. However, relaxing the limitation, it needs to keep
														
 
															-even locks already released, because a decision whether they created
														
 
															-dependencies might be long-deferred.
														
 
															-
														
 
															-To sum up, we can expect several advantages from the limitation:
														
 
															-
														
 
															-   1. Lockdep can easily identify a dependency when acquiring a lock.
														
 
															-   2. Races are avoidable while accessing local locks in a held_locks.
														
 
															-   3. Lockdep only needs to keep locks currently being held.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Given the limitation, the implementation becomes simple and efficient.
														
 
															-
														
 
															-
														
 
															-Cons from the limitation
														
 
															-------------------------
														
 
															-
														
 
															-Given the limitation, lockdep is applicable only to typical locks. For
														
 
															-example, page locks for page access or completions for synchronization
														
 
															-cannot work with lockdep.
														
 
															-
														
 
															-Can we detect deadlocks below, under the limitation?
														
 
															-
														
 
															-Example 1:
														
 
															-
														
 
															-   CONTEXT X	   CONTEXT Y	   CONTEXT Z
														
 
															-   ---------	   ---------	   ----------
														
 
															-		   mutex_lock A
														
 
															-   lock_page B
														
 
															-		   lock_page B
														
 
															-				   mutex_lock A /* DEADLOCK */
														
 
															-				   unlock_page B held by X
														
 
															-		   unlock_page B
														
 
															-		   mutex_unlock A
														
 
															-				   mutex_unlock A
														
 
															-
														
 
															-   where A and B are different lock classes.
														
 
															-
														
 
															-No, we cannot.
														
 
															-
														
 
															-Example 2:
														
 
															-
														
 
															-   CONTEXT X		   CONTEXT Y
														
 
															-   ---------		   ---------
														
 
															-			   mutex_lock A
														
 
															-   mutex_lock A
														
 
															-			   wait_for_complete B /* DEADLOCK */
														
 
															-   complete B
														
 
															-			   mutex_unlock A
														
 
															-   mutex_unlock A
														
 
															-
														
 
															-   where A is a lock class and B is a completion variable.
														
 
															-
														
 
															-No, we cannot.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Given the limitation, lockdep cannot detect a deadlock or its
														
 
															-possibility caused by page locks or completions.
														
 
															-
														
 
															-
														
 
															-Relax the limitation
														
 
															---------------------
														
 
															-
														
 
															-Under the limitation, things to create dependencies are limited to
														
 
															-typical locks. However, synchronization primitives like page locks and
														
 
															-completions, which are allowed to be released in any context, also
														
 
															-create dependencies and can cause a deadlock. So lockdep should track
														
 
															-these locks to do a better job. We have to relax the limitation for
														
 
															-these locks to work with lockdep.
														
 
															-
														
 
															-Detecting dependencies is very important for lockdep to work because
														
 
															-adding a dependency means adding an opportunity to check whether it
														
 
															-causes a deadlock. The more lockdep adds dependencies, the more it
														
 
															-thoroughly works. Thus Lockdep has to do its best to detect and add as
														
 
															-many true dependencies into a graph as possible.
														
 
															-
														
 
															-For example, considering only typical locks, lockdep builds a graph like:
														
 
															-
														
 
															-   A -> B -
														
 
															-           \
														
 
															-            -> E
														
 
															-           /
														
 
															-   C -> D -
														
 
															-
														
 
															-   where A, B,..., E are different lock classes.
														
 
															-
														
 
															-On the other hand, under the relaxation, additional dependencies might
														
 
															-be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
														
 
															-added thanks to the relaxation, the graph will be:
														
 
															-
														
 
															-         A -> B -
														
 
															-                 \
														
 
															-                  -> E -> GX
														
 
															-                 /
														
 
															-   FX -> C -> D -
														
 
															-
														
 
															-   where A, B,..., E, FX and GX are different lock classes, and a suffix
														
 
															-   'X' is added on non-typical locks.
														
 
															-
														
 
															-The latter graph gives us more chances to check circular dependencies
														
 
															-than the former. However, it might suffer performance degradation since
														
 
															-relaxing the limitation, with which design and implementation of lockdep
														
 
															-can be efficient, might introduce inefficiency inevitably. So lockdep
														
 
															-should provide two options, strong detection and efficient detection.
														
 
															-
														
 
															-Choosing efficient detection:
														
 
															-
														
 
															-   Lockdep works with only locks restricted to be released within the
														
 
															-   acquire context. However, lockdep works efficiently.
														
 
															-
														
 
															-Choosing strong detection:
														
 
															-
														
 
															-   Lockdep works with all synchronization primitives. However, lockdep
														
 
															-   suffers performance degradation.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Relaxing the limitation, lockdep can add additional dependencies giving
														
 
															-additional opportunities to check circular dependencies.
														
 
															-
														
 
															-
														
 
															-============
														
 
															-Crossrelease
														
 
															-============
														
 
															-
														
 
															-Introduce crossrelease
														
 
															-----------------------
														
 
															-
														
 
															-In order to allow lockdep to handle additional dependencies by what
														
 
															-might be released in any context, namely 'crosslock', we have to be able
														
 
															-to identify those created by crosslocks. The proposed 'crossrelease'
														
 
															-feature provoides a way to do that.
														
 
															-
														
 
															-Crossrelease feature has to do:
														
 
															-
														
 
															-   1. Identify dependencies created by crosslocks.
														
 
															-   2. Add the dependencies into a dependency graph.
														
 
															-
														
 
															-That's all. Once a meaningful dependency is added into graph, then
														
 
															-lockdep would work with the graph as it did. The most important thing
														
 
															-crossrelease feature has to do is to correctly identify and add true
														
 
															-dependencies into the global graph.
														
 
															-
														
 
															-A dependency e.g. 'A -> B' can be identified only in the A's release
														
 
															-context because a decision required to identify the dependency can be
														
 
															-made only in the release context. That is to decide whether A can be
														
 
															-released so that a waiter for A can be woken up. It cannot be made in
														
 
															-other than the A's release context.
														
 
															-
														
 
															-It's no matter for typical locks because each acquire context is same as
														
 
															-its release context, thus lockdep can decide whether a lock can be
														
 
															-released in the acquire context. However for crosslocks, lockdep cannot
														
 
															-make the decision in the acquire context but has to wait until the
														
 
															-release context is identified.
														
 
															-
														
 
															-Therefore, deadlocks by crosslocks cannot be detected just when it
														
 
															-happens, because those cannot be identified until the crosslocks are
														
 
															-released. However, deadlock possibilities can be detected and it's very
														
 
															-worth. See 'APPENDIX A' section to check why.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Using crossrelease feature, lockdep can work with what might be released
														
 
															-in any context, namely crosslock.
														
 
															-
														
 
															-
														
 
															-Introduce commit
														
 
															-----------------
														
 
															-
														
 
															-Since crossrelease defers the work adding true dependencies of
														
 
															-crosslocks until they are actually released, crossrelease has to queue
														
 
															-all acquisitions which might create dependencies with the crosslocks.
														
 
															-Then it identifies dependencies using the queued data in batches at a
														
 
															-proper time. We call it 'commit'.
														
 
															-
														
 
															-There are four types of dependencies:
														
 
															-
														
 
															-1. TT type: 'typical lock A -> typical lock B'
														
 
															-
														
 
															-   Just when acquiring B, lockdep can see it's in the A's release
														
 
															-   context. So the dependency between A and B can be identified
														
 
															-   immediately. Commit is unnecessary.
														
 
															-
														
 
															-2. TC type: 'typical lock A -> crosslock BX'
														
 
															-
														
 
															-   Just when acquiring BX, lockdep can see it's in the A's release
														
 
															-   context. So the dependency between A and BX can be identified
														
 
															-   immediately. Commit is unnecessary, too.
														
 
															-
														
 
															-3. CT type: 'crosslock AX -> typical lock B'
														
 
															-
														
 
															-   When acquiring B, lockdep cannot identify the dependency because
														
 
															-   there's no way to know if it's in the AX's release context. It has
														
 
															-   to wait until the decision can be made. Commit is necessary.
														
 
															-
														
 
															-4. CC type: 'crosslock AX -> crosslock BX'
														
 
															-
														
 
															-   When acquiring BX, lockdep cannot identify the dependency because
														
 
															-   there's no way to know if it's in the AX's release context. It has
														
 
															-   to wait until the decision can be made. Commit is necessary.
														
 
															-   But, handling CC type is not implemented yet. It's a future work.
														
 
															-
														
 
															-Lockdep can work without commit for typical locks, but commit step is
														
 
															-necessary once crosslocks are involved. Introducing commit, lockdep
														
 
															-performs three steps. What lockdep does in each step is:
														
 
															-
														
 
															-1. Acquisition: For typical locks, lockdep does what it originally did
														
 
															-   and queues the lock so that CT type dependencies can be checked using
														
 
															-   it at the commit step. For crosslocks, it saves data which will be
														
 
															-   used at the commit step and increases a reference count for it.
														
 
															-
														
 
															-2. Commit: No action is reauired for typical locks. For crosslocks,
														
 
															-   lockdep adds CT type dependencies using the data saved at the
														
 
															-   acquisition step.
														
 
															-
														
 
															-3. Release: No changes are required for typical locks. When a crosslock
														
 
															-   is released, it decreases a reference count for it.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Crossrelease introduces commit step to handle dependencies of crosslocks
														
 
															-in batches at a proper time.
														
 
															-
														
 
															-
														
 
															-==============
														
 
															-Implementation
														
 
															-==============
														
 
															-
														
 
															-Data structures
														
 
															----------------
														
 
															-
														
 
															-Crossrelease introduces two main data structures.
														
 
															-
														
 
															-1. hist_lock
														
 
															-
														
 
															-   This is an array embedded in task_struct, for keeping lock history so
														
 
															-   that dependencies can be added using them at the commit step. Since
														
 
															-   it's local data, it can be accessed locklessly in the owner context.
														
 
															-   The array is filled at the acquisition step and consumed at the
														
 
															-   commit step. And it's managed in circular manner.
														
 
															-
														
 
															-2. cross_lock
														
 
															-
														
 
															-   One per lockdep_map exists. This is for keeping data of crosslocks
														
 
															-   and used at the commit step.
														
 
															-
														
 
															-
														
 
															-How crossrelease works
														
 
															-----------------------
														
 
															-
														
 
															-It's the key of how crossrelease works, to defer necessary works to an
														
 
															-appropriate point in time and perform in at once at the commit step.
														
 
															-Let's take a look with examples step by step, starting from how lockdep
														
 
															-works without crossrelease for typical locks.
														
 
															-
														
 
															-   acquire A /* Push A onto held_locks */
														
 
															-   acquire B /* Push B onto held_locks and add 'A -> B' */
														
 
															-   acquire C /* Push C onto held_locks and add 'B -> C' */
														
 
															-   release C /* Pop C from held_locks */
														
 
															-   release B /* Pop B from held_locks */
														
 
															-   release A /* Pop A from held_locks */
														
 
															-
														
 
															-   where A, B and C are different lock classes.
														
 
															-
														
 
															-   NOTE: This document assumes that readers already understand how
														
 
															-   lockdep works without crossrelease thus omits details. But there's
														
 
															-   one thing to note. Lockdep pretends to pop a lock from held_locks
														
 
															-   when releasing it. But it's subtly different from the original pop
														
 
															-   operation because lockdep allows other than the top to be poped.
														
 
															-
														
 
															-In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
														
 
															-dependency every time acquiring a lock.
														
 
															-
														
 
															-After adding 'A -> B', a dependency graph will be:
														
 
															-
														
 
															-   A -> B
														
 
															-
														
 
															-   where A and B are different lock classes.
														
 
															-
														
 
															-And after adding 'B -> C', the graph will be:
														
 
															-
														
 
															-   A -> B -> C
														
 
															-
														
 
															-   where A, B and C are different lock classes.
														
 
															-
														
 
															-Let's performs commit step even for typical locks to add dependencies.
														
 
															-Of course, commit step is not necessary for them, however, it would work
														
 
															-well because this is a more general way.
														
 
															-
														
 
															-   acquire A
														
 
															-   /*
														
 
															-    * Queue A into hist_locks
														
 
															-    *
														
 
															-    * In hist_locks: A
														
 
															-    * In graph: Empty
														
 
															-    */
														
 
															-
														
 
															-   acquire B
														
 
															-   /*
														
 
															-    * Queue B into hist_locks
														
 
															-    *
														
 
															-    * In hist_locks: A, B
														
 
															-    * In graph: Empty
														
 
															-    */
														
 
															-
														
 
															-   acquire C
														
 
															-   /*
														
 
															-    * Queue C into hist_locks
														
 
															-    *
														
 
															-    * In hist_locks: A, B, C
														
 
															-    * In graph: Empty
														
 
															-    */
														
 
															-
														
 
															-   commit C
														
 
															-   /*
														
 
															-    * Add 'C -> ?'
														
 
															-    * Answer the following to decide '?'
														
 
															-    * What has been queued since acquire C: Nothing
														
 
															-    *
														
 
															-    * In hist_locks: A, B, C
														
 
															-    * In graph: Empty
														
 
															-    */
														
 
															-
														
 
															-   release C
														
 
															-
														
 
															-   commit B
														
 
															-   /*
														
 
															-    * Add 'B -> ?'
														
 
															-    * Answer the following to decide '?'
														
 
															-    * What has been queued since acquire B: C
														
 
															-    *
														
 
															-    * In hist_locks: A, B, C
														
 
															-    * In graph: 'B -> C'
														
 
															-    */
														
 
															-
														
 
															-   release B
														
 
															-
														
 
															-   commit A
														
 
															-   /*
														
 
															-    * Add 'A -> ?'
														
 
															-    * Answer the following to decide '?'
														
 
															-    * What has been queued since acquire A: B, C
														
 
															-    *
														
 
															-    * In hist_locks: A, B, C
														
 
															-    * In graph: 'B -> C', 'A -> B', 'A -> C'
														
 
															-    */
														
 
															-
														
 
															-   release A
														
 
															-
														
 
															-   where A, B and C are different lock classes.
														
 
															-
														
 
															-In this case, dependencies are added at the commit step as described.
														
 
															-
														
 
															-After commits for A, B and C, the graph will be:
														
 
															-
														
 
															-   A -> B -> C
														
 
															-
														
 
															-   where A, B and C are different lock classes.
														
 
															-
														
 
															-   NOTE: A dependency 'A -> C' is optimized out.
														
 
															-
														
 
															-We can see the former graph built without commit step is same as the
														
 
															-latter graph built using commit steps. Of course the former way leads to
														
 
															-earlier finish for building the graph, which means we can detect a
														
 
															-deadlock or its possibility sooner. So the former way would be prefered
														
 
															-when possible. But we cannot avoid using the latter way for crosslocks.
														
 
															-
														
 
															-Let's look at how commit steps work for crosslocks. In this case, the
														
 
															-commit step is performed only on crosslock AX as real. And it assumes
														
 
															-that the AX release context is different from the AX acquire context.
														
 
															-
														
 
															-   BX RELEASE CONTEXT		   BX ACQUIRE CONTEXT
														
 
															-   ------------------		   ------------------
														
 
															-				   acquire A
														
 
															-				   /*
														
 
															-				    * Push A onto held_locks
														
 
															-				    * Queue A into hist_locks
														
 
															-				    *
														
 
															-				    * In held_locks: A
														
 
															-				    * In hist_locks: A
														
 
															-				    * In graph: Empty
														
 
															-				    */
														
 
															-
														
 
															-				   acquire BX
														
 
															-				   /*
														
 
															-				    * Add 'the top of held_locks -> BX'
														
 
															-				    *
														
 
															-				    * In held_locks: A
														
 
															-				    * In hist_locks: A
														
 
															-				    * In graph: 'A -> BX'
														
 
															-				    */
														
 
															-
														
 
															-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														
 
															-   It must be guaranteed that the following operations are seen after
														
 
															-   acquiring BX globally. It can be done by things like barrier.
														
 
															-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
														
 
															-
														
 
															-   acquire C
														
 
															-   /*
														
 
															-    * Push C onto held_locks
														
 
															-    * Queue C into hist_locks
														
 
															-    *
														
 
															-    * In held_locks: C
														
 
															-    * In hist_locks: C
														
 
															-    * In graph: 'A -> BX'
														
 
															-    */
														
 
															-
														
 
															-   release C
														
 
															-   /*
														
 
															-    * Pop C from held_locks
														
 
															-    *
														
 
															-    * In held_locks: Empty
														
 
															-    * In hist_locks: C
														
 
															-    * In graph: 'A -> BX'
														
 
															-    */
														
 
															-				   acquire D
														
 
															-				   /*
														
 
															-				    * Push D onto held_locks
														
 
															-				    * Queue D into hist_locks
														
 
															-				    * Add 'the top of held_locks -> D'
														
 
															-				    *
														
 
															-				    * In held_locks: A, D
														
 
															-				    * In hist_locks: A, D
														
 
															-				    * In graph: 'A -> BX', 'A -> D'
														
 
															-				    */
														
 
															-   acquire E
														
 
															-   /*
														
 
															-    * Push E onto held_locks
														
 
															-    * Queue E into hist_locks
														
 
															-    *
														
 
															-    * In held_locks: E
														
 
															-    * In hist_locks: C, E
														
 
															-    * In graph: 'A -> BX', 'A -> D'
														
 
															-    */
														
 
															-
														
 
															-   release E
														
 
															-   /*
														
 
															-    * Pop E from held_locks
														
 
															-    *
														
 
															-    * In held_locks: Empty
														
 
															-    * In hist_locks: D, E
														
 
															-    * In graph: 'A -> BX', 'A -> D'
														
 
															-    */
														
 
															-				   release D
														
 
															-				   /*
														
 
															-				    * Pop D from held_locks
														
 
															-				    *
														
 
															-				    * In held_locks: A
														
 
															-				    * In hist_locks: A, D
														
 
															-				    * In graph: 'A -> BX', 'A -> D'
														
 
															-				    */
														
 
															-   commit BX
														
 
															-   /*
														
 
															-    * Add 'BX -> ?'
														
 
															-    * What has been queued since acquire BX: C, E
														
 
															-    *
														
 
															-    * In held_locks: Empty
														
 
															-    * In hist_locks: D, E
														
 
															-    * In graph: 'A -> BX', 'A -> D',
														
 
															-    *           'BX -> C', 'BX -> E'
														
 
															-    */
														
 
															-
														
 
															-   release BX
														
 
															-   /*
														
 
															-    * In held_locks: Empty
														
 
															-    * In hist_locks: D, E
														
 
															-    * In graph: 'A -> BX', 'A -> D',
														
 
															-    *           'BX -> C', 'BX -> E'
														
 
															-    */
														
 
															-				   release A
														
 
															-				   /*
														
 
															-				    * Pop A from held_locks
														
 
															-				    *
														
 
															-				    * In held_locks: Empty
														
 
															-				    * In hist_locks: A, D
														
 
															-				    * In graph: 'A -> BX', 'A -> D',
														
 
															-				    *           'BX -> C', 'BX -> E'
														
 
															-				    */
														
 
															-
														
 
															-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
														
 
															-   added on crosslocks.
														
 
															-
														
 
															-Crossrelease considers all acquisitions after acqiuring BX are
														
 
															-candidates which might create dependencies with BX. True dependencies
														
 
															-will be determined when identifying the release context of BX. Meanwhile,
														
 
															-all typical locks are queued so that they can be used at the commit step.
														
 
															-And then two dependencies 'BX -> C' and 'BX -> E' are added at the
														
 
															-commit step when identifying the release context.
														
 
															-
														
 
															-The final graph will be, with crossrelease:
														
 
															-
														
 
															-               -> C
														
 
															-              /
														
 
															-       -> BX -
														
 
															-      /       \
														
 
															-   A -         -> E
														
 
															-      \
														
 
															-       -> D
														
 
															-
														
 
															-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
														
 
															-   added on crosslocks.
														
 
															-
														
 
															-However, the final graph will be, without crossrelease:
														
 
															-
														
 
															-   A -> D
														
 
															-
														
 
															-   where A and D are different lock classes.
														
 
															-
														
 
															-The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
														
 
															-'BX -> E' giving additional opportunities to check if they cause
														
 
															-deadlocks. This way lockdep can detect a deadlock or its possibility
														
 
															-caused by crosslocks.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-We checked how crossrelease works with several examples.
														
 
															-
														
 
															-
														
 
															-=============
														
 
															-Optimizations
														
 
															-=============
														
 
															-
														
 
															-Avoid duplication
														
 
															------------------
														
 
															-
														
 
															-Crossrelease feature uses a cache like what lockdep already uses for
														
 
															-dependency chains, but this time it's for caching CT type dependencies.
														
 
															-Once that dependency is cached, the same will never be added again.
														
 
															-
														
 
															-
														
 
															-Lockless for hot paths
														
 
															-----------------------
														
 
															-
														
 
															-To keep all locks for later use at the commit step, crossrelease adopts
														
 
															-a local array embedded in task_struct, which makes access to the data
														
 
															-lockless by forcing it to happen only within the owner context. It's
														
 
															-like how lockdep handles held_locks. Lockless implmentation is important
														
 
															-since typical locks are very frequently acquired and released.
														
 
															-
														
 
															-
														
 
															-=================================================
														
 
															-APPENDIX A: What lockdep does to work aggresively
														
 
															-=================================================
														
 
															-
														
 
															-A deadlock actually occurs when all wait operations creating circular
														
 
															-dependencies run at the same time. Even though they don't, a potential
														
 
															-deadlock exists if the problematic dependencies exist. Thus it's
														
 
															-meaningful to detect not only an actual deadlock but also its potential
														
 
															-possibility. The latter is rather valuable. When a deadlock occurs
														
 
															-actually, we can identify what happens in the system by some means or
														
 
															-other even without lockdep. However, there's no way to detect possiblity
														
 
															-without lockdep unless the whole code is parsed in head. It's terrible.
														
 
															-Lockdep does the both, and crossrelease only focuses on the latter.
														
 
															-
														
 
															-Whether or not a deadlock actually occurs depends on several factors.
														
 
															-For example, what order contexts are switched in is a factor. Assuming
														
 
															-circular dependencies exist, a deadlock would occur when contexts are
														
 
															-switched so that all wait operations creating the dependencies run
														
 
															-simultaneously. Thus to detect a deadlock possibility even in the case
														
 
															-that it has not occured yet, lockdep should consider all possible
														
 
															-combinations of dependencies, trying to:
														
 
															-
														
 
															-1. Use a global dependency graph.
														
 
															-
														
 
															-   Lockdep combines all dependencies into one global graph and uses them,
														
 
															-   regardless of which context generates them or what order contexts are
														
 
															-   switched in. Aggregated dependencies are only considered so they are
														
 
															-   prone to be circular if a problem exists.
														
 
															-
														
 
															-2. Check dependencies between classes instead of instances.
														
 
															-
														
 
															-   What actually causes a deadlock are instances of lock. However,
														
 
															-   lockdep checks dependencies between classes instead of instances.
														
 
															-   This way lockdep can detect a deadlock which has not happened but
														
 
															-   might happen in future by others but the same class.
														
 
															-
														
 
															-3. Assume all acquisitions lead to waiting.
														
 
															-
														
 
															-   Although locks might be acquired without waiting which is essential
														
 
															-   to create dependencies, lockdep assumes all acquisitions lead to
														
 
															-   waiting since it might be true some time or another.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Lockdep detects not only an actual deadlock but also its possibility,
														
 
															-and the latter is more valuable.
														
 
															-
														
 
															-
														
 
															-==================================================
														
 
															-APPENDIX B: How to avoid adding false dependencies
														
 
															-==================================================
														
 
															-
														
 
															-Remind what a dependency is. A dependency exists if:
														
 
															-
														
 
															-   1. There are two waiters waiting for each event at a given time.
														
 
															-   2. The only way to wake up each waiter is to trigger its event.
														
 
															-   3. Whether one can be woken up depends on whether the other can.
														
 
															-
														
 
															-For example:
														
 
															-
														
 
															-   acquire A
														
 
															-   acquire B /* A dependency 'A -> B' exists */
														
 
															-   release B
														
 
															-   release A
														
 
															-
														
 
															-   where A and B are different lock classes.
														
 
															-
														
 
															-A depedency 'A -> B' exists since:
														
 
															-
														
 
															-   1. A waiter for A and a waiter for B might exist when acquiring B.
														
 
															-   2. Only way to wake up each is to release what it waits for.
														
 
															-   3. Whether the waiter for A can be woken up depends on whether the
														
 
															-      other can. IOW, TASK X cannot release A if it fails to acquire B.
														
 
															-
														
 
															-For another example:
														
 
															-
														
 
															-   TASK X			   TASK Y
														
 
															-   ------			   ------
														
 
															-				   acquire AX
														
 
															-   acquire B /* A dependency 'AX -> B' exists */
														
 
															-   release B
														
 
															-   release AX held by Y
														
 
															-
														
 
															-   where AX and B are different lock classes, and a suffix 'X' is added
														
 
															-   on crosslocks.
														
 
															-
														
 
															-Even in this case involving crosslocks, the same rule can be applied. A
														
 
															-depedency 'AX -> B' exists since:
														
 
															-
														
 
															-   1. A waiter for AX and a waiter for B might exist when acquiring B.
														
 
															-   2. Only way to wake up each is to release what it waits for.
														
 
															-   3. Whether the waiter for AX can be woken up depends on whether the
														
 
															-      other can. IOW, TASK X cannot release AX if it fails to acquire B.
														
 
															-
														
 
															-Let's take a look at more complicated example:
														
 
															-
														
 
															-   TASK X			   TASK Y
														
 
															-   ------			   ------
														
 
															-   acquire B
														
 
															-   release B
														
 
															-   fork Y
														
 
															-				   acquire AX
														
 
															-   acquire C /* A dependency 'AX -> C' exists */
														
 
															-   release C
														
 
															-   release AX held by Y
														
 
															-
														
 
															-   where AX, B and C are different lock classes, and a suffix 'X' is
														
 
															-   added on crosslocks.
														
 
															-
														
 
															-Does a dependency 'AX -> B' exist? Nope.
														
 
															-
														
 
															-Two waiters are essential to create a dependency. However, waiters for
														
 
															-AX and B to create 'AX -> B' cannot exist at the same time in this
														
 
															-example. Thus the dependency 'AX -> B' cannot be created.
														
 
															-
														
 
															-It would be ideal if the full set of true ones can be considered. But
														
 
															-we can ensure nothing but what actually happened. Relying on what
														
 
															-actually happens at runtime, we can anyway add only true ones, though
														
 
															-they might be a subset of true ones. It's similar to how lockdep works
														
 
															-for typical locks. There might be more true dependencies than what
														
 
															-lockdep has detected in runtime. Lockdep has no choice but to rely on
														
 
															-what actually happens. Crossrelease also relies on it.
														
 
															-
														
 
															-CONCLUSION
														
 
															-
														
 
															-Relying on what actually happens, lockdep can avoid adding false
														
 
															-dependencies.
														
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.txt
@@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
 
															 original compressor.  Once all pages are removed from an old zpool, the zpool
														
 
															 and its compressor are freed.
														
 
															+Some of the pages in zswap are same-value filled pages (i.e. contents of the
														
 
															+page have same value or repetitive pattern). These pages include zero-filled
														
 
															+pages and they are handled differently. During store operation, a page is
														
 
															+checked if it is a same-value filled page before compressing it. If true, the
														
 
															+compressed length of the page is set to zero and the pattern or same-filled
														
 
															+value is stored.
														
 
															+
														
 
															+Same-value filled pages identification feature is enabled by default and can be
														
 
															+disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
														
 
															+e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
														
 
															+runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
														
 
															+
														
 
															+echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
														
 
															+
														
 
															+When zswap same-filled page identification is disabled at runtime, it will stop
														
 
															+checking for the same-value filled pages during store operation. However, the
														
 
															+existing pages which are marked as same-value filled pages remain stored
														
 
															+unchanged in zswap until they are either loaded or invalidated.
														
 
															+
														
 
															 A debugfs interface is provided for various statistic about pool size, number
														
 
															-of pages stored, and various counters for the reasons pages are rejected.
														
 
															+of pages stored, same-value filled pages and various counters for the reasons
														
 
															+pages are rejected.
														
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
 
															-<previous description obsolete, deleted>
														
 
															-
														
 
															 Virtual memory map with 4 level page tables:
														
 
															 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
														
@@ -14,13 +12,16 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 
															 ... unused hole ...
														
 
															 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
														
 
															 ... unused hole ...
														
 
															+fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
														
 
															+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
														
 
															 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
														
 
															 ... unused hole ...
														
 
															 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
														
 
															 ... unused hole ...
														
 
															 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
														
 
															-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
														
 
															-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
														
 
															+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
														
 
															+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
														
 
															+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
														
 
															 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
														
 
															 Virtual memory map with 5 level page tables:
														
@@ -29,26 +30,29 @@ Virtual memory map with 5 level page tables:
 
															 hole caused by [56:63] sign extension
														
 
															 ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
														
 
															 ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
														
 
															-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
														
 
															-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
														
 
															+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
														
 
															+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
														
 
															 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
														
 
															 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
														
 
															 ... unused hole ...
														
 
															 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
														
 
															 ... unused hole ...
														
 
															+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
														
 
															 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
														
 
															 ... unused hole ...
														
 
															 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
														
 
															 ... unused hole ...
														
 
															 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
														
 
															-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
														
 
															-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
														
 
															+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
														
 
															+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
														
 
															+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
														
 
															 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
														
 
															 Architecture defines a 64-bit virtual address. Implementations can support
														
 
															 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
														
 
															-through to the most-significant implemented bit are set to either all ones
														
 
															-or all zero. This causes hole between user space and kernel addresses.
														
 
															+through to the most-significant implemented bit are sign extended.
														
 
															+This causes hole between user space and kernel addresses if you interpret them
														
 
															+as unsigned.
														
 
															 The direct mapping covers all memory in the system up to the highest
														
 
															 memory address (this means in some cases it can also include PCI memory
														
@@ -58,9 +62,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 
															 the processes using the page fault handler, with init_top_pgt as
														
 
															 reference.
														
 
															-Current X86-64 implementations support up to 46 bits of address space (64 TB),
														
 
															-which is our current limit. This expands into MBZ space in the page tables.
														
 
															-
														
 
															 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
														
 
															 memory window (this size is arbitrary, it can be raised later if needed).
														
 
															 The mappings are not part of any other kernel PGD and are only available
														
@@ -72,5 +73,3 @@ following fixmap section.
 
															 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
														
 
															 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
														
 
															 Their order is preserved but their base will be offset early at boot time.
														
 
															-
														
 
															--Andi Kleen, Jul 2004
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2621,24 +2621,22 @@ F:	fs/bfs/
 
															 F:	include/uapi/linux/bfs_fs.h
														
 
															 BLACKFIN ARCHITECTURE
														
 
															-M:	Steven Miao <realmz6@gmail.com>
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 T:	git git://git.code.sf.net/p/adi-linux/code
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	arch/blackfin/
														
 
															 BLACKFIN EMAC DRIVER
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/net/ethernet/adi/
														
 
															 BLACKFIN MEDIA DRIVER
														
 
															-M:	Scott Jiang <scott.jiang.linux@gmail.com>
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org/
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/media/platform/blackfin/
														
 
															 F:	drivers/media/i2c/adv7183*
														
 
															 F:	drivers/media/i2c/vs6624*
														
@@ -2646,25 +2644,25 @@ F:	drivers/media/i2c/vs6624*
 
															 BLACKFIN RTC DRIVER
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/rtc/rtc-bfin.c
														
 
															 BLACKFIN SDH DRIVER
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/mmc/host/bfin_sdh.c
														
 
															 BLACKFIN SERIAL DRIVER
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/tty/serial/bfin_uart.c
														
 
															 BLACKFIN WATCHDOG DRIVER
														
 
															 L:	adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
														
 
															 W:	http://blackfin.uclinux.org
														
 
															-S:	Supported
														
 
															+S:	Orphan
														
 
															 F:	drivers/watchdog/bfin_wdt.c
														
 
															 BLINKM RGB LED DRIVER
														
@@ -5431,7 +5429,7 @@ F:	drivers/media/tuners/fc2580*
 
															 FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
														
 
															 M:	Johannes Thumshirn <jth@kernel.org>
														
 
															-L:	fcoe-devel@open-fcoe.org
														
 
															+L:	linux-scsi@vger.kernel.org
														
 
															 W:	www.Open-FCoE.org
														
 
															 S:	Supported
														
 
															 F:	drivers/scsi/libfc/
														
@@ -13117,6 +13115,7 @@ F:	drivers/dma/dw/
 
															 SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
														
 
															 M:	Jie Deng <jiedeng@synopsys.com>
														
 
															+M:	Jose Abreu <Jose.Abreu@synopsys.com>
														
 
															 L:	netdev@vger.kernel.org
														
 
															 S:	Supported
														
 
															 F:	drivers/net/ethernet/synopsys/
														
@@ -13492,6 +13491,7 @@ M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 
															 M:	Yehezkel Bernat <yehezkel.bernat@intel.com>
														
 
															 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
														
 
															 S:	Maintained
														
 
															+F:	Documentation/admin-guide/thunderbolt.rst
														
 
															 F:	drivers/thunderbolt/
														
 
															 F:	include/linux/thunderbolt.h
														
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
															 VERSION = 4
														
 
															 PATCHLEVEL = 15
														
 
															 SUBLEVEL = 0
														
 
															-EXTRAVERSION = -rc3
														
 
															+EXTRAVERSION = -rc6
														
 
															 NAME = Fearless Coyote
														
 
															 # *DOCUMENTATION*
														
@@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 
															 # disable invalid "can't wrap" optimizations for signed / pointers
														
 
															 KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
														
 
															+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
														
 
															+KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
														
 
															+
														
 
															 # conserve stack if available
														
 
															 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
														
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
 
															 					switch0port10: port@10 {
														
 
															 						reg = <10>;
														
 
															 						label = "dsa";
														
 
															-						phy-mode = "xgmii";
														
 
															+						phy-mode = "xaui";
														
 
															 						link = <&switch1port10>;
														
 
															 					};
														
 
															 				};
														
@@ -208,7 +208,7 @@
 
															 					switch1port10: port@10 {
														
 
															 						reg = <10>;
														
 
															 						label = "dsa";
														
 
															-						phy-mode = "xgmii";
														
 
															+						phy-mode = "xaui";
														
 
															 						link = <&switch0port10>;
														
 
															 					};
														
 
															 				};
														
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
 
															 		.pushsection .text.fixup,"ax"
														
 
															 		.align	4
														
 
															 9001:		mov	r4, #-EFAULT
														
 
															+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
														
 
															+		ldr	r5, [sp, #9*4]		@ *err_ptr
														
 
															+#else
														
 
															 		ldr	r5, [sp, #8*4]		@ *err_ptr
														
 
															+#endif
														
 
															 		str	r4, [r5]
														
 
															 		ldmia	sp, {r1, r2}		@ retrieve dst, len
														
 
															 		add	r2, r2, r1
														
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
 
															 	  If unsure, say Y.
														
 
															-
														
 
															 config SOCIONEXT_SYNQUACER_PREITS
														
 
															 	bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
														
 
															 	default y
														
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
 
															 	  a 128kB offset to be applied to the target address in this commands.
														
 
															 	  If unsure, say Y.
														
 
															+
														
 
															+config QCOM_FALKOR_ERRATUM_E1041
														
 
															+	bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
														
 
															+	default y
														
 
															+	help
														
 
															+	  Falkor CPU may speculatively fetch instructions from an improper
														
 
															+	  memory location when MMU translation is changed from SCTLR_ELn[M]=1
														
 
															+	  to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
														
 
															+
														
 
															+	  If unsure, say Y.
														
 
															+
														
 
															 endmenu
														
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
 
															 #endif
														
 
															 	.endm
														
 
															+/**
														
 
															+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
														
 
															+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
														
 
															+ */
														
 
															+	.macro pre_disable_mmu_workaround
														
 
															+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
														
 
															+	isb
														
 
															+#endif
														
 
															+	.endm
														
 
															+
														
 
															 #endif	/* __ASM_ASSEMBLER_H */
														
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
 
															 #define FTR_VISIBLE	true	/* Feature visible to the user space */
														
 
															 #define FTR_HIDDEN	false	/* Feature is hidden from the user */
														
 
															+#define FTR_VISIBLE_IF_IS_ENABLED(config)		\
														
 
															+	(IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
														
 
															+
														
 
															 struct arm64_ftr_bits {
														
 
															 	bool		sign;	/* Value is signed ? */
														
 
															 	bool		visible;
														
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
 
															 #define BRCM_CPU_PART_VULCAN		0x516
														
 
															 #define QCOM_CPU_PART_FALKOR_V1		0x800
														
 
															+#define QCOM_CPU_PART_FALKOR		0xC00
														
 
															 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
														
 
															 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
														
@@ -99,6 +100,7 @@
 
															 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
														
 
															 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
														
 
															 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
														
 
															+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
														
 
															 #ifndef __ASSEMBLY__
														
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
 
															 #include <asm/cmpxchg.h>
														
 
															 #include <asm/fixmap.h>
														
 
															 #include <linux/mmdebug.h>
														
 
															+#include <linux/mm_types.h>
														
 
															+#include <linux/sched.h>
														
 
															 extern void __pte_error(const char *file, int line, unsigned long val);
														
 
															 extern void __pmd_error(const char *file, int line, unsigned long val);
														
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
															 static inline pte_t pte_mkclean(pte_t pte)
														
 
															 {
														
 
															-	return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
														
 
															+	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
														
 
															+	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
														
 
															+
														
 
															+	return pte;
														
 
															 }
														
 
															 static inline pte_t pte_mkdirty(pte_t pte)
														
 
															 {
														
 
															-	return set_pte_bit(pte, __pgprot(PTE_DIRTY));
														
 
															+	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
														
 
															+
														
 
															+	if (pte_write(pte))
														
 
															+		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
														
 
															+
														
 
															+	return pte;
														
 
															 }
														
 
															 static inline pte_t pte_mkold(pte_t pte)
														
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 
															 	}
														
 
															 }
														
 
															-struct mm_struct;
														
 
															-struct vm_area_struct;
														
 
															-
														
 
															 extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
														
 
															 /*
														
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 
															 	 * hardware updates of the pte (ptep_set_access_flags safely changes
														
 
															 	 * valid ptes without going through an invalid entry).
														
 
															 	 */
														
 
															-	if (pte_valid(*ptep) && pte_valid(pte)) {
														
 
															+	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
														
 
															+	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
														
 
															 		VM_WARN_ONCE(!pte_young(pte),
														
 
															 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
														
 
															 			     __func__, pte_val(*ptep), pte_val(pte));
														
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 
															 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
														
 
															 /*
														
 
															- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
														
 
															- * the Access Flag.
														
 
															+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
														
 
															+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
														
 
															  */
														
 
															 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
														
 
															 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
														
 
															 {
														
 
															 	pte_t old_pte, pte;
														
 
															-	/*
														
 
															-	 * ptep_set_wrprotect() is only called on CoW mappings which are
														
 
															-	 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
														
 
															-	 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
														
 
															-	 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
														
 
															-	 * protection_map[]. There is no race with the hardware update of the
														
 
															-	 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
														
 
															-	 * is set.
														
 
															-	 */
														
 
															-	VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
														
 
															-		     "%s: potential race with hardware DBM", __func__);
														
 
															 	pte = READ_ONCE(*ptep);
														
 
															 	do {
														
 
															 		old_pte = pte;
														
 
															+		/*
														
 
															+		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
														
 
															+		 * clear), set the PTE_DIRTY bit.
														
 
															+		 */
														
 
															+		if (pte_hw_dirty(pte))
														
 
															+			pte = pte_mkdirty(pte);
														
 
															 		pte = pte_wrprotect(pte);
														
 
															 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
														
 
															 					       pte_val(old_pte), pte_val(pte));
														
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
 
															 	mrs	x12, sctlr_el1
														
 
															 	ldr	x13, =SCTLR_ELx_FLAGS
														
 
															 	bic	x12, x12, x13
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el1, x12
														
 
															 	isb
														
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 
															 };
														
 
															 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
														
 
															-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
														
 
															+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
														
 
															+				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
														
 
															 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
														
 
															 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
														
 
															 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
														
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
 
															 	mrs	x0, sctlr_el2
														
 
															 	bic	x0, x0, #1 << 0	// clear SCTLR.M
														
 
															 	bic	x0, x0, #1 << 2	// clear SCTLR.C
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el2, x0
														
 
															 	isb
														
 
															 	b	2f
														
@@ -103,6 +104,7 @@ ENTRY(entry)
 
															 	mrs	x0, sctlr_el1
														
 
															 	bic	x0, x0, #1 << 0	// clear SCTLR.M
														
 
															 	bic	x0, x0, #1 << 2	// clear SCTLR.C
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el1, x0
														
 
															 	isb
														
 
															 2:
														
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 
															 	local_bh_disable();
														
 
															-	current->thread.fpsimd_state = *state;
														
 
															+	current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
														
 
															 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
														
 
															 		fpsimd_to_sve(current);
														
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
 
															 	 * to take into account by discarding the current kernel mapping and
														
 
															 	 * creating a new one.
														
 
															 	 */
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el1, x20			// disable the MMU
														
 
															 	isb
														
 
															 	bl	__create_page_tables		// recreate kernel mapping
														
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
 
															 #include <linux/perf_event.h>
														
 
															 #include <linux/ptrace.h>
														
 
															 #include <linux/smp.h>
														
 
															+#include <linux/uaccess.h>
														
 
															 #include <asm/compat.h>
														
 
															 #include <asm/current.h>
														
@@ -36,7 +37,6 @@
 
															 #include <asm/traps.h>
														
 
															 #include <asm/cputype.h>
														
 
															 #include <asm/system_misc.h>
														
 
															-#include <asm/uaccess.h>
														
 
															 /* Breakpoint currently in use for each BRP. */
														
 
															 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
														
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
 
															 	mrs	x0, sctlr_el2
														
 
															 	ldr	x1, =SCTLR_ELx_FLAGS
														
 
															 	bic	x0, x0, x1
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el2, x0
														
 
															 	isb
														
 
															 1:
														
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
 
															 	mrs	x5, sctlr_el2
														
 
															 	ldr	x6, =SCTLR_ELx_FLAGS
														
 
															 	bic	x5, x5, x6		// Clear SCTL_M and etc
														
 
															+	pre_disable_mmu_workaround
														
 
															 	msr	sctlr_el2, x5
														
 
															 	isb
														
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 
															 {
														
 
															 	u64 reg;
														
 
															+	/* Clear pmscr in case of early return */
														
 
															+	*pmscr_el1 = 0;
														
 
															+
														
 
															 	/* SPE present on this CPU? */
														
 
															 	if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
														
 
															 						  ID_AA64DFR0_PMSVER_SHIFT))
														
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
 
															 		.check_wx = true,
														
 
															 	};
														
 
															-	walk_pgd(&st, &init_mm, 0);
														
 
															+	walk_pgd(&st, &init_mm, VA_START);
														
 
															 	note_page(&st, 0, 0, 0);
														
 
															 	if (st.wx_pages || st.uxn_pages)
														
 
															 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
														
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
															 {
														
 
															 	struct siginfo info;
														
 
															 	const struct fault_info *inf;
														
 
															-	int ret = 0;
														
 
															 	inf = esr_to_fault_info(esr);
														
 
															 	pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
														
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
															 		if (interrupts_enabled(regs))
														
 
															 			nmi_enter();
														
 
															-		ret = ghes_notify_sea();
														
 
															+		ghes_notify_sea();
														
 
															 		if (interrupts_enabled(regs))
														
 
															 			nmi_exit();
														
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 
															 		info.si_addr  = (void __user *)addr;
														
 
															 	arm64_notify_die("", regs, &info, esr);
														
 
															-	return ret;
														
 
															+	return 0;
														
 
															 }
														
 
															 static const struct fault_info fault_info[] = {
														
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
 
															 	reserve_elfcorehdr();
														
 
															+	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
														
 
															+
														
 
															 	dma_contiguous_reserve(arm64_dma_phys_limit);
														
 
															 	memblock_allow_resize();
														
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
 
															 	sparse_init();
														
 
															 	zone_sizes_init(min, max);
														
 
															-	high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
														
 
															 	memblock_dump_all();
														
 
															 }
														
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
 
															 	while ((nuline = strchr(s, '\n')) != NULL) {
														
 
															 		if (nuline != s)
														
 
															 			pdc_iodc_print(s, nuline - s);
														
 
															-			pdc_iodc_print("\r\n", 2);
														
 
															-			s = nuline + 1;
														
 
															+		pdc_iodc_print("\r\n", 2);
														
 
															+		s = nuline + 1;
														
 
															 	}
														
 
															 	if (*s != '\0')
														
 
															 		pdc_iodc_print(s, strlen(s));
														
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
 
															 /* thread information allocation */
														
 
															+#ifdef CONFIG_IRQSTACKS
														
 
															+#define THREAD_SIZE_ORDER	2 /* PA-RISC requires at least 16k stack */
														
 
															+#else
														
 
															 #define THREAD_SIZE_ORDER	3 /* PA-RISC requires at least 32k stack */
														
 
															+#endif
														
 
															+
														
 
															 /* Be sure to hunt all references to this down when you change the size of
														
 
															  * the kernel stack */
														
 
															 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
														
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
 
															 	STREG   %r19,PT_SR7(%r16)
														
 
															 intr_return:
														
 
															-	/* NOTE: Need to enable interrupts incase we schedule. */
														
 
															-	ssm     PSW_SM_I, %r0
														
 
															-
														
 
															 	/* check for reschedule */
														
 
															 	mfctl   %cr30,%r1
														
 
															 	LDREG   TI_FLAGS(%r1),%r19	/* sched.h: TIF_NEED_RESCHED */
														
@@ -907,6 +904,11 @@ intr_check_sig:
 
															 	LDREG	PT_IASQ1(%r16), %r20
														
 
															 	cmpib,COND(=),n 0,%r20,intr_restore /* backward */
														
 
															+	/* NOTE: We need to enable interrupts if we have to deliver
														
 
															+	 * signals. We used to do this earlier but it caused kernel
														
 
															+	 * stack overflows. */
														
 
															+	ssm     PSW_SM_I, %r0
														
 
															+
														
 
															 	copy	%r0, %r25			/* long in_syscall = 0 */
														
 
															 #ifdef CONFIG_64BIT
														
 
															 	ldo	-16(%r30),%r29			/* Reference param save area */
														
@@ -958,6 +960,10 @@ intr_do_resched:
 
															 	cmpib,COND(=)	0, %r20, intr_do_preempt
														
 
															 	nop
														
 
															+	/* NOTE: We need to enable interrupts if we schedule.  We used
														
 
															+	 * to do this earlier but it caused kernel stack overflows. */
														
 
															+	ssm     PSW_SM_I, %r0
														
 
															+
														
 
															 #ifdef CONFIG_64BIT
														
 
															 	ldo	-16(%r30),%r29		/* Reference param save area */
														
 
															 #endif
														
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
															 	__INITRODATA
														
 
															+	.align 4
														
 
															 	.export os_hpmc_size
														
 
															 os_hpmc_size:
														
 
															 	.word .os_hpmc_end-.os_hpmc
														
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
 
															 #include <linux/slab.h>
														
 
															 #include <linux/kallsyms.h>
														
 
															 #include <linux/sort.h>
														
 
															-#include <linux/sched.h>
														
 
															 #include <linux/uaccess.h>
														
 
															 #include <asm/assembly.h>
														
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
 
															 #include <linux/preempt.h>
														
 
															 #include <linux/init.h>
														
 
															-#include <asm/processor.h>
														
 
															 #include <asm/delay.h>
														
 
															-
														
 
															 #include <asm/special_insns.h>    /* for mfctl() */
														
 
															 #include <asm/processor.h> /* for boot_cpu_data */
														
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 
															 #endif
														
 
															 }
														
 
															-static inline void arch_dup_mmap(struct mm_struct *oldmm,
														
 
															-				 struct mm_struct *mm)
														
 
															+static inline int arch_dup_mmap(struct mm_struct *oldmm,
														
 
															+				struct mm_struct *mm)
														
 
															 {
														
 
															+	return 0;
														
 
															 }
														
 
															 #ifndef CONFIG_PPC_BOOK3S_64
														
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
															 	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
														
 
															 	       regs->nip, regs->link, regs->ctr);
														
 
															-	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
														
 
															+	printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
														
 
															 	       regs, regs->trap, print_tainted(), init_utsname()->release);
														
 
															 	printk("MSR:  "REG" ", regs->msr);
														
 
															 	print_msr_bits(regs->msr);
														
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
															 	/* Return the per-cpu state for state saving/migration */
														
 
															 	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
														
 
															-	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
														
 
															+	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
														
 
															+	       (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
														
 
															 }
														
 
															 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
														
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
															 	/*
														
 
															 	 * Restore P and Q. If the interrupt was pending, we
														
 
															-	 * force both P and Q, which will trigger a resend.
														
 
															+	 * force Q and !P, which will trigger a resend.
														
 
															 	 *
														
 
															 	 * That means that a guest that had both an interrupt
														
 
															 	 * pending (queued) and Q set will restore with only
														
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
															 	 * is perfectly fine as coalescing interrupts that haven't
														
 
															 	 * been presented yet is always allowed.
														
 
															 	 */
														
 
															-	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
														
 
															+	if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
														
 
															 		state->old_p = true;
														
 
															 	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
														
 
															 		state->old_q = true;
														
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
 
															 			func = (u8 *) __bpf_call_base + imm;
														
 
															 			/* Save skb pointer if we need to re-cache skb data */
														
 
															-			if (bpf_helper_changes_pkt_data(func))
														
 
															+			if ((ctx->seen & SEEN_SKB) &&
														
 
															+			    bpf_helper_changes_pkt_data(func))
														
 
															 				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
														
 
															 			bpf_jit_emit_func_call(image, ctx, (u64)func);
														
@@ -772,7 +773,8 @@ emit_clear:
 
															 			PPC_MR(b2p[BPF_REG_0], 3);
														
 
															 			/* refresh skb cache */
														
 
															-			if (bpf_helper_changes_pkt_data(func)) {
														
 
															+			if ((ctx->seen & SEEN_SKB) &&
														
 
															+			    bpf_helper_changes_pkt_data(func)) {
														
 
															 				/* reload skb pointer to r3 */
														
 
															 				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
														
 
															 				bpf_jit_emit_skb_loads(image, ctx);
														
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
 
															 	int ret;
														
 
															 	__u64 target;
														
 
															-	if (is_kernel_addr(addr))
														
 
															-		return branch_target((unsigned int *)addr);
														
 
															+	if (is_kernel_addr(addr)) {
														
 
															+		if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
														
 
															+			return 0;
														
 
															+
														
 
															+		return branch_target(&instr);
														
 
															+	}
														
 
															 	/* Userspace: need copy instruction here then translate it */
														
 
															 	pagefault_disable();
														
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
 
															 	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
														
 
															 		return 0;
														
 
															+	/*
														
 
															+	 * Check whether nest_imc is registered. We could end up here if the
														
 
															+	 * cpuhotplug callback registration fails. i.e, callback invokes the
														
 
															+	 * offline path for all successfully registered nodes. At this stage,
														
 
															+	 * nest_imc pmu will not be registered and we should return here.
														
 
															+	 *
														
 
															+	 * We return with a zero since this is not an offline failure. And
														
 
															+	 * cpuhp_setup_state() returns the actual failure reason to the caller,
														
 
															+	 * which in turn will call the cleanup routine.
														
 
															+	 */
														
 
															+	if (!nest_pmus)
														
 
															+		return 0;
														
 
															+
														
 
															 	/*
														
 
															 	 * Now that this cpu is one of the designated,
														
 
															 	 * find a next cpu a) which is online and b) in same chip.
														
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 
															 		if (nest_pmus == 1) {
														
 
															 			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
														
 
															 			kfree(nest_imc_refc);
														
 
															+			kfree(per_nest_pmu_arr);
														
 
															 		}
														
 
															 		if (nest_pmus > 0)
														
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 
															 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
														
 
															 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
														
 
															 	kfree(pmu_ptr);
														
 
															-	kfree(per_nest_pmu_arr);
														
 
															 	return;
														
 
															 }
														
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
 
															 			ret = nest_pmu_cpumask_init();
														
 
															 			if (ret) {
														
 
															 				mutex_unlock(&nest_init_lock);
														
 
															+				kfree(nest_imc_refc);
														
 
															+				kfree(per_nest_pmu_arr);
														
 
															 				goto err_free;
														
 
															 			}
														
 
															 		}
														
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
 
															 }
														
 
															 static struct lock_class_key fsl_msi_irq_class;
														
 
															+static struct lock_class_key fsl_msi_irq_request_class;
														
 
															 static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
														
 
															 			       int offset, int irq_index)
														
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
 
															 		dev_err(&dev->dev, "No memory for MSI cascade data\n");
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															-	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
														
 
															+	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
														
 
															+			      &fsl_msi_irq_request_class);
														
 
															 	cascade_data->index = offset;
														
 
															 	cascade_data->msi_data = msi;
														
 
															 	cascade_data->virq = virt_msir;
														
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,25 @@
 
															 #define smp_rmb()	RISCV_FENCE(r,r)
														
 
															 #define smp_wmb()	RISCV_FENCE(w,w)
														
 
															+/*
														
 
															+ * This is a very specific barrier: it's currently only used in two places in
														
 
															+ * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
														
 
															+ * orderings it guarantees, but the "critical section is RCsc" guarantee
														
 
															+ * mandates a barrier on RISC-V.  The sequence looks like:
														
 
															+ *
														
 
															+ *    lr.aq lock
														
 
															+ *    sc    lock <= LOCKED
														
 
															+ *    smp_mb__after_spinlock()
														
 
															+ *    // critical section
														
 
															+ *    lr    lock
														
 
															+ *    sc.rl lock <= UNLOCKED
														
 
															+ *
														
 
															+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
														
 
															+ * way we can take advantage of that here because the ordering is only enforced
														
 
															+ * on that one lock.  Thus, we're just doing a full fence.
														
 
															+ */
														
 
															+#define smp_mb__after_spinlock()	RISCV_FENCE(rw,rw)
														
 
															+
														
 
															 #include <asm-generic/barrier.h>
														
 
															 #endif /* __ASSEMBLY__ */
														
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
 
															 #include <asm/tlbflush.h>
														
 
															 #include <asm/thread_info.h>
														
 
															-#ifdef CONFIG_HVC_RISCV_SBI
														
 
															-#include <asm/hvc_riscv_sbi.h>
														
 
															-#endif
														
 
															-
														
 
															 #ifdef CONFIG_DUMMY_CONSOLE
														
 
															 struct screen_info screen_info = {
														
 
															 	.orig_video_lines	= 30,
														
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
 
															 void __init setup_arch(char **cmdline_p)
														
 
															 {
														
 
															-#if defined(CONFIG_HVC_RISCV_SBI)
														
 
															-	if (likely(early_console == NULL)) {
														
 
															-		early_console = &riscv_sbi_early_console_dev;
														
 
															-		register_console(early_console);
														
 
															-	}
														
 
															-#endif
														
 
															-
														
 
															 #ifdef CONFIG_CMDLINE_BOOL
														
 
															 #ifdef CONFIG_CMDLINE_OVERRIDE
														
 
															 	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
														
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
 
															 	bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
														
 
															 	/* Check the reserved flags. */
														
 
															-	if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
														
 
															+	if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
														
 
															 		return -EINVAL;
														
 
															 	flush_icache_mm(mm, local);
														
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
 
															 	return pud;
														
 
															 }
														
 
															-#define pud_write pud_write
														
 
															-static inline int pud_write(pud_t pud)
														
 
															-{
														
 
															-	return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
														
 
															-}
														
 
															-
														
 
															 static inline pud_t pud_mkclean(pud_t pud)
														
 
															 {
														
 
															 	if (pud_large(pud)) {
														
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 
															 		return retval;
														
 
															 	}
														
 
															+	groups_sort(group_info);
														
 
															 	retval = set_current_groups(group_info);
														
 
															 	put_group_info(group_info);
														
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
 
															 #define SEEN_LITERAL	8	/* code uses literals */
														
 
															 #define SEEN_FUNC	16	/* calls C functions */
														
 
															 #define SEEN_TAIL_CALL	32	/* code uses tail calls */
														
 
															-#define SEEN_SKB_CHANGE	64	/* code changes skb data */
														
 
															-#define SEEN_REG_AX	128	/* code uses constant blinding */
														
 
															+#define SEEN_REG_AX	64	/* code uses constant blinding */
														
 
															 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
														
 
															 /*
														
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 
															 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
														
 
															 				      REG_15, 152);
														
 
															 	}
														
 
															-	if (jit->seen & SEEN_SKB)
														
 
															+	if (jit->seen & SEEN_SKB) {
														
 
															 		emit_load_skb_data_hlen(jit);
														
 
															-	if (jit->seen & SEEN_SKB_CHANGE)
														
 
															 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
														
 
															 		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
														
 
															 			      STK_OFF_SKBP);
														
 
															+	}
														
 
															 }
														
 
															 /*
														
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 
															 		EMIT2(0x0d00, REG_14, REG_W1);
														
 
															 		/* lgr %b0,%r2: load return value into %b0 */
														
 
															 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
														
 
															-		if (bpf_helper_changes_pkt_data((void *)func)) {
														
 
															-			jit->seen |= SEEN_SKB_CHANGE;
														
 
															+		if ((jit->seen & SEEN_SKB) &&
														
 
															+		    bpf_helper_changes_pkt_data((void *)func)) {
														
 
															 			/* lg %b1,ST_OFF_SKBP(%r15) */
														
 
															 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
														
 
															 				      REG_15, STK_OFF_SKBP);
														
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
 
															 	.previous
														
 
															 ENTRY(__arch_hweight64)
														
 
															-	sethi	%hi(__sw_hweight16), %g1
														
 
															-	jmpl	%g1 + %lo(__sw_hweight16), %g0
														
 
															+	sethi	%hi(__sw_hweight64), %g1
														
 
															+	jmpl	%g1 + %lo(__sw_hweight64), %g0
														
 
															 	 nop
														
 
															 ENDPROC(__arch_hweight64)
														
 
															 EXPORT_SYMBOL(__arch_hweight64)
														
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 
															 	if (!printk_ratelimit())
														
 
															 		return;
														
 
															-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
														
 
															+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
														
 
															 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
														
 
															 	       tsk->comm, task_pid_nr(tsk), address,
														
 
															 	       (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
														
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
 
															 	if (!printk_ratelimit())
														
 
															 		return;
														
 
															-	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
														
 
															+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
														
 
															 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
														
 
															 	       tsk->comm, task_pid_nr(tsk), address,
														
 
															 	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
														
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 
															 	if (!(pmd_val(pmd) & _PAGE_VALID))
														
 
															 		return 0;
														
 
															-	if (!pmd_access_permitted(pmd, write))
														
 
															+	if (write && !pmd_write(pmd))
														
 
															 		return 0;
														
 
															 	refs = 0;
														
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 
															 	if (!(pud_val(pud) & _PAGE_VALID))
														
 
															 		return 0;
														
 
															-	if (!pud_access_permitted(pud, write))
														
 
															+	if (write && !pud_write(pud))
														
 
															 		return 0;
														
 
															 	refs = 0;
														
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 
															 		u8 *func = ((u8 *)__bpf_call_base) + imm;
														
 
															 		ctx->saw_call = true;
														
 
															+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
														
 
															+			emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
														
 
															 		emit_call((u32 *)func, ctx);
														
 
															 		emit_nop(ctx);
														
 
															 		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
														
 
															-		if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
														
 
															-			load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
														
 
															+		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
														
 
															+			load_skb_regs(ctx, L7);
														
 
															 		break;
														
 
															 	}
														
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
 
															 generic-y += barrier.h
														
 
															+generic-y += bpf_perf_event.h
														
 
															 generic-y += bug.h
														
 
															 generic-y += clkdev.h
														
 
															 generic-y += current.h
														
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 
															 /*
														
 
															  * Needed since we do not use the asm-generic/mm_hooks.h:
														
 
															  */
														
 
															-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
														
 
															+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
														
 
															 {
														
 
															 	uml_setup_stubs(mm);
														
 
															+	return 0;
														
 
															 }
														
 
															 extern void arch_exit_mmap(struct mm_struct *mm);
														
 
															 static inline void arch_unmap(struct mm_struct *mm,
														
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
 
															 	if (!printk_ratelimit())
														
 
															 		return;
														
 
															-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
														
 
															+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
														
 
															 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
														
 
															 		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
														
 
															 		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
														
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
 
															 	} \
														
 
															 } while (0)
														
 
															-static inline void arch_dup_mmap(struct mm_struct *oldmm,
														
 
															-				 struct mm_struct *mm)
														
 
															+static inline int arch_dup_mmap(struct mm_struct *oldmm,
														
 
															+				struct mm_struct *mm)
														
 
															 {
														
 
															+	return 0;
														
 
															 }
														
 
															 static inline void arch_unmap(struct mm_struct *mm,
														
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
 
															 config NR_CPUS
														
 
															 	int "Maximum number of CPUs" if SMP && !MAXSMP
														
 
															 	range 2 8 if SMP && X86_32 && !X86_BIGSMP
														
 
															-	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
														
 
															+	range 2 64 if SMP && X86_32 && X86_BIGSMP
														
 
															+	range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
														
 
															 	range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
														
 
															 	default "1" if !SMP
														
 
															 	default "8192" if MAXSMP
														
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
 
															 config UNWINDER_GUESS
														
 
															 	bool "Guess unwinder"
														
 
															 	depends on EXPERT
														
 
															+	depends on !STACKDEPOT
														
 
															 	---help---
														
 
															 	  This option enables the "guess" unwinder for unwinding kernel stack
														
 
															 	  traces.  It scans the stack and reports every kernel text address it
														
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
 
															 ifdef CONFIG_X86_64
														
 
															 	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
														
 
															 	vmlinux-objs-y += $(obj)/mem_encrypt.o
														
 
															+	vmlinux-objs-y += $(obj)/pgtable_64.o
														
 
															 endif
														
 
															 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
														
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
 
															 	leaq	boot_stack_end(%rbx), %rsp
														
 
															 #ifdef CONFIG_X86_5LEVEL
														
 
															-	/* Check if 5-level paging has already enabled */
														
 
															-	movq	%cr4, %rax
														
 
															-	testl	$X86_CR4_LA57, %eax
														
 
															-	jnz	lvl5
														
 
															+	/*
														
 
															+	 * Check if we need to enable 5-level paging.
														
 
															+	 * RSI holds real mode data and need to be preserved across
														
 
															+	 * a function call.
														
 
															+	 */
														
 
															+	pushq	%rsi
														
 
															+	call	l5_paging_required
														
 
															+	popq	%rsi
														
 
															+
														
 
															+	/* If l5_paging_required() returned zero, we're done here. */
														
 
															+	cmpq	$0, %rax
														
 
															+	je	lvl5
														
 
															 	/*
														
 
															 	 * At this point we are in long mode with 4-level paging enabled,
														
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
 
															 	}
														
 
															 }
														
 
															+static bool l5_supported(void)
														
 
															+{
														
 
															+	/* Check if leaf 7 is supported. */
														
 
															+	if (native_cpuid_eax(0) < 7)
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Check if la57 is supported. */
														
 
															+	return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
														
 
															+}
														
 
															+
														
 
															 #if CONFIG_X86_NEED_RELOCS
														
 
															 static void handle_relocations(void *output, unsigned long output_len,
														
 
															 			       unsigned long virt_addr)
														
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 
															 	console_init();
														
 
															 	debug_putstr("early console in extract_kernel\n");
														
 
															+	if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
														
 
															+		error("This linux kernel as configured requires 5-level paging\n"
														
 
															+			"This CPU does not support the required 'cr4.la57' feature\n"
														
 
															+			"Unable to boot - please use a kernel appropriate for your CPU\n");
														
 
															+	}
														
 
															+
														
 
															 	free_mem_ptr     = heap;	/* Heap */
														
 
															 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
														
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
 
															  */
														
 
															 #undef CONFIG_AMD_MEM_ENCRYPT
														
 
															+/* No PAGE_TABLE_ISOLATION support needed either: */
														
 
															+#undef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+
														
 
															 #include "misc.h"
														
 
															 /* These actually do the work of building the kernel identity maps. */
														
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
 
															+#include <asm/processor.h>
														
 
															+
														
 
															+/*
														
 
															+ * __force_order is used by special_insns.h asm code to force instruction
														
 
															+ * serialization.
														
 
															+ *
														
 
															+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
														
 
															+ * due to an undefined symbol. Define it to make these ancient GCCs work.
														
 
															+ */
														
 
															+unsigned long __force_order;
														
 
															+
														
 
															+int l5_paging_required(void)
														
 
															+{
														
 
															+	/* Check if leaf 7 is supported. */
														
 
															+
														
 
															+	if (native_cpuid_eax(0) < 7)
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Check if la57 is supported. */
														
 
															+	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Check if 5-level paging has already been enabled. */
														
 
															+	if (native_read_cr4() & X86_CR4_LA57)
														
 
															+		return 0;
														
 
															+
														
 
															+	return 1;
														
 
															+}
														
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
 
															 # Make sure the files actually exist
														
 
															 verify "$FBZIMAGE"
														
 
															-verify "$MTOOLSRC"
														
 
															 genbzdisk() {
														
 
															+	verify "$MTOOLSRC"
														
 
															 	mformat a:
														
 
															 	syslinux $FIMAGE
														
 
															 	echo "$KCMDLINE" | mcopy - a:syslinux.cfg
														
@@ -57,6 +57,7 @@ genbzdisk() {
 
															 }
														
 
															 genfdimage144() {
														
 
															+	verify "$MTOOLSRC"
														
 
															 	dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
														
 
															 	mformat v:
														
 
															 	syslinux $FIMAGE
														
@@ -68,6 +69,7 @@ genfdimage144() {
 
															 }
														
 
															 genfdimage288() {
														
 
															+	verify "$MTOOLSRC"
														
 
															 	dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
														
 
															 	mformat w:
														
 
															 	syslinux $FIMAGE
														
@@ -78,39 +80,43 @@ genfdimage288() {
 
															 	mcopy $FBZIMAGE w:linux
														
 
															 }
														
 
															-genisoimage() {
														
 
															+geniso() {
														
 
															 	tmp_dir=`dirname $FIMAGE`/isoimage
														
 
															 	rm -rf $tmp_dir
														
 
															 	mkdir $tmp_dir
														
 
															-	for i in lib lib64 share end ; do
														
 
															+	for i in lib lib64 share ; do
														
 
															 		for j in syslinux ISOLINUX ; do
														
 
															 			if [ -f /usr/$i/$j/isolinux.bin ] ; then
														
 
															 				isolinux=/usr/$i/$j/isolinux.bin
														
 
															-				cp $isolinux $tmp_dir
														
 
															 			fi
														
 
															 		done
														
 
															 		for j in syslinux syslinux/modules/bios ; do
														
 
															 			if [ -f /usr/$i/$j/ldlinux.c32 ]; then
														
 
															 				ldlinux=/usr/$i/$j/ldlinux.c32
														
 
															-				cp $ldlinux $tmp_dir
														
 
															 			fi
														
 
															 		done
														
 
															 		if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
														
 
															 			break
														
 
															 		fi
														
 
															-		if [ $i = end -a -z "$isolinux" ] ; then
														
 
															-			echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
														
 
															-			exit 1
														
 
															-		fi
														
 
															 	done
														
 
															+	if [ -z "$isolinux" ] ; then
														
 
															+		echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
														
 
															+		exit 1
														
 
															+	fi
														
 
															+	if [ -z "$ldlinux" ] ; then
														
 
															+		echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
														
 
															+		exit 1
														
 
															+	fi
														
 
															+	cp $isolinux $tmp_dir
														
 
															+	cp $ldlinux $tmp_dir
														
 
															 	cp $FBZIMAGE $tmp_dir/linux
														
 
															 	echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
														
 
															 	if [ -f "$FDINITRD" ] ; then
														
 
															 		cp "$FDINITRD" $tmp_dir/initrd.img
														
 
															 	fi
														
 
															-	mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
														
 
															-		-c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
														
 
															-		$tmp_dir
														
 
															+	genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
														
 
															+		-b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
														
 
															+		-boot-info-table $tmp_dir
														
 
															 	isohybrid $FIMAGE 2>/dev/null || true
														
 
															 	rm -rf $tmp_dir
														
 
															 }
														
@@ -119,6 +125,6 @@ case $1 in
 
															 	bzdisk)     genbzdisk;;
														
 
															 	fdimage144) genfdimage144;;
														
 
															 	fdimage288) genfdimage288;;
														
 
															-	isoimage)   genisoimage;;
														
 
															+	isoimage)   geniso;;
														
 
															 	*)          echo 'Unknown image format'; exit 1;
														
 
															 esac
														
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
 
															 	salsa20_ivsetup(ctx, walk.iv);
														
 
															-	if (likely(walk.nbytes == nbytes))
														
 
															-	{
														
 
															-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
														
 
															-				      walk.dst.virt.addr, nbytes);
														
 
															-		return blkcipher_walk_done(desc, &walk, 0);
														
 
															-	}
														
 
															-
														
 
															 	while (walk.nbytes >= 64) {
														
 
															 		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
														
 
															 				      walk.dst.virt.addr,
														
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
 
															 /* SPDX-License-Identifier: GPL-2.0 */
														
 
															 #include <linux/jump_label.h>
														
 
															 #include <asm/unwind_hints.h>
														
 
															+#include <asm/cpufeatures.h>
														
 
															+#include <asm/page_types.h>
														
 
															+#include <asm/percpu.h>
														
 
															+#include <asm/asm-offsets.h>
														
 
															+#include <asm/processor-flags.h>
														
 
															 /*
														
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
 
															 #endif
														
 
															 .endm
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+
														
 
															+/*
														
 
															+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
														
 
															+ * halves:
														
 
															+ */
														
 
															+#define PTI_SWITCH_PGTABLES_MASK	(1<<PAGE_SHIFT)
														
 
															+#define PTI_SWITCH_MASK		(PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
														
 
															+
														
 
															+.macro SET_NOFLUSH_BIT	reg:req
														
 
															+	bts	$X86_CR3_PCID_NOFLUSH_BIT, \reg
														
 
															+.endm
														
 
															+
														
 
															+.macro ADJUST_KERNEL_CR3 reg:req
														
 
															+	ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
														
 
															+	/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
														
 
															+	andq    $(~PTI_SWITCH_MASK), \reg
														
 
															+.endm
														
 
															+
														
 
															+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
														
 
															+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
														
 
															+	mov	%cr3, \scratch_reg
														
 
															+	ADJUST_KERNEL_CR3 \scratch_reg
														
 
															+	mov	\scratch_reg, %cr3
														
 
															+.Lend_\@:
														
 
															+.endm
														
 
															+
														
 
															+#define THIS_CPU_user_pcid_flush_mask   \
														
 
															+	PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
														
 
															+
														
 
															+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
														
 
															+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
														
 
															+	mov	%cr3, \scratch_reg
														
 
															+
														
 
															+	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
														
 
															+
														
 
															+	/*
														
 
															+	 * Test if the ASID needs a flush.
														
 
															+	 */
														
 
															+	movq	\scratch_reg, \scratch_reg2
														
 
															+	andq	$(0x7FF), \scratch_reg		/* mask ASID */
														
 
															+	bt	\scratch_reg, THIS_CPU_user_pcid_flush_mask
														
 
															+	jnc	.Lnoflush_\@
														
 
															+
														
 
															+	/* Flush needed, clear the bit */
														
 
															+	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
														
 
															+	movq	\scratch_reg2, \scratch_reg
														
 
															+	jmp	.Lwrcr3_\@
														
 
															+
														
 
															+.Lnoflush_\@:
														
 
															+	movq	\scratch_reg2, \scratch_reg
														
 
															+	SET_NOFLUSH_BIT \scratch_reg
														
 
															+
														
 
															+.Lwrcr3_\@:
														
 
															+	/* Flip the PGD and ASID to the user version */
														
 
															+	orq     $(PTI_SWITCH_MASK), \scratch_reg
														
 
															+	mov	\scratch_reg, %cr3
														
 
															+.Lend_\@:
														
 
															+.endm
														
 
															+
														
 
															+.macro SWITCH_TO_USER_CR3_STACK	scratch_reg:req
														
 
															+	pushq	%rax
														
 
															+	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
														
 
															+	popq	%rax
														
 
															+.endm
														
 
															+
														
 
															+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
														
 
															+	ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
														
 
															+	movq	%cr3, \scratch_reg
														
 
															+	movq	\scratch_reg, \save_reg
														
 
															+	/*
														
 
															+	 * Is the "switch mask" all zero?  That means that both of
														
 
															+	 * these are zero:
														
 
															+	 *
														
 
															+	 *	1. The user/kernel PCID bit, and
														
 
															+	 *	2. The user/kernel "bit" that points CR3 to the
														
 
															+	 *	   bottom half of the 8k PGD
														
 
															+	 *
														
 
															+	 * That indicates a kernel CR3 value, not a user CR3.
														
 
															+	 */
														
 
															+	testq	$(PTI_SWITCH_MASK), \scratch_reg
														
 
															+	jz	.Ldone_\@
														
 
															+
														
 
															+	ADJUST_KERNEL_CR3 \scratch_reg
														
 
															+	movq	\scratch_reg, %cr3
														
 
															+
														
 
															+.Ldone_\@:
														
 
															+.endm
														
 
															+
														
 
															+.macro RESTORE_CR3 scratch_reg:req save_reg:req
														
 
															+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
														
 
															+
														
 
															+	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
														
 
															+
														
 
															+	/*
														
 
															+	 * KERNEL pages can always resume with NOFLUSH as we do
														
 
															+	 * explicit flushes.
														
 
															+	 */
														
 
															+	bt	$X86_CR3_PTI_SWITCH_BIT, \save_reg
														
 
															+	jnc	.Lnoflush_\@
														
 
															+
														
 
															+	/*
														
 
															+	 * Check if there's a pending flush for the user ASID we're
														
 
															+	 * about to set.
														
 
															+	 */
														
 
															+	movq	\save_reg, \scratch_reg
														
 
															+	andq	$(0x7FF), \scratch_reg
														
 
															+	bt	\scratch_reg, THIS_CPU_user_pcid_flush_mask
														
 
															+	jnc	.Lnoflush_\@
														
 
															+
														
 
															+	btr	\scratch_reg, THIS_CPU_user_pcid_flush_mask
														
 
															+	jmp	.Lwrcr3_\@
														
 
															+
														
 
															+.Lnoflush_\@:
														
 
															+	SET_NOFLUSH_BIT \save_reg
														
 
															+
														
 
															+.Lwrcr3_\@:
														
 
															+	/*
														
 
															+	 * The CR3 write could be avoided when not changing its value,
														
 
															+	 * but would require a CR3 read *and* a scratch register.
														
 
															+	 */
														
 
															+	movq	\save_reg, %cr3
														
 
															+.Lend_\@:
														
 
															+.endm
														
 
															+
														
 
															+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
														
 
															+
														
 
															+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
														
 
															+.endm
														
 
															+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
														
 
															+.endm
														
 
															+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
														
 
															+.endm
														
 
															+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
														
 
															+.endm
														
 
															+.macro RESTORE_CR3 scratch_reg:req save_reg:req
														
 
															+.endm
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															 #endif /* CONFIG_X86_64 */
														
 
															 /*
														
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
 
															 	movl	%esp, %eax			# pt_regs pointer
														
 
															 	/* Are we currently on the SYSENTER stack? */
														
 
															-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
														
 
															-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
														
 
															-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
														
 
															+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
														
 
															+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
														
 
															+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
														
 
															+	cmpl	$SIZEOF_entry_stack, %ecx
														
 
															 	jb	.Ldebug_from_sysenter_stack
														
 
															 	TRACE_IRQS_OFF
														
@@ -984,9 +985,10 @@ ENTRY(nmi)
 
															 	movl	%esp, %eax			# pt_regs pointer
														
 
															 	/* Are we currently on the SYSENTER stack? */
														
 
															-	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
														
 
															-	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
														
 
															-	cmpl	$SIZEOF_SYSENTER_stack, %ecx
														
 
															+	movl	PER_CPU_VAR(cpu_entry_area), %ecx
														
 
															+	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
														
 
															+	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
														
 
															+	cmpl	$SIZEOF_entry_stack, %ecx
														
 
															 	jb	.Lnmi_from_sysenter_stack
														
 
															 	/* Not on SYSENTER stack. */
														
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
 
															 #include <asm/segment.h>
														
 
															 #include <asm/cache.h>
														
 
															 #include <asm/errno.h>
														
 
															-#include "calling.h"
														
 
															 #include <asm/asm-offsets.h>
														
 
															 #include <asm/msr.h>
														
 
															 #include <asm/unistd.h>
														
@@ -40,6 +39,8 @@
 
															 #include <asm/frame.h>
														
 
															 #include <linux/err.h>
														
 
															+#include "calling.h"
														
 
															+
														
 
															 .code64
														
 
															 .section .entry.text, "ax"
														
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
 
															  * with them due to bugs in both AMD and Intel CPUs.
														
 
															  */
														
 
															+	.pushsection .entry_trampoline, "ax"
														
 
															+
														
 
															+/*
														
 
															+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
														
 
															+ * that the assembler and linker have the wrong idea as to where this code
														
 
															+ * lives (and, in fact, it's mapped more than once, so it's not even at a
														
 
															+ * fixed address).  So we can't reference any symbols outside the entry
														
 
															+ * trampoline and expect it to work.
														
 
															+ *
														
 
															+ * Instead, we carefully abuse %rip-relative addressing.
														
 
															+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
														
 
															+ * trampoline.  We can thus find cpu_entry_area with this macro:
														
 
															+ */
														
 
															+
														
 
															+#define CPU_ENTRY_AREA \
														
 
															+	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
														
 
															+
														
 
															+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
														
 
															+#define RSP_SCRATCH	CPU_ENTRY_AREA_entry_stack + \
														
 
															+			SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
														
 
															+
														
 
															+ENTRY(entry_SYSCALL_64_trampoline)
														
 
															+	UNWIND_HINT_EMPTY
														
 
															+	swapgs
														
 
															+
														
 
															+	/* Stash the user RSP. */
														
 
															+	movq	%rsp, RSP_SCRATCH
														
 
															+
														
 
															+	/* Note: using %rsp as a scratch reg. */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
														
 
															+
														
 
															+	/* Load the top of the task stack into RSP */
														
 
															+	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
														
 
															+
														
 
															+	/* Start building the simulated IRET frame. */
														
 
															+	pushq	$__USER_DS			/* pt_regs->ss */
														
 
															+	pushq	RSP_SCRATCH			/* pt_regs->sp */
														
 
															+	pushq	%r11				/* pt_regs->flags */
														
 
															+	pushq	$__USER_CS			/* pt_regs->cs */
														
 
															+	pushq	%rcx				/* pt_regs->ip */
														
 
															+
														
 
															+	/*
														
 
															+	 * x86 lacks a near absolute jump, and we can't jump to the real
														
 
															+	 * entry text with a relative jump.  We could push the target
														
 
															+	 * address and then use retq, but this destroys the pipeline on
														
 
															+	 * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
														
 
															+	 * spill RDI and restore it in a second-stage trampoline.
														
 
															+	 */
														
 
															+	pushq	%rdi
														
 
															+	movq	$entry_SYSCALL_64_stage2, %rdi
														
 
															+	jmp	*%rdi
														
 
															+END(entry_SYSCALL_64_trampoline)
														
 
															+
														
 
															+	.popsection
														
 
															+
														
 
															+ENTRY(entry_SYSCALL_64_stage2)
														
 
															+	UNWIND_HINT_EMPTY
														
 
															+	popq	%rdi
														
 
															+	jmp	entry_SYSCALL_64_after_hwframe
														
 
															+END(entry_SYSCALL_64_stage2)
														
 
															+
														
 
															 ENTRY(entry_SYSCALL_64)
														
 
															 	UNWIND_HINT_EMPTY
														
 
															 	/*
														
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
 
															 	 */
														
 
															 	swapgs
														
 
															+	/*
														
 
															+	 * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
														
 
															+	 * is not required to switch CR3.
														
 
															+	 */
														
 
															 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
														
 
															 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
														
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
 
															 	popq	%rsi	/* skip rcx */
														
 
															 	popq	%rdx
														
 
															 	popq	%rsi
														
 
															+
														
 
															+	/*
														
 
															+	 * Now all regs are restored except RSP and RDI.
														
 
															+	 * Save old stack pointer and switch to trampoline stack.
														
 
															+	 */
														
 
															+	movq	%rsp, %rdi
														
 
															+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
														
 
															+
														
 
															+	pushq	RSP-RDI(%rdi)	/* RSP */
														
 
															+	pushq	(%rdi)		/* RDI */
														
 
															+
														
 
															+	/*
														
 
															+	 * We are on the trampoline stack.  All regs except RDI are live.
														
 
															+	 * We can do future final exit work right here.
														
 
															+	 */
														
 
															+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
														
 
															+
														
 
															 	popq	%rdi
														
 
															-	movq	RSP-ORIG_RAX(%rsp), %rsp
														
 
															+	popq	%rsp
														
 
															 	USERGS_SYSRET64
														
 
															 END(entry_SYSCALL_64)
														
@@ -466,12 +549,13 @@ END(irq_entries_start)
 
															 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
														
 
															 #ifdef CONFIG_DEBUG_ENTRY
														
 
															-	pushfq
														
 
															-	testl $X86_EFLAGS_IF, (%rsp)
														
 
															+	pushq %rax
														
 
															+	SAVE_FLAGS(CLBR_RAX)
														
 
															+	testl $X86_EFLAGS_IF, %eax
														
 
															 	jz .Lokay_\@
														
 
															 	ud2
														
 
															 .Lokay_\@:
														
 
															-	addq $8, %rsp
														
 
															+	popq %rax
														
 
															 #endif
														
 
															 .endm
														
@@ -563,6 +647,13 @@ END(irq_entries_start)
 
															 /* 0(%rsp): ~(interrupt number) */
														
 
															 	.macro interrupt func
														
 
															 	cld
														
 
															+
														
 
															+	testb	$3, CS-ORIG_RAX(%rsp)
														
 
															+	jz	1f
														
 
															+	SWAPGS
														
 
															+	call	switch_to_thread_stack
														
 
															+1:
														
 
															+
														
 
															 	ALLOC_PT_GPREGS_ON_STACK
														
 
															 	SAVE_C_REGS
														
 
															 	SAVE_EXTRA_REGS
														
@@ -572,12 +663,8 @@ END(irq_entries_start)
 
															 	jz	1f
														
 
															 	/*
														
 
															-	 * IRQ from user mode.  Switch to kernel gsbase and inform context
														
 
															-	 * tracking that we're in kernel mode.
														
 
															-	 */
														
 
															-	SWAPGS
														
 
															-
														
 
															-	/*
														
 
															+	 * IRQ from user mode.
														
 
															+	 *
														
 
															 	 * We need to tell lockdep that IRQs are off.  We can't do this until
														
 
															 	 * we fix gsbase, and we should do it before enter_from_user_mode
														
 
															 	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
														
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 
															 	ud2
														
 
															 1:
														
 
															 #endif
														
 
															-	SWAPGS
														
 
															 	POP_EXTRA_REGS
														
 
															-	POP_C_REGS
														
 
															-	addq	$8, %rsp	/* skip regs->orig_ax */
														
 
															+	popq	%r11
														
 
															+	popq	%r10
														
 
															+	popq	%r9
														
 
															+	popq	%r8
														
 
															+	popq	%rax
														
 
															+	popq	%rcx
														
 
															+	popq	%rdx
														
 
															+	popq	%rsi
														
 
															+
														
 
															+	/*
														
 
															+	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
														
 
															+	 * Save old stack pointer and switch to trampoline stack.
														
 
															+	 */
														
 
															+	movq	%rsp, %rdi
														
 
															+	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
														
 
															+
														
 
															+	/* Copy the IRET frame to the trampoline stack. */
														
 
															+	pushq	6*8(%rdi)	/* SS */
														
 
															+	pushq	5*8(%rdi)	/* RSP */
														
 
															+	pushq	4*8(%rdi)	/* EFLAGS */
														
 
															+	pushq	3*8(%rdi)	/* CS */
														
 
															+	pushq	2*8(%rdi)	/* RIP */
														
 
															+
														
 
															+	/* Push user RDI on the trampoline stack. */
														
 
															+	pushq	(%rdi)
														
 
															+
														
 
															+	/*
														
 
															+	 * We are on the trampoline stack.  All regs except RDI are live.
														
 
															+	 * We can do future final exit work right here.
														
 
															+	 */
														
 
															+
														
 
															+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
														
 
															+
														
 
															+	/* Restore RDI. */
														
 
															+	popq	%rdi
														
 
															+	SWAPGS
														
 
															 	INTERRUPT_RETURN
														
@@ -713,7 +833,9 @@ native_irq_return_ldt:
 
															 	 */
														
 
															 	pushq	%rdi				/* Stash user RDI */
														
 
															-	SWAPGS
														
 
															+	SWAPGS					/* to kernel GS */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
														
 
															+
														
 
															 	movq	PER_CPU_VAR(espfix_waddr), %rdi
														
 
															 	movq	%rax, (0*8)(%rdi)		/* user RAX */
														
 
															 	movq	(1*8)(%rsp), %rax		/* user RIP */
														
@@ -729,7 +851,6 @@ native_irq_return_ldt:
 
															 	/* Now RAX == RSP. */
														
 
															 	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
														
 
															-	popq	%rdi				/* Restore user RDI */
														
 
															 	/*
														
 
															 	 * espfix_stack[31:16] == 0.  The page tables are set up such that
														
@@ -740,7 +861,11 @@ native_irq_return_ldt:
 
															 	 * still points to an RO alias of the ESPFIX stack.
														
 
															 	 */
														
 
															 	orq	PER_CPU_VAR(espfix_stack), %rax
														
 
															-	SWAPGS
														
 
															+
														
 
															+	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
														
 
															+	SWAPGS					/* to user GS */
														
 
															+	popq	%rdi				/* Restore user RDI */
														
 
															+
														
 
															 	movq	%rax, %rsp
														
 
															 	UNWIND_HINT_IRET_REGS offset=8
														
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 
															 /*
														
 
															  * Exception entry points.
														
 
															  */
														
 
															-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
														
 
															+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
														
 
															+
														
 
															+/*
														
 
															+ * Switch to the thread stack.  This is called with the IRET frame and
														
 
															+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
														
 
															+ * space has not been allocated for them.)
														
 
															+ */
														
 
															+ENTRY(switch_to_thread_stack)
														
 
															+	UNWIND_HINT_FUNC
														
 
															+
														
 
															+	pushq	%rdi
														
 
															+	/* Need to switch before accessing the thread stack. */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
														
 
															+	movq	%rsp, %rdi
														
 
															+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
														
 
															+	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
														
 
															+
														
 
															+	pushq	7*8(%rdi)		/* regs->ss */
														
 
															+	pushq	6*8(%rdi)		/* regs->rsp */
														
 
															+	pushq	5*8(%rdi)		/* regs->eflags */
														
 
															+	pushq	4*8(%rdi)		/* regs->cs */
														
 
															+	pushq	3*8(%rdi)		/* regs->ip */
														
 
															+	pushq	2*8(%rdi)		/* regs->orig_ax */
														
 
															+	pushq	8(%rdi)			/* return address */
														
 
															+	UNWIND_HINT_FUNC
														
 
															+
														
 
															+	movq	(%rdi), %rdi
														
 
															+	ret
														
 
															+END(switch_to_thread_stack)
														
 
															 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
														
 
															 ENTRY(\sym)
														
@@ -848,11 +1001,12 @@ ENTRY(\sym)
 
															 	ALLOC_PT_GPREGS_ON_STACK
														
 
															-	.if \paranoid
														
 
															-	.if \paranoid == 1
														
 
															+	.if \paranoid < 2
														
 
															 	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
														
 
															-	jnz	1f
														
 
															+	jnz	.Lfrom_usermode_switch_stack_\@
														
 
															 	.endif
														
 
															+
														
 
															+	.if \paranoid
														
 
															 	call	paranoid_entry
														
 
															 	.else
														
 
															 	call	error_entry
														
@@ -894,20 +1048,15 @@ ENTRY(\sym)
 
															 	jmp	error_exit
														
 
															 	.endif
														
 
															-	.if \paranoid == 1
														
 
															+	.if \paranoid < 2
														
 
															 	/*
														
 
															-	 * Paranoid entry from userspace.  Switch stacks and treat it
														
 
															+	 * Entry from userspace.  Switch stacks and treat it
														
 
															 	 * as a normal entry.  This means that paranoid handlers
														
 
															 	 * run in real process context if user_mode(regs).
														
 
															 	 */
														
 
															-1:
														
 
															+.Lfrom_usermode_switch_stack_\@:
														
 
															 	call	error_entry
														
 
															-
														
 
															-	movq	%rsp, %rdi			/* pt_regs pointer */
														
 
															-	call	sync_regs
														
 
															-	movq	%rax, %rsp			/* switch stack */
														
 
															-
														
 
															 	movq	%rsp, %rdi			/* pt_regs pointer */
														
 
															 	.if \has_error_code
														
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
 
															 	js	1f				/* negative -> in kernel */
														
 
															 	SWAPGS
														
 
															 	xorl	%ebx, %ebx
														
 
															-1:	ret
														
 
															+
														
 
															+1:
														
 
															+	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
														
 
															+
														
 
															+	ret
														
 
															 END(paranoid_entry)
														
 
															 /*
														
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
 
															 	testl	%ebx, %ebx			/* swapgs needed? */
														
 
															 	jnz	.Lparanoid_exit_no_swapgs
														
 
															 	TRACE_IRQS_IRETQ
														
 
															+	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
														
 
															 	SWAPGS_UNSAFE_STACK
														
 
															 	jmp	.Lparanoid_exit_restore
														
 
															 .Lparanoid_exit_no_swapgs:
														
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
 
															 	 * from user mode due to an IRET fault.
														
 
															 	 */
														
 
															 	SWAPGS
														
 
															+	/* We have user CR3.  Change to kernel CR3. */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
														
 
															 .Lerror_entry_from_usermode_after_swapgs:
														
 
															+	/* Put us onto the real thread stack. */
														
 
															+	popq	%r12				/* save return addr in %12 */
														
 
															+	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
														
 
															+	call	sync_regs
														
 
															+	movq	%rax, %rsp			/* switch stack */
														
 
															+	ENCODE_FRAME_POINTER
														
 
															+	pushq	%r12
														
 
															+
														
 
															 	/*
														
 
															 	 * We need to tell lockdep that IRQs are off.  We can't do this until
														
 
															 	 * we fix gsbase, and we should do it before enter_from_user_mode
														
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
 
															 	 * .Lgs_change's error handler with kernel gsbase.
														
 
															 	 */
														
 
															 	SWAPGS
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
														
 
															 	jmp .Lerror_entry_done
														
 
															 .Lbstep_iret:
														
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
 
															 .Lerror_bad_iret:
														
 
															 	/*
														
 
															-	 * We came from an IRET to user mode, so we have user gsbase.
														
 
															-	 * Switch to kernel gsbase:
														
 
															+	 * We came from an IRET to user mode, so we have user
														
 
															+	 * gsbase and CR3.  Switch to kernel gsbase and CR3:
														
 
															 	 */
														
 
															 	SWAPGS
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
														
 
															 	/*
														
 
															 	 * Pretend that the exception came from user mode: set up pt_regs
														
@@ -1250,6 +1416,10 @@ END(error_exit)
 
															 /*
														
 
															  * Runs on exception stack.  Xen PV does not go through this path at all,
														
 
															  * so we can use real assembly here.
														
 
															+ *
														
 
															+ * Registers:
														
 
															+ *	%r14: Used to save/restore the CR3 of the interrupted context
														
 
															+ *	      when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
														
 
															  */
														
 
															 ENTRY(nmi)
														
 
															 	UNWIND_HINT_IRET_REGS
														
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
 
															 	swapgs
														
 
															 	cld
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
														
 
															 	movq	%rsp, %rdx
														
 
															 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
														
 
															 	UNWIND_HINT_IRET_REGS base=%rdx offset=8
														
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
 
															 	movq	$-1, %rsi
														
 
															 	call	do_nmi
														
 
															+	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
														
 
															+
														
 
															 	testl	%ebx, %ebx			/* swapgs needed? */
														
 
															 	jnz	nmi_restore
														
 
															 nmi_swapgs:
														
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
 
															  */
														
 
															 ENTRY(entry_SYSENTER_compat)
														
 
															 	/* Interrupts are off on entry. */
														
 
															-	SWAPGS_UNSAFE_STACK
														
 
															+	SWAPGS
														
 
															+
														
 
															+	/* We are about to clobber %rsp anyway, clobbering here is OK */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
														
 
															+
														
 
															 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
														
 
															 	/*
														
@@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 
															 	pushq   $0			/* pt_regs->r14 = 0 */
														
 
															 	pushq   $0			/* pt_regs->r15 = 0 */
														
 
															+	/*
														
 
															+	 * We just saved %rdi so it is safe to clobber.  It is not
														
 
															+	 * preserved during the C calls inside TRACE_IRQS_OFF anyway.
														
 
															+	 */
														
 
															+	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
														
 
															+
														
 
															 	/*
														
 
															 	 * User mode is traced as though IRQs are on, and SYSENTER
														
 
															 	 * turned them off.
														
@@ -256,10 +266,22 @@ sysret32_from_system_call:
 
															 	 * when the system call started, which is already known to user
														
 
															 	 * code.  We zero R8-R10 to avoid info leaks.
														
 
															          */
														
 
															+	movq	RSP-ORIG_RAX(%rsp), %rsp
														
 
															+
														
 
															+	/*
														
 
															+	 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
														
 
															+	 * on the process stack which is not mapped to userspace and
														
 
															+	 * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
														
 
															+	 * switch until after after the last reference to the process
														
 
															+	 * stack.
														
 
															+	 *
														
 
															+	 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
														
 
															+	 */
														
 
															+	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
														
 
															+
														
 
															 	xorq	%r8, %r8
														
 
															 	xorq	%r9, %r9
														
 
															 	xorq	%r10, %r10
														
 
															-	movq	RSP-ORIG_RAX(%rsp), %rsp
														
 
															 	swapgs
														
 
															 	sysretl
														
 
															 END(entry_SYSCALL_compat)
														
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
 
															 	 */
														
 
															 	movl	%eax, %eax
														
 
															-	/* Construct struct pt_regs on stack (iret frame is already on stack) */
														
 
															 	pushq	%rax			/* pt_regs->orig_ax */
														
 
															+
														
 
															+	/* switch to thread stack expects orig_ax to be pushed */
														
 
															+	call	switch_to_thread_stack
														
 
															+
														
 
															 	pushq	%rdi			/* pt_regs->di */
														
 
															 	pushq	%rsi			/* pt_regs->si */
														
 
															 	pushq	%rdx			/* pt_regs->dx */
														
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
 
															 #include <asm/unistd.h>
														
 
															 #include <asm/fixmap.h>
														
 
															 #include <asm/traps.h>
														
 
															+#include <asm/paravirt.h>
														
 
															 #define CREATE_TRACE_POINTS
														
 
															 #include "vsyscall_trace.h"
														
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
															 	WARN_ON_ONCE(address != regs->ip);
														
 
															+	/* This should be unreachable in NATIVE mode. */
														
 
															+	if (WARN_ON(vsyscall_mode == NATIVE))
														
 
															+		return false;
														
 
															+
														
 
															 	if (vsyscall_mode == NONE) {
														
 
															 		warn_bad_vsyscall(KERN_INFO, regs,
														
 
															 				  "vsyscall attempted with vsyscall=none");
														
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
 
															 	return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
														
 
															 }
														
 
															+/*
														
 
															+ * The VSYSCALL page is the only user-accessible page in the kernel address
														
 
															+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
														
 
															+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
														
 
															+ * are enabled.
														
 
															+ *
														
 
															+ * Some day we may create a "minimal" vsyscall mode in which we emulate
														
 
															+ * vsyscalls but leave the page not present.  If so, we skip calling
														
 
															+ * this.
														
 
															+ */
														
 
															+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
														
 
															+{
														
 
															+	pgd_t *pgd;
														
 
															+	p4d_t *p4d;
														
 
															+	pud_t *pud;
														
 
															+	pmd_t *pmd;
														
 
															+
														
 
															+	pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
														
 
															+	set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
														
 
															+	p4d = p4d_offset(pgd, VSYSCALL_ADDR);
														
 
															+#if CONFIG_PGTABLE_LEVELS >= 5
														
 
															+	p4d->p4d |= _PAGE_USER;
														
 
															+#endif
														
 
															+	pud = pud_offset(p4d, VSYSCALL_ADDR);
														
 
															+	set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
														
 
															+	pmd = pmd_offset(pud, VSYSCALL_ADDR);
														
 
															+	set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
														
 
															+}
														
 
															+
														
 
															 void __init map_vsyscall(void)
														
 
															 {
														
 
															 	extern char __vsyscall_page;
														
 
															 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
														
 
															-	if (vsyscall_mode != NONE)
														
 
															+	if (vsyscall_mode != NONE) {
														
 
															 		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
														
 
															 			     vsyscall_mode == NATIVE
														
 
															 			     ? PAGE_KERNEL_VSYSCALL
														
 
															 			     : PAGE_KERNEL_VVAR);
														
 
															+		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
														
 
															+	}
														
 
															 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
														
 
															 		     (unsigned long)VSYSCALL_ADDR);
														
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
 
															 __init int intel_pmu_init(void)
														
 
															 {
														
 
															+	struct attribute **extra_attr = NULL;
														
 
															+	struct attribute **to_free = NULL;
														
 
															 	union cpuid10_edx edx;
														
 
															 	union cpuid10_eax eax;
														
 
															 	union cpuid10_ebx ebx;
														
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
 
															 	unsigned int unused;
														
 
															 	struct extra_reg *er;
														
 
															 	int version, i;
														
 
															-	struct attribute **extra_attr = NULL;
														
 
															 	char *name;
														
 
															 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
														
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
 
															 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
														
 
															 			hsw_format_attr : nhm_format_attr;
														
 
															 		extra_attr = merge_attr(extra_attr, skl_format_attr);
														
 
															+		to_free = extra_attr;
														
 
															 		x86_pmu.cpu_events = get_hsw_events_attrs();
														
 
															 		intel_pmu_pebs_data_source_skl(
														
 
															 			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
														
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
 
															 		pr_cont("full-width counters, ");
														
 
															 	}
														
 
															+	kfree(to_free);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,18 @@
 
															 #include <linux/types.h>
														
 
															 #include <linux/slab.h>
														
 
															+#include <asm/cpu_entry_area.h>
														
 
															 #include <asm/perf_event.h>
														
 
															 #include <asm/insn.h>
														
 
															 #include "../perf_event.h"
														
 
															+/* Waste a full page so it can be mapped into the cpu_entry_area */
														
 
															+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
														
 
															+
														
 
															 /* The size of a BTS record in bytes: */
														
 
															 #define BTS_RECORD_SIZE		24
														
 
															-#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
														
 
															-#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
														
 
															 #define PEBS_FIXUP_SIZE		PAGE_SIZE
														
 
															 /*
														
@@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
 
															 static DEFINE_PER_CPU(void *, insn_buffer);
														
 
															-static int alloc_pebs_buffer(int cpu)
														
 
															+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
														
 
															 {
														
 
															-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
														
 
															+	phys_addr_t pa;
														
 
															+	size_t msz = 0;
														
 
															+
														
 
															+	pa = virt_to_phys(addr);
														
 
															+	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
														
 
															+		cea_set_pte(cea, pa, prot);
														
 
															+}
														
 
															+
														
 
															+static void ds_clear_cea(void *cea, size_t size)
														
 
															+{
														
 
															+	size_t msz = 0;
														
 
															+
														
 
															+	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
														
 
															+		cea_set_pte(cea, 0, PAGE_NONE);
														
 
															+}
														
 
															+
														
 
															+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
														
 
															+{
														
 
															+	unsigned int order = get_order(size);
														
 
															 	int node = cpu_to_node(cpu);
														
 
															-	int max;
														
 
															-	void *buffer, *ibuffer;
														
 
															+	struct page *page;
														
 
															+
														
 
															+	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
														
 
															+	return page ? page_address(page) : NULL;
														
 
															+}
														
 
															+
														
 
															+static void dsfree_pages(const void *buffer, size_t size)
														
 
															+{
														
 
															+	if (buffer)
														
 
															+		free_pages((unsigned long)buffer, get_order(size));
														
 
															+}
														
 
															+
														
 
															+static int alloc_pebs_buffer(int cpu)
														
 
															+{
														
 
															+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
														
 
															+	struct debug_store *ds = hwev->ds;
														
 
															+	size_t bsiz = x86_pmu.pebs_buffer_size;
														
 
															+	int max, node = cpu_to_node(cpu);
														
 
															+	void *buffer, *ibuffer, *cea;
														
 
															 	if (!x86_pmu.pebs)
														
 
															 		return 0;
														
 
															-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
														
 
															+	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
														
 
															 	if (unlikely(!buffer))
														
 
															 		return -ENOMEM;
														
@@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
 
															 	if (x86_pmu.intel_cap.pebs_format < 2) {
														
 
															 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
														
 
															 		if (!ibuffer) {
														
 
															-			kfree(buffer);
														
 
															+			dsfree_pages(buffer, bsiz);
														
 
															 			return -ENOMEM;
														
 
															 		}
														
 
															 		per_cpu(insn_buffer, cpu) = ibuffer;
														
 
															 	}
														
 
															-
														
 
															-	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
														
 
															-
														
 
															-	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
														
 
															+	hwev->ds_pebs_vaddr = buffer;
														
 
															+	/* Update the cpu entry area mapping */
														
 
															+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
														
 
															+	ds->pebs_buffer_base = (unsigned long) cea;
														
 
															+	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
														
 
															 	ds->pebs_index = ds->pebs_buffer_base;
														
 
															-	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
														
 
															-		max * x86_pmu.pebs_record_size;
														
 
															-
														
 
															+	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
														
 
															+	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
														
 
															 	return 0;
														
 
															 }
														
 
															 static void release_pebs_buffer(int cpu)
														
 
															 {
														
 
															-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
														
 
															+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
														
 
															+	struct debug_store *ds = hwev->ds;
														
 
															+	void *cea;
														
 
															 	if (!ds || !x86_pmu.pebs)
														
 
															 		return;
														
@@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
 
															 	kfree(per_cpu(insn_buffer, cpu));
														
 
															 	per_cpu(insn_buffer, cpu) = NULL;
														
 
															-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
														
 
															+	/* Clear the fixmap */
														
 
															+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
														
 
															+	ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
														
 
															 	ds->pebs_buffer_base = 0;
														
 
															+	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
														
 
															+	hwev->ds_pebs_vaddr = NULL;
														
 
															 }
														
 
															 static int alloc_bts_buffer(int cpu)
														
 
															 {
														
 
															-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
														
 
															-	int node = cpu_to_node(cpu);
														
 
															-	int max, thresh;
														
 
															-	void *buffer;
														
 
															+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
														
 
															+	struct debug_store *ds = hwev->ds;
														
 
															+	void *buffer, *cea;
														
 
															+	int max;
														
 
															 	if (!x86_pmu.bts)
														
 
															 		return 0;
														
 
															-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
														
 
															+	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
														
 
															 	if (unlikely(!buffer)) {
														
 
															 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
														
 
															 		return -ENOMEM;
														
 
															 	}
														
 
															-
														
 
															-	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
														
 
															-	thresh = max / 16;
														
 
															-
														
 
															-	ds->bts_buffer_base = (u64)(unsigned long)buffer;
														
 
															+	hwev->ds_bts_vaddr = buffer;
														
 
															+	/* Update the fixmap */
														
 
															+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
														
 
															+	ds->bts_buffer_base = (unsigned long) cea;
														
 
															+	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
														
 
															 	ds->bts_index = ds->bts_buffer_base;
														
 
															-	ds->bts_absolute_maximum = ds->bts_buffer_base +
														
 
															-		max * BTS_RECORD_SIZE;
														
 
															-	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
														
 
															-		thresh * BTS_RECORD_SIZE;
														
 
															-
														
 
															+	max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
														
 
															+	ds->bts_absolute_maximum = ds->bts_buffer_base + max;
														
 
															+	ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
														
 
															 	return 0;
														
 
															 }
														
 
															 static void release_bts_buffer(int cpu)
														
 
															 {
														
 
															-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
														
 
															+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
														
 
															+	struct debug_store *ds = hwev->ds;
														
 
															+	void *cea;
														
 
															 	if (!ds || !x86_pmu.bts)
														
 
															 		return;
														
 
															-	kfree((void *)(unsigned long)ds->bts_buffer_base);
														
 
															+	/* Clear the fixmap */
														
 
															+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
														
 
															+	ds_clear_cea(cea, BTS_BUFFER_SIZE);
														
 
															 	ds->bts_buffer_base = 0;
														
 
															+	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
														
 
															+	hwev->ds_bts_vaddr = NULL;
														
 
															 }
														
 
															 static int alloc_ds_buffer(int cpu)
														
 
															 {
														
 
															-	int node = cpu_to_node(cpu);
														
 
															-	struct debug_store *ds;
														
 
															-
														
 
															-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
														
 
															-	if (unlikely(!ds))
														
 
															-		return -ENOMEM;
														
 
															+	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
														
 
															+	memset(ds, 0, sizeof(*ds));
														
 
															 	per_cpu(cpu_hw_events, cpu).ds = ds;
														
 
															-
														
 
															 	return 0;
														
 
															 }
														
 
															 static void release_ds_buffer(int cpu)
														
 
															 {
														
 
															-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
														
 
															-
														
 
															-	if (!ds)
														
 
															-		return;
														
 
															-
														
 
															 	per_cpu(cpu_hw_events, cpu).ds = NULL;
														
 
															-	kfree(ds);
														
 
															 }
														
 
															 void release_ds_buffers(void)
														
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
 
															 #include <linux/perf_event.h>
														
 
															+#include <asm/intel_ds.h>
														
 
															+
														
 
															 /* To enable MSR tracing please use the generic trace points. */
														
 
															 /*
														
@@ -77,8 +79,6 @@ struct amd_nb {
 
															 	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
														
 
															 };
														
 
															-/* The maximal number of PEBS events: */
														
 
															-#define MAX_PEBS_EVENTS		8
														
 
															 #define PEBS_COUNTER_MASK	((1ULL << MAX_PEBS_EVENTS) - 1)
														
 
															 /*
														
@@ -95,23 +95,6 @@ struct amd_nb {
 
															 	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
														
 
															 	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
														
 
															-/*
														
 
															- * A debug store configuration.
														
 
															- *
														
 
															- * We only support architectures that use 64bit fields.
														
 
															- */
														
 
															-struct debug_store {
														
 
															-	u64	bts_buffer_base;
														
 
															-	u64	bts_index;
														
 
															-	u64	bts_absolute_maximum;
														
 
															-	u64	bts_interrupt_threshold;
														
 
															-	u64	pebs_buffer_base;
														
 
															-	u64	pebs_index;
														
 
															-	u64	pebs_absolute_maximum;
														
 
															-	u64	pebs_interrupt_threshold;
														
 
															-	u64	pebs_event_reset[MAX_PEBS_EVENTS];
														
 
															-};
														
 
															-
														
 
															 #define PEBS_REGS \
														
 
															 	(PERF_REG_X86_AX | \
														
 
															 	 PERF_REG_X86_BX | \
														
@@ -216,6 +199,8 @@ struct cpu_hw_events {
 
															 	 * Intel DebugStore bits
														
 
															 	 */
														
 
															 	struct debug_store	*ds;
														
 
															+	void			*ds_pebs_vaddr;
														
 
															+	void			*ds_bts_vaddr;
														
 
															 	u64			pebs_enabled;
														
 
															 	int			n_pebs;
														
 
															 	int			n_large_pebs;
														
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
 
															 #endif
														
 
															 #ifndef __ASSEMBLY__
														
 
															+#ifndef __BPF__
														
 
															 /*
														
 
															  * This output constraint should be used for any inline asm which has a "call"
														
 
															  * instruction.  Otherwise the asm may be inserted before the frame pointer
														
@@ -145,5 +146,6 @@
 
															 register unsigned long current_stack_pointer asm(_ASM_SP);
														
 
															 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
														
 
															 #endif
														
 
															+#endif
														
 
															 #endif /* _ASM_X86_ASM_H */
														
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
 
															+// SPDX-License-Identifier: GPL-2.0
														
 
															+
														
 
															+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
														
 
															+#define _ASM_X86_CPU_ENTRY_AREA_H
														
 
															+
														
 
															+#include <linux/percpu-defs.h>
														
 
															+#include <asm/processor.h>
														
 
															+#include <asm/intel_ds.h>
														
 
															+
														
 
															+/*
														
 
															+ * cpu_entry_area is a percpu region that contains things needed by the CPU
														
 
															+ * and early entry/exit code.  Real types aren't used for all fields here
														
 
															+ * to avoid circular header dependencies.
														
 
															+ *
														
 
															+ * Every field is a virtual alias of some other allocated backing store.
														
 
															+ * There is no direct allocation of a struct cpu_entry_area.
														
 
															+ */
														
 
															+struct cpu_entry_area {
														
 
															+	char gdt[PAGE_SIZE];
														
 
															+
														
 
															+	/*
														
 
															+	 * The GDT is just below entry_stack and thus serves (on x86_64) as
														
 
															+	 * a a read-only guard page.
														
 
															+	 */
														
 
															+	struct entry_stack_page entry_stack_page;
														
 
															+
														
 
															+	/*
														
 
															+	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
														
 
															+	 * we need task switches to work, and task switches write to the TSS.
														
 
															+	 */
														
 
															+	struct tss_struct tss;
														
 
															+
														
 
															+	char entry_trampoline[PAGE_SIZE];
														
 
															+
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	/*
														
 
															+	 * Exception stacks used for IST entries.
														
 
															+	 *
														
 
															+	 * In the future, this should have a separate slot for each stack
														
 
															+	 * with guard pages between them.
														
 
															+	 */
														
 
															+	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
														
 
															+#endif
														
 
															+#ifdef CONFIG_CPU_SUP_INTEL
														
 
															+	/*
														
 
															+	 * Per CPU debug store for Intel performance monitoring. Wastes a
														
 
															+	 * full page at the moment.
														
 
															+	 */
														
 
															+	struct debug_store cpu_debug_store;
														
 
															+	/*
														
 
															+	 * The actual PEBS/BTS buffers must be mapped to user space
														
 
															+	 * Reserve enough fixmap PTEs.
														
 
															+	 */
														
 
															+	struct debug_store_buffers cpu_debug_buffers;
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
														
 
															+#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
														
 
															+
														
 
															+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
														
 
															+
														
 
															+extern void setup_cpu_entry_areas(void);
														
 
															+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
														
 
															+
														
 
															+#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
														
 
															+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
														
 
															+
														
 
															+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
														
 
															+
														
 
															+#define CPU_ENTRY_AREA_MAP_SIZE			\
														
 
															+	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
														
 
															+
														
 
															+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
														
 
															+
														
 
															+static inline struct entry_stack *cpu_entry_stack(int cpu)
														
 
															+{
														
 
															+	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 
															 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
														
 
															 } while (0)
														
 
															+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
														
 
															+
														
 
															 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
														
 
															 /*
														
 
															  * Static testing of CPU features.  Used the same as boot_cpu_has().
														
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,12 @@
 
															 #define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
														
 
															 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
														
 
															 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
														
 
															+#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
														
 
															 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
														
 
															 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
														
 
															 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
														
 
															-
														
 
															+#define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
														
 
															 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
														
 
															 #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
														
 
															 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
														
@@ -340,5 +341,6 @@
 
															 #define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
														
 
															 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
														
 
															 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
														
 
															+#define X86_BUG_CPU_INSECURE		X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
														
 
															 #endif /* _ASM_X86_CPUFEATURES_H */
														
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
 
															 #include <asm/mmu.h>
														
 
															 #include <asm/fixmap.h>
														
 
															 #include <asm/irq_vectors.h>
														
 
															+#include <asm/cpu_entry_area.h>
														
 
															 #include <linux/smp.h>
														
 
															 #include <linux/percpu.h>
														
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
															 	desc->type		= (info->read_exec_only ^ 1) << 1;
														
 
															 	desc->type	       |= info->contents << 2;
														
 
															+	/* Set the ACCESS bit so it can be mapped RO */
														
 
															+	desc->type	       |= 1;
														
 
															 	desc->s			= 1;
														
 
															 	desc->dpl		= 0x3;
														
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
 
															 	return this_cpu_ptr(&gdt_page)->gdt;
														
 
															 }
														
 
															-/* Get the fixmap index for a specific processor */
														
 
															-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
														
 
															-{
														
 
															-	return FIX_GDT_REMAP_BEGIN + cpu;
														
 
															-}
														
 
															-
														
 
															 /* Provide the fixmap address of the remapped GDT */
														
 
															 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
														
 
															 {
														
 
															-	unsigned int idx = get_cpu_gdt_ro_index(cpu);
														
 
															-	return (struct desc_struct *)__fix_to_virt(idx);
														
 
															+	return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
														
 
															 }
														
 
															 /* Provide the current read-only GDT */
														
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 
															 #endif
														
 
															 }
														
 
															-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
														
 
															+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
														
 
															 {
														
 
															 	struct desc_struct *d = get_cpu_gdt_rw(cpu);
														
 
															 	tss_desc tss;
														
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
 
															 # define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
														
 
															 #endif
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+# define DISABLE_PTI		0
														
 
															+#else
														
 
															+# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * Make sure to add features to the correct mask
														
 
															  */
														
@@ -60,7 +66,7 @@
 
															 #define DISABLED_MASK4	(DISABLE_PCID)
														
 
															 #define DISABLED_MASK5	0
														
 
															 #define DISABLED_MASK6	0
														
 
															-#define DISABLED_MASK7	0
														
 
															+#define DISABLED_MASK7	(DISABLE_PTI)
														
 
															 #define DISABLED_MASK8	0
														
 
															 #define DISABLED_MASK9	(DISABLE_MPX)
														
 
															 #define DISABLED_MASK10	0
														
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
 
															 #ifndef _ASM_X86_ESPFIX_H
														
 
															 #define _ASM_X86_ESPFIX_H
														
 
															-#ifdef CONFIG_X86_64
														
 
															+#ifdef CONFIG_X86_ESPFIX64
														
 
															 #include <asm/percpu.h>
														
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
															 extern void init_espfix_bsp(void);
														
 
															 extern void init_espfix_ap(int cpu);
														
 
															-
														
 
															-#endif /* CONFIG_X86_64 */
														
 
															+#else
														
 
															+static inline void init_espfix_ap(int cpu) { }
														
 
															+#endif
														
 
															 #endif /* _ASM_X86_ESPFIX_H */
														
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
 
															 			 PAGE_SIZE)
														
 
															 #endif
														
 
															-
														
 
															 /*
														
 
															  * Here we define all the compile-time 'special' virtual
														
 
															  * addresses. The point is to have a constant address at
														
@@ -84,7 +83,6 @@ enum fixed_addresses {
 
															 	FIX_IO_APIC_BASE_0,
														
 
															 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
														
 
															 #endif
														
 
															-	FIX_RO_IDT,	/* Virtual mapping for read-only IDT */
														
 
															 #ifdef CONFIG_X86_32
														
 
															 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
														
 
															 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
														
@@ -100,9 +98,6 @@ enum fixed_addresses {
 
															 #ifdef	CONFIG_X86_INTEL_MID
														
 
															 	FIX_LNW_VRTC,
														
 
															 #endif
														
 
															-	/* Fixmap entries to remap the GDTs, one per processor. */
														
 
															-	FIX_GDT_REMAP_BEGIN,
														
 
															-	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
														
 
															 #ifdef CONFIG_ACPI_APEI_GHES
														
 
															 	/* Used for GHES mapping from assorted contexts */
														
@@ -143,7 +138,7 @@ enum fixed_addresses {
 
															 extern void reserve_top_address(unsigned long reserve);
														
 
															 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
														
 
															-#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
														
 
															+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
														
 
															 extern int fixmaps_set;
														
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
 
															 #ifndef _ASM_X86_HYPERVISOR_H
														
 
															 #define _ASM_X86_HYPERVISOR_H
														
 
															-#ifdef CONFIG_HYPERVISOR_GUEST
														
 
															-
														
 
															-#include <asm/kvm_para.h>
														
 
															-#include <asm/x86_init.h>
														
 
															-#include <asm/xen/hypervisor.h>
														
 
															-
														
 
															-/*
														
 
															- * x86 hypervisor information
														
 
															- */
														
 
															-
														
 
															+/* x86 hypervisor types  */
														
 
															 enum x86_hypervisor_type {
														
 
															 	X86_HYPER_NATIVE = 0,
														
 
															 	X86_HYPER_VMWARE,
														
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
 
															 	X86_HYPER_KVM,
														
 
															 };
														
 
															+#ifdef CONFIG_HYPERVISOR_GUEST
														
 
															+
														
 
															+#include <asm/kvm_para.h>
														
 
															+#include <asm/x86_init.h>
														
 
															+#include <asm/xen/hypervisor.h>
														
 
															+
														
 
															 struct hypervisor_x86 {
														
 
															 	/* Hypervisor name */
														
 
															 	const char	*name;
														
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
															 extern enum x86_hypervisor_type x86_hyper_type;
														
 
															 extern void init_hypervisor_platform(void);
														
 
															+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
														
 
															+{
														
 
															+	return x86_hyper_type == type;
														
 
															+}
														
 
															 #else
														
 
															 static inline void init_hypervisor_platform(void) { }
														
 
															+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
														
 
															+{
														
 
															+	return type == X86_HYPER_NATIVE;
														
 
															+}
														
 
															 #endif /* CONFIG_HYPERVISOR_GUEST */
														
 
															 #endif /* _ASM_X86_HYPERVISOR_H */
														
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
 
															+#ifndef _ASM_INTEL_DS_H
														
 
															+#define _ASM_INTEL_DS_H
														
 
															+
														
 
															+#include <linux/percpu-defs.h>
														
 
															+
														
 
															+#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
														
 
															+#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
														
 
															+
														
 
															+/* The maximal number of PEBS events: */
														
 
															+#define MAX_PEBS_EVENTS		8
														
 
															+
														
 
															+/*
														
 
															+ * A debug store configuration.
														
 
															+ *
														
 
															+ * We only support architectures that use 64bit fields.
														
 
															+ */
														
 
															+struct debug_store {
														
 
															+	u64	bts_buffer_base;
														
 
															+	u64	bts_index;
														
 
															+	u64	bts_absolute_maximum;
														
 
															+	u64	bts_interrupt_threshold;
														
 
															+	u64	pebs_buffer_base;
														
 
															+	u64	pebs_index;
														
 
															+	u64	pebs_absolute_maximum;
														
 
															+	u64	pebs_interrupt_threshold;
														
 
															+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
														
 
															+} __aligned(PAGE_SIZE);
														
 
															+
														
 
															+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
														
 
															+
														
 
															+struct debug_store_buffers {
														
 
															+	char	bts_buffer[BTS_BUFFER_SIZE];
														
 
															+	char	pebs_buffer[PEBS_BUFFER_SIZE];
														
 
															+};
														
 
															+
														
 
															+#endif
														
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
 
															+/* SPDX-License-Identifier: GPL-2.0 */
														
 
															+#ifndef _ASM_X86_INVPCID
														
 
															+#define _ASM_X86_INVPCID
														
 
															+
														
 
															+static inline void __invpcid(unsigned long pcid, unsigned long addr,
														
 
															+			     unsigned long type)
														
 
															+{
														
 
															+	struct { u64 d[2]; } desc = { { pcid, addr } };
														
 
															+
														
 
															+	/*
														
 
															+	 * The memory clobber is because the whole point is to invalidate
														
 
															+	 * stale TLB entries and, especially if we're flushing global
														
 
															+	 * mappings, we don't want the compiler to reorder any subsequent
														
 
															+	 * memory accesses before the TLB flush.
														
 
															+	 *
														
 
															+	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
														
 
															+	 * invpcid (%rcx), %rax in long mode.
														
 
															+	 */
														
 
															+	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
														
 
															+		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
														
 
															+}
														
 
															+
														
 
															+#define INVPCID_TYPE_INDIV_ADDR		0
														
 
															+#define INVPCID_TYPE_SINGLE_CTXT	1
														
 
															+#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
														
 
															+#define INVPCID_TYPE_ALL_NON_GLOBAL	3
														
 
															+
														
 
															+/* Flush all mappings for a given pcid and addr, not including globals. */
														
 
															+static inline void invpcid_flush_one(unsigned long pcid,
														
 
															+				     unsigned long addr)
														
 
															+{
														
 
															+	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
														
 
															+}
														
 
															+
														
 
															+/* Flush all mappings for a given PCID, not including globals. */
														
 
															+static inline void invpcid_flush_single_context(unsigned long pcid)
														
 
															+{
														
 
															+	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
														
 
															+}
														
 
															+
														
 
															+/* Flush all mappings, including globals, for all PCIDs. */
														
 
															+static inline void invpcid_flush_all(void)
														
 
															+{
														
 
															+	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
														
 
															+}
														
 
															+
														
 
															+/* Flush all mappings for all PCIDs except globals. */
														
 
															+static inline void invpcid_flush_all_nonglobals(void)
														
 
															+{
														
 
															+	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
														
 
															+}
														
 
															+
														
 
															+#endif /* _ASM_X86_INVPCID */
														
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 
															 extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
														
 
															 			      unsigned int nr_irqs);
														
 
															 extern int mp_irqdomain_activate(struct irq_domain *domain,
														
 
															-				 struct irq_data *irq_data, bool early);
														
 
															+				 struct irq_data *irq_data, bool reserve);
														
 
															 extern void mp_irqdomain_deactivate(struct irq_domain *domain,
														
 
															 				    struct irq_data *irq_data);
														
 
															 extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
														
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
 
															 	swapgs;					\
														
 
															 	sysretl
														
 
															+#ifdef CONFIG_DEBUG_ENTRY
														
 
															+#define SAVE_FLAGS(x)		pushfq; popq %rax
														
 
															+#endif
														
 
															 #else
														
 
															 #define INTERRUPT_RETURN		iret
														
 
															 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
														
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 
															 extern int __must_check __die(const char *, struct pt_regs *, long);
														
 
															 extern void show_stack_regs(struct pt_regs *regs);
														
 
															 extern void __show_regs(struct pt_regs *regs, int all);
														
 
															+extern void show_iret_regs(struct pt_regs *regs);
														
 
															 extern unsigned long oops_begin(void);
														
 
															 extern void oops_end(unsigned long, struct pt_regs *, int signr);
														
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
 
															 #define _ASM_X86_MMU_H
														
 
															 #include <linux/spinlock.h>
														
 
															+#include <linux/rwsem.h>
														
 
															 #include <linux/mutex.h>
														
 
															 #include <linux/atomic.h>
														
@@ -27,7 +28,8 @@ typedef struct {
 
															 	atomic64_t tlb_gen;
														
 
															 #ifdef CONFIG_MODIFY_LDT_SYSCALL
														
 
															-	struct ldt_struct *ldt;
														
 
															+	struct rw_semaphore	ldt_usr_sem;
														
 
															+	struct ldt_struct	*ldt;
														
 
															 #endif
														
 
															 #ifdef CONFIG_X86_64
														
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
 
															 	 * call gates.  On native, we could merge the ldt_struct and LDT
														
 
															 	 * allocations, but it's not worth trying to optimize.
														
 
															 	 */
														
 
															-	struct desc_struct *entries;
														
 
															-	unsigned int nr_entries;
														
 
															+	struct desc_struct	*entries;
														
 
															+	unsigned int		nr_entries;
														
 
															+
														
 
															+	/*
														
 
															+	 * If PTI is in use, then the entries array is not mapped while we're
														
 
															+	 * in user mode.  The whole array will be aliased at the addressed
														
 
															+	 * given by ldt_slot_va(slot).  We use two slots so that we can allocate
														
 
															+	 * and map, and enable a new LDT without invalidating the mapping
														
 
															+	 * of an older, still-in-use LDT.
														
 
															+	 *
														
 
															+	 * slot will be -1 if this LDT doesn't have an alias mapping.
														
 
															+	 */
														
 
															+	int			slot;
														
 
															 };
														
 
															+/* This is a multiple of PAGE_SIZE. */
														
 
															+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
														
 
															+
														
 
															+static inline void *ldt_slot_va(int slot)
														
 
															+{
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
														
 
															+#else
														
 
															+	BUG();
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Used for LDT copy/destruction.
														
 
															  */
														
 
															-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
														
 
															+static inline void init_new_context_ldt(struct mm_struct *mm)
														
 
															+{
														
 
															+	mm->context.ldt = NULL;
														
 
															+	init_rwsem(&mm->context.ldt_usr_sem);
														
 
															+}
														
 
															+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
														
 
															 void destroy_context_ldt(struct mm_struct *mm);
														
 
															+void ldt_arch_exit_mmap(struct mm_struct *mm);
														
 
															 #else	/* CONFIG_MODIFY_LDT_SYSCALL */
														
 
															-static inline int init_new_context_ldt(struct task_struct *tsk,
														
 
															-				       struct mm_struct *mm)
														
 
															+static inline void init_new_context_ldt(struct mm_struct *mm) { }
														
 
															+static inline int ldt_dup_context(struct mm_struct *oldmm,
														
 
															+				  struct mm_struct *mm)
														
 
															 {
														
 
															 	return 0;
														
 
															 }
														
 
															-static inline void destroy_context_ldt(struct mm_struct *mm) {}
														
 
															+static inline void destroy_context_ldt(struct mm_struct *mm) { }
														
 
															+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
														
 
															 #endif
														
 
															 static inline void load_mm_ldt(struct mm_struct *mm)
														
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
 
															 	 * that we can see.
														
 
															 	 */
														
 
															-	if (unlikely(ldt))
														
 
															-		set_ldt(ldt->entries, ldt->nr_entries);
														
 
															-	else
														
 
															+	if (unlikely(ldt)) {
														
 
															+		if (static_cpu_has(X86_FEATURE_PTI)) {
														
 
															+			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
														
 
															+				/*
														
 
															+				 * Whoops -- either the new LDT isn't mapped
														
 
															+				 * (if slot == -1) or is mapped into a bogus
														
 
															+				 * slot (if slot > 1).
														
 
															+				 */
														
 
															+				clear_LDT();
														
 
															+				return;
														
 
															+			}
														
 
															+
														
 
															+			/*
														
 
															+			 * If page table isolation is enabled, ldt->entries
														
 
															+			 * will not be mapped in the userspace pagetables.
														
 
															+			 * Tell the CPU to access the LDT through the alias
														
 
															+			 * at ldt_slot_va(ldt->slot).
														
 
															+			 */
														
 
															+			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
														
 
															+		} else {
														
 
															+			set_ldt(ldt->entries, ldt->nr_entries);
														
 
															+		}
														
 
															+	} else {
														
 
															 		clear_LDT();
														
 
															+	}
														
 
															 #else
														
 
															 	clear_LDT();
														
 
															 #endif
														
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
															 static inline int init_new_context(struct task_struct *tsk,
														
 
															 				   struct mm_struct *mm)
														
 
															 {
														
 
															+	mutex_init(&mm->context.lock);
														
 
															+
														
 
															 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
														
 
															 	atomic64_set(&mm->context.tlb_gen, 0);
														
 
															-	#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
														
 
															+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
														
 
															 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
														
 
															 		/* pkey 0 is the default and always allocated */
														
 
															 		mm->context.pkey_allocation_map = 0x1;
														
 
															 		/* -1 means unallocated or invalid */
														
 
															 		mm->context.execute_only_pkey = -1;
														
 
															 	}
														
 
															-	#endif
														
 
															-	return init_new_context_ldt(tsk, mm);
														
 
															+#endif
														
 
															+	init_new_context_ldt(mm);
														
 
															+	return 0;
														
 
															 }
														
 
															 static inline void destroy_context(struct mm_struct *mm)
														
 
															 {
														
@@ -176,15 +231,16 @@ do {						\
 
															 } while (0)
														
 
															 #endif
														
 
															-static inline void arch_dup_mmap(struct mm_struct *oldmm,
														
 
															-				 struct mm_struct *mm)
														
 
															+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
														
 
															 {
														
 
															 	paravirt_arch_dup_mmap(oldmm, mm);
														
 
															+	return ldt_dup_context(oldmm, mm);
														
 
															 }
														
 
															 static inline void arch_exit_mmap(struct mm_struct *mm)
														
 
															 {
														
 
															 	paravirt_arch_exit_mmap(mm);
														
 
															+	ldt_arch_exit_mmap(mm);
														
 
															 }
														
 
															 #ifdef CONFIG_X86_64
														
@@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 
															 	return __pkru_allows_pkey(vma_pkey(vma), write);
														
 
															 }
														
 
															-/*
														
 
															- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
														
 
															- * bits.  This serves two purposes.  It prevents a nasty situation in
														
 
															- * which PCID-unaware code saves CR3, loads some other value (with PCID
														
 
															- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
														
 
															- * the saved ASID was nonzero.  It also means that any bugs involving
														
 
															- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
														
 
															- * deterministically.
														
 
															- */
														
 
															-
														
 
															-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
														
 
															-{
														
 
															-	if (static_cpu_has(X86_FEATURE_PCID)) {
														
 
															-		VM_WARN_ON_ONCE(asid > 4094);
														
 
															-		return __sme_pa(mm->pgd) | (asid + 1);
														
 
															-	} else {
														
 
															-		VM_WARN_ON_ONCE(asid != 0);
														
 
															-		return __sme_pa(mm->pgd);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
														
 
															-{
														
 
															-	VM_WARN_ON_ONCE(asid > 4094);
														
 
															-	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * This can be used from process context to figure out what the value of
														
 
															  * CR3 is without needing to do a (slow) __read_cr3().
														
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
 
															  */
														
 
															 static inline unsigned long __get_current_cr3_fast(void)
														
 
															 {
														
 
															-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
														
 
															+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
														
 
															 		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
														
 
															 	/* For now, be very restrictive about when this can be called. */
														
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
 
															 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
														
 
															 		  CLBR_NONE,						\
														
 
															 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
														
 
															+
														
 
															+#ifdef CONFIG_DEBUG_ENTRY
														
 
															+#define SAVE_FLAGS(clobbers)                                        \
														
 
															+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
														
 
															+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
														
 
															+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
														
 
															+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
														
 
															+#endif
														
 
															+
														
 
															 #endif	/* CONFIG_X86_32 */
														
 
															 #endif /* __ASSEMBLY__ */
														
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
 
															  */
														
 
															 extern gfp_t __userpte_alloc_gfp;
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+/*
														
 
															+ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
														
 
															+ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
														
 
															+ * in a pointer to swap between the two 4k halves.
														
 
															+ */
														
 
															+#define PGD_ALLOCATION_ORDER 1
														
 
															+#else
														
 
															+#define PGD_ALLOCATION_ORDER 0
														
 
															+#endif
														
 
															+
														
 
															 /*
														
 
															  * Allocate and free page tables.
														
 
															  */
														
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
 
															 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
														
 
															 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
														
 
															+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
														
 
															 void ptdump_walk_pgd_level_checkwx(void);
														
 
															 #ifdef CONFIG_DEBUG_WX
														
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 
															 static inline int p4d_bad(p4d_t p4d)
														
 
															 {
														
 
															-	return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
														
 
															+	unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
														
 
															+
														
 
															+	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
														
 
															+		ignore_flags |= _PAGE_NX;
														
 
															+
														
 
															+	return (p4d_flags(p4d) & ~ignore_flags) != 0;
														
 
															 }
														
 
															 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
														
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 
															 static inline int pgd_bad(pgd_t pgd)
														
 
															 {
														
 
															-	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
														
 
															+	unsigned long ignore_flags = _PAGE_USER;
														
 
															+
														
 
															+	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
														
 
															+		ignore_flags |= _PAGE_NX;
														
 
															+
														
 
															+	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
														
 
															 }
														
 
															 static inline int pgd_none(pgd_t pgd)
														
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
 
															  * pgd_offset() returns a (pgd_t *)
														
 
															  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
														
 
															  */
														
 
															-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
														
 
															+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
														
 
															+/*
														
 
															+ * a shortcut to get a pgd_t in a given mm
														
 
															+ */
														
 
															+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
														
 
															 /*
														
 
															  * a shortcut which implies the use of the kernel's pgd, instead
														
 
															  * of a process's
														
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
 
															  */
														
 
															 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
														
 
															 {
														
 
															-       memcpy(dst, src, count * sizeof(pgd_t));
														
 
															+	memcpy(dst, src, count * sizeof(pgd_t));
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+	if (!static_cpu_has(X86_FEATURE_PTI))
														
 
															+		return;
														
 
															+	/* Clone the user space pgd as well */
														
 
															+	memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
														
 
															+	       count * sizeof(pgd_t));
														
 
															+#endif
														
 
															 }
														
 
															 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
														
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 
															 #define LAST_PKMAP 1024
														
 
															 #endif
														
 
															-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
														
 
															-		    & PMD_MASK)
														
 
															+/*
														
 
															+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
														
 
															+ * to avoid include recursion hell
														
 
															+ */
														
 
															+#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
														
 
															+
														
 
															+#define CPU_ENTRY_AREA_BASE				\
														
 
															+	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
														
 
															+
														
 
															+#define PKMAP_BASE		\
														
 
															+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
														
 
															 #ifdef CONFIG_HIGHMEM
														
 
															 # define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
														
 
															 #else
														
 
															-# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
														
 
															+# define VMALLOC_END	(CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
														
 
															 #endif
														
 
															 #define MODULES_VADDR	VMALLOC_START
														
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 
															 #endif
														
 
															 }
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+/*
														
 
															+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
														
 
															+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
														
 
															+ * the user one is in the last 4k.  To switch between them, you
														
 
															+ * just need to flip the 12th bit in their addresses.
														
 
															+ */
														
 
															+#define PTI_PGTABLE_SWITCH_BIT	PAGE_SHIFT
														
 
															+
														
 
															+/*
														
 
															+ * This generates better code than the inline assembly in
														
 
															+ * __set_bit().
														
 
															+ */
														
 
															+static inline void *ptr_set_bit(void *ptr, int bit)
														
 
															+{
														
 
															+	unsigned long __ptr = (unsigned long)ptr;
														
 
															+
														
 
															+	__ptr |= BIT(bit);
														
 
															+	return (void *)__ptr;
														
 
															+}
														
 
															+static inline void *ptr_clear_bit(void *ptr, int bit)
														
 
															+{
														
 
															+	unsigned long __ptr = (unsigned long)ptr;
														
 
															+
														
 
															+	__ptr &= ~BIT(bit);
														
 
															+	return (void *)__ptr;
														
 
															+}
														
 
															+
														
 
															+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
														
 
															+{
														
 
															+	return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
														
 
															+{
														
 
															+	return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
														
 
															+{
														
 
															+	return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
														
 
															+}
														
 
															+
														
 
															+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
														
 
															+{
														
 
															+	return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
														
 
															+}
														
 
															+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
														
 
															+
														
 
															+/*
														
 
															+ * Page table pages are page-aligned.  The lower half of the top
														
 
															+ * level is used for userspace and the top half for the kernel.
														
 
															+ *
														
 
															+ * Returns true for parts of the PGD that map userspace and
														
 
															+ * false for the parts that map the kernel.
														
 
															+ */
														
 
															+static inline bool pgdp_maps_userspace(void *__ptr)
														
 
															+{
														
 
															+	unsigned long ptr = (unsigned long)__ptr;
														
 
															+
														
 
															+	return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
														
 
															+
														
 
															+/*
														
 
															+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
														
 
															+ * Populates the user and returns the resulting PGD that must be set in
														
 
															+ * the kernel copy of the page tables.
														
 
															+ */
														
 
															+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
														
 
															+{
														
 
															+	if (!static_cpu_has(X86_FEATURE_PTI))
														
 
															+		return pgd;
														
 
															+	return __pti_set_user_pgd(pgdp, pgd);
														
 
															+}
														
 
															+#else
														
 
															+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
														
 
															+{
														
 
															+	return pgd;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
														
 
															 {
														
 
															+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
														
 
															+	p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
														
 
															+#else
														
 
															 	*p4dp = p4d;
														
 
															+#endif
														
 
															 }
														
 
															 static inline void native_p4d_clear(p4d_t *p4d)
														
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
 
															 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
														
 
															 {
														
 
															+#ifdef CONFIG_PAGE_TABLE_ISOLATION
														
 
															+	*pgdp = pti_set_user_pgd(pgdp, pgd);
														
 
															+#else
														
 
															 	*pgdp = pgd;
														
 
															+#endif
														
 
															 }
														
 
															 static inline void native_pgd_clear(pgd_t *pgd)