Browse Source

Merge tag 'v4.16-rc2' into sched/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Ingo Molnar 7 years ago
parent
commit
ed02934395
100 changed files with 1404 additions and 839 deletions
  1. 39 0
      Documentation/ABI/testing/sysfs-devices-platform-dock
  2. 75 2
      Documentation/ABI/testing/sysfs-devices-system-cpu
  3. 40 0
      Documentation/ABI/testing/sysfs-platform-dptf
  4. 6 3
      Documentation/admin-guide/kernel-parameters.txt
  5. 6 1
      Documentation/atomic_bitops.txt
  6. 4 0
      Documentation/cpu-freq/cpu-drivers.txt
  7. 8 0
      Documentation/devicetree/bindings/power/mti,mips-cpc.txt
  8. 28 4
      Documentation/driver-api/s390-drivers.rst
  9. 17 32
      Documentation/locking/mutex-design.txt
  10. 3 0
      Documentation/virtual/kvm/00-INDEX
  11. 247 0
      Documentation/virtual/kvm/amd-memory-encryption.rst
  12. 53 1
      Documentation/virtual/kvm/api.txt
  13. 0 187
      Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
  14. 4 0
      Documentation/virtual/kvm/cpuid.txt
  15. 6 1
      MAINTAINERS
  16. 3 4
      Makefile
  17. 1 1
      arch/arm/include/asm/kvm_emulate.h
  18. 2 0
      arch/arm/include/asm/kvm_host.h
  19. 2 1
      arch/arm/include/asm/kvm_hyp.h
  20. 84 15
      arch/arm/include/asm/kvm_mmu.h
  21. 2 2
      arch/arm/include/asm/pgtable.h
  22. 1 0
      arch/arm/kvm/hyp/switch.c
  23. 1 0
      arch/arm/kvm/hyp/tlb.c
  24. 0 1
      arch/arm/mach-vt8500/Kconfig
  25. 21 0
      arch/arm64/include/asm/assembler.h
  26. 7 0
      arch/arm64/include/asm/cacheflush.h
  27. 1 1
      arch/arm64/include/asm/cputype.h
  28. 1 1
      arch/arm64/include/asm/hugetlb.h
  29. 2 0
      arch/arm64/include/asm/kvm_host.h
  30. 0 1
      arch/arm64/include/asm/kvm_hyp.h
  31. 40 14
      arch/arm64/include/asm/kvm_mmu.h
  32. 2 2
      arch/arm64/include/asm/mmu_context.h
  33. 22 22
      arch/arm64/include/asm/pgalloc.h
  34. 2 0
      arch/arm64/include/asm/pgtable-hwdef.h
  35. 2 2
      arch/arm64/include/asm/pgtable-prot.h
  36. 13 10
      arch/arm64/include/asm/pgtable.h
  37. 2 2
      arch/arm64/kernel/acpi.c
  38. 9 0
      arch/arm64/kernel/cpu_errata.c
  39. 1 1
      arch/arm64/kernel/efi.c
  40. 76 72
      arch/arm64/kernel/hibernate.c
  41. 12 3
      arch/arm64/kvm/guest.c
  42. 1 0
      arch/arm64/kvm/hyp/debug-sr.c
  43. 4 1
      arch/arm64/kvm/hyp/switch.c
  44. 1 0
      arch/arm64/kvm/hyp/tlb.c
  45. 22 10
      arch/arm64/mm/cache.S
  46. 30 24
      arch/arm64/mm/dump.c
  47. 25 19
      arch/arm64/mm/fault.c
  48. 43 51
      arch/arm64/mm/hugetlbpage.c
  49. 35 35
      arch/arm64/mm/kasan_init.c
  50. 148 134
      arch/arm64/mm/mmu.c
  51. 17 15
      arch/arm64/mm/pageattr.c
  52. 9 5
      arch/arm64/mm/proc.S
  53. 1 18
      arch/blackfin/include/uapi/asm/poll.h
  54. 1 1
      arch/cris/arch-v10/drivers/gpio.c
  55. 4 4
      arch/cris/arch-v10/drivers/sync_serial.c
  56. 5 5
      arch/cris/arch-v32/drivers/sync_serial.c
  57. 1 18
      arch/frv/include/uapi/asm/poll.h
  58. 0 1
      arch/ia64/kernel/Makefile
  59. 1 1
      arch/ia64/kernel/perfmon.c
  60. 1 18
      arch/m68k/include/uapi/asm/poll.h
  61. 0 1
      arch/mips/Kconfig
  62. 0 1
      arch/mips/bcm63xx/boards/Kconfig
  63. 1 18
      arch/mips/include/uapi/asm/poll.h
  64. 13 0
      arch/mips/kernel/mips-cpc.c
  65. 2 2
      arch/mips/kernel/rtlx.c
  66. 12 4
      arch/mips/kernel/setup.c
  67. 1 1
      arch/mips/kernel/smp-bmips.c
  68. 1 0
      arch/mips/kvm/Kconfig
  69. 47 20
      arch/mips/kvm/mips.c
  70. 0 1
      arch/nios2/Kconfig
  71. 8 8
      arch/nios2/boot/dts/3c120_devboard.dts
  72. 0 1
      arch/nios2/configs/10m50_defconfig
  73. 0 1
      arch/nios2/configs/3c120_defconfig
  74. 1 0
      arch/powerpc/include/asm/book3s/32/pgtable.h
  75. 2 1
      arch/powerpc/include/asm/book3s/64/hash-4k.h
  76. 11 5
      arch/powerpc/include/asm/book3s/64/hash-64k.h
  77. 12 1
      arch/powerpc/include/asm/book3s/64/hash.h
  78. 11 5
      arch/powerpc/include/asm/book3s/64/pgalloc.h
  79. 3 1
      arch/powerpc/include/asm/book3s/64/pgtable.h
  80. 1 1
      arch/powerpc/include/asm/exception-64s.h
  81. 11 1
      arch/powerpc/include/asm/hw_irq.h
  82. 6 0
      arch/powerpc/include/asm/kexec.h
  83. 2 4
      arch/powerpc/include/asm/kvm_book3s.h
  84. 9 5
      arch/powerpc/include/asm/kvm_book3s_64.h
  85. 7 1
      arch/powerpc/include/asm/kvm_host.h
  86. 4 0
      arch/powerpc/include/asm/kvm_ppc.h
  87. 1 0
      arch/powerpc/include/asm/nohash/32/pgtable.h
  88. 1 0
      arch/powerpc/include/asm/nohash/64/pgtable.h
  89. 1 0
      arch/powerpc/include/asm/opal-api.h
  90. 6 0
      arch/powerpc/include/asm/ppc-opcode.h
  91. 13 0
      arch/powerpc/include/asm/topology.h
  92. 2 1
      arch/powerpc/include/asm/xive.h
  93. 2 0
      arch/powerpc/include/uapi/asm/kvm.h
  94. 4 0
      arch/powerpc/kernel/asm-offsets.c
  95. 2 0
      arch/powerpc/kernel/exceptions-64e.S
  96. 3 3
      arch/powerpc/kernel/exceptions-64s.S
  97. 3 2
      arch/powerpc/kernel/pci-common.c
  98. 1 1
      arch/powerpc/kernel/rtasd.c
  99. 4 2
      arch/powerpc/kernel/sysfs.c
  100. 2 1
      arch/powerpc/kvm/Kconfig

+ 39 - 0
Documentation/ABI/testing/sysfs-devices-platform-dock

@@ -0,0 +1,39 @@
+What:		/sys/devices/platform/dock.N/docked
+Date:		Dec, 2006
+KernelVersion:	2.6.19
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Value 1 or 0 indicates whether the software believes the
+		laptop is docked in a docking station.
+
+What:		/sys/devices/platform/dock.N/undock
+Date:		Dec, 2006
+KernelVersion:	2.6.19
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(WO) Writing to this file causes the software to initiate an
+		undock request to the firmware.
+
+What:		/sys/devices/platform/dock.N/uid
+Date:		Feb, 2007
+KernelVersion:	v2.6.21
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Displays the docking station the laptop is docked to.
+
+What:		/sys/devices/platform/dock.N/flags
+Date:		May, 2007
+KernelVersion:	v2.6.21
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Show dock station flags, useful for checking if undock
+		request has been made by the user (from the immediate_undock
+		option).
+
+What:		/sys/devices/platform/dock.N/type
+Date:		Aug, 2008
+KernelVersion:	v2.6.27
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Display the dock station type- dock_station, ata_bay or
+		battery_bay.

+ 75 - 2
Documentation/ABI/testing/sysfs-devices-system-cpu

@@ -108,6 +108,8 @@ Description:	CPU topology files that describe a logical CPU's relationship
 
 What:		/sys/devices/system/cpu/cpuidle/current_driver
 		/sys/devices/system/cpu/cpuidle/current_governer_ro
+		/sys/devices/system/cpu/cpuidle/available_governors
+		/sys/devices/system/cpu/cpuidle/current_governor
 Date:		September 2007
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description:	Discover cpuidle policy and mechanism
 		Idle policy (governor) is differentiated from idle mechanism
 		(driver)
 
-		current_driver: displays current idle mechanism
+		current_driver: (RO) displays current idle mechanism
 
-		current_governor_ro: displays current idle policy
+		current_governor_ro: (RO) displays current idle policy
+
+		With the cpuidle_sysfs_switch boot option enabled (meant for
+		developer testing), the following three attributes are visible
+		instead:
+
+		current_driver: same as described above
+
+		available_governors: (RO) displays a space separated list of
+		available governors
+
+		current_governor: (RW) displays current idle policy. Users can
+		switch the governor at runtime by writing to this file.
 
 		See files in Documentation/cpuidle/ for more information.
 
 
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date:		September 2007
+KernelVersion:	v2.6.24
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:
+		The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+		logical CPU specific cpuidle information for each online cpu X.
+		The processor idle states which are available for use have the
+		following attributes:
+
+		name: (RO) Name of the idle state (string).
+
+		latency: (RO) The latency to exit out of this idle state (in
+		microseconds).
+
+		power: (RO) The power consumed while in this idle state (in
+		milliwatts).
+
+		time: (RO) The total time spent in this idle state (in microseconds).
+
+		usage: (RO) Number of times this state was entered (a count).
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date:		February 2008
+KernelVersion:	v2.6.25
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:
+		(RO) A small description about the idle state (string).
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date:		March 2012
+KernelVersion:	v3.10
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:
+		(RW) Option to disable this idle state (bool). The behavior and
+		the effect of the disable variable depends on the implementation
+		of a particular governor. In the ladder governor, for example,
+		it is not coherent, i.e. if one is disabling a light state, then
+		all deeper states are disabled as well, but the disable variable
+		does not reflect it. Likewise, if one enables a deep state but a
+		lighter state still is disabled, then this has no effect.
+
+
+What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date:		March 2014
+KernelVersion:	v3.15
+Contact:	Linux power management list <linux-pm@vger.kernel.org>
+Description:
+		(RO) Display the target residency i.e. the minimum amount of
+		time (in microseconds) this cpu should spend in this idle state
+		to make the transition worth the effort.
+
+
 What:		/sys/devices/system/cpu/cpu#/cpufreq/*
 Date:		pre-git history
 Contact:	linux-pm@vger.kernel.org

+ 40 - 0
Documentation/ABI/testing/sysfs-platform-dptf

@@ -0,0 +1,40 @@
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The charger type - Traditional, Hybrid or NVDC.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Adapter rating in milliwatts (the maximum Adapter power).
+		Must be 0 if no AC Adaptor is plugged in.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Maximum platform power that can be supported by the battery
+		in milliwatts.
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) Display the platform power source
+		0x00 = DC
+		0x01 = AC
+		0x02 = USB
+		0x03 = Wireless Charger
+
+What:		/sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date:		Jul, 2016
+KernelVersion:	v4.10
+Contact:	linux-acpi@vger.kernel.org
+Description:
+		(RO) The maximum sustained power for battery in milliwatts.

+ 6 - 3
Documentation/admin-guide/kernel-parameters.txt

@@ -931,9 +931,12 @@
 
 	earlycon=	[KNL] Output early console device and options.
 
-			When used with no options, the early console is
-			determined by the stdout-path property in device
-			tree's chosen node.
+			[ARM64] The early console is determined by the
+			stdout-path property in device tree's chosen node,
+			or determined by the ACPI SPCR table.
+
+			[X86] When used with no options the early console is
+			determined by the ACPI SPCR table.
 
 		cdns,<addr>[,options]
 			Start an early, polled-mode console on a Cadence

+ 6 - 1
Documentation/atomic_bitops.txt

@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
 
  - RMW operations that have a return value are fully ordered.
 
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+   otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+   if the bit in memory is unchanged by the operation then it is deemed to have
+   failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
 clear_bit_unlock() which has RELEASE semantics.
 
 Since a platform only has a single means of achieving atomic operations

+ 4 - 0
Documentation/cpu-freq/cpu-drivers.txt

@@ -291,3 +291,7 @@ For example:
 		/* Do something with pos */
 		pos->frequency = ...
 	}
+
+If you need to work with the position of pos within driver_freq_table,
+do not subtract the pointers, as it is quite costly. Instead, use the
+macros cpufreq_for_each_entry_idx() and cpufreq_for_each_valid_entry_idx().

+ 8 - 0
Documentation/devicetree/bindings/power/mti,mips-cpc.txt

@@ -0,0 +1,8 @@
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.

+ 28 - 4
Documentation/driver-api/s390-drivers.rst

@@ -22,9 +22,28 @@ While most I/O devices on a s390 system are typically driven through the
 channel I/O mechanism described here, there are various other methods
 (like the diag interface). These are out of the scope of this document.
 
+The s390 common I/O layer also provides access to some devices that are
+not strictly considered I/O devices. They are considered here as well,
+although they are not the focus of this document.
+
 Some additional information can also be found in the kernel source under
 Documentation/s390/driver-model.txt.
 
+The css bus
+===========
+
+The css bus contains the subchannels available on the system. They fall
+into several categories:
+
+* Standard I/O subchannels, for use by the system. They have a child
+  device on the ccw bus and are described below.
+* I/O subchannels bound to the vfio-ccw driver. See
+  Documentation/s390/vfio-ccw.txt.
+* Message subchannels. No Linux driver currently exists.
+* CHSC subchannels (at most one). The chsc subchannel driver can be used
+  to send asynchronous chsc commands.
+* eADM subchannels. Used for talking to storage class memory.
+
 The ccw bus
 ===========
 
@@ -102,10 +121,15 @@ ccw group devices
 Generic interfaces
 ==================
 
-Some interfaces are available to other drivers that do not necessarily
-have anything to do with the busses described above, but still are
-indirectly using basic infrastructure in the common I/O layer. One
-example is the support for adapter interrupts.
+The following section contains interfaces in use not only by drivers
+dealing with ccw devices, but drivers for various other s390 hardware
+as well.
+
+Adapter interrupts
+------------------
+
+The common I/O layer provides helper functions for dealing with adapter
+interrupts and interrupt vectors.
 
 .. kernel-doc:: drivers/s390/cio/airq.c
    :export:

+ 17 - 32
Documentation/locking/mutex-design.txt

@@ -21,37 +21,23 @@ Implementation
 --------------
 
 Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-	  1: unlocked
-	  0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
 
 When acquiring a mutex, there are three possible paths that can be
 taken, depending on the state of the lock:
 
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
 
 (ii) midpath: aka optimistic spinning, tries to spin for acquisition
      while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
 Disadvantages
 -------------
 
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
 
 When to use mutexes
 -------------------

+ 3 - 0
Documentation/virtual/kvm/00-INDEX

@@ -26,3 +26,6 @@ s390-diag.txt
 	- Diagnose hypercall description (for IBM S/390)
 timekeeping.txt
 	- timekeeping virtualization for x86-based architectures.
+amd-memory-encryption.txt
+	- notes on AMD Secure Encrypted Virtualization feature and SEV firmware
+	  command description

+ 247 - 0
Documentation/virtual/kvm/amd-memory-encryption.rst

@@ -0,0 +1,247 @@
+======================================
+Secure Encrypted Virtualization (SEV)
+======================================
+
+Overview
+========
+
+Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
+
+SEV is an extension to the AMD-V architecture which supports running
+virtual machines (VMs) under the control of a hypervisor. When enabled,
+the memory contents of a VM will be transparently encrypted with a key
+unique to that VM.
+
+The hypervisor can determine the SEV support through the CPUID
+instruction. The CPUID function 0x8000001f reports information related
+to SEV::
+
+	0x8000001f[eax]:
+			Bit[1] 	indicates support for SEV
+	    ...
+		  [ecx]:
+			Bits[31:0]  Number of encrypted guests supported simultaneously
+
+If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+(MSR_K7_HWCR) can be used to determine if it can be enabled::
+
+	0xc001_0010:
+		Bit[23]	   1 = memory encryption can be enabled
+			   0 = memory encryption can not be enabled
+
+	0xc001_0015:
+		Bit[0]	   1 = memory encryption can be enabled
+			   0 = memory encryption can not be enabled
+
+When SEV support is available, it can be enabled in a specific VM by
+setting the SEV bit before executing VMRUN.::
+
+	VMCB[0x90]:
+		Bit[1]	    1 = SEV is enabled
+			    0 = SEV is disabled
+
+SEV hardware uses ASIDs to associate a memory encryption key with a VM.
+Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
+defined in the CPUID 0x8000001f[ecx] field.
+
+SEV Key Management
+==================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+KVM implements the following commands to support common lifecycle events of SEV
+guests, such as launching, running, snapshotting, migrating and decommissioning.
+
+1. KVM_SEV_INIT
+---------------
+
+The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
+context. In a typical workflow, this command should be the first command issued.
+
+Returns: 0 on success, -negative on error
+
+2. KVM_SEV_LAUNCH_START
+-----------------------
+
+The KVM_SEV_LAUNCH_START command is used for creating the memory encryption
+context. To create the encryption context, user must provide a guest policy,
+the owner's public Diffie-Hellman (PDH) key and session information.
+
+Parameters: struct  kvm_sev_launch_start (in/out)
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_start {
+                __u32 handle;           /* if zero then firmware creates a new handle */
+                __u32 policy;           /* guest's policy */
+
+                __u64 dh_uaddr;         /* userspace address pointing to the guest owner's PDH key */
+                __u32 dh_len;
+
+                __u64 session_addr;     /* userspace address which points to the guest session information */
+                __u32 session_len;
+        };
+
+On success, the 'handle' field contains a new handle and on error, a negative value.
+
+For more details, see SEV spec Section 6.2.
+
+3. KVM_SEV_LAUNCH_UPDATE_DATA
+-----------------------------
+
+The KVM_SEV_LAUNCH_UPDATE_DATA is used for encrypting a memory region. It also
+calculates a measurement of the memory contents. The measurement is a signature
+of the memory contents that can be sent to the guest owner as an attestation
+that the memory was encrypted correctly by the firmware.
+
+Parameters (in): struct  kvm_sev_launch_update_data
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_update {
+                __u64 uaddr;    /* userspace address to be encrypted (must be 16-byte aligned) */
+                __u32 len;      /* length of the data to be encrypted (must be 16-byte aligned) */
+        };
+
+For more details, see SEV spec Section 6.3.
+
+4. KVM_SEV_LAUNCH_MEASURE
+-------------------------
+
+The KVM_SEV_LAUNCH_MEASURE command is used to retrieve the measurement of the
+data encrypted by the KVM_SEV_LAUNCH_UPDATE_DATA command. The guest owner may
+wait to provide the guest with confidential information until it can verify the
+measurement. Since the guest owner knows the initial contents of the guest at
+boot, the measurement can be verified by comparing it to what the guest owner
+expects.
+
+Parameters (in): struct  kvm_sev_launch_measure
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_measure {
+                __u64 uaddr;    /* where to copy the measurement */
+                __u32 len;      /* length of measurement blob */
+        };
+
+For more details on the measurement verification flow, see SEV spec Section 6.4.
+
+5. KVM_SEV_LAUNCH_FINISH
+------------------------
+
+After completion of the launch flow, the KVM_SEV_LAUNCH_FINISH command can be
+issued to make the guest ready for the execution.
+
+Returns: 0 on success, -negative on error
+
+6. KVM_SEV_GUEST_STATUS
+-----------------------
+
+The KVM_SEV_GUEST_STATUS command is used to retrieve status information about a
+SEV-enabled guest.
+
+Parameters (out): struct kvm_sev_guest_status
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_guest_status {
+                __u32 handle;   /* guest handle */
+                __u32 policy;   /* guest policy */
+                __u8 state;     /* guest state (see enum below) */
+        };
+
+SEV guest state:
+
+::
+
+        enum {
+        SEV_STATE_INVALID = 0;
+        SEV_STATE_LAUNCHING,    /* guest is currently being launched */
+        SEV_STATE_SECRET,       /* guest is being launched and ready to accept the ciphertext data */
+        SEV_STATE_RUNNING,      /* guest is fully launched and running */
+        SEV_STATE_RECEIVING,    /* guest is being migrated in from another SEV machine */
+        SEV_STATE_SENDING       /* guest is getting migrated out to another SEV machine */
+        };
+
+7. KVM_SEV_DBG_DECRYPT
+----------------------
+
+The KVM_SEV_DEBUG_DECRYPT command can be used by the hypervisor to request the
+firmware to decrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_dbg {
+                __u64 src_uaddr;        /* userspace address of data to decrypt */
+                __u64 dst_uaddr;        /* userspace address of destination */
+                __u32 len;              /* length of memory region to decrypt */
+        };
+
+The command returns an error if the guest policy does not allow debugging.
+
+8. KVM_SEV_DBG_ENCRYPT
+----------------------
+
+The KVM_SEV_DEBUG_ENCRYPT command can be used by the hypervisor to request the
+firmware to encrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_dbg {
+                __u64 src_uaddr;        /* userspace address of data to encrypt */
+                __u64 dst_uaddr;        /* userspace address of destination */
+                __u32 len;              /* length of memory region to encrypt */
+        };
+
+The command returns an error if the guest policy does not allow debugging.
+
+9. KVM_SEV_LAUNCH_SECRET
+------------------------
+
+The KVM_SEV_LAUNCH_SECRET command can be used by the hypervisor to inject secret
+data after the measurement has been validated by the guest owner.
+
+Parameters (in): struct kvm_sev_launch_secret
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_secret {
+                __u64 hdr_uaddr;        /* userspace address containing the packet header */
+                __u32 hdr_len;
+
+                __u64 guest_uaddr;      /* the guest memory region where the secret should be injected */
+                __u32 guest_len;
+
+                __u64 trans_uaddr;      /* the hypervisor memory region which contains the secret */
+                __u32 trans_len;
+        };
+
+References
+==========
+
+.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
+.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Specification.pdf
+.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
+.. [kvm-forum]  http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf

+ 53 - 1
Documentation/virtual/kvm/api.txt

@@ -1841,6 +1841,7 @@ registers, find a list below:
   PPC	| KVM_REG_PPC_DBSR              | 32
   PPC   | KVM_REG_PPC_TIDR              | 64
   PPC   | KVM_REG_PPC_PSSCR             | 64
+  PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
   PPC   | KVM_REG_PPC_TM_GPR0           | 64
           ...
   PPC   | KVM_REG_PPC_TM_GPR31          | 64
@@ -3403,7 +3404,7 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
 or if no page table is present for the addresses (e.g. when using
 hugepages).
 
-4.108 KVM_PPC_GET_CPU_CHAR
+4.109 KVM_PPC_GET_CPU_CHAR
 
 Capability: KVM_CAP_PPC_GET_CPU_CHAR
 Architectures: powerpc
@@ -3449,6 +3450,57 @@ array bounds check and the array access.
 These fields use the same bit definitions as the new
 H_GET_CPU_CHARACTERISTICS hypercall.
 
+4.110 KVM_MEMORY_ENCRYPT_OP
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: an opaque platform specific structure (in/out)
+Returns: 0 on success; -1 on error
+
+If the platform supports creating encrypted VMs then this ioctl can be used
+for issuing platform-specific memory encryption commands to manage those
+encrypted VMs.
+
+Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+(SEV) commands on AMD Processors. The SEV commands are defined in
+Documentation/virtual/kvm/amd-memory-encryption.txt.
+
+4.111 KVM_MEMORY_ENCRYPT_REG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to register a guest memory region which may
+contain encrypted data (e.g. guest RAM, SMRAM etc).
+
+It is used in the SEV-enabled guest. When encryption is enabled, a guest
+memory region may contain encrypted data. The SEV memory encryption
+engine uses a tweak such that two identical plaintext pages, each at
+different locations will have differing ciphertexts. So swapping or
+moving ciphertext of those pages will not result in plaintext being
+swapped. So relocating (or migrating) physical backing pages for the SEV
+guest will require some additional steps.
+
+Note: The current SEV key management spec does not provide commands to
+swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+memory region registered with the ioctl.
+
+4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to unregister the guest memory region registered
+with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
+
+
 5. The kvm_run structure
 ------------------------
 

+ 0 - 187
Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt

@@ -1,187 +0,0 @@
-KVM/ARM VGIC Forwarded Physical Interrupts
-==========================================
-
-The KVM/ARM code implements software support for the ARM Generic
-Interrupt Controller's (GIC's) hardware support for virtualization by
-allowing software to inject virtual interrupts to a VM, which the guest
-OS sees as regular interrupts.  The code is famously known as the VGIC.
-
-Some of these virtual interrupts, however, correspond to physical
-interrupts from real physical devices.  One example could be the
-architected timer, which itself supports virtualization, and therefore
-lets a guest OS program the hardware device directly to raise an
-interrupt at some point in time.  When such an interrupt is raised, the
-host OS initially handles the interrupt and must somehow signal this
-event as a virtual interrupt to the guest.  Another example could be a
-passthrough device, where the physical interrupts are initially handled
-by the host, but the device driver for the device lives in the guest OS
-and KVM must therefore somehow inject a virtual interrupt on behalf of
-the physical one to the guest OS.
-
-These virtual interrupts corresponding to a physical interrupt on the
-host are called forwarded physical interrupts, but are also sometimes
-referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
-
-Forwarded physical interrupts are handled slightly differently compared
-to virtual interrupts generated purely by a software emulated device.
-
-
-The HW bit
-----------
-Virtual interrupts are signalled to the guest by programming the List
-Registers (LRs) on the GIC before running a VCPU.  The LR is programmed
-with the virtual IRQ number and the state of the interrupt (Pending,
-Active, or Pending+Active).  When the guest ACKs and EOIs a virtual
-interrupt, the LR state moves from Pending to Active, and finally to
-inactive.
-
-The LRs include an extra bit, called the HW bit.  When this bit is set,
-KVM must also program an additional field in the LR, the physical IRQ
-number, to link the virtual with the physical IRQ.
-
-When the HW bit is set, KVM must EITHER set the Pending OR the Active
-bit, never both at the same time.
-
-Setting the HW bit causes the hardware to deactivate the physical
-interrupt on the physical distributor when the guest deactivates the
-corresponding virtual interrupt.
-
-
-Forwarded Physical Interrupts Life Cycle
-----------------------------------------
-
-The state of forwarded physical interrupts is managed in the following way:
-
-  - The physical interrupt is acked by the host, and becomes active on
-    the physical distributor (*).
-  - KVM sets the LR.Pending bit, because this is the only way the GICV
-    interface is going to present it to the guest.
-  - LR.Pending will stay set as long as the guest has not acked the interrupt.
-  - LR.Pending transitions to LR.Active on the guest read of the IAR, as
-    expected.
-  - On guest EOI, the *physical distributor* active bit gets cleared,
-    but the LR.Active is left untouched (set).
-  - KVM clears the LR on VM exits when the physical distributor
-    active state has been cleared.
-
-(*): The host handling is slightly more complicated.  For some forwarded
-interrupts (shared), KVM directly sets the active state on the physical
-distributor before entering the guest, because the interrupt is never actually
-handled on the host (see details on the timer as an example below).  For other
-forwarded interrupts (non-shared) the host does not deactivate the interrupt
-when the host ISR completes, but leaves the interrupt active until the guest
-deactivates it.  Leaving the interrupt active is allowed, because Linux
-configures the physical GIC with EOIMode=1, which causes EOI operations to
-perform a priority drop allowing the GIC to receive other interrupts of the
-default priority.
-
-
-Forwarded Edge and Level Triggered PPIs and SPIs
-------------------------------------------------
-Forwarded physical interrupts injected should always be active on the
-physical distributor when injected to a guest.
-
-Level-triggered interrupts will keep the interrupt line to the GIC
-asserted, typically until the guest programs the device to deassert the
-line.  This means that the interrupt will remain pending on the physical
-distributor until the guest has reprogrammed the device.  Since we
-always run the VM with interrupts enabled on the CPU, a pending
-interrupt will exit the guest as soon as we switch into the guest,
-preventing the guest from ever making progress as the process repeats
-over and over.  Therefore, the active state on the physical distributor
-must be set when entering the guest, preventing the GIC from forwarding
-the pending interrupt to the CPU.  As soon as the guest deactivates the
-interrupt, the physical line is sampled by the hardware again and the host
-takes a new interrupt if and only if the physical line is still asserted.
-
-Edge-triggered interrupts do not exhibit the same problem with
-preventing guest execution that level-triggered interrupts do.  One
-option is to not use HW bit at all, and inject edge-triggered interrupts
-from a physical device as pure virtual interrupts.  But that would
-potentially slow down handling of the interrupt in the guest, because a
-physical interrupt occurring in the middle of the guest ISR would
-preempt the guest for the host to handle the interrupt.  Additionally,
-if you configure the system to handle interrupts on a separate physical
-core from that running your VCPU, you still have to interrupt the VCPU
-to queue the pending state onto the LR, even though the guest won't use
-this information until the guest ISR completes.  Therefore, the HW
-bit should always be set for forwarded edge-triggered interrupts.  With
-the HW bit set, the virtual interrupt is injected and additional
-physical interrupts occurring before the guest deactivates the interrupt
-simply mark the state on the physical distributor as Pending+Active.  As
-soon as the guest deactivates the interrupt, the host takes another
-interrupt if and only if there was a physical interrupt between injecting
-the forwarded interrupt to the guest and the guest deactivating the
-interrupt.
-
-Consequently, whenever we schedule a VCPU with one or more LRs with the
-HW bit set, the interrupt must also be active on the physical
-distributor.
-
-
-Forwarded LPIs
---------------
-LPIs, introduced in GICv3, are always edge-triggered and do not have an
-active state.  They become pending when a device signal them, and as
-soon as they are acked by the CPU, they are inactive again.
-
-It therefore doesn't make sense, and is not supported, to set the HW bit
-for physical LPIs that are forwarded to a VM as virtual interrupts,
-typically virtual SPIs.
-
-For LPIs, there is no other choice than to preempt the VCPU thread if
-necessary, and queue the pending state onto the LR.
-
-
-Putting It Together: The Architected Timer
-------------------------------------------
-The architected timer is a device that signals interrupts with level
-triggered semantics.  The timer hardware is directly accessed by VCPUs
-which program the timer to fire at some point in time.  Each VCPU on a
-system programs the timer to fire at different times, and therefore the
-hardware is multiplexed between multiple VCPUs.  This is implemented by
-context-switching the timer state along with each VCPU thread.
-
-However, this means that a scenario like the following is entirely
-possible, and in fact, typical:
-
-1.  KVM runs the VCPU
-2.  The guest programs the time to fire in T+100
-3.  The guest is idle and calls WFI (wait-for-interrupts)
-4.  The hardware traps to the host
-5.  KVM stores the timer state to memory and disables the hardware timer
-6.  KVM schedules a soft timer to fire in T+(100 - time since step 2)
-7.  KVM puts the VCPU thread to sleep (on a waitqueue)
-8.  The soft timer fires, waking up the VCPU thread
-9.  KVM reprograms the timer hardware with the VCPU's values
-10. KVM marks the timer interrupt as active on the physical distributor
-11. KVM injects a forwarded physical interrupt to the guest
-12. KVM runs the VCPU
-
-Notice that KVM injects a forwarded physical interrupt in step 11 without
-the corresponding interrupt having actually fired on the host.  That is
-exactly why we mark the timer interrupt as active in step 10, because
-the active state on the physical distributor is part of the state
-belonging to the timer hardware, which is context-switched along with
-the VCPU thread.
-
-If the guest does not idle because it is busy, the flow looks like this
-instead:
-
-1.  KVM runs the VCPU
-2.  The guest programs the time to fire in T+100
-4.  At T+100 the timer fires and a physical IRQ causes the VM to exit
-    (note that this initially only traps to EL2 and does not run the host ISR
-    until KVM has returned to the host).
-5.  With interrupts still disabled on the CPU coming back from the guest, KVM
-    stores the virtual timer state to memory and disables the virtual hw timer.
-6.  KVM looks at the timer state (in memory) and injects a forwarded physical
-    interrupt because it concludes the timer has expired.
-7.  KVM marks the timer interrupt as active on the physical distributor
-7.  KVM enables the timer, enables interrupts, and runs the VCPU
-
-Notice that again the forwarded physical interrupt is injected to the
-guest without having actually been handled on the host.  In this case it
-is because the physical interrupt is never actually seen by the host because the
-timer is disabled upon guest return, and the virtual forwarded interrupt is
-injected on the KVM guest entry path.

+ 4 - 0
Documentation/virtual/kvm/cpuid.txt

@@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT              ||     7 || guest checks this feature bit
                                    ||       || before enabling paravirtualized
                                    ||       || spinlock support.
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
+                                   ||       || before enabling paravirtualized
+                                   ||       || tlb flush.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.

+ 6 - 1
MAINTAINERS

@@ -7748,7 +7748,9 @@ F:	arch/powerpc/kernel/kvm*
 
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
-M:	Cornelia Huck <cohuck@redhat.com>
+M:	Janosch Frank <frankja@linux.vnet.ibm.com>
+R:	David Hildenbrand <david@redhat.com>
+R:	Cornelia Huck <cohuck@redhat.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@ -9204,6 +9206,7 @@ MIPS GENERIC PLATFORM
 M:	Paul Burton <paul.burton@mips.com>
 L:	linux-mips@linux-mips.org
 S:	Supported
+F:	Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:	arch/mips/generic/
 F:	arch/mips/tools/generic-board-config.sh
 
@@ -9943,6 +9946,7 @@ F:	drivers/nfc/nxp-nci
 
 OBJTOOL
 M:	Josh Poimboeuf <jpoimboe@redhat.com>
+M:	Peter Zijlstra <peterz@infradead.org>
 S:	Supported
 F:	tools/objtool/
 
@@ -12026,6 +12030,7 @@ F:	drivers/pci/hotplug/s390_pci_hpc.c
 S390 VFIO-CCW DRIVER
 M:	Cornelia Huck <cohuck@redhat.com>
 M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+M:	Halil Pasic <pasic@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	kvm@vger.kernel.org
 S:	Supported

+ 3 - 4
Makefile

@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
-PATCHLEVEL = 15
+PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -729,7 +729,6 @@ endif
 
 ifeq ($(cc-name),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
 KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
@@ -747,9 +746,9 @@ else
 # These warnings generated too much noise in a regular build.
 # Use make W=1 to enable them (see scripts/Makefile.extrawarn)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else

+ 1 - 1
arch/arm/include/asm/kvm_emulate.h

@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
 static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 {
 	unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
-	return cpsr_mode > USR_MODE;;
+	return cpsr_mode > USR_MODE;
 }
 
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)

+ 2 - 0
arch/arm/include/asm/kvm_host.h

@@ -48,6 +48,8 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);

+ 2 - 1
arch/arm/include/asm/kvm_hyp.h

@@ -21,7 +21,6 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <asm/cp15.h>
-#include <asm/kvm_mmu.h>
 #include <asm/vfp.h>
 
 #define __hyp_text __section(.hyp.text) notrace
@@ -69,6 +68,8 @@
 #define HIFAR		__ACCESS_CP15(c6, 4, c0, 2)
 #define HPFAR		__ACCESS_CP15(c6, 4, c0, 4)
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
+#define BPIALLIS	__ACCESS_CP15(c7, 0, c1, 6)
+#define ICIMVAU		__ACCESS_CP15(c7, 0, c5, 1)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
 #define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)

+ 84 - 15
arch/arm/include/asm/kvm_mmu.h

@@ -37,6 +37,8 @@
 
 #include <linux/highmem.h>
 #include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/kvm_hyp.h>
 #include <asm/pgalloc.h>
 #include <asm/stage2_pgtable.h>
 
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 	return pmd;
 }
 
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+	pte_val(pte) &= ~L_PTE_XN;
+	return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~PMD_SECT_XN;
+	return pmd;
+}
+
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
 	pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
 	return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
 }
 
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+	return !(pte_val(*pte) & L_PTE_XN);
+}
+
 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
 {
 	pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 	return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
 }
 
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+	return !(pmd_val(*pmd) & PMD_SECT_XN);
+}
+
 static inline bool kvm_page_empty(void *ptr)
 {
 	struct page *ptr_page = virt_to_page(ptr);
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
 }
 
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-					       kvm_pfn_t pfn,
-					       unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+	/*
+	 * Clean the dcache to the Point of Coherency.
+	 *
+	 * We need to do this through a kernel mapping (using the
+	 * user-space mapping has proved to be the wrong
+	 * solution). For that, we need to kmap one page at a time,
+	 * and iterate over the range.
+	 */
+
+	VM_BUG_ON(size & ~PAGE_MASK);
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		size -= PAGE_SIZE;
+		pfn++;
+
+		kunmap_atomic(va);
+	}
+}
+
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+						  unsigned long size)
 {
+	u32 iclsz;
+
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
 	 *
 	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
-	 *
-	 * We need to do this through a kernel mapping (using the
-	 * user-space mapping has proved to be the wrong
-	 * solution). For that, we need to kmap one page at a time,
-	 * and iterate over the range.
 	 */
 
 	VM_BUG_ON(size & ~PAGE_MASK);
 
+	if (icache_is_vivt_asid_tagged())
+		return;
+
+	if (!icache_is_pipt()) {
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+		return;
+	}
+
+	/*
+	 * CTR IminLine contains Log2 of the number of words in the
+	 * cache line, so we can get the number of words as
+	 * 2 << (IminLine - 1).  To get the number of bytes, we
+	 * multiply by 4 (the number of bytes in a 32-bit word), and
+	 * get 4 << (IminLine).
+	 */
+	iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
+
 	while (size) {
 		void *va = kmap_atomic_pfn(pfn);
+		void *end = va + PAGE_SIZE;
+		void *addr = va;
 
-		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+		do {
+			write_sysreg(addr, ICIMVAU);
+			addr += iclsz;
+		} while (addr < end);
 
-		if (icache_is_pipt())
-			__cpuc_coherent_user_range((unsigned long)va,
-						   (unsigned long)va + PAGE_SIZE);
+		dsb(ishst);
+		isb();
 
 		size -= PAGE_SIZE;
 		pfn++;
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
 		kunmap_atomic(va);
 	}
 
-	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
-		/* any kind of VIPT cache */
-		__flush_icache_all();
+	/* Check if we need to invalidate the BTB */
+	if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
+		write_sysreg(0, BPIALLIS);
+		dsb(ishst);
+		isb();
 	}
 }
 

+ 2 - 2
arch/arm/include/asm/pgtable.h

@@ -102,8 +102,8 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_HYP_EXEC		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
 #define PAGE_HYP_RO		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
-#define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
+#define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)

+ 1 - 0
arch/arm/kvm/hyp/switch.c

@@ -18,6 +18,7 @@
 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 __asm__(".arch_extension     virt");
 

+ 1 - 0
arch/arm/kvm/hyp/tlb.c

@@ -19,6 +19,7 @@
  */
 
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 /**
  * Flush per-VMID TLBs

+ 0 - 1
arch/arm/mach-vt8500/Kconfig

@@ -13,7 +13,6 @@ config ARCH_WM8505
  	depends on ARCH_MULTI_V5
  	select ARCH_VT8500
  	select CPU_ARM926T
- 	help
 
 config ARCH_WM8750
 	bool "WonderMedia WM8750"

+ 21 - 0
arch/arm64/include/asm/assembler.h

@@ -435,6 +435,27 @@ alternative_endif
 	dsb	\domain
 	.endm
 
+/*
+ * Macro to perform an instruction cache maintenance for the interval
+ * [start, end)
+ *
+ * 	start, end:	virtual addresses describing the region
+ *	label:		A label to branch to on user fault.
+ * 	Corrupts:	tmp1, tmp2
+ */
+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+	icache_line_size \tmp1, \tmp2
+	sub	\tmp2, \tmp1, #1
+	bic	\tmp2, \start, \tmp2
+9997:
+USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
+	add	\tmp2, \tmp2, \tmp1
+	cmp	\tmp2, \end
+	b.lo	9997b
+	dsb	ish
+	isb
+	.endm
+
 /*
  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
  */

+ 7 - 0
arch/arm64/include/asm/cacheflush.h

@@ -52,6 +52,12 @@
  *		- start  - virtual start address
  *		- end    - virtual end address
  *
+ *	invalidate_icache_range(start, end)
+ *
+ *		Invalidate the I-cache in the region described by start, end.
+ *		- start  - virtual start address
+ *		- end    - virtual end address
+ *
  *	__flush_cache_user_range(start, end)
  *
  *		Ensure coherency between the I-cache and the D-cache in the
@@ -66,6 +72,7 @@
  *		- size   - region size
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
+extern int  invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);

+ 1 - 1
arch/arm64/include/asm/cputype.h

@@ -20,7 +20,7 @@
 
 #define MPIDR_UP_BITMASK	(0x1 << 30)
 #define MPIDR_MT_BITMASK	(0x1 << 24)
-#define MPIDR_HWID_BITMASK	0xff00ffffff
+#define MPIDR_HWID_BITMASK	0xff00ffffffUL
 
 #define MPIDR_LEVEL_BITS_SHIFT	3
 #define MPIDR_LEVEL_BITS	(1 << MPIDR_LEVEL_BITS_SHIFT)

+ 1 - 1
arch/arm64/include/asm/hugetlb.h

@@ -22,7 +22,7 @@
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
-	return *ptep;
+	return READ_ONCE(*ptep);
 }
 
 

+ 2 - 0
arch/arm64/include/asm/kvm_host.h

@@ -48,6 +48,8 @@
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);

+ 0 - 1
arch/arm64/include/asm/kvm_hyp.h

@@ -20,7 +20,6 @@
 
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
 #include <asm/sysreg.h>
 
 #define __hyp_text __section(.hyp.text) notrace

+ 40 - 14
arch/arm64/include/asm/kvm_mmu.h

@@ -173,32 +173,54 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 	return pmd;
 }
 
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+	pte_val(pte) &= ~PTE_S2_XN;
+	return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~PMD_S2_XN;
+	return pmd;
+}
+
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
 	pteval_t old_pteval, pteval;
 
-	pteval = READ_ONCE(pte_val(*pte));
+	pteval = READ_ONCE(pte_val(*ptep));
 	do {
 		old_pteval = pteval;
 		pteval &= ~PTE_S2_RDWR;
 		pteval |= PTE_S2_RDONLY;
-		pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
 	} while (pteval != old_pteval);
 }
 
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
 {
-	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+	return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
 {
-	kvm_set_s2pte_readonly((pte_t *)pmd);
+	return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
 }
 
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
 {
-	return kvm_s2pte_readonly((pte_t *)pmd);
+	kvm_set_s2pte_readonly((pte_t *)pmdp);
+}
+
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
+{
+	return kvm_s2pte_readonly((pte_t *)pmdp);
+}
+
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
+{
+	return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
 static inline bool kvm_page_empty(void *ptr)
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-					       kvm_pfn_t pfn,
-					       unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
 	void *va = page_address(pfn_to_page(pfn));
 
 	kvm_flush_dcache_to_poc(va, size);
+}
 
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+						  unsigned long size)
+{
 	if (icache_is_aliasing()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
 		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
-		flush_icache_range((unsigned long)va,
-				   (unsigned long)va + size);
+		void *va = page_address(pfn_to_page(pfn));
+
+		invalidate_icache_range((unsigned long)va,
+					(unsigned long)va + size);
 	}
 }
 

+ 2 - 2
arch/arm64/include/asm/mmu_context.h

@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 {
 	typedef void (ttbr_replace_func)(phys_addr_t);
 	extern ttbr_replace_func idmap_cpu_replace_ttbr1;
 	ttbr_replace_func *replace_phys;
 
-	phys_addr_t pgd_phys = virt_to_phys(pgd);
+	phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
 	replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 

+ 22 - 22
arch/arm64/include/asm/pgalloc.h

@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
-	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	free_page((unsigned long)pmd);
+	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+	free_page((unsigned long)pmdp);
 }
 
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
-	set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+	set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
 	BUILD_BUG();
 }
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 	return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
 {
-	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
+	BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+	free_page((unsigned long)pudp);
 }
 
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
-	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
-	__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+	__pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
 	BUILD_BUG();
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 /*
  * Free a PTE table.
  */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
 {
-	if (pte)
-		free_page((unsigned long)pte);
+	if (ptep)
+		free_page((unsigned long)ptep);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 				  pmdval_t prot)
 {
-	set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+	set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
 /*

+ 2 - 0
arch/arm64/include/asm/pgtable-hwdef.h

@@ -187,9 +187,11 @@
  */
 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+#define PTE_S2_XN		(_AT(pteval_t, 2) << 53)  /* XN[1:0] */
 
 #define PMD_S2_RDONLY		(_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
 #define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+#define PMD_S2_XN		(_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
 
 /*
  * Memory Attribute override for Stage-2 (MemAttr[3:0])

+ 2 - 2
arch/arm64/include/asm/pgtable-prot.h

@@ -67,8 +67,8 @@
 #define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
-#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
 
 #define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)

+ 13 - 10
arch/arm64/include/asm/pgtable.h

@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-	*ptep = pte;
+	WRITE_ONCE(*ptep, pte);
 
 	/*
 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
+	pte_t old_pte;
+
 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
 		__sync_icache_dcache(pte, addr);
 
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+	old_pte = READ_ONCE(*ptep);
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
 	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
 		VM_WARN_ONCE(!pte_young(pte),
 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
-			     __func__, pte_val(*ptep), pte_val(pte));
-		VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+			     __func__, pte_val(old_pte), pte_val(pte));
+		VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
 			     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
-			     __func__, pte_val(*ptep), pte_val(pte));
+			     __func__, pte_val(old_pte), pte_val(pte));
 	}
 
 	set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	*pmdp = pmd;
+	WRITE_ONCE(*pmdp, pmd);
 	dsb(ishst);
 	isb();
 }
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-	*pudp = pud;
+	WRITE_ONCE(*pudp, pud);
 	dsb(ishst);
 	isb();
 }
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-#define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr)	(pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-	*pgdp = pgd;
+	WRITE_ONCE(*pgdp, pgd);
 	dsb(ishst);
 }
 
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-#define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr)	(pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))

+ 2 - 2
arch/arm64/kernel/acpi.c

@@ -230,10 +230,10 @@ void __init acpi_boot_table_init(void)
 
 done:
 	if (acpi_disabled) {
-		if (earlycon_init_is_deferred)
+		if (earlycon_acpi_spcr_enable)
 			early_init_dt_scan_chosen_stdout();
 	} else {
-		parse_spcr(earlycon_init_is_deferred);
+		acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
 		if (IS_ENABLED(CONFIG_ACPI_BGRT))
 			acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
 	}

+ 9 - 0
arch/arm64/kernel/cpu_errata.c

@@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
 		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
 	},
+	{
+		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+		.enable = qcom_enable_link_stack_sanitization,
+	},
+	{
+		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+	},
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
 		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),

+ 1 - 1
arch/arm64/kernel/efi.c

@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
 				  unsigned long addr, void *data)
 {
 	efi_memory_desc_t *md = data;
-	pte_t pte = *ptep;
+	pte_t pte = READ_ONCE(*ptep);
 
 	if (md->attribute & EFI_MEMORY_RO)
 		pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));

+ 76 - 72
arch/arm64/kernel/hibernate.c

@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
 				 gfp_t mask)
 {
 	int rc = 0;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
 	unsigned long dst = (unsigned long)allocator(mask);
 
 	if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	memcpy((void *)dst, src_start, length);
 	flush_icache_range(dst, dst + length);
 
-	pgd = pgd_offset_raw(allocator(mask), dst_addr);
-	if (pgd_none(*pgd)) {
-		pud = allocator(mask);
-		if (!pud) {
+	pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+	if (pgd_none(READ_ONCE(*pgdp))) {
+		pudp = allocator(mask);
+		if (!pudp) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pgd_populate(&init_mm, pgd, pud);
+		pgd_populate(&init_mm, pgdp, pudp);
 	}
 
-	pud = pud_offset(pgd, dst_addr);
-	if (pud_none(*pud)) {
-		pmd = allocator(mask);
-		if (!pmd) {
+	pudp = pud_offset(pgdp, dst_addr);
+	if (pud_none(READ_ONCE(*pudp))) {
+		pmdp = allocator(mask);
+		if (!pmdp) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pud_populate(&init_mm, pud, pmd);
+		pud_populate(&init_mm, pudp, pmdp);
 	}
 
-	pmd = pmd_offset(pud, dst_addr);
-	if (pmd_none(*pmd)) {
-		pte = allocator(mask);
-		if (!pte) {
+	pmdp = pmd_offset(pudp, dst_addr);
+	if (pmd_none(READ_ONCE(*pmdp))) {
+		ptep = allocator(mask);
+		if (!ptep) {
 			rc = -ENOMEM;
 			goto out;
 		}
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
 	}
 
-	pte = pte_offset_kernel(pmd, dst_addr);
-	set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+	ptep = pte_offset_kernel(pmdp, dst_addr);
+	set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
 	/*
 	 * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	 */
 	cpu_set_reserved_ttbr0();
 	local_flush_tlb_all();
-	write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+	write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
 	isb();
 
 	*phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
 	return ret;
 }
 
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-	pte_t pte = *src_pte;
+	pte_t pte = READ_ONCE(*src_ptep);
 
 	if (pte_valid(pte)) {
 		/*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 * read only (code, rodata). Clear the RDONLY bit from
 		 * the temporary mappings we use during restore.
 		 */
-		set_pte(dst_pte, pte_mkwrite(pte));
+		set_pte(dst_ptep, pte_mkwrite(pte));
 	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 		/*
 		 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
 	}
 }
 
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
 		    unsigned long end)
 {
-	pte_t *src_pte;
-	pte_t *dst_pte;
+	pte_t *src_ptep;
+	pte_t *dst_ptep;
 	unsigned long addr = start;
 
-	dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
-	if (!dst_pte)
+	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+	if (!dst_ptep)
 		return -ENOMEM;
-	pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
-	dst_pte = pte_offset_kernel(dst_pmd, start);
+	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+	dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
-	src_pte = pte_offset_kernel(src_pmd, start);
+	src_ptep = pte_offset_kernel(src_pmdp, start);
 	do {
-		_copy_pte(dst_pte, src_pte, addr);
-	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+		_copy_pte(dst_ptep, src_ptep, addr);
+	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 
 	return 0;
 }
 
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
 		    unsigned long end)
 {
-	pmd_t *src_pmd;
-	pmd_t *dst_pmd;
+	pmd_t *src_pmdp;
+	pmd_t *dst_pmdp;
 	unsigned long next;
 	unsigned long addr = start;
 
-	if (pud_none(*dst_pud)) {
-		dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-		if (!dst_pmd)
+	if (pud_none(READ_ONCE(*dst_pudp))) {
+		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pmdp)
 			return -ENOMEM;
-		pud_populate(&init_mm, dst_pud, dst_pmd);
+		pud_populate(&init_mm, dst_pudp, dst_pmdp);
 	}
-	dst_pmd = pmd_offset(dst_pud, start);
+	dst_pmdp = pmd_offset(dst_pudp, start);
 
-	src_pmd = pmd_offset(src_pud, start);
+	src_pmdp = pmd_offset(src_pudp, start);
 	do {
+		pmd_t pmd = READ_ONCE(*src_pmdp);
+
 		next = pmd_addr_end(addr, end);
-		if (pmd_none(*src_pmd))
+		if (pmd_none(pmd))
 			continue;
-		if (pmd_table(*src_pmd)) {
-			if (copy_pte(dst_pmd, src_pmd, addr, next))
+		if (pmd_table(pmd)) {
+			if (copy_pte(dst_pmdp, src_pmdp, addr, next))
 				return -ENOMEM;
 		} else {
-			set_pmd(dst_pmd,
-				__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+			set_pmd(dst_pmdp,
+				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
 		}
-	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
+	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 
 	return 0;
 }
 
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
 		    unsigned long end)
 {
-	pud_t *dst_pud;
-	pud_t *src_pud;
+	pud_t *dst_pudp;
+	pud_t *src_pudp;
 	unsigned long next;
 	unsigned long addr = start;
 
-	if (pgd_none(*dst_pgd)) {
-		dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-		if (!dst_pud)
+	if (pgd_none(READ_ONCE(*dst_pgdp))) {
+		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pudp)
 			return -ENOMEM;
-		pgd_populate(&init_mm, dst_pgd, dst_pud);
+		pgd_populate(&init_mm, dst_pgdp, dst_pudp);
 	}
-	dst_pud = pud_offset(dst_pgd, start);
+	dst_pudp = pud_offset(dst_pgdp, start);
 
-	src_pud = pud_offset(src_pgd, start);
+	src_pudp = pud_offset(src_pgdp, start);
 	do {
+		pud_t pud = READ_ONCE(*src_pudp);
+
 		next = pud_addr_end(addr, end);
-		if (pud_none(*src_pud))
+		if (pud_none(pud))
 			continue;
-		if (pud_table(*(src_pud))) {
-			if (copy_pmd(dst_pud, src_pud, addr, next))
+		if (pud_table(pud)) {
+			if (copy_pmd(dst_pudp, src_pudp, addr, next))
 				return -ENOMEM;
 		} else {
-			set_pud(dst_pud,
-				__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+			set_pud(dst_pudp,
+				__pud(pud_val(pud) & ~PMD_SECT_RDONLY));
 		}
-	} while (dst_pud++, src_pud++, addr = next, addr != end);
+	} while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
 	return 0;
 }
 
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
 			    unsigned long end)
 {
 	unsigned long next;
 	unsigned long addr = start;
-	pgd_t *src_pgd = pgd_offset_k(start);
+	pgd_t *src_pgdp = pgd_offset_k(start);
 
-	dst_pgd = pgd_offset_raw(dst_pgd, start);
+	dst_pgdp = pgd_offset_raw(dst_pgdp, start);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none(*src_pgd))
+		if (pgd_none(READ_ONCE(*src_pgdp)))
 			continue;
-		if (copy_pud(dst_pgd, src_pgd, addr, next))
+		if (copy_pud(dst_pgdp, src_pgdp, addr, next))
 			return -ENOMEM;
-	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 
 	return 0;
 }

+ 12 - 3
arch/arm64/kvm/guest.c

@@ -361,10 +361,16 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
+	int ret = 0;
+
+	vcpu_load(vcpu);
+
 	trace_kvm_set_guest_debug(vcpu, dbg->control);
 
-	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
-		return -EINVAL;
+	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
 		vcpu->guest_debug = dbg->control;
@@ -378,7 +384,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 		/* If not enabled clear all flags */
 		vcpu->guest_debug = 0;
 	}
-	return 0;
+
+out:
+	vcpu_put(vcpu);
+	return ret;
 }
 
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,

+ 1 - 0
arch/arm64/kvm/hyp/debug-sr.c

@@ -21,6 +21,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 #define read_debug(r,n)		read_sysreg(r##n##_el1)
 #define write_debug(v,r,n)	write_sysreg(v, r##n##_el1)

+ 4 - 1
arch/arm64/kvm/hyp/switch.c

@@ -24,6 +24,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
 
@@ -406,8 +407,10 @@ again:
 		u32 midr = read_cpuid_id();
 
 		/* Apply BTAC predictors mitigation to all Falkor chips */
-		if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+		if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+		    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
 			__qcom_hyp_sanitize_btac_predictors();
+		}
 	}
 
 	fp_enabled = __fpsimd_enabled();

+ 1 - 0
arch/arm64/kvm/hyp/tlb.c

@@ -16,6 +16,7 @@
  */
 
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 #include <asm/tlbflush.h>
 
 static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)

+ 22 - 10
arch/arm64/mm/cache.S

@@ -60,16 +60,7 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	b.lo	1b
 	dsb	ish
 
-	icache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x4, x0, x3
-1:
-USER(9f, ic	ivau, x4	)		// invalidate I line PoU
-	add	x4, x4, x2
-	cmp	x4, x1
-	b.lo	1b
-	dsb	ish
-	isb
+	invalidate_icache_by_line x0, x1, x2, x3, 9f
 	mov	x0, #0
 1:
 	uaccess_ttbr0_disable x1, x2
@@ -80,6 +71,27 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU
 ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
+/*
+ *	invalidate_icache_range(start,end)
+ *
+ *	Ensure that the I cache is invalid within specified region.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(invalidate_icache_range)
+	uaccess_ttbr0_enable x2, x3, x4
+
+	invalidate_icache_by_line x0, x1, x2, x3, 2f
+	mov	x0, xzr
+1:
+	uaccess_ttbr0_disable x1, x2
+	ret
+2:
+	mov	x0, #-EFAULT
+	b	1b
+ENDPROC(invalidate_icache_range)
+
 /*
  *	__flush_dcache_area(kaddr, size)
  *

+ 30 - 24
arch/arm64/mm/dump.c

@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
 {
-	pte_t *pte = pte_offset_kernel(pmd, 0UL);
+	pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+	for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
 		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte));
+		note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
 	}
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
 {
-	pmd_t *pmd = pmd_offset(pud, 0UL);
+	pmd_t *pmdp = pmd_offset(pudp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+	for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+		pmd_t pmd = READ_ONCE(*pmdp);
+
 		addr = start + i * PMD_SIZE;
-		if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-			note_page(st, addr, 3, pmd_val(*pmd));
+		if (pmd_none(pmd) || pmd_sect(pmd)) {
+			note_page(st, addr, 3, pmd_val(pmd));
 		} else {
-			BUG_ON(pmd_bad(*pmd));
-			walk_pte(st, pmd, addr);
+			BUG_ON(pmd_bad(pmd));
+			walk_pte(st, pmdp, addr);
 		}
 	}
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
 {
-	pud_t *pud = pud_offset(pgd, 0UL);
+	pud_t *pudp = pud_offset(pgdp, 0UL);
 	unsigned long addr;
 	unsigned i;
 
-	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+	for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+		pud_t pud = READ_ONCE(*pudp);
+
 		addr = start + i * PUD_SIZE;
-		if (pud_none(*pud) || pud_sect(*pud)) {
-			note_page(st, addr, 2, pud_val(*pud));
+		if (pud_none(pud) || pud_sect(pud)) {
+			note_page(st, addr, 2, pud_val(pud));
 		} else {
-			BUG_ON(pud_bad(*pud));
-			walk_pmd(st, pud, addr);
+			BUG_ON(pud_bad(pud));
+			walk_pmd(st, pudp, addr);
 		}
 	}
 }
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
 		     unsigned long start)
 {
-	pgd_t *pgd = pgd_offset(mm, 0UL);
+	pgd_t *pgdp = pgd_offset(mm, 0UL);
 	unsigned i;
 	unsigned long addr;
 
-	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+	for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+		pgd_t pgd = READ_ONCE(*pgdp);
+
 		addr = start + i * PGDIR_SIZE;
-		if (pgd_none(*pgd)) {
-			note_page(st, addr, 1, pgd_val(*pgd));
+		if (pgd_none(pgd)) {
+			note_page(st, addr, 1, pgd_val(pgd));
 		} else {
-			BUG_ON(pgd_bad(*pgd));
-			walk_pud(st, pgd, addr);
+			BUG_ON(pgd_bad(pgd));
+			walk_pud(st, pgdp, addr);
 		}
 	}
 }

+ 25 - 19
arch/arm64/mm/fault.c

@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
 void show_pte(unsigned long addr)
 {
 	struct mm_struct *mm;
-	pgd_t *pgd;
+	pgd_t *pgdp;
+	pgd_t pgd;
 
 	if (addr < TASK_SIZE) {
 		/* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
 		return;
 	}
 
-	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
 		 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
 		 VA_BITS, mm->pgd);
-	pgd = pgd_offset(mm, addr);
-	pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+	pgdp = pgd_offset(mm, addr);
+	pgd = READ_ONCE(*pgdp);
+	pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 
 	do {
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *pte;
+		pud_t *pudp, pud;
+		pmd_t *pmdp, pmd;
+		pte_t *ptep, pte;
 
-		if (pgd_none(*pgd) || pgd_bad(*pgd))
+		if (pgd_none(pgd) || pgd_bad(pgd))
 			break;
 
-		pud = pud_offset(pgd, addr);
-		pr_cont(", *pud=%016llx", pud_val(*pud));
-		if (pud_none(*pud) || pud_bad(*pud))
+		pudp = pud_offset(pgdp, addr);
+		pud = READ_ONCE(*pudp);
+		pr_cont(", pud=%016llx", pud_val(pud));
+		if (pud_none(pud) || pud_bad(pud))
 			break;
 
-		pmd = pmd_offset(pud, addr);
-		pr_cont(", *pmd=%016llx", pmd_val(*pmd));
-		if (pmd_none(*pmd) || pmd_bad(*pmd))
+		pmdp = pmd_offset(pudp, addr);
+		pmd = READ_ONCE(*pmdp);
+		pr_cont(", pmd=%016llx", pmd_val(pmd));
+		if (pmd_none(pmd) || pmd_bad(pmd))
 			break;
 
-		pte = pte_offset_map(pmd, addr);
-		pr_cont(", *pte=%016llx", pte_val(*pte));
-		pte_unmap(pte);
+		ptep = pte_offset_map(pmdp, addr);
+		pte = READ_ONCE(*ptep);
+		pr_cont(", pte=%016llx", pte_val(pte));
+		pte_unmap(ptep);
 	} while(0);
 
 	pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  pte_t entry, int dirty)
 {
 	pteval_t old_pteval, pteval;
+	pte_t pte = READ_ONCE(*ptep);
 
-	if (pte_same(*ptep, entry))
+	if (pte_same(pte, entry))
 		return 0;
 
 	/* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * (calculated as: a & b == ~(~a | ~b)).
 	 */
 	pte_val(entry) ^= PTE_RDONLY;
-	pteval = READ_ONCE(pte_val(*ptep));
+	pteval = pte_val(pte);
 	do {
 		old_pteval = pteval;
 		pteval ^= PTE_RDONLY;

+ 43 - 51
arch/arm64/mm/hugetlbpage.c

@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, size_t *pgsize)
 {
-	pgd_t *pgd = pgd_offset(mm, addr);
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp = pgd_offset(mm, addr);
+	pud_t *pudp;
+	pmd_t *pmdp;
 
 	*pgsize = PAGE_SIZE;
-	pud = pud_offset(pgd, addr);
-	pmd = pmd_offset(pud, addr);
-	if ((pte_t *)pmd == ptep) {
+	pudp = pud_offset(pgdp, addr);
+	pmdp = pmd_offset(pudp, addr);
+	if ((pte_t *)pmdp == ptep) {
 		*pgsize = PMD_SIZE;
 		return CONT_PMDS;
 	}
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	clear_flush(mm, addr, ptep, pgsize, ncontig);
 
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
-			 pte_val(pfn_pte(pfn, hugeprot)));
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-	}
 }
 
 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 		      unsigned long addr, unsigned long sz)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pte_t *pte = NULL;
-
-	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
-	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
-	if (!pud)
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	pudp = pud_alloc(mm, pgdp, addr);
+	if (!pudp)
 		return NULL;
 
 	if (sz == PUD_SIZE) {
-		pte = (pte_t *)pud;
+		ptep = (pte_t *)pudp;
 	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
-		pmd_t *pmd = pmd_alloc(mm, pud, addr);
+		pmdp = pmd_alloc(mm, pudp, addr);
 
 		WARN_ON(addr & (sz - 1));
 		/*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		 * will be no pte_unmap() to correspond with this
 		 * pte_alloc_map().
 		 */
-		pte = pte_alloc_map(mm, pmd, addr);
+		ptep = pte_alloc_map(mm, pmdp, addr);
 	} else if (sz == PMD_SIZE) {
 		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
-		    pud_none(*pud))
-			pte = huge_pmd_share(mm, addr, pud);
+		    pud_none(READ_ONCE(*pudp)))
+			ptep = huge_pmd_share(mm, addr, pudp);
 		else
-			pte = (pte_t *)pmd_alloc(mm, pud, addr);
+			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
 	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
-		pmd_t *pmd;
-
-		pmd = pmd_alloc(mm, pud, addr);
+		pmdp = pmd_alloc(mm, pudp, addr);
 		WARN_ON(addr & (sz - 1));
-		return (pte_t *)pmd;
+		return (pte_t *)pmdp;
 	}
 
-	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
-	       sz, pte, pte_val(*pte));
-	return pte;
+	return ptep;
 }
 
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
 
-	pgd = pgd_offset(mm, addr);
-	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
-	if (!pgd_present(*pgd))
+	pgdp = pgd_offset(mm, addr);
+	if (!pgd_present(READ_ONCE(*pgdp)))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
-	if (sz != PUD_SIZE && pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (sz != PUD_SIZE && pud_none(pud))
 		return NULL;
 	/* hugepage or swap? */
-	if (pud_huge(*pud) || !pud_present(*pud))
-		return (pte_t *)pud;
+	if (pud_huge(pud) || !pud_present(pud))
+		return (pte_t *)pudp;
 	/* table; check the next level */
 
 	if (sz == CONT_PMD_SIZE)
 		addr &= CONT_PMD_MASK;
 
-	pmd = pmd_offset(pud, addr);
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
-	    pmd_none(*pmd))
+	    pmd_none(pmd))
 		return NULL;
-	if (pmd_huge(*pmd) || !pmd_present(*pmd))
-		return (pte_t *)pmd;
+	if (pmd_huge(pmd) || !pmd_present(pmd))
+		return (pte_t *)pmdp;
 
-	if (sz == CONT_PTE_SIZE) {
-		pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
-		return pte;
-	}
+	if (sz == CONT_PTE_SIZE)
+		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 
 	return NULL;
 }
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	size_t pgsize;
 	pte_t pte;
 
-	if (!pte_cont(*ptep)) {
+	if (!pte_cont(READ_ONCE(*ptep))) {
 		ptep_set_wrprotect(mm, addr, ptep);
 		return;
 	}
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
 	size_t pgsize;
 	int ncontig;
 
-	if (!pte_cont(*ptep)) {
+	if (!pte_cont(READ_ONCE(*ptep))) {
 		ptep_clear_flush(vma, addr, ptep);
 		return;
 	}

+ 35 - 35
arch/arm64/mm/kasan_init.c

@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 	return __pa(p);
 }
 
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pmd_none(*pmd)) {
+	if (pmd_none(READ_ONCE(*pmdp))) {
 		phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
 					     : kasan_alloc_zeroed_page(node);
-		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pte_offset_kimg(pmd, addr)
-		     : pte_offset_kernel(pmd, addr);
+	return early ? pte_offset_kimg(pmdp, addr)
+		     : pte_offset_kernel(pmdp, addr);
 }
 
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pud_none(*pud)) {
+	if (pud_none(READ_ONCE(*pudp))) {
 		phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
 					     : kasan_alloc_zeroed_page(node);
-		__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+	return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
 }
 
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
 				      bool early)
 {
-	if (pgd_none(*pgd)) {
+	if (pgd_none(READ_ONCE(*pgdp))) {
 		phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
 					     : kasan_alloc_zeroed_page(node);
-		__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+		__pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
 	}
 
-	return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+	return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
 }
 
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+	pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
 
 	do {
 		phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
 					      : kasan_alloc_zeroed_page(node);
 		next = addr + PAGE_SIZE;
-		set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-	} while (pte++, addr = next, addr != end && pte_none(*pte));
+		set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+	} while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
 }
 
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+	pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
 
 	do {
 		next = pmd_addr_end(addr, end);
-		kasan_pte_populate(pmd, addr, next, node, early);
-	} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+		kasan_pte_populate(pmdp, addr, next, node, early);
+	} while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
 }
 
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
 				      unsigned long end, int node, bool early)
 {
 	unsigned long next;
-	pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+	pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
 
 	do {
 		next = pud_addr_end(addr, end);
-		kasan_pmd_populate(pud, addr, next, node, early);
-	} while (pud++, addr = next, addr != end && pud_none(*pud));
+		kasan_pmd_populate(pudp, addr, next, node, early);
+	} while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
 }
 
 static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
 				      int node, bool early)
 {
 	unsigned long next;
-	pgd_t *pgd;
+	pgd_t *pgdp;
 
-	pgd = pgd_offset_k(addr);
+	pgdp = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		kasan_pud_populate(pgd, addr, next, node, early);
-	} while (pgd++, addr = next, addr != end);
+		kasan_pud_populate(pgdp, addr, next, node, early);
+	} while (pgdp++, addr = next, addr != end);
 }
 
 /* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
  */
 void __init kasan_copy_shadow(pgd_t *pgdir)
 {
-	pgd_t *pgd, *pgd_new, *pgd_end;
+	pgd_t *pgdp, *pgdp_new, *pgdp_end;
 
-	pgd = pgd_offset_k(KASAN_SHADOW_START);
-	pgd_end = pgd_offset_k(KASAN_SHADOW_END);
-	pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+	pgdp = pgd_offset_k(KASAN_SHADOW_START);
+	pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+	pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
 	do {
-		set_pgd(pgd_new, *pgd);
-	} while (pgd++, pgd_new++, pgd != pgd_end);
+		set_pgd(pgdp_new, READ_ONCE(*pgdp));
+	} while (pgdp++, pgdp_new++, pgdp != pgdp_end);
 }
 
 static void __init clear_pgds(unsigned long start,

+ 148 - 134
arch/arm64/mm/mmu.c

@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	return ((old ^ new) & ~mask) == 0;
 }
 
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		     phys_addr_t phys, pgprot_t prot)
 {
-	pte_t *pte;
+	pte_t *ptep;
 
-	pte = pte_set_fixmap_offset(pmd, addr);
+	ptep = pte_set_fixmap_offset(pmdp, addr);
 	do {
-		pte_t old_pte = *pte;
+		pte_t old_pte = READ_ONCE(*ptep);
 
-		set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+		set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
 		/*
 		 * After the PTE entry has been populated once, we
 		 * only allow updates to the permission attributes.
 		 */
-		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+		BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+					      READ_ONCE(pte_val(*ptep))));
 
 		phys += PAGE_SIZE;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
 
 	pte_clear_fixmap();
 }
 
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
 				phys_addr_t (*pgtable_alloc)(void),
 				int flags)
 {
 	unsigned long next;
+	pmd_t pmd = READ_ONCE(*pmdp);
 
-	BUG_ON(pmd_sect(*pmd));
-	if (pmd_none(*pmd)) {
+	BUG_ON(pmd_sect(pmd));
+	if (pmd_none(pmd)) {
 		phys_addr_t pte_phys;
 		BUG_ON(!pgtable_alloc);
 		pte_phys = pgtable_alloc();
-		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+		pmd = READ_ONCE(*pmdp);
 	}
-	BUG_ON(pmd_bad(*pmd));
+	BUG_ON(pmd_bad(pmd));
 
 	do {
 		pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
 		    (flags & NO_CONT_MAPPINGS) == 0)
 			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-		init_pte(pmd, addr, next, phys, __prot);
+		init_pte(pmdp, addr, next, phys, __prot);
 
 		phys += next - addr;
 	} while (addr = next, addr != end);
 }
 
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 		     phys_addr_t phys, pgprot_t prot,
 		     phys_addr_t (*pgtable_alloc)(void), int flags)
 {
 	unsigned long next;
-	pmd_t *pmd;
+	pmd_t *pmdp;
 
-	pmd = pmd_set_fixmap_offset(pud, addr);
+	pmdp = pmd_set_fixmap_offset(pudp, addr);
 	do {
-		pmd_t old_pmd = *pmd;
+		pmd_t old_pmd = READ_ONCE(*pmdp);
 
 		next = pmd_addr_end(addr, end);
 
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
-			pmd_set_huge(pmd, phys, prot);
+			pmd_set_huge(pmdp, phys, prot);
 
 			/*
 			 * After the PMD entry has been populated once, we
 			 * only allow updates to the permission attributes.
 			 */
 			BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
-						      pmd_val(*pmd)));
+						      READ_ONCE(pmd_val(*pmdp))));
 		} else {
-			alloc_init_cont_pte(pmd, addr, next, phys, prot,
+			alloc_init_cont_pte(pmdp, addr, next, phys, prot,
 					    pgtable_alloc, flags);
 
 			BUG_ON(pmd_val(old_pmd) != 0 &&
-			       pmd_val(old_pmd) != pmd_val(*pmd));
+			       pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
 		}
 		phys += next - addr;
-	} while (pmd++, addr = next, addr != end);
+	} while (pmdp++, addr = next, addr != end);
 
 	pmd_clear_fixmap();
 }
 
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
 				phys_addr_t (*pgtable_alloc)(void), int flags)
 {
 	unsigned long next;
+	pud_t pud = READ_ONCE(*pudp);
 
 	/*
 	 * Check for initial section mappings in the pgd/pud.
 	 */
-	BUG_ON(pud_sect(*pud));
-	if (pud_none(*pud)) {
+	BUG_ON(pud_sect(pud));
+	if (pud_none(pud)) {
 		phys_addr_t pmd_phys;
 		BUG_ON(!pgtable_alloc);
 		pmd_phys = pgtable_alloc();
-		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+		pud = READ_ONCE(*pudp);
 	}
-	BUG_ON(pud_bad(*pud));
+	BUG_ON(pud_bad(pud));
 
 	do {
 		pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
 		    (flags & NO_CONT_MAPPINGS) == 0)
 			__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-		init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+		init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 
 		phys += next - addr;
 	} while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 	return true;
 }
 
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
-				  phys_addr_t phys, pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void),
-				  int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+			   phys_addr_t phys, pgprot_t prot,
+			   phys_addr_t (*pgtable_alloc)(void),
+			   int flags)
 {
-	pud_t *pud;
 	unsigned long next;
+	pud_t *pudp;
+	pgd_t pgd = READ_ONCE(*pgdp);
 
-	if (pgd_none(*pgd)) {
+	if (pgd_none(pgd)) {
 		phys_addr_t pud_phys;
 		BUG_ON(!pgtable_alloc);
 		pud_phys = pgtable_alloc();
-		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+		__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+		pgd = READ_ONCE(*pgdp);
 	}
-	BUG_ON(pgd_bad(*pgd));
+	BUG_ON(pgd_bad(pgd));
 
-	pud = pud_set_fixmap_offset(pgd, addr);
+	pudp = pud_set_fixmap_offset(pgdp, addr);
 	do {
-		pud_t old_pud = *pud;
+		pud_t old_pud = READ_ONCE(*pudp);
 
 		next = pud_addr_end(addr, end);
 
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		 */
 		if (use_1G_block(addr, next, phys) &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
-			pud_set_huge(pud, phys, prot);
+			pud_set_huge(pudp, phys, prot);
 
 			/*
 			 * After the PUD entry has been populated once, we
 			 * only allow updates to the permission attributes.
 			 */
 			BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
-						      pud_val(*pud)));
+						      READ_ONCE(pud_val(*pudp))));
 		} else {
-			alloc_init_cont_pmd(pud, addr, next, phys, prot,
+			alloc_init_cont_pmd(pudp, addr, next, phys, prot,
 					    pgtable_alloc, flags);
 
 			BUG_ON(pud_val(old_pud) != 0 &&
-			       pud_val(old_pud) != pud_val(*pud));
+			       pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
 		}
 		phys += next - addr;
-	} while (pud++, addr = next, addr != end);
+	} while (pudp++, addr = next, addr != end);
 
 	pud_clear_fixmap();
 }
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 int flags)
 {
 	unsigned long addr, length, end, next;
-	pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+	pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
 	/*
 	 * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+		alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
 			       flags);
 		phys += next - addr;
-	} while (pgd++, addr = next, addr != end);
+	} while (pgdp++, addr = next, addr != end);
 }
 
 static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
 	flush_tlb_kernel_range(virt, virt + size);
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
 				  phys_addr_t end, pgprot_t prot, int flags)
 {
-	__create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+	__create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
 			     prot, early_pgtable_alloc, flags);
 }
 
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
 			    PAGE_KERNEL_RO);
 }
 
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
 {
 	phys_addr_t kernel_start = __pa_symbol(_text);
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
 		if (memblock_is_nomap(reg))
 			continue;
 
-		__map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+		__map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
 	}
 
 	/*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
 	 * Note that contiguous mappings cannot be remapped in this way,
 	 * so we should avoid them here.
 	 */
-	__map_memblock(pgd, kernel_start, kernel_end,
+	__map_memblock(pgdp, kernel_start, kernel_end,
 		       PAGE_KERNEL, NO_CONT_MAPPINGS);
 	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
 	 * through /sys/kernel/kexec_crash_size interface.
 	 */
 	if (crashk_res.end) {
-		__map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+		__map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
 			       PAGE_KERNEL,
 			       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
 		memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
 	debug_checkwx();
 }
 
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
 				      pgprot_t prot, struct vm_struct *vma,
 				      int flags, unsigned long vm_flags)
 {
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 	BUG_ON(!PAGE_ALIGNED(pa_start));
 	BUG_ON(!PAGE_ALIGNED(size));
 
-	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+	__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
 			     early_pgtable_alloc, flags);
 
 	if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
 /*
  * Create fine-grained mappings for the kernel.
  */
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
 {
 	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
 				vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
 	 * Only rodata will be remapped with different permissions later on,
 	 * all other segments are allowed to use contiguous mappings.
 	 */
-	map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+	map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
 			   VM_NO_GUARD);
-	map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+	map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
 			   &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
-	map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+	map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
 			   &vmlinux_inittext, 0, VM_NO_GUARD);
-	map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+	map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
 			   &vmlinux_initdata, 0, VM_NO_GUARD);
-	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+	map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 
-	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+	if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
 		/*
 		 * The fixmap falls in a separate pgd to the kernel, and doesn't
 		 * live in the carveout for the swapper_pg_dir. We can simply
 		 * re-use the existing dir for the fixmap.
 		 */
-		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
-			*pgd_offset_k(FIXADDR_START));
+		set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+			READ_ONCE(*pgd_offset_k(FIXADDR_START)));
 	} else if (CONFIG_PGTABLE_LEVELS > 3) {
 		/*
 		 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
 		 * entry instead.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+		pud_populate(&init_mm,
+			     pud_set_fixmap_offset(pgdp, FIXADDR_START),
 			     lm_alias(bm_pmd));
 		pud_clear_fixmap();
 	} else {
 		BUG();
 	}
 
-	kasan_copy_shadow(pgd);
+	kasan_copy_shadow(pgdp);
 }
 
 /*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
 void __init paging_init(void)
 {
 	phys_addr_t pgd_phys = early_pgtable_alloc();
-	pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
-	map_kernel(pgd);
-	map_mem(pgd);
+	map_kernel(pgdp);
+	map_mem(pgdp);
 
 	/*
 	 * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
 	 * To do this we need to go via a temporary pgd.
 	 */
 	cpu_replace_ttbr1(__va(pgd_phys));
-	memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+	memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
 	pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
  */
 int kern_addr_valid(unsigned long addr)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep, pte;
 
 	if ((((long)addr) >> VA_BITS) != -1UL)
 		return 0;
 
-	pgd = pgd_offset_k(addr);
-	if (pgd_none(*pgd))
+	pgdp = pgd_offset_k(addr);
+	if (pgd_none(READ_ONCE(*pgdp)))
 		return 0;
 
-	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
 		return 0;
 
-	if (pud_sect(*pud))
-		return pfn_valid(pud_pfn(*pud));
+	if (pud_sect(pud))
+		return pfn_valid(pud_pfn(pud));
 
-	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
+	if (pmd_none(pmd))
 		return 0;
 
-	if (pmd_sect(*pmd))
-		return pfn_valid(pmd_pfn(*pmd));
+	if (pmd_sect(pmd))
+		return pfn_valid(pmd_pfn(pmd));
 
-	pte = pte_offset_kernel(pmd, addr);
-	if (pte_none(*pte))
+	ptep = pte_offset_kernel(pmdp, addr);
+	pte = READ_ONCE(*ptep);
+	if (pte_none(pte))
 		return 0;
 
-	return pfn_valid(pte_pfn(*pte));
+	return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
 	unsigned long addr = start;
 	unsigned long next;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
 
 	do {
 		next = pmd_addr_end(addr, end);
 
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
+		pgdp = vmemmap_pgd_populate(addr, node);
+		if (!pgdp)
 			return -ENOMEM;
 
-		pud = vmemmap_pud_populate(pgd, addr, node);
-		if (!pud)
+		pudp = vmemmap_pud_populate(pgdp, addr, node);
+		if (!pudp)
 			return -ENOMEM;
 
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
+		pmdp = pmd_offset(pudp, addr);
+		if (pmd_none(READ_ONCE(*pmdp))) {
 			void *p = NULL;
 
 			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 			if (!p)
 				return -ENOMEM;
 
-			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
 		} else
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
+			vmemmap_verify((pte_t *)pmdp, node, addr, next);
 	} while (addr = next, addr != end);
 
 	return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-	pgd_t *pgd = pgd_offset_k(addr);
+	pgd_t *pgdp = pgd_offset_k(addr);
+	pgd_t pgd = READ_ONCE(*pgdp);
 
-	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+	BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 
-	return pud_offset_kimg(pgd, addr);
+	return pud_offset_kimg(pgdp, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
 {
-	pud_t *pud = fixmap_pud(addr);
+	pud_t *pudp = fixmap_pud(addr);
+	pud_t pud = READ_ONCE(*pudp);
 
-	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+	BUG_ON(pud_none(pud) || pud_bad(pud));
 
-	return pmd_offset_kimg(pud, addr);
+	return pmd_offset_kimg(pudp, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
  */
 void __init early_fixmap_init(void)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	pgd_t *pgdp, pgd;
+	pud_t *pudp;
+	pmd_t *pmdp;
 	unsigned long addr = FIXADDR_START;
 
-	pgd = pgd_offset_k(addr);
+	pgdp = pgd_offset_k(addr);
+	pgd = READ_ONCE(*pgdp);
 	if (CONFIG_PGTABLE_LEVELS > 3 &&
-	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+	    !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
 		/*
 		 * We only end up here if the kernel mapping and the fixmap
 		 * share the top level pgd entry, which should only happen on
 		 * 16k/4 levels configurations.
 		 */
 		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-		pud = pud_offset_kimg(pgd, addr);
+		pudp = pud_offset_kimg(pgdp, addr);
 	} else {
-		if (pgd_none(*pgd))
-			__pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
-		pud = fixmap_pud(addr);
+		if (pgd_none(pgd))
+			__pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+		pudp = fixmap_pud(addr);
 	}
-	if (pud_none(*pud))
-		__pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
-	pmd = fixmap_pmd(addr);
-	__pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+	if (pud_none(READ_ONCE(*pudp)))
+		__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+	pmdp = fixmap_pmd(addr);
+	__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 
-	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
-	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+	if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+	     || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
-		pr_warn("pmd %p != %p, %p\n",
-			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+		pr_warn("pmdp %p != %p, %p\n",
+			pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
 			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
 		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 			fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
 			       phys_addr_t phys, pgprot_t flags)
 {
 	unsigned long addr = __fix_to_virt(idx);
-	pte_t *pte;
+	pte_t *ptep;
 
 	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-	pte = fixmap_pte(addr);
+	ptep = fixmap_pte(addr);
 
 	if (pgprot_val(flags)) {
-		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
 	} else {
-		pte_clear(&init_mm, addr, pte);
+		pte_clear(&init_mm, addr, ptep);
 		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
 	}
 }
@@ -915,36 +929,36 @@ int __init arch_ioremap_pmd_supported(void)
 	return 1;
 }
 
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PUD_MASK);
-	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
 	BUG_ON(phys & ~PMD_MASK);
-	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
 {
-	if (!pud_sect(*pud))
+	if (!pud_sect(READ_ONCE(*pudp)))
 		return 0;
-	pud_clear(pud);
+	pud_clear(pudp);
 	return 1;
 }
 
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
 {
-	if (!pmd_sect(*pmd))
+	if (!pmd_sect(READ_ONCE(*pmdp)))
 		return 0;
-	pmd_clear(pmd);
+	pmd_clear(pmdp);
 	return 1;
 }

+ 17 - 15
arch/arm64/mm/pageattr.c

@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
 			void *data)
 {
 	struct page_change_data *cdata = data;
-	pte_t pte = *ptep;
+	pte_t pte = READ_ONCE(*ptep);
 
 	pte = clear_pte_bit(pte, cdata->clear_mask);
 	pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
  */
 bool kernel_page_present(struct page *page)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep;
 	unsigned long addr = (unsigned long)page_address(page);
 
-	pgd = pgd_offset_k(addr);
-	if (pgd_none(*pgd))
+	pgdp = pgd_offset_k(addr);
+	if (pgd_none(READ_ONCE(*pgdp)))
 		return false;
 
-	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	pudp = pud_offset(pgdp, addr);
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
 		return false;
-	if (pud_sect(*pud))
+	if (pud_sect(pud))
 		return true;
 
-	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	pmdp = pmd_offset(pudp, addr);
+	pmd = READ_ONCE(*pmdp);
+	if (pmd_none(pmd))
 		return false;
-	if (pmd_sect(*pmd))
+	if (pmd_sect(pmd))
 		return true;
 
-	pte = pte_offset_kernel(pmd, addr);
-	return pte_valid(*pte);
+	ptep = pte_offset_kernel(pmdp, addr);
+	return pte_valid(READ_ONCE(*ptep));
 }
 #endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */

+ 9 - 5
arch/arm64/mm/proc.S

@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
 	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty
 	dmb	sy				// lines are written back before
 	ldr	\type, [cur_\()\type\()p]	// loading the entry
-	tbz	\type, #0, next_\()\type	// Skip invalid entries
+	tbz	\type, #0, skip_\()\type	// Skip invalid and
+	tbnz	\type, #11, skip_\()\type	// non-global entries
 	.endm
 
 	.macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
 	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
 do_pgd:	__idmap_kpti_get_pgtable_ent	pgd
 	tbnz	pgd, #1, walk_puds
-	__idmap_kpti_put_pgtable_ent_ng	pgd
 next_pgd:
+	__idmap_kpti_put_pgtable_ent_ng	pgd
+skip_pgd:
 	add	cur_pgdp, cur_pgdp, #8
 	cmp	cur_pgdp, end_pgdp
 	b.ne	do_pgd
@@ -294,8 +296,9 @@ walk_puds:
 	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
 do_pud:	__idmap_kpti_get_pgtable_ent	pud
 	tbnz	pud, #1, walk_pmds
-	__idmap_kpti_put_pgtable_ent_ng	pud
 next_pud:
+	__idmap_kpti_put_pgtable_ent_ng	pud
+skip_pud:
 	add	cur_pudp, cur_pudp, 8
 	cmp	cur_pudp, end_pudp
 	b.ne	do_pud
@@ -314,8 +317,9 @@ walk_pmds:
 	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
 do_pmd:	__idmap_kpti_get_pgtable_ent	pmd
 	tbnz	pmd, #1, walk_ptes
-	__idmap_kpti_put_pgtable_ent_ng	pmd
 next_pmd:
+	__idmap_kpti_put_pgtable_ent_ng	pmd
+skip_pmd:
 	add	cur_pmdp, cur_pmdp, #8
 	cmp	cur_pmdp, end_pmdp
 	b.ne	do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
 	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
 do_pte:	__idmap_kpti_get_pgtable_ent	pte
 	__idmap_kpti_put_pgtable_ent_ng	pte
-next_pte:
+skip_pte:
 	add	cur_ptep, cur_ptep, #8
 	cmp	cur_ptep, end_ptep
 	b.ne	do_pte

+ 1 - 18
arch/blackfin/include/uapi/asm/poll.h

@@ -9,25 +9,8 @@
 #ifndef _UAPI__BFIN_POLL_H
 #define _UAPI__BFIN_POLL_H
 
-#ifndef __KERNEL__
 #define POLLWRNORM	POLLOUT
-#define POLLWRBAND	(__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
-	__u16 v = (__force __u16)val;
-	/* bit 9 -> bit 8, bit 8 -> bit 2 */
-	return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
-        /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
-	return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
-				((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND	256
 
 #include <asm-generic/poll.h>
 

+ 1 - 1
arch/cris/arch-v10/drivers/gpio.c

@@ -173,7 +173,7 @@ static __poll_t gpio_poll(struct file *file, poll_table *wait)
 
 	if ((data & priv->highalarm) ||
 	    (~data & priv->lowalarm)) {
-		mask = POLLIN|POLLRDNORM;
+		mask = EPOLLIN|EPOLLRDNORM;
 	}
 
 out:

+ 4 - 4
arch/cris/arch-v10/drivers/sync_serial.c

@@ -666,16 +666,16 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
 	poll_wait(file, &port->in_wait_q, wait);
 	/* Some room to write */
 	if (port->out_count < OUT_BUFFER_SIZE)
-		mask |=  POLLOUT | POLLWRNORM;
+		mask |=  EPOLLOUT | EPOLLWRNORM;
 	/* At least an inbufchunk of data */
 	if (sync_data_avail(port) >= port->inbufchunk)
-		mask |= POLLIN | POLLRDNORM;
+		mask |= EPOLLIN | EPOLLRDNORM;
 
 	DEBUGPOLL(if (mask != prev_mask)
 		printk(KERN_DEBUG "sync_serial_poll: mask 0x%08X %s %s\n",
 			mask,
-			mask & POLLOUT ? "POLLOUT" : "",
-			mask & POLLIN ? "POLLIN" : "");
+			mask & EPOLLOUT ? "POLLOUT" : "",
+			mask & EPOLLIN ? "POLLIN" : "");
 		prev_mask = mask;
 	);
 	return mask;

+ 5 - 5
arch/cris/arch-v32/drivers/sync_serial.c

@@ -574,24 +574,24 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
 
 	/* No active transfer, descriptors are available */
 	if (port->output && !port->tr_running)
-		mask |= POLLOUT | POLLWRNORM;
+		mask |= EPOLLOUT | EPOLLWRNORM;
 
 	/* Descriptor and buffer space available. */
 	if (port->output &&
 	    port->active_tr_descr != port->catch_tr_descr &&
 	    port->out_buf_count < OUT_BUFFER_SIZE)
-		mask |=  POLLOUT | POLLWRNORM;
+		mask |=  EPOLLOUT | EPOLLWRNORM;
 
 	/* At least an inbufchunk of data */
 	if (port->input && sync_data_avail(port) >= port->inbufchunk)
-		mask |= POLLIN | POLLRDNORM;
+		mask |= EPOLLIN | EPOLLRDNORM;
 
 	DEBUGPOLL(
 	if (mask != prev_mask)
 		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
 			mask,
-			mask & POLLOUT ? "POLLOUT" : "",
-			mask & POLLIN ? "POLLIN" : "");
+			mask & EPOLLOUT ? "POLLOUT" : "",
+			mask & EPOLLIN ? "POLLIN" : "");
 		prev_mask = mask;
 	);
 	return mask;

+ 1 - 18
arch/frv/include/uapi/asm/poll.h

@@ -2,25 +2,8 @@
 #ifndef _ASM_POLL_H
 #define _ASM_POLL_H
 
-#ifndef __KERNEL__
 #define POLLWRNORM	POLLOUT
-#define POLLWRBAND	(__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
-	__u16 v = (__force __u16)val;
-	/* bit 9 -> bit 8, bit 8 -> bit 2 */
-	return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
-        /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
-	return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
-				((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND	256
 
 #include <asm-generic/poll.h>
 #undef POLLREMOVE

+ 0 - 1
arch/ia64/kernel/Makefile

@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
 obj-y				+= esi_stub.o	# must be in kernel proper
 endif
 obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
-obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
 

+ 1 - 1
arch/ia64/kernel/perfmon.c

@@ -1670,7 +1670,7 @@ pfm_poll(struct file *filp, poll_table * wait)
 	PROTECT_CTX(ctx, flags);
 
 	if (PFM_CTXQ_EMPTY(ctx) == 0)
-		mask =  POLLIN | POLLRDNORM;
+		mask =  EPOLLIN | EPOLLRDNORM;
 
 	UNPROTECT_CTX(ctx, flags);
 

+ 1 - 18
arch/m68k/include/uapi/asm/poll.h

@@ -2,25 +2,8 @@
 #ifndef __m68k_POLL_H
 #define __m68k_POLL_H
 
-#ifndef __KERNEL__
 #define POLLWRNORM	POLLOUT
-#define POLLWRBAND	(__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
-	__u16 v = (__force __u16)val;
-	/* bit 9 -> bit 8, bit 8 -> bit 2 */
-	return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
-        /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
-	return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
-				((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND	256
 
 #include <asm-generic/poll.h>
 

+ 0 - 1
arch/mips/Kconfig

@@ -2333,7 +2333,6 @@ config MIPS_VPE_LOADER_TOM
 config MIPS_VPE_APSP_API
 	bool "Enable support for AP/SP API (RTLX)"
 	depends on MIPS_VPE_LOADER
-	help
 
 config MIPS_VPE_APSP_API_CMP
 	bool

+ 0 - 1
arch/mips/bcm63xx/boards/Kconfig

@@ -7,6 +7,5 @@ choice
 config BOARD_BCM963XX
        bool "Generic Broadcom 963xx boards"
 	select SSB
-       help
 
 endchoice

+ 1 - 18
arch/mips/include/uapi/asm/poll.h

@@ -2,25 +2,8 @@
 #ifndef __ASM_POLL_H
 #define __ASM_POLL_H
 
-#ifndef __KERNEL__
 #define POLLWRNORM	POLLOUT
-#define POLLWRBAND	(__force __poll_t)0x0100
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
-	__u16 v = (__force __u16)val;
-	/* bit 9 -> bit 8, bit 8 -> bit 2 */
-	return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
-        /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
-	return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
-				((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND	0x0100
 
 #include <asm-generic/poll.h>
 

+ 13 - 0
arch/mips/kernel/mips-cpc.c

@@ -10,6 +10,8 @@
 
 #include <linux/errno.h>
 #include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 
 #include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 phys_addr_t __weak mips_cpc_default_phys_base(void)
 {
+	struct device_node *cpc_node;
+	struct resource res;
+	int err;
+
+	cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+	if (cpc_node) {
+		err = of_address_to_resource(cpc_node, 0, &res);
+		if (!err)
+			return res.start;
+	}
+
 	return 0;
 }
 

+ 2 - 2
arch/mips/kernel/rtlx.c

@@ -349,11 +349,11 @@ static __poll_t file_poll(struct file *file, poll_table *wait)
 
 	/* data available to read? */
 	if (rtlx_read_poll(minor, 0))
-		mask |= POLLIN | POLLRDNORM;
+		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* space to write */
 	if (rtlx_write_poll(minor))
-		mask |= POLLOUT | POLLWRNORM;
+		mask |= EPOLLOUT | EPOLLWRNORM;
 
 	return mask;
 }

+ 12 - 4
arch/mips/kernel/setup.c

@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
 	unsigned long reserved_end;
 	unsigned long mapstart = ~0UL;
 	unsigned long bootmap_size;
+	phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
 	bool bootmap_valid = false;
 	int i;
 
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
 	max_low_pfn = 0;
 
 	/*
-	 * Find the highest page frame number we have available.
+	 * Find the highest page frame number we have available
+	 * and the lowest used RAM address
 	 */
 	for (i = 0; i < boot_mem_map.nr_map; i++) {
 		unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
 		end = PFN_DOWN(boot_mem_map.map[i].addr
 				+ boot_mem_map.map[i].size);
 
+		ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
 #ifndef CONFIG_HIGHMEM
 		/*
 		 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
 		mapstart = max(reserved_end, start);
 	}
 
+	/*
+	 * Reserve any memory between the start of RAM and PHYS_OFFSET
+	 */
+	if (ramstart > PHYS_OFFSET)
+		add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+				  BOOT_MEM_RESERVED);
+
 	if (min_low_pfn >= max_low_pfn)
 		panic("Incorrect memory mapping !!!");
 	if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
 
 	add_memory_region(start, size, BOOT_MEM_RAM);
 
-	if (start && start > PHYS_OFFSET)
-		add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
-				BOOT_MEM_RESERVED);
 	return 0;
 }
 early_param("mem", early_parse_mem);

+ 1 - 1
arch/mips/kernel/smp-bmips.c

@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
 	 */
 }
 
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
 {
 	void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
 	u32 __maybe_unused cfg;

+ 1 - 0
arch/mips/kvm/Kconfig

@@ -22,6 +22,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	select HAVE_KVM_VCPU_ASYNC_IOCTL
 	select KVM_MMIO
 	select MMU_NOTIFIER
 	select SRCU

+ 47 - 20
arch/mips/kvm/mips.c

@@ -446,6 +446,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	int r = -EINTR;
 
+	vcpu_load(vcpu);
+
 	kvm_sigset_activate(vcpu);
 
 	if (vcpu->mmio_needed) {
@@ -480,6 +482,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 out:
 	kvm_sigset_deactivate(vcpu);
 
+	vcpu_put(vcpu);
 	return r;
 }
 
@@ -900,6 +903,26 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	return r;
 }
 
+long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl,
+			       unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	if (ioctl == KVM_INTERRUPT) {
+		struct kvm_mips_interrupt irq;
+
+		if (copy_from_user(&irq, argp, sizeof(irq)))
+			return -EFAULT;
+		kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
+			  irq.irq);
+
+		return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+	}
+
+	return -ENOIOCTLCMD;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			 unsigned long arg)
 {
@@ -907,56 +930,54 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 	void __user *argp = (void __user *)arg;
 	long r;
 
+	vcpu_load(vcpu);
+
 	switch (ioctl) {
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
 		struct kvm_one_reg reg;
 
+		r = -EFAULT;
 		if (copy_from_user(&reg, argp, sizeof(reg)))
-			return -EFAULT;
+			break;
 		if (ioctl == KVM_SET_ONE_REG)
-			return kvm_mips_set_reg(vcpu, &reg);
+			r = kvm_mips_set_reg(vcpu, &reg);
 		else
-			return kvm_mips_get_reg(vcpu, &reg);
+			r = kvm_mips_get_reg(vcpu, &reg);
+		break;
 	}
 	case KVM_GET_REG_LIST: {
 		struct kvm_reg_list __user *user_list = argp;
 		struct kvm_reg_list reg_list;
 		unsigned n;
 
+		r = -EFAULT;
 		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
-			return -EFAULT;
+			break;
 		n = reg_list.n;
 		reg_list.n = kvm_mips_num_regs(vcpu);
 		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
-			return -EFAULT;
+			break;
+		r = -E2BIG;
 		if (n < reg_list.n)
-			return -E2BIG;
-		return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
-	}
-	case KVM_INTERRUPT:
-		{
-			struct kvm_mips_interrupt irq;
-
-			if (copy_from_user(&irq, argp, sizeof(irq)))
-				return -EFAULT;
-			kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
-				  irq.irq);
-
-			r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
 			break;
-		}
+		r = kvm_mips_copy_reg_indices(vcpu, user_list->reg);
+		break;
+	}
 	case KVM_ENABLE_CAP: {
 		struct kvm_enable_cap cap;
 
+		r = -EFAULT;
 		if (copy_from_user(&cap, argp, sizeof(cap)))
-			return -EFAULT;
+			break;
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
 	default:
 		r = -ENOIOCTLCMD;
 	}
+
+	vcpu_put(vcpu);
 	return r;
 }
 
@@ -1145,6 +1166,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
+	vcpu_load(vcpu);
+
 	for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
 		vcpu->arch.gprs[i] = regs->gpr[i];
 	vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
@@ -1152,6 +1175,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.lo = regs->lo;
 	vcpu->arch.pc = regs->pc;
 
+	vcpu_put(vcpu);
 	return 0;
 }
 
@@ -1159,6 +1183,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
+	vcpu_load(vcpu);
+
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
 		regs->gpr[i] = vcpu->arch.gprs[i];
 
@@ -1166,6 +1192,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->lo = vcpu->arch.lo;
 	regs->pc = vcpu->arch.pc;
 
+	vcpu_put(vcpu);
 	return 0;
 }
 

+ 0 - 1
arch/nios2/Kconfig

@@ -152,7 +152,6 @@ menu "Advanced setup"
 
 config ADVANCED_OPTIONS
 	bool "Prompt for advanced kernel configuration options"
-	help
 
 comment "Default settings for advanced configuration options are used"
 	depends on !ADVANCED_OPTIONS

+ 8 - 8
arch/nios2/boot/dts/3c120_devboard.dts

@@ -29,7 +29,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu: cpu@0x0 {
+		cpu: cpu@0 {
 			device_type = "cpu";
 			compatible = "altr,nios2-1.0";
 			reg = <0x00000000>;
@@ -69,7 +69,7 @@
 		compatible = "altr,avalon", "simple-bus";
 		bus-frequency = <125000000>;
 
-		pb_cpu_to_io: bridge@0x8000000 {
+		pb_cpu_to_io: bridge@8000000 {
 			compatible = "simple-bus";
 			reg = <0x08000000 0x00800000>;
 			#address-cells = <1>;
@@ -83,7 +83,7 @@
 				<0x00008000 0x08008000 0x00000020>,
 				<0x00400000 0x08400000 0x00000020>;
 
-			timer_1ms: timer@0x400000 {
+			timer_1ms: timer@400000 {
 				compatible = "altr,timer-1.0";
 				reg = <0x00400000 0x00000020>;
 				interrupt-parent = <&cpu>;
@@ -91,7 +91,7 @@
 				clock-frequency = <125000000>;
 			};
 
-			timer_0: timer@0x8000 {
+			timer_0: timer@8000 {
 				compatible = "altr,timer-1.0";
 				reg = < 0x00008000 0x00000020 >;
 				interrupt-parent = < &cpu >;
@@ -99,14 +99,14 @@
 				clock-frequency = < 125000000 >;
 			};
 
-			jtag_uart: serial@0x4d50 {
+			jtag_uart: serial@4d50 {
 				compatible = "altr,juart-1.0";
 				reg = <0x00004d50 0x00000008>;
 				interrupt-parent = <&cpu>;
 				interrupts = <1>;
 			};
 
-			tse_mac: ethernet@0x4000 {
+			tse_mac: ethernet@4000 {
 				compatible = "altr,tse-1.0";
 				reg = <0x00004000 0x00000400>,
 					<0x00004400 0x00000040>,
@@ -133,7 +133,7 @@
 				};
 			};
 
-			uart: serial@0x4c80 {
+			uart: serial@4c80 {
 				compatible = "altr,uart-1.0";
 				reg = <0x00004c80 0x00000020>;
 				interrupt-parent = <&cpu>;
@@ -143,7 +143,7 @@
 			};
 		};
 
-		cfi_flash_64m: flash@0x0 {
+		cfi_flash_64m: flash@0 {
 			compatible = "cfi-flash";
 			reg = <0x00000000 0x04000000>;
 			bank-width = <2>;

+ 0 - 1
arch/nios2/configs/10m50_defconfig

@@ -33,7 +33,6 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"

+ 0 - 1
arch/nios2/configs/3c120_defconfig

@@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"

+ 1 - 0
arch/powerpc/include/asm/book3s/32/pgtable.h

@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)

+ 2 - 1
arch/powerpc/include/asm/book3s/64/hash-4k.h

@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-			unsigned int subpg_index, unsigned long hidx)
+					 unsigned int subpg_index, unsigned long hidx,
+					 int offset)
 {
 	return (hidx << H_PAGE_F_GIX_SHIFT) &
 		(H_PAGE_F_SECOND | H_PAGE_F_GIX);

+ 11 - 5
arch/powerpc/include/asm/book3s/64/hash-64k.h

@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
 	real_pte_t rpte;
 	unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 	 */
 	smp_rmb();
 
-	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+	hidxp = (unsigned long *)(ptep + offset);
 	rpte.hidx = *hidxp;
 	return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-		unsigned int subpg_index, unsigned long hidx)
+					 unsigned int subpg_index,
+					 unsigned long hidx, int offset)
 {
-	unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+	unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
 	rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
 	*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
 				 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
 #define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE	((sizeof(pud_t) << PUD_INDEX_SIZE) +	\
+				 (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE

+ 12 - 1
arch/powerpc/include/asm/book3s/64/hash.h

@@ -23,7 +23,8 @@
 				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+	defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
@@ -32,6 +33,16 @@
 #else
 #define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE
 #endif
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE)
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */

+ 11 - 5
arch/powerpc/include/asm/book3s/64/pgalloc.h

@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+	pgd_t *pgd;
+
 	if (radix_enabled())
 		return radix__pgd_alloc(mm);
-	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-		pgtable_gfp_flags(mm, GFP_KERNEL));
+
+	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+			       pgtable_gfp_flags(mm, GFP_KERNEL));
+	memset(pgd, 0, PGD_TABLE_SIZE);
+
+	return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+	return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
 		pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+	kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 	 * ahead and flush the page walk cache
 	 */
 	flush_tlb_pgtable(tlb, address);
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+	pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)

+ 3 - 1
arch/powerpc/include/asm/book3s/64/pgtable.h

@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
 extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
 #define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
  * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)		((real_pte_t){(e)})
+#define __real_pte(e, p, o)		((real_pte_t){(e)})
 #define __rpte_to_pte(r)	((r).pte)
 #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 

+ 1 - 1
arch/powerpc/include/asm/exception-64s.h

@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 					  EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)		\
-	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
 	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*

+ 11 - 1
arch/powerpc/include/asm/hw_irq.h

@@ -29,6 +29,16 @@
 #define PACA_IRQ_HMI		0x20
 #define PACA_IRQ_PMI		0x40
 
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE)
+#endif
+
 /*
  * flags for paca->irq_soft_mask
  */
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
 static inline void may_hard_irq_enable(void)
 {
 	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-	if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+	if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
 		__hard_irq_enable();
 }
 

+ 6 - 0
arch/powerpc/include/asm/kexec.h

@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
 	return false;
 }
 
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
 #endif /* CONFIG_KEXEC_CORE */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */

+ 2 - 4
arch/powerpc/include/asm/kvm_book3s.h

@@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
 extern int kvmppc_hcall_impl_pr(unsigned long cmd);
 extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
-				 struct kvm_vcpu *vcpu);
-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
-				   struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
 extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)

+ 9 - 5
arch/powerpc/include/asm/kvm_book3s_64.h

@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
 	lphi = (l >> 16) & 0xf;
 	switch ((l >> 12) & 0xf) {
 	case 0:
-		return !lphi ? 24 : -1;		/* 16MB */
+		return !lphi ? 24 : 0;		/* 16MB */
 		break;
 	case 1:
 		return 16;			/* 64kB */
 		break;
 	case 3:
-		return !lphi ? 34 : -1;		/* 16GB */
+		return !lphi ? 34 : 0;		/* 16GB */
 		break;
 	case 7:
 		return (16 << 8) + 12;		/* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
 			return (24 << 8) + 12;	/* 16MB in 4kB */
 		break;
 	}
-	return -1;
+	return 0;
 }
 
 static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
 
 static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
 {
-	return 1ul << kvmppc_hpte_actual_page_shift(v, r);
+	int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+	if (shift)
+		return 1ul << shift;
+	return 0;
 }
 
 static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 		va_low ^= v >> (SID_SHIFT_1T - 16);
 	va_low &= 0x7ff;
 
-	if (b_pgshift == 12) {
+	if (b_pgshift <= 12) {
 		if (a_pgshift > 12) {
 			sllp = (a_pgshift == 16) ? 5 : 4;
 			rb |= sllp << 5;	/*  AP field */

+ 7 - 1
arch/powerpc/include/asm/kvm_host.h

@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
 	u8 mmio_vsx_offset;
 	u8 mmio_vsx_copy_type;
 	u8 mmio_vsx_tx_sx_enabled;
+	u8 mmio_vmx_copy_nums;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
@@ -709,6 +710,7 @@ struct kvm_vcpu_arch {
 	u8 ceded;
 	u8 prodded;
 	u8 doorbell_request;
+	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
 	struct swait_queue_head *wqp;
@@ -738,8 +740,11 @@ struct kvm_vcpu_arch {
 	struct kvmppc_icp *icp; /* XICS presentation controller */
 	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
 	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
-	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+	u64 xive_esc_raddr;	 /* Escalation interrupt ESB real addr */
+	u64 xive_esc_vaddr;	 /* Escalation interrupt ESB virt addr */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -800,6 +805,7 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_QPR	0x0040
 #define KVM_MMIO_REG_FQPR	0x0060
 #define KVM_MMIO_REG_VSX	0x0080
+#define KVM_MMIO_REG_VMX	0x00c0
 
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE

+ 4 - 0
arch/powerpc/include/asm/kvm_ppc.h

@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned int rt, unsigned int bytes,
 			int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
+		struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
+extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
+		struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			       u64 val, unsigned int bytes,
 			       int is_default_endian);

+ 1 - 0
arch/powerpc/include/asm/nohash/32/pgtable.h

@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
 #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)

+ 1 - 0
arch/powerpc/include/asm/nohash/64/pgtable.h

@@ -27,6 +27,7 @@
 #else
 #define PMD_CACHE_INDEX	PMD_INDEX_SIZE
 #endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
 
 /*
  * Define the address range of the kernel non-linear virtual area

+ 1 - 0
arch/powerpc/include/asm/opal-api.h

@@ -1076,6 +1076,7 @@ enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
 	OPAL_XIVE_VP_ENABLED		= 0x00000001,
+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */

+ 6 - 0
arch/powerpc/include/asm/ppc-opcode.h

@@ -156,6 +156,12 @@
 #define OP_31_XOP_LFDX          599
 #define OP_31_XOP_LFDUX		631
 
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX           103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX          231
+
 #define OP_LWZ  32
 #define OP_STFS 52
 #define OP_STFSU 53

+ 13 - 0
arch/powerpc/include/asm/topology.h

@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+	numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
 	int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 {
 	return 0;
 }
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
 #else
 static inline int start_topology_update(void)
 {
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
 {
 	return 0;
 }
+static inline int find_and_online_cpu_nid(int cpu)
+{
+	return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)

+ 2 - 1
arch/powerpc/include/asm/xive.h

@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 

+ 2 - 0
arch/powerpc/include/uapi/asm/kvm.h

@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */

+ 4 - 0
arch/powerpc/kernel/asm-offsets.c

@@ -520,6 +520,7 @@ int main(void)
 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
 	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -739,6 +740,9 @@ int main(void)
 	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
 					    arch.xive_cam_word));
 	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+	DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+	DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+	DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING

+ 2 - 0
arch/powerpc/kernel/exceptions-64e.S

@@ -943,6 +943,8 @@ kernel_dbg_exc:
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask

+ 3 - 3
arch/powerpc/kernel/exceptions-64s.S

@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
  *   triggered and won't automatically refire.
  * - If it was a HMI we return immediately since we handled it in realmode
  *   and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
 #define MASKED_INTERRUPT(_H)				\
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt:					\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
 	b	MASKED_DEC_HANDLER_LABEL;		\
-1:	andi.	r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);	\
-	bne	2f;					\
+1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK;	\
+	beq	2f;					\
 	mfspr	r10,SPRN_##_H##SRR1;			\
 	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\
 	mtspr	SPRN_##_H##SRR1,r10;			\

+ 3 - 2
arch/powerpc/kernel/pci-common.c

@@ -362,7 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
  */
 static int pci_read_irq_line(struct pci_dev *pci_dev)
 {
-	unsigned int virq = 0;
+	int virq;
 
 	pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
 
@@ -370,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 	memset(&oirq, 0xff, sizeof(oirq));
 #endif
 	/* Try to get a mapping from the device-tree */
-	if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) {
+	virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+	if (virq <= 0) {
 		u8 line, pin;
 
 		/* If that fails, lets fallback to what is in the config

+ 1 - 1
arch/powerpc/kernel/rtasd.c

@@ -392,7 +392,7 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
 {
 	poll_wait(file, &rtas_log_wait, wait);
 	if (rtas_log_size)
-		return POLLIN | POLLRDNORM;
+		return EPOLLIN | EPOLLRDNORM;
 	return 0;
 }
 

+ 4 - 2
arch/powerpc/kernel/sysfs.c

@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
 	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
 		device_create_file(s, &dev_attr_pir);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
 		device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
 	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
 		device_remove_file(s, &dev_attr_pir);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
 		device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 

+ 2 - 1
arch/powerpc/kvm/Kconfig

@@ -22,6 +22,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_VCPU_ASYNC_IOCTL
 	select SRCU
 	select KVM_VFIO
 	select IRQ_BYPASS_MANAGER
@@ -68,7 +69,7 @@ config KVM_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
 	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
-	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
+	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.

Some files were not shown because too many files changed in this diff