7 жил өмнө · 15303ba5d1
--- a/Documentation/virtual/kvm/00-INDEX
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -26,3 +26,6 @@ s390-diag.txt
 
															 	- Diagnose hypercall description (for IBM S/390)
														
 
															 timekeeping.txt
														
 
															 	- timekeeping virtualization for x86-based architectures.
														
 
															+amd-memory-encryption.txt
														
 
															+	- notes on AMD Secure Encrypted Virtualization feature and SEV firmware
														
 
															+	  command description
														
--- a/Documentation/virtual/kvm/amd-memory-encryption.rst
+++ b/Documentation/virtual/kvm/amd-memory-encryption.rst
@@ -0,0 +1,247 @@
 
															+======================================
														
 
															+Secure Encrypted Virtualization (SEV)
														
 
															+======================================
														
 
															+
														
 
															+Overview
														
 
															+========
														
 
															+
														
 
															+Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
														
 
															+
														
 
															+SEV is an extension to the AMD-V architecture which supports running
														
 
															+virtual machines (VMs) under the control of a hypervisor. When enabled,
														
 
															+the memory contents of a VM will be transparently encrypted with a key
														
 
															+unique to that VM.
														
 
															+
														
 
															+The hypervisor can determine the SEV support through the CPUID
														
 
															+instruction. The CPUID function 0x8000001f reports information related
														
 
															+to SEV::
														
 
															+
														
 
															+	0x8000001f[eax]:
														
 
															+			Bit[1] 	indicates support for SEV
														
 
															+	    ...
														
 
															+		  [ecx]:
														
 
															+			Bits[31:0]  Number of encrypted guests supported simultaneously
														
 
															+
														
 
															+If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
														
 
															+(MSR_K7_HWCR) can be used to determine if it can be enabled::
														
 
															+
														
 
															+	0xc001_0010:
														
 
															+		Bit[23]	   1 = memory encryption can be enabled
														
 
															+			   0 = memory encryption can not be enabled
														
 
															+
														
 
															+	0xc001_0015:
														
 
															+		Bit[0]	   1 = memory encryption can be enabled
														
 
															+			   0 = memory encryption can not be enabled
														
 
															+
														
 
															+When SEV support is available, it can be enabled in a specific VM by
														
 
															+setting the SEV bit before executing VMRUN.::
														
 
															+
														
 
															+	VMCB[0x90]:
														
 
															+		Bit[1]	    1 = SEV is enabled
														
 
															+			    0 = SEV is disabled
														
 
															+
														
 
															+SEV hardware uses ASIDs to associate a memory encryption key with a VM.
														
 
															+Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
														
 
															+defined in the CPUID 0x8000001f[ecx] field.
														
 
															+
														
 
															+SEV Key Management
														
 
															+==================
														
 
															+
														
 
															+The SEV guest key management is handled by a separate processor called the AMD
														
 
															+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
														
 
															+key management interface to perform common hypervisor activities such as
														
 
															+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
														
 
															+information, see the SEV Key Management spec [api-spec]_
														
 
															+
														
 
															+KVM implements the following commands to support common lifecycle events of SEV
														
 
															+guests, such as launching, running, snapshotting, migrating and decommissioning.
														
 
															+
														
 
															+1. KVM_SEV_INIT
														
 
															+---------------
														
 
															+
														
 
															+The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
														
 
															+context. In a typical workflow, this command should be the first command issued.
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+2. KVM_SEV_LAUNCH_START
														
 
															+-----------------------
														
 
															+
														
 
															+The KVM_SEV_LAUNCH_START command is used for creating the memory encryption
														
 
															+context. To create the encryption context, user must provide a guest policy,
														
 
															+the owner's public Diffie-Hellman (PDH) key and session information.
														
 
															+
														
 
															+Parameters: struct  kvm_sev_launch_start (in/out)
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_launch_start {
														
 
															+                __u32 handle;           /* if zero then firmware creates a new handle */
														
 
															+                __u32 policy;           /* guest's policy */
														
 
															+
														
 
															+                __u64 dh_uaddr;         /* userspace address pointing to the guest owner's PDH key */
														
 
															+                __u32 dh_len;
														
 
															+
														
 
															+                __u64 session_addr;     /* userspace address which points to the guest session information */
														
 
															+                __u32 session_len;
														
 
															+        };
														
 
															+
														
 
															+On success, the 'handle' field contains a new handle and on error, a negative value.
														
 
															+
														
 
															+For more details, see SEV spec Section 6.2.
														
 
															+
														
 
															+3. KVM_SEV_LAUNCH_UPDATE_DATA
														
 
															+-----------------------------
														
 
															+
														
 
															+The KVM_SEV_LAUNCH_UPDATE_DATA is used for encrypting a memory region. It also
														
 
															+calculates a measurement of the memory contents. The measurement is a signature
														
 
															+of the memory contents that can be sent to the guest owner as an attestation
														
 
															+that the memory was encrypted correctly by the firmware.
														
 
															+
														
 
															+Parameters (in): struct  kvm_sev_launch_update_data
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_launch_update {
														
 
															+                __u64 uaddr;    /* userspace address to be encrypted (must be 16-byte aligned) */
														
 
															+                __u32 len;      /* length of the data to be encrypted (must be 16-byte aligned) */
														
 
															+        };
														
 
															+
														
 
															+For more details, see SEV spec Section 6.3.
														
 
															+
														
 
															+4. KVM_SEV_LAUNCH_MEASURE
														
 
															+-------------------------
														
 
															+
														
 
															+The KVM_SEV_LAUNCH_MEASURE command is used to retrieve the measurement of the
														
 
															+data encrypted by the KVM_SEV_LAUNCH_UPDATE_DATA command. The guest owner may
														
 
															+wait to provide the guest with confidential information until it can verify the
														
 
															+measurement. Since the guest owner knows the initial contents of the guest at
														
 
															+boot, the measurement can be verified by comparing it to what the guest owner
														
 
															+expects.
														
 
															+
														
 
															+Parameters (in): struct  kvm_sev_launch_measure
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_launch_measure {
														
 
															+                __u64 uaddr;    /* where to copy the measurement */
														
 
															+                __u32 len;      /* length of measurement blob */
														
 
															+        };
														
 
															+
														
 
															+For more details on the measurement verification flow, see SEV spec Section 6.4.
														
 
															+
														
 
															+5. KVM_SEV_LAUNCH_FINISH
														
 
															+------------------------
														
 
															+
														
 
															+After completion of the launch flow, the KVM_SEV_LAUNCH_FINISH command can be
														
 
															+issued to make the guest ready for the execution.
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+6. KVM_SEV_GUEST_STATUS
														
 
															+-----------------------
														
 
															+
														
 
															+The KVM_SEV_GUEST_STATUS command is used to retrieve status information about a
														
 
															+SEV-enabled guest.
														
 
															+
														
 
															+Parameters (out): struct kvm_sev_guest_status
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_guest_status {
														
 
															+                __u32 handle;   /* guest handle */
														
 
															+                __u32 policy;   /* guest policy */
														
 
															+                __u8 state;     /* guest state (see enum below) */
														
 
															+        };
														
 
															+
														
 
															+SEV guest state:
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        enum {
														
 
															+        SEV_STATE_INVALID = 0;
														
 
															+        SEV_STATE_LAUNCHING,    /* guest is currently being launched */
														
 
															+        SEV_STATE_SECRET,       /* guest is being launched and ready to accept the ciphertext data */
														
 
															+        SEV_STATE_RUNNING,      /* guest is fully launched and running */
														
 
															+        SEV_STATE_RECEIVING,    /* guest is being migrated in from another SEV machine */
														
 
															+        SEV_STATE_SENDING       /* guest is getting migrated out to another SEV machine */
														
 
															+        };
														
 
															+
														
 
															+7. KVM_SEV_DBG_DECRYPT
														
 
															+----------------------
														
 
															+
														
 
															+The KVM_SEV_DEBUG_DECRYPT command can be used by the hypervisor to request the
														
 
															+firmware to decrypt the data at the given memory region.
														
 
															+
														
 
															+Parameters (in): struct kvm_sev_dbg
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_dbg {
														
 
															+                __u64 src_uaddr;        /* userspace address of data to decrypt */
														
 
															+                __u64 dst_uaddr;        /* userspace address of destination */
														
 
															+                __u32 len;              /* length of memory region to decrypt */
														
 
															+        };
														
 
															+
														
 
															+The command returns an error if the guest policy does not allow debugging.
														
 
															+
														
 
															+8. KVM_SEV_DBG_ENCRYPT
														
 
															+----------------------
														
 
															+
														
 
															+The KVM_SEV_DEBUG_ENCRYPT command can be used by the hypervisor to request the
														
 
															+firmware to encrypt the data at the given memory region.
														
 
															+
														
 
															+Parameters (in): struct kvm_sev_dbg
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_dbg {
														
 
															+                __u64 src_uaddr;        /* userspace address of data to encrypt */
														
 
															+                __u64 dst_uaddr;        /* userspace address of destination */
														
 
															+                __u32 len;              /* length of memory region to encrypt */
														
 
															+        };
														
 
															+
														
 
															+The command returns an error if the guest policy does not allow debugging.
														
 
															+
														
 
															+9. KVM_SEV_LAUNCH_SECRET
														
 
															+------------------------
														
 
															+
														
 
															+The KVM_SEV_LAUNCH_SECRET command can be used by the hypervisor to inject secret
														
 
															+data after the measurement has been validated by the guest owner.
														
 
															+
														
 
															+Parameters (in): struct kvm_sev_launch_secret
														
 
															+
														
 
															+Returns: 0 on success, -negative on error
														
 
															+
														
 
															+::
														
 
															+
														
 
															+        struct kvm_sev_launch_secret {
														
 
															+                __u64 hdr_uaddr;        /* userspace address containing the packet header */
														
 
															+                __u32 hdr_len;
														
 
															+
														
 
															+                __u64 guest_uaddr;      /* the guest memory region where the secret should be injected */
														
 
															+                __u32 guest_len;
														
 
															+
														
 
															+                __u64 trans_uaddr;      /* the hypervisor memory region which contains the secret */
														
 
															+                __u32 trans_len;
														
 
															+        };
														
 
															+
														
 
															+References
														
 
															+==========
														
 
															+
														
 
															+.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
														
 
															+.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Specification.pdf
														
 
															+.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
														
 
															+.. [kvm-forum]  http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
														
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1841,6 +1841,7 @@ registers, find a list below:
 
															   PPC	| KVM_REG_PPC_DBSR              | 32
														
 
															   PPC   | KVM_REG_PPC_TIDR              | 64
														
 
															   PPC   | KVM_REG_PPC_PSSCR             | 64
														
 
															+  PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
														
 
															   PPC   | KVM_REG_PPC_TM_GPR0           | 64
														
 
															           ...
														
 
															   PPC   | KVM_REG_PPC_TM_GPR31          | 64
														
@@ -3403,7 +3404,7 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
 
															 or if no page table is present for the addresses (e.g. when using
														
 
															 hugepages).
														
 
															-4.108 KVM_PPC_GET_CPU_CHAR
														
 
															+4.109 KVM_PPC_GET_CPU_CHAR
														
 
															 Capability: KVM_CAP_PPC_GET_CPU_CHAR
														
 
															 Architectures: powerpc
														
@@ -3449,6 +3450,57 @@ array bounds check and the array access.
 
															 These fields use the same bit definitions as the new
														
 
															 H_GET_CPU_CHARACTERISTICS hypercall.
														
 
															+4.110 KVM_MEMORY_ENCRYPT_OP
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: system
														
 
															+Parameters: an opaque platform specific structure (in/out)
														
 
															+Returns: 0 on success; -1 on error
														
 
															+
														
 
															+If the platform supports creating encrypted VMs then this ioctl can be used
														
 
															+for issuing platform-specific memory encryption commands to manage those
														
 
															+encrypted VMs.
														
 
															+
														
 
															+Currently, this ioctl is used for issuing Secure Encrypted Virtualization
														
 
															+(SEV) commands on AMD Processors. The SEV commands are defined in
														
 
															+Documentation/virtual/kvm/amd-memory-encryption.txt.
														
 
															+
														
 
															+4.111 KVM_MEMORY_ENCRYPT_REG_REGION
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: system
														
 
															+Parameters: struct kvm_enc_region (in)
														
 
															+Returns: 0 on success; -1 on error
														
 
															+
														
 
															+This ioctl can be used to register a guest memory region which may
														
 
															+contain encrypted data (e.g. guest RAM, SMRAM etc).
														
 
															+
														
 
															+It is used in the SEV-enabled guest. When encryption is enabled, a guest
														
 
															+memory region may contain encrypted data. The SEV memory encryption
														
 
															+engine uses a tweak such that two identical plaintext pages, each at
														
 
															+different locations will have differing ciphertexts. So swapping or
														
 
															+moving ciphertext of those pages will not result in plaintext being
														
 
															+swapped. So relocating (or migrating) physical backing pages for the SEV
														
 
															+guest will require some additional steps.
														
 
															+
														
 
															+Note: The current SEV key management spec does not provide commands to
														
 
															+swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
														
 
															+memory region registered with the ioctl.
														
 
															+
														
 
															+4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
														
 
															+
														
 
															+Capability: basic
														
 
															+Architectures: x86
														
 
															+Type: system
														
 
															+Parameters: struct kvm_enc_region (in)
														
 
															+Returns: 0 on success; -1 on error
														
 
															+
														
 
															+This ioctl can be used to unregister the guest memory region registered
														
 
															+with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
														
 
															+
														
 
															+
														
 
															 5. The kvm_run structure
														
 
															 ------------------------
														
--- a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
+++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
@@ -1,187 +0,0 @@
 
															-KVM/ARM VGIC Forwarded Physical Interrupts
														
 
															-==========================================
														
 
															-
														
 
															-The KVM/ARM code implements software support for the ARM Generic
														
 
															-Interrupt Controller's (GIC's) hardware support for virtualization by
														
 
															-allowing software to inject virtual interrupts to a VM, which the guest
														
 
															-OS sees as regular interrupts.  The code is famously known as the VGIC.
														
 
															-
														
 
															-Some of these virtual interrupts, however, correspond to physical
														
 
															-interrupts from real physical devices.  One example could be the
														
 
															-architected timer, which itself supports virtualization, and therefore
														
 
															-lets a guest OS program the hardware device directly to raise an
														
 
															-interrupt at some point in time.  When such an interrupt is raised, the
														
 
															-host OS initially handles the interrupt and must somehow signal this
														
 
															-event as a virtual interrupt to the guest.  Another example could be a
														
 
															-passthrough device, where the physical interrupts are initially handled
														
 
															-by the host, but the device driver for the device lives in the guest OS
														
 
															-and KVM must therefore somehow inject a virtual interrupt on behalf of
														
 
															-the physical one to the guest OS.
														
 
															-
														
 
															-These virtual interrupts corresponding to a physical interrupt on the
														
 
															-host are called forwarded physical interrupts, but are also sometimes
														
 
															-referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
														
 
															-
														
 
															-Forwarded physical interrupts are handled slightly differently compared
														
 
															-to virtual interrupts generated purely by a software emulated device.
														
 
															-
														
 
															-
														
 
															-The HW bit
														
 
															-----------
														
 
															-Virtual interrupts are signalled to the guest by programming the List
														
 
															-Registers (LRs) on the GIC before running a VCPU.  The LR is programmed
														
 
															-with the virtual IRQ number and the state of the interrupt (Pending,
														
 
															-Active, or Pending+Active).  When the guest ACKs and EOIs a virtual
														
 
															-interrupt, the LR state moves from Pending to Active, and finally to
														
 
															-inactive.
														
 
															-
														
 
															-The LRs include an extra bit, called the HW bit.  When this bit is set,
														
 
															-KVM must also program an additional field in the LR, the physical IRQ
														
 
															-number, to link the virtual with the physical IRQ.
														
 
															-
														
 
															-When the HW bit is set, KVM must EITHER set the Pending OR the Active
														
 
															-bit, never both at the same time.
														
 
															-
														
 
															-Setting the HW bit causes the hardware to deactivate the physical
														
 
															-interrupt on the physical distributor when the guest deactivates the
														
 
															-corresponding virtual interrupt.
														
 
															-
														
 
															-
														
 
															-Forwarded Physical Interrupts Life Cycle
														
 
															-----------------------------------------
														
 
															-
														
 
															-The state of forwarded physical interrupts is managed in the following way:
														
 
															-
														
 
															-  - The physical interrupt is acked by the host, and becomes active on
														
 
															-    the physical distributor (*).
														
 
															-  - KVM sets the LR.Pending bit, because this is the only way the GICV
														
 
															-    interface is going to present it to the guest.
														
 
															-  - LR.Pending will stay set as long as the guest has not acked the interrupt.
														
 
															-  - LR.Pending transitions to LR.Active on the guest read of the IAR, as
														
 
															-    expected.
														
 
															-  - On guest EOI, the *physical distributor* active bit gets cleared,
														
 
															-    but the LR.Active is left untouched (set).
														
 
															-  - KVM clears the LR on VM exits when the physical distributor
														
 
															-    active state has been cleared.
														
 
															-
														
 
															-(*): The host handling is slightly more complicated.  For some forwarded
														
 
															-interrupts (shared), KVM directly sets the active state on the physical
														
 
															-distributor before entering the guest, because the interrupt is never actually
														
 
															-handled on the host (see details on the timer as an example below).  For other
														
 
															-forwarded interrupts (non-shared) the host does not deactivate the interrupt
														
 
															-when the host ISR completes, but leaves the interrupt active until the guest
														
 
															-deactivates it.  Leaving the interrupt active is allowed, because Linux
														
 
															-configures the physical GIC with EOIMode=1, which causes EOI operations to
														
 
															-perform a priority drop allowing the GIC to receive other interrupts of the
														
 
															-default priority.
														
 
															-
														
 
															-
														
 
															-Forwarded Edge and Level Triggered PPIs and SPIs
														
 
															-------------------------------------------------
														
 
															-Forwarded physical interrupts injected should always be active on the
														
 
															-physical distributor when injected to a guest.
														
 
															-
														
 
															-Level-triggered interrupts will keep the interrupt line to the GIC
														
 
															-asserted, typically until the guest programs the device to deassert the
														
 
															-line.  This means that the interrupt will remain pending on the physical
														
 
															-distributor until the guest has reprogrammed the device.  Since we
														
 
															-always run the VM with interrupts enabled on the CPU, a pending
														
 
															-interrupt will exit the guest as soon as we switch into the guest,
														
 
															-preventing the guest from ever making progress as the process repeats
														
 
															-over and over.  Therefore, the active state on the physical distributor
														
 
															-must be set when entering the guest, preventing the GIC from forwarding
														
 
															-the pending interrupt to the CPU.  As soon as the guest deactivates the
														
 
															-interrupt, the physical line is sampled by the hardware again and the host
														
 
															-takes a new interrupt if and only if the physical line is still asserted.
														
 
															-
														
 
															-Edge-triggered interrupts do not exhibit the same problem with
														
 
															-preventing guest execution that level-triggered interrupts do.  One
														
 
															-option is to not use HW bit at all, and inject edge-triggered interrupts
														
 
															-from a physical device as pure virtual interrupts.  But that would
														
 
															-potentially slow down handling of the interrupt in the guest, because a
														
 
															-physical interrupt occurring in the middle of the guest ISR would
														
 
															-preempt the guest for the host to handle the interrupt.  Additionally,
														
 
															-if you configure the system to handle interrupts on a separate physical
														
 
															-core from that running your VCPU, you still have to interrupt the VCPU
														
 
															-to queue the pending state onto the LR, even though the guest won't use
														
 
															-this information until the guest ISR completes.  Therefore, the HW
														
 
															-bit should always be set for forwarded edge-triggered interrupts.  With
														
 
															-the HW bit set, the virtual interrupt is injected and additional
														
 
															-physical interrupts occurring before the guest deactivates the interrupt
														
 
															-simply mark the state on the physical distributor as Pending+Active.  As
														
 
															-soon as the guest deactivates the interrupt, the host takes another
														
 
															-interrupt if and only if there was a physical interrupt between injecting
														
 
															-the forwarded interrupt to the guest and the guest deactivating the
														
 
															-interrupt.
														
 
															-
														
 
															-Consequently, whenever we schedule a VCPU with one or more LRs with the
														
 
															-HW bit set, the interrupt must also be active on the physical
														
 
															-distributor.
														
 
															-
														
 
															-
														
 
															-Forwarded LPIs
														
 
															---------------
														
 
															-LPIs, introduced in GICv3, are always edge-triggered and do not have an
														
 
															-active state.  They become pending when a device signal them, and as
														
 
															-soon as they are acked by the CPU, they are inactive again.
														
 
															-
														
 
															-It therefore doesn't make sense, and is not supported, to set the HW bit
														
 
															-for physical LPIs that are forwarded to a VM as virtual interrupts,
														
 
															-typically virtual SPIs.
														
 
															-
														
 
															-For LPIs, there is no other choice than to preempt the VCPU thread if
														
 
															-necessary, and queue the pending state onto the LR.
														
 
															-
														
 
															-
														
 
															-Putting It Together: The Architected Timer
														
 
															-------------------------------------------
														
 
															-The architected timer is a device that signals interrupts with level
														
 
															-triggered semantics.  The timer hardware is directly accessed by VCPUs
														
 
															-which program the timer to fire at some point in time.  Each VCPU on a
														
 
															-system programs the timer to fire at different times, and therefore the
														
 
															-hardware is multiplexed between multiple VCPUs.  This is implemented by
														
 
															-context-switching the timer state along with each VCPU thread.
														
 
															-
														
 
															-However, this means that a scenario like the following is entirely
														
 
															-possible, and in fact, typical:
														
 
															-
														
 
															-1.  KVM runs the VCPU
														
 
															-2.  The guest programs the time to fire in T+100
														
 
															-3.  The guest is idle and calls WFI (wait-for-interrupts)
														
 
															-4.  The hardware traps to the host
														
 
															-5.  KVM stores the timer state to memory and disables the hardware timer
														
 
															-6.  KVM schedules a soft timer to fire in T+(100 - time since step 2)
														
 
															-7.  KVM puts the VCPU thread to sleep (on a waitqueue)
														
 
															-8.  The soft timer fires, waking up the VCPU thread
														
 
															-9.  KVM reprograms the timer hardware with the VCPU's values
														
 
															-10. KVM marks the timer interrupt as active on the physical distributor
														
 
															-11. KVM injects a forwarded physical interrupt to the guest
														
 
															-12. KVM runs the VCPU
														
 
															-
														
 
															-Notice that KVM injects a forwarded physical interrupt in step 11 without
														
 
															-the corresponding interrupt having actually fired on the host.  That is
														
 
															-exactly why we mark the timer interrupt as active in step 10, because
														
 
															-the active state on the physical distributor is part of the state
														
 
															-belonging to the timer hardware, which is context-switched along with
														
 
															-the VCPU thread.
														
 
															-
														
 
															-If the guest does not idle because it is busy, the flow looks like this
														
 
															-instead:
														
 
															-
														
 
															-1.  KVM runs the VCPU
														
 
															-2.  The guest programs the time to fire in T+100
														
 
															-4.  At T+100 the timer fires and a physical IRQ causes the VM to exit
														
 
															-    (note that this initially only traps to EL2 and does not run the host ISR
														
 
															-    until KVM has returned to the host).
														
 
															-5.  With interrupts still disabled on the CPU coming back from the guest, KVM
														
 
															-    stores the virtual timer state to memory and disables the virtual hw timer.
														
 
															-6.  KVM looks at the timer state (in memory) and injects a forwarded physical
														
 
															-    interrupt because it concludes the timer has expired.
														
 
															-7.  KVM marks the timer interrupt as active on the physical distributor
														
 
															-7.  KVM enables the timer, enables interrupts, and runs the VCPU
														
 
															-
														
 
															-Notice that again the forwarded physical interrupt is injected to the
														
 
															-guest without having actually been handled on the host.  In this case it
														
 
															-is because the physical interrupt is never actually seen by the host because the
														
 
															-timer is disabled upon guest return, and the virtual forwarded interrupt is
														
 
															-injected on the KVM guest entry path.
														
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT              ||     7 || guest checks this feature bit
 
															                                    ||       || before enabling paravirtualized
														
 
															                                    ||       || spinlock support.
														
 
															 ------------------------------------------------------------------------------
														
 
															+KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
														
 
															+                                   ||       || before enabling paravirtualized
														
 
															+                                   ||       || tlb flush.
														
 
															+------------------------------------------------------------------------------
														
 
															 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
														
 
															                                    ||       || per-cpu warps are expected in
														
 
															                                    ||       || kvmclock.
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7748,7 +7748,9 @@ F:	arch/powerpc/kernel/kvm*
 
															 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
														
 
															 M:	Christian Borntraeger <borntraeger@de.ibm.com>
														
 
															-M:	Cornelia Huck <cohuck@redhat.com>
														
 
															+M:	Janosch Frank <frankja@linux.vnet.ibm.com>
														
 
															+R:	David Hildenbrand <david@redhat.com>
														
 
															+R:	Cornelia Huck <cohuck@redhat.com>
														
 
															 L:	linux-s390@vger.kernel.org
														
 
															 W:	http://www.ibm.com/developerworks/linux/linux390/
														
 
															 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
														
@@ -12026,6 +12028,7 @@ F:	drivers/pci/hotplug/s390_pci_hpc.c
 
															 S390 VFIO-CCW DRIVER
														
 
															 M:	Cornelia Huck <cohuck@redhat.com>
														
 
															 M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
														
 
															+M:	Halil Pasic <pasic@linux.vnet.ibm.com>
														
 
															 L:	linux-s390@vger.kernel.org
														
 
															 L:	kvm@vger.kernel.org
														
 
															 S:	Supported
														
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
 
															 static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
														
 
															-	return cpsr_mode > USR_MODE;;
														
 
															+	return cpsr_mode > USR_MODE;
														
 
															 }
														
 
															 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
														
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
 
															 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
														
 
															 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
														
 
															+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
														
 
															+
														
 
															 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
														
 
															 int __attribute_const__ kvm_target_cpu(void);
														
 
															 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
														
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -21,7 +21,6 @@
 
															 #include <linux/compiler.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															 #include <asm/cp15.h>
														
 
															-#include <asm/kvm_mmu.h>
														
 
															 #include <asm/vfp.h>
														
 
															 #define __hyp_text __section(.hyp.text) notrace
														
@@ -69,6 +68,8 @@
 
															 #define HIFAR		__ACCESS_CP15(c6, 4, c0, 2)
														
 
															 #define HPFAR		__ACCESS_CP15(c6, 4, c0, 4)
														
 
															 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
														
 
															+#define BPIALLIS	__ACCESS_CP15(c7, 0, c1, 6)
														
 
															+#define ICIMVAU		__ACCESS_CP15(c7, 0, c5, 1)
														
 
															 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
														
 
															 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
														
 
															 #define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
														
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,8 @@
 
															 #include <linux/highmem.h>
														
 
															 #include <asm/cacheflush.h>
														
 
															+#include <asm/cputype.h>
														
 
															+#include <asm/kvm_hyp.h>
														
 
															 #include <asm/pgalloc.h>
														
 
															 #include <asm/stage2_pgtable.h>
														
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 
															 	return pmd;
														
 
															 }
														
 
															+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
														
 
															+{
														
 
															+	pte_val(pte) &= ~L_PTE_XN;
														
 
															+	return pte;
														
 
															+}
														
 
															+
														
 
															+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
														
 
															+{
														
 
															+	pmd_val(pmd) &= ~PMD_SECT_XN;
														
 
															+	return pmd;
														
 
															+}
														
 
															+
														
 
															 static inline void kvm_set_s2pte_readonly(pte_t *pte)
														
 
															 {
														
 
															 	pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
														
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
 
															 	return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
														
 
															 }
														
 
															+static inline bool kvm_s2pte_exec(pte_t *pte)
														
 
															+{
														
 
															+	return !(pte_val(*pte) & L_PTE_XN);
														
 
															+}
														
 
															+
														
 
															 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
														
 
															 {
														
 
															 	pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
														
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 
															 	return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
														
 
															 }
														
 
															+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
														
 
															+{
														
 
															+	return !(pmd_val(*pmd) & PMD_SECT_XN);
														
 
															+}
														
 
															+
														
 
															 static inline bool kvm_page_empty(void *ptr)
														
 
															 {
														
 
															 	struct page *ptr_page = virt_to_page(ptr);
														
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 
															 	return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
														
 
															 }
														
 
															-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
														
 
															-					       kvm_pfn_t pfn,
														
 
															-					       unsigned long size)
														
 
															+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Clean the dcache to the Point of Coherency.
														
 
															+	 *
														
 
															+	 * We need to do this through a kernel mapping (using the
														
 
															+	 * user-space mapping has proved to be the wrong
														
 
															+	 * solution). For that, we need to kmap one page at a time,
														
 
															+	 * and iterate over the range.
														
 
															+	 */
														
 
															+
														
 
															+	VM_BUG_ON(size & ~PAGE_MASK);
														
 
															+
														
 
															+	while (size) {
														
 
															+		void *va = kmap_atomic_pfn(pfn);
														
 
															+
														
 
															+		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
														
 
															+
														
 
															+		size -= PAGE_SIZE;
														
 
															+		pfn++;
														
 
															+
														
 
															+		kunmap_atomic(va);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
														
 
															+						  unsigned long size)
														
 
															 {
														
 
															+	u32 iclsz;
														
 
															+
														
 
															 	/*
														
 
															 	 * If we are going to insert an instruction page and the icache is
														
 
															 	 * either VIPT or PIPT, there is a potential problem where the host
														
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
 
															 	 *
														
 
															 	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
														
 
															 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
														
 
															-	 *
														
 
															-	 * We need to do this through a kernel mapping (using the
														
 
															-	 * user-space mapping has proved to be the wrong
														
 
															-	 * solution). For that, we need to kmap one page at a time,
														
 
															-	 * and iterate over the range.
														
 
															 	 */
														
 
															 	VM_BUG_ON(size & ~PAGE_MASK);
														
 
															+	if (icache_is_vivt_asid_tagged())
														
 
															+		return;
														
 
															+
														
 
															+	if (!icache_is_pipt()) {
														
 
															+		/* any kind of VIPT cache */
														
 
															+		__flush_icache_all();
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * CTR IminLine contains Log2 of the number of words in the
														
 
															+	 * cache line, so we can get the number of words as
														
 
															+	 * 2 << (IminLine - 1).  To get the number of bytes, we
														
 
															+	 * multiply by 4 (the number of bytes in a 32-bit word), and
														
 
															+	 * get 4 << (IminLine).
														
 
															+	 */
														
 
															+	iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
														
 
															+
														
 
															 	while (size) {
														
 
															 		void *va = kmap_atomic_pfn(pfn);
														
 
															+		void *end = va + PAGE_SIZE;
														
 
															+		void *addr = va;
														
 
															-		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
														
 
															+		do {
														
 
															+			write_sysreg(addr, ICIMVAU);
														
 
															+			addr += iclsz;
														
 
															+		} while (addr < end);
														
 
															-		if (icache_is_pipt())
														
 
															-			__cpuc_coherent_user_range((unsigned long)va,
														
 
															-						   (unsigned long)va + PAGE_SIZE);
														
 
															+		dsb(ishst);
														
 
															+		isb();
														
 
															 		size -= PAGE_SIZE;
														
 
															 		pfn++;
														
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
 
															 		kunmap_atomic(va);
														
 
															 	}
														
 
															-	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
														
 
															-		/* any kind of VIPT cache */
														
 
															-		__flush_icache_all();
														
 
															+	/* Check if we need to invalidate the BTB */
														
 
															+	if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
														
 
															+		write_sysreg(0, BPIALLIS);
														
 
															+		dsb(ishst);
														
 
															+		isb();
														
 
															 	}
														
 
															 }
														
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -102,8 +102,8 @@ extern pgprot_t		pgprot_s2_device;
 
															 #define PAGE_HYP_EXEC		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
														
 
															 #define PAGE_HYP_RO		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
														
 
															 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
														
 
															-#define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
														
 
															-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
														
 
															+#define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
														
 
															+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
														
 
															 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
														
 
															 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
														
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -18,6 +18,7 @@
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_hyp.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 __asm__(".arch_extension     virt");
														
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -19,6 +19,7 @@
 
															  */
														
 
															 #include <asm/kvm_hyp.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 /**
														
 
															  * Flush per-VMID TLBs
														
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -435,6 +435,27 @@ alternative_endif
 
															 	dsb	\domain
														
 
															 	.endm
														
 
															+/*
														
 
															+ * Macro to perform an instruction cache maintenance for the interval
														
 
															+ * [start, end)
														
 
															+ *
														
 
															+ * 	start, end:	virtual addresses describing the region
														
 
															+ *	label:		A label to branch to on user fault.
														
 
															+ * 	Corrupts:	tmp1, tmp2
														
 
															+ */
														
 
															+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
														
 
															+	icache_line_size \tmp1, \tmp2
														
 
															+	sub	\tmp2, \tmp1, #1
														
 
															+	bic	\tmp2, \start, \tmp2
														
 
															+9997:
														
 
															+USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
														
 
															+	add	\tmp2, \tmp2, \tmp1
														
 
															+	cmp	\tmp2, \end
														
 
															+	b.lo	9997b
														
 
															+	dsb	ish
														
 
															+	isb
														
 
															+	.endm
														
 
															+
														
 
															 /*
														
 
															  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
														
 
															  */
														
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -52,6 +52,12 @@
 
															  *		- start  - virtual start address
														
 
															  *		- end    - virtual end address
														
 
															  *
														
 
															+ *	invalidate_icache_range(start, end)
														
 
															+ *
														
 
															+ *		Invalidate the I-cache in the region described by start, end.
														
 
															+ *		- start  - virtual start address
														
 
															+ *		- end    - virtual end address
														
 
															+ *
														
 
															  *	__flush_cache_user_range(start, end)
														
 
															  *
														
 
															  *		Ensure coherency between the I-cache and the D-cache in the
														
@@ -66,6 +72,7 @@
 
															  *		- size   - region size
														
 
															  */
														
 
															 extern void flush_icache_range(unsigned long start, unsigned long end);
														
 
															+extern int  invalidate_icache_range(unsigned long start, unsigned long end);
														
 
															 extern void __flush_dcache_area(void *addr, size_t len);
														
 
															 extern void __inval_dcache_area(void *addr, size_t len);
														
 
															 extern void __clean_dcache_area_poc(void *addr, size_t len);
														
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
 
															 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
														
 
															 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
														
 
															+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
														
 
															+
														
 
															 int __attribute_const__ kvm_target_cpu(void);
														
 
															 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
														
 
															 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
														
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,7 +20,6 @@
 
															 #include <linux/compiler.h>
														
 
															 #include <linux/kvm_host.h>
														
 
															-#include <asm/kvm_mmu.h>
														
 
															 #include <asm/sysreg.h>
														
 
															 #define __hyp_text __section(.hyp.text) notrace
														
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 
															 	return pmd;
														
 
															 }
														
 
															+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
														
 
															+{
														
 
															+	pte_val(pte) &= ~PTE_S2_XN;
														
 
															+	return pte;
														
 
															+}
														
 
															+
														
 
															+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
														
 
															+{
														
 
															+	pmd_val(pmd) &= ~PMD_S2_XN;
														
 
															+	return pmd;
														
 
															+}
														
 
															+
														
 
															 static inline void kvm_set_s2pte_readonly(pte_t *pte)
														
 
															 {
														
 
															 	pteval_t old_pteval, pteval;
														
@@ -191,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
 
															 	return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
														
 
															 }
														
 
															+static inline bool kvm_s2pte_exec(pte_t *pte)
														
 
															+{
														
 
															+	return !(pte_val(*pte) & PTE_S2_XN);
														
 
															+}
														
 
															+
														
 
															 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
														
 
															 {
														
 
															 	kvm_set_s2pte_readonly((pte_t *)pmd);
														
@@ -201,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 
															 	return kvm_s2pte_readonly((pte_t *)pmd);
														
 
															 }
														
 
															+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
														
 
															+{
														
 
															+	return !(pmd_val(*pmd) & PMD_S2_XN);
														
 
															+}
														
 
															+
														
 
															 static inline bool kvm_page_empty(void *ptr)
														
 
															 {
														
 
															 	struct page *ptr_page = virt_to_page(ptr);
														
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 
															 	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
														
 
															 }
														
 
															-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
														
 
															-					       kvm_pfn_t pfn,
														
 
															-					       unsigned long size)
														
 
															+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
														
 
															 {
														
 
															 	void *va = page_address(pfn_to_page(pfn));
														
 
															 	kvm_flush_dcache_to_poc(va, size);
														
 
															+}
														
 
															+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
														
 
															+						  unsigned long size)
														
 
															+{
														
 
															 	if (icache_is_aliasing()) {
														
 
															 		/* any kind of VIPT cache */
														
 
															 		__flush_icache_all();
														
 
															 	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
														
 
															 		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
														
 
															-		flush_icache_range((unsigned long)va,
														
 
															-				   (unsigned long)va + size);
														
 
															+		void *va = page_address(pfn_to_page(pfn));
														
 
															+
														
 
															+		invalidate_icache_range((unsigned long)va,
														
 
															+					(unsigned long)va + size);
														
 
															 	}
														
 
															 }
														
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -187,9 +187,11 @@
 
															  */
														
 
															 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
														
 
															 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
														
 
															+#define PTE_S2_XN		(_AT(pteval_t, 2) << 53)  /* XN[1:0] */
														
 
															 #define PMD_S2_RDONLY		(_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
														
 
															 #define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
														
 
															+#define PMD_S2_XN		(_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
														
 
															 /*
														
 
															  * Memory Attribute override for Stage-2 (MemAttr[3:0])
														
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -67,8 +67,8 @@
 
															 #define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
														
 
															 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
														
 
															-#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
														
 
															-#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
														
 
															+#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
														
 
															+#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
														
 
															 #define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
														
 
															 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
														
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -361,10 +361,16 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															 					struct kvm_guest_debug *dbg)
														
 
															 {
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	trace_kvm_set_guest_debug(vcpu, dbg->control);
														
 
															-	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
														
 
															-		return -EINVAL;
														
 
															+	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
														
 
															 		vcpu->guest_debug = dbg->control;
														
@@ -378,7 +384,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 		/* If not enabled clear all flags */
														
 
															 		vcpu->guest_debug = 0;
														
 
															 	}
														
 
															-	return 0;
														
 
															+
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
														
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -21,6 +21,7 @@
 
															 #include <asm/debug-monitors.h>
														
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_hyp.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 #define read_debug(r,n)		read_sysreg(r##n##_el1)
														
 
															 #define write_debug(v,r,n)	write_sysreg(v, r##n##_el1)
														
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -24,6 +24,7 @@
 
															 #include <asm/kvm_asm.h>
														
 
															 #include <asm/kvm_emulate.h>
														
 
															 #include <asm/kvm_hyp.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 #include <asm/fpsimd.h>
														
 
															 #include <asm/debug-monitors.h>
														
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
 
															  */
														
 
															 #include <asm/kvm_hyp.h>
														
 
															+#include <asm/kvm_mmu.h>
														
 
															 #include <asm/tlbflush.h>
														
 
															 static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
														
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -60,16 +60,7 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 
															 	b.lo	1b
														
 
															 	dsb	ish
														
 
															-	icache_line_size x2, x3
														
 
															-	sub	x3, x2, #1
														
 
															-	bic	x4, x0, x3
														
 
															-1:
														
 
															-USER(9f, ic	ivau, x4	)		// invalidate I line PoU
														
 
															-	add	x4, x4, x2
														
 
															-	cmp	x4, x1
														
 
															-	b.lo	1b
														
 
															-	dsb	ish
														
 
															-	isb
														
 
															+	invalidate_icache_by_line x0, x1, x2, x3, 9f
														
 
															 	mov	x0, #0
														
 
															 1:
														
 
															 	uaccess_ttbr0_disable x1, x2
														
@@ -80,6 +71,27 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU
 
															 ENDPROC(flush_icache_range)
														
 
															 ENDPROC(__flush_cache_user_range)
														
 
															+/*
														
 
															+ *	invalidate_icache_range(start,end)
														
 
															+ *
														
 
															+ *	Ensure that the I cache is invalid within specified region.
														
 
															+ *
														
 
															+ *	- start   - virtual start address of region
														
 
															+ *	- end     - virtual end address of region
														
 
															+ */
														
 
															+ENTRY(invalidate_icache_range)
														
 
															+	uaccess_ttbr0_enable x2, x3, x4
														
 
															+
														
 
															+	invalidate_icache_by_line x0, x1, x2, x3, 2f
														
 
															+	mov	x0, xzr
														
 
															+1:
														
 
															+	uaccess_ttbr0_disable x1, x2
														
 
															+	ret
														
 
															+2:
														
 
															+	mov	x0, #-EFAULT
														
 
															+	b	1b
														
 
															+ENDPROC(invalidate_icache_range)
														
 
															+
														
 
															 /*
														
 
															  *	__flush_dcache_area(kaddr, size)
														
 
															  *
														
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
														
 
															+	select HAVE_KVM_VCPU_ASYNC_IOCTL
														
 
															 	select KVM_MMIO
														
 
															 	select MMU_NOTIFIER
														
 
															 	select SRCU
														
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -446,6 +446,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 {
														
 
															 	int r = -EINTR;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	kvm_sigset_activate(vcpu);
														
 
															 	if (vcpu->mmio_needed) {
														
@@ -480,6 +482,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 out:
														
 
															 	kvm_sigset_deactivate(vcpu);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -900,6 +903,26 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
															 	return r;
														
 
															 }
														
 
															+long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl,
														
 
															+			       unsigned long arg)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = filp->private_data;
														
 
															+	void __user *argp = (void __user *)arg;
														
 
															+
														
 
															+	if (ioctl == KVM_INTERRUPT) {
														
 
															+		struct kvm_mips_interrupt irq;
														
 
															+
														
 
															+		if (copy_from_user(&irq, argp, sizeof(irq)))
														
 
															+			return -EFAULT;
														
 
															+		kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
														
 
															+			  irq.irq);
														
 
															+
														
 
															+		return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
														
 
															+	}
														
 
															+
														
 
															+	return -ENOIOCTLCMD;
														
 
															+}
														
 
															+
														
 
															 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
														
 
															 			 unsigned long arg)
														
 
															 {
														
@@ -907,56 +930,54 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 
															 	void __user *argp = (void __user *)arg;
														
 
															 	long r;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	switch (ioctl) {
														
 
															 	case KVM_SET_ONE_REG:
														
 
															 	case KVM_GET_ONE_REG: {
														
 
															 		struct kvm_one_reg reg;
														
 
															+		r = -EFAULT;
														
 
															 		if (copy_from_user(&reg, argp, sizeof(reg)))
														
 
															-			return -EFAULT;
														
 
															+			break;
														
 
															 		if (ioctl == KVM_SET_ONE_REG)
														
 
															-			return kvm_mips_set_reg(vcpu, &reg);
														
 
															+			r = kvm_mips_set_reg(vcpu, &reg);
														
 
															 		else
														
 
															-			return kvm_mips_get_reg(vcpu, &reg);
														
 
															+			r = kvm_mips_get_reg(vcpu, &reg);
														
 
															+		break;
														
 
															 	}
														
 
															 	case KVM_GET_REG_LIST: {
														
 
															 		struct kvm_reg_list __user *user_list = argp;
														
 
															 		struct kvm_reg_list reg_list;
														
 
															 		unsigned n;
														
 
															+		r = -EFAULT;
														
 
															 		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
														
 
															-			return -EFAULT;
														
 
															+			break;
														
 
															 		n = reg_list.n;
														
 
															 		reg_list.n = kvm_mips_num_regs(vcpu);
														
 
															 		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
														
 
															-			return -EFAULT;
														
 
															+			break;
														
 
															+		r = -E2BIG;
														
 
															 		if (n < reg_list.n)
														
 
															-			return -E2BIG;
														
 
															-		return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
														
 
															-	}
														
 
															-	case KVM_INTERRUPT:
														
 
															-		{
														
 
															-			struct kvm_mips_interrupt irq;
														
 
															-
														
 
															-			if (copy_from_user(&irq, argp, sizeof(irq)))
														
 
															-				return -EFAULT;
														
 
															-			kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
														
 
															-				  irq.irq);
														
 
															-
														
 
															-			r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
														
 
															 			break;
														
 
															-		}
														
 
															+		r = kvm_mips_copy_reg_indices(vcpu, user_list->reg);
														
 
															+		break;
														
 
															+	}
														
 
															 	case KVM_ENABLE_CAP: {
														
 
															 		struct kvm_enable_cap cap;
														
 
															+		r = -EFAULT;
														
 
															 		if (copy_from_user(&cap, argp, sizeof(cap)))
														
 
															-			return -EFAULT;
														
 
															+			break;
														
 
															 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
														
 
															 		break;
														
 
															 	}
														
 
															 	default:
														
 
															 		r = -ENOIOCTLCMD;
														
 
															 	}
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -1145,6 +1166,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
														
 
															 		vcpu->arch.gprs[i] = regs->gpr[i];
														
 
															 	vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
														
@@ -1152,6 +1175,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	vcpu->arch.lo = regs->lo;
														
 
															 	vcpu->arch.pc = regs->pc;
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -1159,6 +1183,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
														
 
															 		regs->gpr[i] = vcpu->arch.gprs[i];
														
@@ -1166,6 +1192,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	regs->lo = vcpu->arch.lo;
														
 
															 	regs->pc = vcpu->arch.pc;
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 
															 extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
														
 
															 extern int kvmppc_hcall_impl_pr(unsigned long cmd);
														
 
															 extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
														
 
															-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
														
 
															-				 struct kvm_vcpu *vcpu);
														
 
															-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
														
 
															-				   struct kvmppc_book3s_shadow_vcpu *svcpu);
														
 
															+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
														
 
															+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
														
 
															 extern int kvm_irq_bypass;
														
 
															 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
														
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
 
															 	lphi = (l >> 16) & 0xf;
														
 
															 	switch ((l >> 12) & 0xf) {
														
 
															 	case 0:
														
 
															-		return !lphi ? 24 : -1;		/* 16MB */
														
 
															+		return !lphi ? 24 : 0;		/* 16MB */
														
 
															 		break;
														
 
															 	case 1:
														
 
															 		return 16;			/* 64kB */
														
 
															 		break;
														
 
															 	case 3:
														
 
															-		return !lphi ? 34 : -1;		/* 16GB */
														
 
															+		return !lphi ? 34 : 0;		/* 16GB */
														
 
															 		break;
														
 
															 	case 7:
														
 
															 		return (16 << 8) + 12;		/* 64kB in 4kB */
														
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
 
															 			return (24 << 8) + 12;	/* 16MB in 4kB */
														
 
															 		break;
														
 
															 	}
														
 
															-	return -1;
														
 
															+	return 0;
														
 
															 }
														
 
															 static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
														
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
 
															 static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
														
 
															 {
														
 
															-	return 1ul << kvmppc_hpte_actual_page_shift(v, r);
														
 
															+	int shift = kvmppc_hpte_actual_page_shift(v, r);
														
 
															+
														
 
															+	if (shift)
														
 
															+		return 1ul << shift;
														
 
															+	return 0;
														
 
															 }
														
 
															 static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
														
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 
															 		va_low ^= v >> (SID_SHIFT_1T - 16);
														
 
															 	va_low &= 0x7ff;
														
 
															-	if (b_pgshift == 12) {
														
 
															+	if (b_pgshift <= 12) {
														
 
															 		if (a_pgshift > 12) {
														
 
															 			sllp = (a_pgshift == 16) ? 5 : 4;
														
 
															 			rb |= sllp << 5;	/*  AP field */
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
 
															 	u8 mmio_vsx_offset;
														
 
															 	u8 mmio_vsx_copy_type;
														
 
															 	u8 mmio_vsx_tx_sx_enabled;
														
 
															+	u8 mmio_vmx_copy_nums;
														
 
															 	u8 osi_needed;
														
 
															 	u8 osi_enabled;
														
 
															 	u8 papr_enabled;
														
@@ -709,6 +710,7 @@ struct kvm_vcpu_arch {
 
															 	u8 ceded;
														
 
															 	u8 prodded;
														
 
															 	u8 doorbell_request;
														
 
															+	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
														
 
															 	u32 last_inst;
														
 
															 	struct swait_queue_head *wqp;
														
@@ -738,8 +740,11 @@ struct kvm_vcpu_arch {
 
															 	struct kvmppc_icp *icp; /* XICS presentation controller */
														
 
															 	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
														
 
															 	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
														
 
															-	u32 xive_pushed;	 /* Is the VP pushed on the physical CPU ? */
														
 
															+	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
														
 
															+	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
														
 
															 	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
														
 
															+	u64 xive_esc_raddr;	 /* Escalation interrupt ESB real addr */
														
 
															+	u64 xive_esc_vaddr;	 /* Escalation interrupt ESB virt addr */
														
 
															 #endif
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
														
@@ -800,6 +805,7 @@ struct kvm_vcpu_arch {
 
															 #define KVM_MMIO_REG_QPR	0x0040
														
 
															 #define KVM_MMIO_REG_FQPR	0x0060
														
 
															 #define KVM_MMIO_REG_VSX	0x0080
														
 
															+#define KVM_MMIO_REG_VMX	0x00c0
														
 
															 #define __KVM_HAVE_ARCH_WQP
														
 
															 #define __KVM_HAVE_CREATE_DEVICE
														
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															 				unsigned int rt, unsigned int bytes,
														
 
															 			int is_default_endian, int mmio_sign_extend);
														
 
															+extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
														
 
															+		struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
														
 
															+extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
														
 
															+		struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
														
 
															 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															 			       u64 val, unsigned int bytes,
														
 
															 			       int is_default_endian);
														
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1076,6 +1076,7 @@ enum {
 
															 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
														
 
															 enum {
														
 
															 	OPAL_XIVE_VP_ENABLED		= 0x00000001,
														
 
															+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
														
 
															 };
														
 
															 /* "Any chip" replacement for chip ID for allocation functions */
														
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -156,6 +156,12 @@
 
															 #define OP_31_XOP_LFDX          599
														
 
															 #define OP_31_XOP_LFDUX		631
														
 
															+/* VMX Vector Load Instructions */
														
 
															+#define OP_31_XOP_LVX           103
														
 
															+
														
 
															+/* VMX Vector Store Instructions */
														
 
															+#define OP_31_XOP_STVX          231
														
 
															+
														
 
															 #define OP_LWZ  32
														
 
															 #define OP_STFS 52
														
 
															 #define OP_STFSU 53
														
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
															 extern void xive_native_sync_source(u32 hw_irq);
														
 
															 extern bool is_xive_irq(struct irq_chip *chip);
														
 
															-extern int xive_native_enable_vp(u32 vp_id);
														
 
															+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
														
 
															 extern int xive_native_disable_vp(u32 vp_id);
														
 
															 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
														
 
															+extern bool xive_native_has_single_escalation(void);
														
 
															 #else
														
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
 
															 #define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
														
 
															 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
														
 
															+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
														
 
															+
														
 
															 /* Transactional Memory checkpointed state:
														
 
															  * This is all GPRs, all VSX regs and a subset of SPRs
														
 
															  */
														
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -520,6 +520,7 @@ int main(void)
 
															 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
														
 
															 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
														
 
															 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
														
 
															+	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
														
 
															 	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
														
 
															 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
														
 
															 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
														
@@ -739,6 +740,9 @@ int main(void)
 
															 	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
														
 
															 					    arch.xive_cam_word));
														
 
															 	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
														
 
															+	DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
														
 
															+	DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
														
 
															+	DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
														
 
															 #endif
														
 
															 #ifdef CONFIG_KVM_EXIT_TIMING
														
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_EVENTFD
														
 
															+	select HAVE_KVM_VCPU_ASYNC_IOCTL
														
 
															 	select SRCU
														
 
															 	select KVM_VFIO
														
 
															 	select IRQ_BYPASS_MANAGER
														
@@ -68,7 +69,7 @@ config KVM_BOOK3S_64
 
															 	select KVM_BOOK3S_64_HANDLER
														
 
															 	select KVM
														
 
															 	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
														
 
															-	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
														
 
															+	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
														
 
															 	---help---
														
 
															 	  Support running unmodified book3s_64 and book3s_32 guest kernels
														
 
															 	  in virtual machines on book3s_64 host processors.
														
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -484,19 +484,33 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
															 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
														
 
															 				  struct kvm_sregs *sregs)
														
 
															 {
														
 
															-	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
														
 
															+	int ret;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+	ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
														
 
															+	vcpu_put(vcpu);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
														
 
															 				  struct kvm_sregs *sregs)
														
 
															 {
														
 
															-	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
														
 
															+	int ret;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+	ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
														
 
															+	vcpu_put(vcpu);
														
 
															+
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
														
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	regs->pc = kvmppc_get_pc(vcpu);
														
 
															 	regs->cr = kvmppc_get_cr(vcpu);
														
 
															 	regs->ctr = kvmppc_get_ctr(vcpu);
														
@@ -518,6 +532,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
														
 
															 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -525,6 +540,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	kvmppc_set_pc(vcpu, regs->pc);
														
 
															 	kvmppc_set_cr(vcpu, regs->cr);
														
 
															 	kvmppc_set_ctr(vcpu, regs->ctr);
														
@@ -545,6 +562,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
														
 
															 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -737,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															 					struct kvm_guest_debug *dbg)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															 	vcpu->guest_debug = dbg->control;
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1269,6 +1269,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
															 		/* Nothing to do */
														
 
															 		goto out;
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+		rpte = be64_to_cpu(hptep[1]);
														
 
															+		vpte = hpte_new_to_old_v(vpte, rpte);
														
 
															+	}
														
 
															+
														
 
															 	/* Unmap */
														
 
															 	rev = &old->rev[idx];
														
 
															 	guest_rpte = rev->guest_rpte;
														
@@ -1298,7 +1303,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
															 	/* Reload PTE after unmap */
														
 
															 	vpte = be64_to_cpu(hptep[0]);
														
 
															-
														
 
															 	BUG_ON(vpte & HPTE_V_VALID);
														
 
															 	BUG_ON(!(vpte & HPTE_V_ABSENT));
														
@@ -1307,6 +1311,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
															 		goto out;
														
 
															 	rpte = be64_to_cpu(hptep[1]);
														
 
															+
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+		vpte = hpte_new_to_old_v(vpte, rpte);
														
 
															+		rpte = hpte_new_to_old_r(rpte);
														
 
															+	}
														
 
															+
														
 
															 	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
														
 
															 	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
														
 
															 	pteg = idx / HPTES_PER_GROUP;
														
@@ -1337,17 +1347,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
															 	}
														
 
															 	new_pteg = hash & new_hash_mask;
														
 
															-	if (vpte & HPTE_V_SECONDARY) {
														
 
															-		BUG_ON(~pteg != (hash & old_hash_mask));
														
 
															-		new_pteg = ~new_pteg;
														
 
															-	} else {
														
 
															-		BUG_ON(pteg != (hash & old_hash_mask));
														
 
															-	}
														
 
															+	if (vpte & HPTE_V_SECONDARY)
														
 
															+		new_pteg = ~hash & new_hash_mask;
														
 
															 	new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
														
 
															 	new_hptep = (__be64 *)(new->virt + (new_idx << 4));
														
 
															 	replace_vpte = be64_to_cpu(new_hptep[0]);
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+		unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
														
 
															+		replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
														
 
															+	}
														
 
															 	if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
														
 
															 		BUG_ON(new->order >= old->order);
														
@@ -1363,6 +1373,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 
															 		/* Discard the previous HPTE */
														
 
															 	}
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+		rpte = hpte_old_to_new_r(vpte, rpte);
														
 
															+		vpte = hpte_old_to_new_v(vpte);
														
 
															+	}
														
 
															+
														
 
															 	new_hptep[1] = cpu_to_be64(rpte);
														
 
															 	new->rev[new_idx].guest_rpte = guest_rpte;
														
 
															 	/* No need for a barrier, since new HPT isn't active */
														
@@ -1380,12 +1395,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
 
															 	unsigned  long i;
														
 
															 	int rc;
														
 
															-	/*
														
 
															-	 * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
														
 
															-	 * that POWER9 uses, and could well hit a BUG_ON on POWER9.
														
 
															-	 */
														
 
															-	if (cpu_has_feature(CPU_FTR_ARCH_300))
														
 
															-		return -EIO;
														
 
															 	for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
														
 
															 		rc = resize_hpt_rehash_hpte(resize, i);
														
 
															 		if (rc != 0)
														
@@ -1416,6 +1425,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
															 	synchronize_srcu_expedited(&kvm->srcu);
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300))
														
 
															+		kvmppc_setup_partition_table(kvm);
														
 
															+
														
 
															 	resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
 
															 		j = i + 1;
														
 
															 		if (npages) {
														
 
															 			set_dirty_bits(map, i, npages);
														
 
															-			i = j + npages;
														
 
															+			j = i + npages;
														
 
															 		}
														
 
															 	}
														
 
															 	return 0;
														
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -116,6 +116,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
 
															 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
														
 
															 #endif
														
 
															+/* If set, the threads on each CPU core have to be in the same MMU mode */
														
 
															+static bool no_mixing_hpt_and_radix;
														
 
															+
														
 
															 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
														
 
															 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
														
@@ -1003,8 +1006,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 
															 	struct kvm *kvm = vcpu->kvm;
														
 
															 	struct kvm_vcpu *tvcpu;
														
 
															-	if (!cpu_has_feature(CPU_FTR_ARCH_300))
														
 
															-		return EMULATE_FAIL;
														
 
															 	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
														
 
															 		return RESUME_GUEST;
														
 
															 	if (get_op(inst) != 31)
														
@@ -1054,6 +1055,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
 
															 	return RESUME_GUEST;
														
 
															 }
														
 
															+/* Called with vcpu->arch.vcore->lock held */
														
 
															 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															 				 struct task_struct *tsk)
														
 
															 {
														
@@ -1174,7 +1176,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 				swab32(vcpu->arch.emul_inst) :
														
 
															 				vcpu->arch.emul_inst;
														
 
															 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
														
 
															+			/* Need vcore unlocked to call kvmppc_get_last_inst */
														
 
															+			spin_unlock(&vcpu->arch.vcore->lock);
														
 
															 			r = kvmppc_emulate_debug_inst(run, vcpu);
														
 
															+			spin_lock(&vcpu->arch.vcore->lock);
														
 
															 		} else {
														
 
															 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
														
 
															 			r = RESUME_GUEST;
														
@@ -1189,8 +1194,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 	 */
														
 
															 	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
														
 
															 		r = EMULATE_FAIL;
														
 
															-		if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
														
 
															+		if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
														
 
															+		    cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+			/* Need vcore unlocked to call kvmppc_get_last_inst */
														
 
															+			spin_unlock(&vcpu->arch.vcore->lock);
														
 
															 			r = kvmppc_emulate_doorbell_instr(vcpu);
														
 
															+			spin_lock(&vcpu->arch.vcore->lock);
														
 
															+		}
														
 
															 		if (r == EMULATE_FAIL) {
														
 
															 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
														
 
															 			r = RESUME_GUEST;
														
@@ -1495,6 +1505,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 
															 	case KVM_REG_PPC_ARCH_COMPAT:
														
 
															 		*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
														
 
															 		break;
														
 
															+	case KVM_REG_PPC_DEC_EXPIRY:
														
 
															+		*val = get_reg_val(id, vcpu->arch.dec_expires +
														
 
															+				   vcpu->arch.vcore->tb_offset);
														
 
															+		break;
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 		break;
														
@@ -1722,6 +1736,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 
															 	case KVM_REG_PPC_ARCH_COMPAT:
														
 
															 		r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
														
 
															 		break;
														
 
															+	case KVM_REG_PPC_DEC_EXPIRY:
														
 
															+		vcpu->arch.dec_expires = set_reg_val(id, *val) -
														
 
															+			vcpu->arch.vcore->tb_offset;
														
 
															+		break;
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
 
															 		break;
														
@@ -2376,8 +2394,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
 
															 static bool subcore_config_ok(int n_subcores, int n_threads)
														
 
															 {
														
 
															 	/*
														
 
															-	 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
														
 
															-	 * mode, with one thread per subcore.
														
 
															+	 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
														
 
															+	 * split-core mode, with one thread per subcore.
														
 
															 	 */
														
 
															 	if (cpu_has_feature(CPU_FTR_ARCH_300))
														
 
															 		return n_subcores <= 4 && n_threads == 1;
														
@@ -2413,8 +2431,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 
															 	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
														
 
															 		return false;
														
 
															-	/* POWER9 currently requires all threads to be in the same MMU mode */
														
 
															-	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
														
 
															+	/* Some POWER9 chips require all threads to be in the same MMU mode */
														
 
															+	if (no_mixing_hpt_and_radix &&
														
 
															 	    kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
														
 
															 		return false;
														
@@ -2677,9 +2695,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 	 * threads are offline.  Also check if the number of threads in this
														
 
															 	 * guest are greater than the current system threads per guest.
														
 
															 	 * On POWER9, we need to be not in independent-threads mode if
														
 
															-	 * this is a HPT guest on a radix host.
														
 
															+	 * this is a HPT guest on a radix host machine where the
														
 
															+	 * CPU threads may not be in different MMU modes.
														
 
															 	 */
														
 
															-	hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
														
 
															+	hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
														
 
															+		!kvm_is_radix(vc->kvm);
														
 
															 	if (((controlled_threads > 1) &&
														
 
															 	     ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
														
 
															 	    (hpt_on_radix && vc->kvm->arch.threads_indep)) {
														
@@ -2829,7 +2849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 		 */
														
 
															 		if (!thr0_done)
														
 
															 			kvmppc_start_thread(NULL, pvc);
														
 
															-		thr += pvc->num_threads;
														
 
															 	}
														
 
															 	/*
														
@@ -2932,13 +2951,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
															 	/* make sure updates to secondary vcpu structs are visible now */
														
 
															 	smp_mb();
														
 
															+	preempt_enable();
														
 
															+
														
 
															 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
														
 
															 		pvc = core_info.vc[sub];
														
 
															 		post_guest_process(pvc, pvc == vc);
														
 
															 	}
														
 
															 	spin_lock(&vc->lock);
														
 
															-	preempt_enable();
														
 
															  out:
														
 
															 	vc->vcore_state = VCORE_INACTIVE;
														
@@ -2985,7 +3005,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	if (!xive_enabled())
														
 
															 		return false;
														
 
															-	return vcpu->arch.xive_saved_state.pipr <
														
 
															+	return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
														
 
															 		vcpu->arch.xive_saved_state.cppr;
														
 
															 }
														
 
															 #else
														
@@ -3174,17 +3194,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 	 * this thread straight away and have it join in.
														
 
															 	 */
														
 
															 	if (!signal_pending(current)) {
														
 
															-		if (vc->vcore_state == VCORE_PIGGYBACK) {
														
 
															-			if (spin_trylock(&vc->lock)) {
														
 
															-				if (vc->vcore_state == VCORE_RUNNING &&
														
 
															-				    !VCORE_IS_EXITING(vc)) {
														
 
															-					kvmppc_create_dtl_entry(vcpu, vc);
														
 
															-					kvmppc_start_thread(vcpu, vc);
														
 
															-					trace_kvm_guest_enter(vcpu);
														
 
															-				}
														
 
															-				spin_unlock(&vc->lock);
														
 
															-			}
														
 
															-		} else if (vc->vcore_state == VCORE_RUNNING &&
														
 
															+		if ((vc->vcore_state == VCORE_PIGGYBACK ||
														
 
															+		     vc->vcore_state == VCORE_RUNNING) &&
														
 
															 			   !VCORE_IS_EXITING(vc)) {
														
 
															 			kvmppc_create_dtl_entry(vcpu, vc);
														
 
															 			kvmppc_start_thread(vcpu, vc);
														
@@ -4446,6 +4457,19 @@ static int kvmppc_book3s_init_hv(void)
 
															 	if (kvmppc_radix_possible())
														
 
															 		r = kvmppc_radix_init();
														
 
															+
														
 
															+	/*
														
 
															+	 * POWER9 chips before version 2.02 can't have some threads in
														
 
															+	 * HPT mode and some in radix mode on the same core.
														
 
															+	 */
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
														
 
															+		unsigned int pvr = mfspr(SPRN_PVR);
														
 
															+		if ((pvr >> 16) == PVR_POWER9 &&
														
 
															+		    (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
														
 
															+		     ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
														
 
															+			no_mixing_hpt_and_radix = true;
														
 
															+	}
														
 
															+
														
 
															 	return r;
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -413,10 +413,11 @@ FTR_SECTION_ELSE
 
															 	/* On P9 we use the split_info for coordinating LPCR changes */
														
 
															 	lwz	r4, KVM_SPLIT_DO_SET(r6)
														
 
															 	cmpwi	r4, 0
														
 
															-	beq	63f
														
 
															+	beq	1f
														
 
															 	mr	r3, r6
														
 
															 	bl	kvmhv_p9_set_lpcr
														
 
															 	nop
														
 
															+1:
														
 
															 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
														
 
															 63:
														
 
															 	/* Order load of vcpu after load of vcore */
														
@@ -617,13 +618,6 @@ kvmppc_hv_entry:
 
															 	lbz	r0, KVM_RADIX(r9)
														
 
															 	cmpwi	cr7, r0, 0
														
 
															-	/* Clear out SLB if hash */
														
 
															-	bne	cr7, 2f
														
 
															-	li	r6,0
														
 
															-	slbmte	r6,r6
														
 
															-	slbia
														
 
															-	ptesync
														
 
															-2:
														
 
															 	/*
														
 
															 	 * POWER7/POWER8 host -> guest partition switch code.
														
 
															 	 * We don't have to lock against concurrent tlbies,
														
@@ -738,19 +732,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
															 10:	cmpdi	r4, 0
														
 
															 	beq	kvmppc_primary_no_guest
														
 
															 kvmppc_got_guest:
														
 
															-
														
 
															-	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
														
 
															-	lwz	r5,VCPU_SLB_MAX(r4)
														
 
															-	cmpwi	r5,0
														
 
															-	beq	9f
														
 
															-	mtctr	r5
														
 
															-	addi	r6,r4,VCPU_SLB
														
 
															-1:	ld	r8,VCPU_SLB_E(r6)
														
 
															-	ld	r9,VCPU_SLB_V(r6)
														
 
															-	slbmte	r9,r8
														
 
															-	addi	r6,r6,VCPU_SLB_SIZE
														
 
															-	bdnz	1b
														
 
															-9:
														
 
															 	/* Increment yield count if they have a VPA */
														
 
															 	ld	r3, VCPU_VPA(r4)
														
 
															 	cmpdi	r3, 0
														
@@ -957,7 +938,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
															 	mftb	r7
														
 
															 	subf	r3,r7,r8
														
 
															 	mtspr	SPRN_DEC,r3
														
 
															-	std	r3,VCPU_DEC(r4)
														
 
															 	ld	r5, VCPU_SPRG0(r4)
														
 
															 	ld	r6, VCPU_SPRG1(r4)
														
@@ -1018,6 +998,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
															 	cmpdi	r3, 512		/* 1 microsecond */
														
 
															 	blt	hdec_soon
														
 
															+	/* For hash guest, clear out and reload the SLB */
														
 
															+	ld	r6, VCPU_KVM(r4)
														
 
															+	lbz	r0, KVM_RADIX(r6)
														
 
															+	cmpwi	r0, 0
														
 
															+	bne	9f
														
 
															+	li	r6, 0
														
 
															+	slbmte	r6, r6
														
 
															+	slbia
														
 
															+	ptesync
														
 
															+
														
 
															+	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
														
 
															+	lwz	r5,VCPU_SLB_MAX(r4)
														
 
															+	cmpwi	r5,0
														
 
															+	beq	9f
														
 
															+	mtctr	r5
														
 
															+	addi	r6,r4,VCPU_SLB
														
 
															+1:	ld	r8,VCPU_SLB_E(r6)
														
 
															+	ld	r9,VCPU_SLB_V(r6)
														
 
															+	slbmte	r9,r8
														
 
															+	addi	r6,r6,VCPU_SLB_SIZE
														
 
															+	bdnz	1b
														
 
															+9:
														
 
															+
														
 
															 #ifdef CONFIG_KVM_XICS
														
 
															 	/* We are entering the guest on that thread, push VCPU to XIVE */
														
 
															 	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
														
@@ -1031,8 +1034,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
															 	li	r9, TM_QW1_OS + TM_WORD2
														
 
															 	stwcix	r11,r9,r10
														
 
															 	li	r9, 1
														
 
															-	stw	r9, VCPU_XIVE_PUSHED(r4)
														
 
															+	stb	r9, VCPU_XIVE_PUSHED(r4)
														
 
															 	eieio
														
 
															+
														
 
															+	/*
														
 
															+	 * We clear the irq_pending flag. There is a small chance of a
														
 
															+	 * race vs. the escalation interrupt happening on another
														
 
															+	 * processor setting it again, but the only consequence is to
														
 
															+	 * cause a spurrious wakeup on the next H_CEDE which is not an
														
 
															+	 * issue.
														
 
															+	 */
														
 
															+	li	r0,0
														
 
															+	stb	r0, VCPU_IRQ_PENDING(r4)
														
 
															+
														
 
															+	/*
														
 
															+	 * In single escalation mode, if the escalation interrupt is
														
 
															+	 * on, we mask it.
														
 
															+	 */
														
 
															+	lbz	r0, VCPU_XIVE_ESC_ON(r4)
														
 
															+	cmpwi	r0,0
														
 
															+	beq	1f
														
 
															+	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
														
 
															+	li	r9, XIVE_ESB_SET_PQ_01
														
 
															+	ldcix	r0, r10, r9
														
 
															+	sync
														
 
															+
														
 
															+	/* We have a possible subtle race here: The escalation interrupt might
														
 
															+	 * have fired and be on its way to the host queue while we mask it,
														
 
															+	 * and if we unmask it early enough (re-cede right away), there is
														
 
															+	 * a theorical possibility that it fires again, thus landing in the
														
 
															+	 * target queue more than once which is a big no-no.
														
 
															+	 *
														
 
															+	 * Fortunately, solving this is rather easy. If the above load setting
														
 
															+	 * PQ to 01 returns a previous value where P is set, then we know the
														
 
															+	 * escalation interrupt is somewhere on its way to the host. In that
														
 
															+	 * case we simply don't clear the xive_esc_on flag below. It will be
														
 
															+	 * eventually cleared by the handler for the escalation interrupt.
														
 
															+	 *
														
 
															+	 * Then, when doing a cede, we check that flag again before re-enabling
														
 
															+	 * the escalation interrupt, and if set, we abort the cede.
														
 
															+	 */
														
 
															+	andi.	r0, r0, XIVE_ESB_VAL_P
														
 
															+	bne-	1f
														
 
															+
														
 
															+	/* Now P is 0, we can clear the flag */
														
 
															+	li	r0, 0
														
 
															+	stb	r0, VCPU_XIVE_ESC_ON(r4)
														
 
															+1:
														
 
															 no_xive:
														
 
															 #endif /* CONFIG_KVM_XICS */
														
@@ -1193,7 +1241,7 @@ hdec_soon:
 
															 	addi	r3, r4, VCPU_TB_RMEXIT
														
 
															 	bl	kvmhv_accumulate_time
														
 
															 #endif
														
 
															-	b	guest_exit_cont
														
 
															+	b	guest_bypass
														
 
															 /******************************************************************************
														
 
															  *                                                                            *
														
@@ -1423,15 +1471,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
															 	blt	deliver_guest_interrupt
														
 
															 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
														
 
															+	/* Save more register state  */
														
 
															+	mfdar	r6
														
 
															+	mfdsisr	r7
														
 
															+	std	r6, VCPU_DAR(r9)
														
 
															+	stw	r7, VCPU_DSISR(r9)
														
 
															+	/* don't overwrite fault_dar/fault_dsisr if HDSI */
														
 
															+	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
														
 
															+	beq	mc_cont
														
 
															+	std	r6, VCPU_FAULT_DAR(r9)
														
 
															+	stw	r7, VCPU_FAULT_DSISR(r9)
														
 
															+
														
 
															+	/* See if it is a machine check */
														
 
															+	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															+	beq	machine_check_realmode
														
 
															+mc_cont:
														
 
															+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
														
 
															+	addi	r3, r9, VCPU_TB_RMEXIT
														
 
															+	mr	r4, r9
														
 
															+	bl	kvmhv_accumulate_time
														
 
															+#endif
														
 
															 #ifdef CONFIG_KVM_XICS
														
 
															 	/* We are exiting, pull the VP from the XIVE */
														
 
															-	lwz	r0, VCPU_XIVE_PUSHED(r9)
														
 
															+	lbz	r0, VCPU_XIVE_PUSHED(r9)
														
 
															 	cmpwi	cr0, r0, 0
														
 
															 	beq	1f
														
 
															 	li	r7, TM_SPC_PULL_OS_CTX
														
 
															 	li	r6, TM_QW1_OS
														
 
															 	mfmsr	r0
														
 
															-	andi.	r0, r0, MSR_IR		/* in real mode? */
														
 
															+	andi.	r0, r0, MSR_DR		/* in real mode? */
														
 
															 	beq	2f
														
 
															 	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
														
 
															 	cmpldi	cr0, r10, 0
														
@@ -1454,33 +1522,42 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 
															 	/* Fixup some of the state for the next load */
														
 
															 	li	r10, 0
														
 
															 	li	r0, 0xff
														
 
															-	stw	r10, VCPU_XIVE_PUSHED(r9)
														
 
															+	stb	r10, VCPU_XIVE_PUSHED(r9)
														
 
															 	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
														
 
															 	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
														
 
															 	eieio
														
 
															 1:
														
 
															 #endif /* CONFIG_KVM_XICS */
														
 
															-	/* Save more register state  */
														
 
															-	mfdar	r6
														
 
															-	mfdsisr	r7
														
 
															-	std	r6, VCPU_DAR(r9)
														
 
															-	stw	r7, VCPU_DSISR(r9)
														
 
															-	/* don't overwrite fault_dar/fault_dsisr if HDSI */
														
 
															-	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
														
 
															-	beq	mc_cont
														
 
															-	std	r6, VCPU_FAULT_DAR(r9)
														
 
															-	stw	r7, VCPU_FAULT_DSISR(r9)
														
 
															-	/* See if it is a machine check */
														
 
															-	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
														
 
															-	beq	machine_check_realmode
														
 
															-mc_cont:
														
 
															-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
														
 
															-	addi	r3, r9, VCPU_TB_RMEXIT
														
 
															-	mr	r4, r9
														
 
															-	bl	kvmhv_accumulate_time
														
 
															-#endif
														
 
															+	/* For hash guest, read the guest SLB and save it away */
														
 
															+	ld	r5, VCPU_KVM(r9)
														
 
															+	lbz	r0, KVM_RADIX(r5)
														
 
															+	li	r5, 0
														
 
															+	cmpwi	r0, 0
														
 
															+	bne	3f			/* for radix, save 0 entries */
														
 
															+	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
														
 
															+	mtctr	r0
														
 
															+	li	r6,0
														
 
															+	addi	r7,r9,VCPU_SLB
														
 
															+1:	slbmfee	r8,r6
														
 
															+	andis.	r0,r8,SLB_ESID_V@h
														
 
															+	beq	2f
														
 
															+	add	r8,r8,r6		/* put index in */
														
 
															+	slbmfev	r3,r6
														
 
															+	std	r8,VCPU_SLB_E(r7)
														
 
															+	std	r3,VCPU_SLB_V(r7)
														
 
															+	addi	r7,r7,VCPU_SLB_SIZE
														
 
															+	addi	r5,r5,1
														
 
															+2:	addi	r6,r6,1
														
 
															+	bdnz	1b
														
 
															+	/* Finally clear out the SLB */
														
 
															+	li	r0,0
														
 
															+	slbmte	r0,r0
														
 
															+	slbia
														
 
															+	ptesync
														
 
															+3:	stw	r5,VCPU_SLB_MAX(r9)
														
 
															+guest_bypass:
														
 
															 	mr 	r3, r12
														
 
															 	/* Increment exit count, poke other threads to exit */
														
 
															 	bl	kvmhv_commence_exit
														
@@ -1501,31 +1578,6 @@ mc_cont:
 
															 	ori	r6,r6,1
														
 
															 	mtspr	SPRN_CTRLT,r6
														
 
															 4:
														
 
															-	/* Check if we are running hash or radix and store it in cr2 */
														
 
															-	ld	r5, VCPU_KVM(r9)
														
 
															-	lbz	r0, KVM_RADIX(r5)
														
 
															-	cmpwi	cr2,r0,0
														
 
															-
														
 
															-	/* Read the guest SLB and save it away */
														
 
															-	li	r5, 0
														
 
															-	bne	cr2, 3f			/* for radix, save 0 entries */
														
 
															-	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
														
 
															-	mtctr	r0
														
 
															-	li	r6,0
														
 
															-	addi	r7,r9,VCPU_SLB
														
 
															-1:	slbmfee	r8,r6
														
 
															-	andis.	r0,r8,SLB_ESID_V@h
														
 
															-	beq	2f
														
 
															-	add	r8,r8,r6		/* put index in */
														
 
															-	slbmfev	r3,r6
														
 
															-	std	r8,VCPU_SLB_E(r7)
														
 
															-	std	r3,VCPU_SLB_V(r7)
														
 
															-	addi	r7,r7,VCPU_SLB_SIZE
														
 
															-	addi	r5,r5,1
														
 
															-2:	addi	r6,r6,1
														
 
															-	bdnz	1b
														
 
															-3:	stw	r5,VCPU_SLB_MAX(r9)
														
 
															-
														
 
															 	/*
														
 
															 	 * Save the guest PURR/SPURR
														
 
															 	 */
														
@@ -1803,7 +1855,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
															 	ld	r5, VCPU_KVM(r9)
														
 
															 	lbz	r0, KVM_RADIX(r5)
														
 
															 	cmpwi	cr2, r0, 0
														
 
															-	beq	cr2, 3f
														
 
															+	beq	cr2, 4f
														
 
															 	/* Radix: Handle the case where the guest used an illegal PID */
														
 
															 	LOAD_REG_ADDR(r4, mmu_base_pid)
														
@@ -1839,15 +1891,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
															 BEGIN_FTR_SECTION
														
 
															 	PPC_INVALIDATE_ERAT
														
 
															 END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
														
 
															-	b	4f
														
 
															+4:
														
 
															 #endif /* CONFIG_PPC_RADIX_MMU */
														
 
															-	/* Hash: clear out SLB */
														
 
															-3:	li	r5,0
														
 
															-	slbmte	r5,r5
														
 
															-	slbia
														
 
															-	ptesync
														
 
															-4:
														
 
															 	/*
														
 
															 	 * POWER7/POWER8 guest -> host partition switch code.
														
 
															 	 * We don't have to lock against tlbies but we do
														
@@ -2745,7 +2791,32 @@ kvm_cede_prodded:
 
															 	/* we've ceded but we want to give control to the host */
														
 
															 kvm_cede_exit:
														
 
															 	ld	r9, HSTATE_KVM_VCPU(r13)
														
 
															-	b	guest_exit_cont
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	/* Abort if we still have a pending escalation */
														
 
															+	lbz	r5, VCPU_XIVE_ESC_ON(r9)
														
 
															+	cmpwi	r5, 0
														
 
															+	beq	1f
														
 
															+	li	r0, 0
														
 
															+	stb	r0, VCPU_CEDED(r9)
														
 
															+1:	/* Enable XIVE escalation */
														
 
															+	li	r5, XIVE_ESB_SET_PQ_00
														
 
															+	mfmsr	r0
														
 
															+	andi.	r0, r0, MSR_DR		/* in real mode? */
														
 
															+	beq	1f
														
 
															+	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
														
 
															+	cmpdi	r10, 0
														
 
															+	beq	3f
														
 
															+	ldx	r0, r10, r5
														
 
															+	b	2f
														
 
															+1:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
														
 
															+	cmpdi	r10, 0
														
 
															+	beq	3f
														
 
															+	ldcix	r0, r10, r5
														
 
															+2:	sync
														
 
															+	li	r0, 1
														
 
															+	stb	r0, VCPU_XIVE_ESC_ON(r9)
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															+3:	b	guest_exit_cont
														
 
															 	/* Try to handle a machine check in real mode */
														
 
															 machine_check_realmode:
														
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -96,7 +96,7 @@ kvm_start_entry:
 
															 kvm_start_lightweight:
														
 
															 	/* Copy registers into shadow vcpu so we can access them in real mode */
														
 
															-	GET_SHADOW_VCPU(r3)
														
 
															+	mr	r3, r4
														
 
															 	bl	FUNC(kvmppc_copy_to_svcpu)
														
 
															 	nop
														
 
															 	REST_GPR(4, r1)
														
@@ -165,9 +165,7 @@ after_sprg3_load:
 
															 	stw	r12, VCPU_TRAP(r3)
														
 
															 	/* Transfer reg values from shadow vcpu back to vcpu struct */
														
 
															-	/* On 64-bit, interrupts are still off at this point */
														
 
															-	GET_SHADOW_VCPU(r4)
														
 
															 	bl	FUNC(kvmppc_copy_from_svcpu)
														
 
															 	nop
														
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -121,7 +121,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
														
 
															 	if (svcpu->in_use) {
														
 
															-		kvmppc_copy_from_svcpu(vcpu, svcpu);
														
 
															+		kvmppc_copy_from_svcpu(vcpu);
														
 
															 	}
														
 
															 	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
														
 
															 	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
														
@@ -143,9 +143,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 /* Copy data needed by real-mode code from vcpu to shadow vcpu */
														
 
															-void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
														
 
															-			  struct kvm_vcpu *vcpu)
														
 
															+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
														
 
															+
														
 
															 	svcpu->gpr[0] = vcpu->arch.gpr[0];
														
 
															 	svcpu->gpr[1] = vcpu->arch.gpr[1];
														
 
															 	svcpu->gpr[2] = vcpu->arch.gpr[2];
														
@@ -177,17 +178,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 
															 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
														
 
															 		vcpu->arch.entry_ic = mfspr(SPRN_IC);
														
 
															 	svcpu->in_use = true;
														
 
															+
														
 
															+	svcpu_put(svcpu);
														
 
															 }
														
 
															 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
														
 
															-void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
														
 
															-			    struct kvmppc_book3s_shadow_vcpu *svcpu)
														
 
															+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	/*
														
 
															-	 * vcpu_put would just call us again because in_use hasn't
														
 
															-	 * been updated yet.
														
 
															-	 */
														
 
															-	preempt_disable();
														
 
															+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
														
 
															 	/*
														
 
															 	 * Maybe we were already preempted and synced the svcpu from
														
@@ -233,7 +231,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 
															 	svcpu->in_use = false;
														
 
															 out:
														
 
															-	preempt_enable();
														
 
															+	svcpu_put(svcpu);
														
 
															 }
														
 
															 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
														
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = data;
														
 
															-	/* We use the existing H_PROD mechanism to wake up the target */
														
 
															-	vcpu->arch.prodded = 1;
														
 
															+	vcpu->arch.irq_pending = 1;
														
 
															 	smp_mb();
														
 
															 	if (vcpu->arch.ceded)
														
 
															 		kvmppc_fast_vcpu_kick(vcpu);
														
 
															+	/* Since we have the no-EOI flag, the interrupt is effectively
														
 
															+	 * disabled now. Clearing xive_esc_on means we won't bother
														
 
															+	 * doing so on the next entry.
														
 
															+	 *
														
 
															+	 * This also allows the entry code to know that if a PQ combination
														
 
															+	 * of 10 is observed while xive_esc_on is true, it means the queue
														
 
															+	 * contains an unprocessed escalation interrupt. We don't make use of
														
 
															+	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
														
 
															+	 */
														
 
															+	vcpu->arch.xive_esc_on = false;
														
 
															+
														
 
															 	return IRQ_HANDLED;
														
 
															 }
														
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 
															 		return -EIO;
														
 
															 	}
														
 
															-	/*
														
 
															-	 * Future improvement: start with them disabled
														
 
															-	 * and handle DD2 and later scheme of merged escalation
														
 
															-	 * interrupts
														
 
															-	 */
														
 
															-	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
														
 
															-			 vcpu->kvm->arch.lpid, xc->server_num, prio);
														
 
															+	if (xc->xive->single_escalation)
														
 
															+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
														
 
															+				 vcpu->kvm->arch.lpid, xc->server_num);
														
 
															+	else
														
 
															+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
														
 
															+				 vcpu->kvm->arch.lpid, xc->server_num, prio);
														
 
															 	if (!name) {
														
 
															 		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
														
 
															 		       prio, xc->server_num);
														
 
															 		rc = -ENOMEM;
														
 
															 		goto error;
														
 
															 	}
														
 
															+
														
 
															+	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
														
 
															+
														
 
															 	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
														
 
															 			 IRQF_NO_THREAD, name, vcpu);
														
 
															 	if (rc) {
														
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 
															 		goto error;
														
 
															 	}
														
 
															 	xc->esc_virq_names[prio] = name;
														
 
															+
														
 
															+	/* In single escalation mode, we grab the ESB MMIO of the
														
 
															+	 * interrupt and mask it. Also populate the VCPU v/raddr
														
 
															+	 * of the ESB page for use by asm entry/exit code. Finally
														
 
															+	 * set the XIVE_IRQ_NO_EOI flag which will prevent the
														
 
															+	 * core code from performing an EOI on the escalation
														
 
															+	 * interrupt, thus leaving it effectively masked after
														
 
															+	 * it fires once.
														
 
															+	 */
														
 
															+	if (xc->xive->single_escalation) {
														
 
															+		struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
														
 
															+		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
														
 
															+
														
 
															+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
														
 
															+		vcpu->arch.xive_esc_raddr = xd->eoi_page;
														
 
															+		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
														
 
															+		xd->flags |= XIVE_IRQ_NO_EOI;
														
 
															+	}
														
 
															+
														
 
															 	return 0;
														
 
															 error:
														
 
															 	irq_dispose_mapping(xc->esc_virq[prio]);
														
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 
															 	pr_devel("Provisioning prio... %d\n", prio);
														
 
															-	/* Provision each VCPU and enable escalations */
														
 
															+	/* Provision each VCPU and enable escalations if needed */
														
 
															 	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															 		if (!vcpu->arch.xive_vcpu)
														
 
															 			continue;
														
 
															 		rc = xive_provision_queue(vcpu, prio);
														
 
															-		if (rc == 0)
														
 
															+		if (rc == 0 && !xive->single_escalation)
														
 
															 			xive_attach_escalation(vcpu, prio);
														
 
															 		if (rc)
														
 
															 			return rc;
														
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 
															 	/* Allocate IPI */
														
 
															 	xc->vp_ipi = xive_native_alloc_irq();
														
 
															 	if (!xc->vp_ipi) {
														
 
															+		pr_err("Failed to allocate xive irq for VCPU IPI\n");
														
 
															 		r = -EIO;
														
 
															 		goto bail;
														
 
															 	}
														
@@ -1091,19 +1123,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 
															 	if (r)
														
 
															 		goto bail;
														
 
															+	/*
														
 
															+	 * Enable the VP first as the single escalation mode will
														
 
															+	 * affect escalation interrupts numbering
														
 
															+	 */
														
 
															+	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
														
 
															+	if (r) {
														
 
															+		pr_err("Failed to enable VP in OPAL, err %d\n", r);
														
 
															+		goto bail;
														
 
															+	}
														
 
															+
														
 
															 	/*
														
 
															 	 * Initialize queues. Initially we set them all for no queueing
														
 
															 	 * and we enable escalation for queue 0 only which we'll use for
														
 
															 	 * our mfrr change notifications. If the VCPU is hot-plugged, we
														
 
															-	 * do handle provisioning however.
														
 
															+	 * do handle provisioning however based on the existing "map"
														
 
															+	 * of enabled queues.
														
 
															 	 */
														
 
															 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
														
 
															 		struct xive_q *q = &xc->queues[i];
														
 
															+		/* Single escalation, no queue 7 */
														
 
															+		if (i == 7 && xive->single_escalation)
														
 
															+			break;
														
 
															+
														
 
															 		/* Is queue already enabled ? Provision it */
														
 
															 		if (xive->qmap & (1 << i)) {
														
 
															 			r = xive_provision_queue(vcpu, i);
														
 
															-			if (r == 0)
														
 
															+			if (r == 0 && !xive->single_escalation)
														
 
															 				xive_attach_escalation(vcpu, i);
														
 
															 			if (r)
														
 
															 				goto bail;
														
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 
															 	if (r)
														
 
															 		goto bail;
														
 
															-	/* Enable the VP */
														
 
															-	r = xive_native_enable_vp(xc->vp_id);
														
 
															-	if (r)
														
 
															-		goto bail;
														
 
															-
														
 
															 	/* Route the IPI */
														
 
															 	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
														
 
															 	if (!r)
														
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
															 	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
														
 
															 		 val, server, guest_prio);
														
 
															+
														
 
															 	/*
														
 
															 	 * If the source doesn't already have an IPI, allocate
														
 
															 	 * one and get the corresponding data
														
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 
															 	if (xive->vp_base == XIVE_INVALID_VP)
														
 
															 		ret = -ENOMEM;
														
 
															+	xive->single_escalation = xive_native_has_single_escalation();
														
 
															+
														
 
															 	if (ret) {
														
 
															 		kfree(xive);
														
 
															 		return ret;
														
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
 
															 	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															 		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
														
 
															+		unsigned int i;
														
 
															 		if (!xc)
														
 
															 			continue;
														
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
 
															 			   xc->server_num, xc->cppr, xc->hw_cppr,
														
 
															 			   xc->mfrr, xc->pending,
														
 
															 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
														
 
															+		for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
														
 
															+			struct xive_q *q = &xc->queues[i];
														
 
															+			u32 i0, i1, idx;
														
 
															+
														
 
															+			if (!q->qpage && !xc->esc_virq[i])
														
 
															+				continue;
														
 
															+
														
 
															+			seq_printf(m, " [q%d]: ", i);
														
 
															+
														
 
															+			if (q->qpage) {
														
 
															+				idx = q->idx;
														
 
															+				i0 = be32_to_cpup(q->qpage + idx);
														
 
															+				idx = (idx + 1) & q->msk;
														
 
															+				i1 = be32_to_cpup(q->qpage + idx);
														
 
															+				seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
														
 
															+			}
														
 
															+			if (xc->esc_virq[i]) {
														
 
															+				struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
														
 
															+				struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
														
 
															+				u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
														
 
															+				seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
														
 
															+					   (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
														
 
															+					   (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
														
 
															+					   xc->esc_virq[i], pq, xd->eoi_page);
														
 
															+				seq_printf(m, "\n");
														
 
															+			}
														
 
															+		}
														
 
															 		t_rm_h_xirr += xc->stat_rm_h_xirr;
														
 
															 		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
														
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
 
															 	u32	q_order;
														
 
															 	u32	q_page_order;
														
 
															+	/* Flags */
														
 
															+	u8	single_escalation;
														
 
															 };
														
 
															 #define KVMPPC_XIVE_Q_COUNT	8
														
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
 
															  * is as follow.
														
 
															  *
														
 
															  * Guest request for 0...6 are honored. Guest request for anything
														
 
															- * higher results in a priority of 7 being applied.
														
 
															- *
														
 
															- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
														
 
															- * in order to match AIX expectations
														
 
															+ * higher results in a priority of 6 being applied.
														
 
															  *
														
 
															  * Similar mapping is done for CPPR values
														
 
															  */
														
 
															 static inline u8 xive_prio_from_guest(u8 prio)
														
 
															 {
														
 
															-	if (prio == 0xff || prio < 8)
														
 
															+	if (prio == 0xff || prio < 6)
														
 
															 		return prio;
														
 
															-	return 7;
														
 
															+	return 6;
														
 
															 }
														
 
															 static inline u8 xive_prio_to_guest(u8 prio)
														
 
															 {
														
 
															-	if (prio == 0xff || prio < 7)
														
 
															-		return prio;
														
 
															-	return 0xb;
														
 
															+	return prio;
														
 
															 }
														
 
															 static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	regs->pc = vcpu->arch.pc;
														
 
															 	regs->cr = kvmppc_get_cr(vcpu);
														
 
															 	regs->ctr = vcpu->arch.ctr;
														
@@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
														
 
															 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -1459,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 {
														
 
															 	int i;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	vcpu->arch.pc = regs->pc;
														
 
															 	kvmppc_set_cr(vcpu, regs->cr);
														
 
															 	vcpu->arch.ctr = regs->ctr;
														
@@ -1480,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
														
 
															 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -1607,30 +1613,42 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
														
 
															                                   struct kvm_sregs *sregs)
														
 
															 {
														
 
															+	int ret;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	sregs->pvr = vcpu->arch.pvr;
														
 
															 	get_sregs_base(vcpu, sregs);
														
 
															 	get_sregs_arch206(vcpu, sregs);
														
 
															-	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
														
 
															+	ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
														
 
															                                   struct kvm_sregs *sregs)
														
 
															 {
														
 
															-	int ret;
														
 
															+	int ret = -EINVAL;
														
 
															+	vcpu_load(vcpu);
														
 
															 	if (vcpu->arch.pvr != sregs->pvr)
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	ret = set_sregs_base(vcpu, sregs);
														
 
															 	if (ret < 0)
														
 
															-		return ret;
														
 
															+		goto out;
														
 
															 	ret = set_sregs_arch206(vcpu, sregs);
														
 
															 	if (ret < 0)
														
 
															-		return ret;
														
 
															+		goto out;
														
 
															-	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
														
 
															+	ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
														
 
															+
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
@@ -1773,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	int r;
														
 
															+	vcpu_load(vcpu);
														
 
															 	r = kvmppc_core_vcpu_translate(vcpu, tr);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -1996,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	struct debug_reg *dbg_reg;
														
 
															 	int n, b = 0, w = 0;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															 	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
														
 
															 		vcpu->arch.dbg_reg.dbcr0 = 0;
														
 
															 		vcpu->guest_debug = 0;
														
 
															 		kvm_guest_protect_msr(vcpu, MSR_DE, false);
														
 
															-		return 0;
														
 
															+		goto out;
														
 
															 	}
														
 
															 	kvm_guest_protect_msr(vcpu, MSR_DE, true);
														
@@ -2033,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 #endif
														
 
															 	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
														
 
															-		return 0;
														
 
															+		goto out;
														
 
															+	ret = -EINVAL;
														
 
															 	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
														
 
															 		uint64_t addr = dbg->arch.bp[n].addr;
														
 
															 		uint32_t type = dbg->arch.bp[n].type;
														
@@ -2045,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 		if (type & ~(KVMPPC_DEBUG_WATCH_READ |
														
 
															 			     KVMPPC_DEBUG_WATCH_WRITE |
														
 
															 			     KVMPPC_DEBUG_BREAKPOINT))
														
 
															-			return -EINVAL;
														
 
															+			goto out;
														
 
															 		if (type & KVMPPC_DEBUG_BREAKPOINT) {
														
 
															 			/* Setting H/W breakpoint */
														
 
															 			if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
														
 
															-				return -EINVAL;
														
 
															+				goto out;
														
 
															 		} else {
														
 
															 			/* Setting H/W watchpoint */
														
 
															 			if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
														
 
															 							type, w++))
														
 
															-				return -EINVAL;
														
 
															+				goto out;
														
 
															 		}
														
 
															 	}
														
 
															-	return 0;
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
														
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 #endif /* CONFIG_VSX */
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
														
 
															+		kvmppc_core_queue_vec_unavail(vcpu);
														
 
															+		return true;
														
 
															+	}
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+#endif /* CONFIG_ALTIVEC */
														
 
															+
														
 
															 /*
														
 
															  * XXX to do:
														
 
															  * lfiwax, lfiwzx
														
@@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
															 	vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
														
 
															 	vcpu->arch.mmio_sp64_extend = 0;
														
 
															 	vcpu->arch.mmio_sign_extend = 0;
														
 
															+	vcpu->arch.mmio_vmx_copy_nums = 0;
														
 
															 	switch (get_op(inst)) {
														
 
															 	case 31:
														
@@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
															 							 rs, 4, 1);
														
 
															 			break;
														
 
															 #endif /* CONFIG_VSX */
														
 
															+
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+		case OP_31_XOP_LVX:
														
 
															+			if (kvmppc_check_altivec_disabled(vcpu))
														
 
															+				return EMULATE_DONE;
														
 
															+			vcpu->arch.vaddr_accessed &= ~0xFULL;
														
 
															+			vcpu->arch.paddr_accessed &= ~0xFULL;
														
 
															+			vcpu->arch.mmio_vmx_copy_nums = 2;
														
 
															+			emulated = kvmppc_handle_load128_by2x64(run, vcpu,
														
 
															+					KVM_MMIO_REG_VMX|rt, 1);
														
 
															+			break;
														
 
															+
														
 
															+		case OP_31_XOP_STVX:
														
 
															+			if (kvmppc_check_altivec_disabled(vcpu))
														
 
															+				return EMULATE_DONE;
														
 
															+			vcpu->arch.vaddr_accessed &= ~0xFULL;
														
 
															+			vcpu->arch.paddr_accessed &= ~0xFULL;
														
 
															+			vcpu->arch.mmio_vmx_copy_nums = 2;
														
 
															+			emulated = kvmppc_handle_store128_by2x64(run, vcpu,
														
 
															+					rs, 1);
														
 
															+			break;
														
 
															+#endif /* CONFIG_ALTIVEC */
														
 
															+
														
 
															 		default:
														
 
															 			emulated = EMULATE_FAIL;
														
 
															 			break;
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -638,8 +638,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_SPAPR_RESIZE_HPT:
														
 
															-		/* Disable this on POWER9 until code handles new HPTE format */
														
 
															-		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
														
 
															+		r = !!hv_enabled;
														
 
															 		break;
														
 
															 #endif
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
														
@@ -763,7 +762,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
															 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
														
 
															 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
														
 
															-	vcpu->arch.dec_expires = ~(u64)0;
														
 
															+	vcpu->arch.dec_expires = get_tb();
														
 
															 #ifdef CONFIG_KVM_EXIT_TIMING
														
 
															 	mutex_init(&vcpu->arch.exit_timing_lock);
														
@@ -930,6 +929,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
 
															 }
														
 
															 #endif /* CONFIG_VSX */
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
														
 
															+		u64 gpr)
														
 
															+{
														
 
															+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
														
 
															+	u32 hi, lo;
														
 
															+	u32 di;
														
 
															+
														
 
															+#ifdef __BIG_ENDIAN
														
 
															+	hi = gpr >> 32;
														
 
															+	lo = gpr & 0xffffffff;
														
 
															+#else
														
 
															+	lo = gpr >> 32;
														
 
															+	hi = gpr & 0xffffffff;
														
 
															+#endif
														
 
															+
														
 
															+	di = 2 - vcpu->arch.mmio_vmx_copy_nums;		/* doubleword index */
														
 
															+	if (di > 1)
														
 
															+		return;
														
 
															+
														
 
															+	if (vcpu->arch.mmio_host_swabbed)
														
 
															+		di = 1 - di;
														
 
															+
														
 
															+	VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
														
 
															+	VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
														
 
															+}
														
 
															+#endif /* CONFIG_ALTIVEC */
														
 
															+
														
 
															 #ifdef CONFIG_PPC_FPU
														
 
															 static inline u64 sp_to_dp(u32 fprs)
														
 
															 {
														
@@ -1032,6 +1059,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 
															 				KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
														
 
															 			kvmppc_set_vsr_dword_dump(vcpu, gpr);
														
 
															 		break;
														
 
															+#endif
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+	case KVM_MMIO_REG_VMX:
														
 
															+		kvmppc_set_vmx_dword(vcpu, gpr);
														
 
															+		break;
														
 
															 #endif
														
 
															 	default:
														
 
															 		BUG();
														
@@ -1106,11 +1138,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	enum emulation_result emulated = EMULATE_DONE;
														
 
															-	/* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
														
 
															-	if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
														
 
															-		(vcpu->arch.mmio_vsx_copy_nums < 0) ) {
														
 
															+	/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
														
 
															+	if (vcpu->arch.mmio_vsx_copy_nums > 4)
														
 
															 		return EMULATE_FAIL;
														
 
															-	}
														
 
															 	while (vcpu->arch.mmio_vsx_copy_nums) {
														
 
															 		emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
														
@@ -1252,11 +1282,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 	vcpu->arch.io_gpr = rs;
														
 
															-	/* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
														
 
															-	if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
														
 
															-		(vcpu->arch.mmio_vsx_copy_nums < 0) ) {
														
 
															+	/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
														
 
															+	if (vcpu->arch.mmio_vsx_copy_nums > 4)
														
 
															 		return EMULATE_FAIL;
														
 
															-	}
														
 
															 	while (vcpu->arch.mmio_vsx_copy_nums) {
														
 
															 		if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
														
@@ -1312,6 +1340,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
 
															 }
														
 
															 #endif /* CONFIG_VSX */
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+/* handle quadword load access in two halves */
														
 
															+int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															+		unsigned int rt, int is_default_endian)
														
 
															+{
														
 
															+	enum emulation_result emulated;
														
 
															+
														
 
															+	while (vcpu->arch.mmio_vmx_copy_nums) {
														
 
															+		emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
														
 
															+				is_default_endian, 0);
														
 
															+
														
 
															+		if (emulated != EMULATE_DONE)
														
 
															+			break;
														
 
															+
														
 
															+		vcpu->arch.paddr_accessed += run->mmio.len;
														
 
															+		vcpu->arch.mmio_vmx_copy_nums--;
														
 
															+	}
														
 
															+
														
 
															+	return emulated;
														
 
															+}
														
 
															+
														
 
															+static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
														
 
															+{
														
 
															+	vector128 vrs = VCPU_VSX_VR(vcpu, rs);
														
 
															+	u32 di;
														
 
															+	u64 w0, w1;
														
 
															+
														
 
															+	di = 2 - vcpu->arch.mmio_vmx_copy_nums;		/* doubleword index */
														
 
															+	if (di > 1)
														
 
															+		return -1;
														
 
															+
														
 
															+	if (vcpu->arch.mmio_host_swabbed)
														
 
															+		di = 1 - di;
														
 
															+
														
 
															+	w0 = vrs.u[di * 2];
														
 
															+	w1 = vrs.u[di * 2 + 1];
														
 
															+
														
 
															+#ifdef __BIG_ENDIAN
														
 
															+	*val = (w0 << 32) | w1;
														
 
															+#else
														
 
															+	*val = (w1 << 32) | w0;
														
 
															+#endif
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* handle quadword store in two halves */
														
 
															+int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
														
 
															+		unsigned int rs, int is_default_endian)
														
 
															+{
														
 
															+	u64 val = 0;
														
 
															+	enum emulation_result emulated = EMULATE_DONE;
														
 
															+
														
 
															+	vcpu->arch.io_gpr = rs;
														
 
															+
														
 
															+	while (vcpu->arch.mmio_vmx_copy_nums) {
														
 
															+		if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
														
 
															+			return EMULATE_FAIL;
														
 
															+
														
 
															+		emulated = kvmppc_handle_store(run, vcpu, val, 8,
														
 
															+				is_default_endian);
														
 
															+		if (emulated != EMULATE_DONE)
														
 
															+			break;
														
 
															+
														
 
															+		vcpu->arch.paddr_accessed += run->mmio.len;
														
 
															+		vcpu->arch.mmio_vmx_copy_nums--;
														
 
															+	}
														
 
															+
														
 
															+	return emulated;
														
 
															+}
														
 
															+
														
 
															+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
														
 
															+		struct kvm_run *run)
														
 
															+{
														
 
															+	enum emulation_result emulated = EMULATE_FAIL;
														
 
															+	int r;
														
 
															+
														
 
															+	vcpu->arch.paddr_accessed += run->mmio.len;
														
 
															+
														
 
															+	if (!vcpu->mmio_is_write) {
														
 
															+		emulated = kvmppc_handle_load128_by2x64(run, vcpu,
														
 
															+				vcpu->arch.io_gpr, 1);
														
 
															+	} else {
														
 
															+		emulated = kvmppc_handle_store128_by2x64(run, vcpu,
														
 
															+				vcpu->arch.io_gpr, 1);
														
 
															+	}
														
 
															+
														
 
															+	switch (emulated) {
														
 
															+	case EMULATE_DO_MMIO:
														
 
															+		run->exit_reason = KVM_EXIT_MMIO;
														
 
															+		r = RESUME_HOST;
														
 
															+		break;
														
 
															+	case EMULATE_FAIL:
														
 
															+		pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
														
 
															+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
														
 
															+		run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
														
 
															+		r = RESUME_HOST;
														
 
															+		break;
														
 
															+	default:
														
 
															+		r = RESUME_GUEST;
														
 
															+		break;
														
 
															+	}
														
 
															+	return r;
														
 
															+}
														
 
															+#endif /* CONFIG_ALTIVEC */
														
 
															+
														
 
															 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
														
 
															 {
														
 
															 	int r = 0;
														
@@ -1413,6 +1546,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 {
														
 
															 	int r;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	if (vcpu->mmio_needed) {
														
 
															 		vcpu->mmio_needed = 0;
														
 
															 		if (!vcpu->mmio_is_write)
														
@@ -1427,7 +1562,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 			r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
														
 
															 			if (r == RESUME_HOST) {
														
 
															 				vcpu->mmio_needed = 1;
														
 
															-				return r;
														
 
															+				goto out;
														
 
															+			}
														
 
															+		}
														
 
															+#endif
														
 
															+#ifdef CONFIG_ALTIVEC
														
 
															+		if (vcpu->arch.mmio_vmx_copy_nums > 0)
														
 
															+			vcpu->arch.mmio_vmx_copy_nums--;
														
 
															+
														
 
															+		if (vcpu->arch.mmio_vmx_copy_nums > 0) {
														
 
															+			r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
														
 
															+			if (r == RESUME_HOST) {
														
 
															+				vcpu->mmio_needed = 1;
														
 
															+				goto out;
														
 
															 			}
														
 
															 		}
														
 
															 #endif
														
@@ -1461,6 +1608,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
															 	kvm_sigset_deactivate(vcpu);
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -1608,23 +1757,31 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															 	return -EINVAL;
														
 
															 }
														
 
															-long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															-                         unsigned int ioctl, unsigned long arg)
														
 
															+long kvm_arch_vcpu_async_ioctl(struct file *filp,
														
 
															+			       unsigned int ioctl, unsigned long arg)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = filp->private_data;
														
 
															 	void __user *argp = (void __user *)arg;
														
 
															-	long r;
														
 
															-	switch (ioctl) {
														
 
															-	case KVM_INTERRUPT: {
														
 
															+	if (ioctl == KVM_INTERRUPT) {
														
 
															 		struct kvm_interrupt irq;
														
 
															-		r = -EFAULT;
														
 
															 		if (copy_from_user(&irq, argp, sizeof(irq)))
														
 
															-			goto out;
														
 
															-		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
														
 
															-		goto out;
														
 
															+			return -EFAULT;
														
 
															+		return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
														
 
															 	}
														
 
															+	return -ENOIOCTLCMD;
														
 
															+}
														
 
															+
														
 
															+long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															+                         unsigned int ioctl, unsigned long arg)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = filp->private_data;
														
 
															+	void __user *argp = (void __user *)arg;
														
 
															+	long r;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+	switch (ioctl) {
														
 
															 	case KVM_ENABLE_CAP:
														
 
															 	{
														
 
															 		struct kvm_enable_cap cap;
														
@@ -1664,6 +1821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	}
														
 
															 out:
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
 
															 	int i;
														
 
															 	u64 min, max, sum, sum_quad;
														
 
															-	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
														
 
															-
														
 
															+	seq_puts(m, "type	count	min	max	sum	sum_squared\n");
														
 
															 	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
														
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
 
															 static u32 xive_queue_shift;
														
 
															 static u32 xive_pool_vps = XIVE_INVALID_VP;
														
 
															 static struct kmem_cache *xive_provision_cache;
														
 
															+static bool xive_has_single_esc;
														
 
															 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
														
 
															 {
														
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
 
															 			break;
														
 
															 	}
														
 
															+	/* Do we support single escalation */
														
 
															+	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
														
 
															+		xive_has_single_esc = true;
														
 
															+
														
 
															 	/* Configure Thread Management areas for KVM */
														
 
															 	for_each_possible_cpu(cpu)
														
 
															 		kvmppc_set_xive_tima(cpu, r.start, tima);
														
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
														
 
															-int xive_native_enable_vp(u32 vp_id)
														
 
															+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
														
 
															 {
														
 
															 	s64 rc;
														
 
															+	u64 flags = OPAL_XIVE_VP_ENABLED;
														
 
															+	if (single_escalation)
														
 
															+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
														
 
															 	for (;;) {
														
 
															-		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
														
 
															+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
														
 
															 		if (rc != OPAL_BUSY)
														
 
															 			break;
														
 
															 		msleep(1);
														
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 
															 	return 0;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
														
 
															+
														
 
															+bool xive_native_has_single_escalation(void)
														
 
															+{
														
 
															+	return xive_has_single_esc;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
														
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -261,6 +261,11 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
 
															 	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
														
 
															 }
														
 
															+static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
														
 
															+{
														
 
															+	return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
														
 
															+}
														
 
															+
														
 
															 static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
														
 
															 {
														
 
															 	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
														
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -20,7 +20,9 @@ struct css_general_char {
 
															 	u32 aif_tdd : 1; /* bit 56 */
														
 
															 	u32 : 1;
														
 
															 	u32 qebsm : 1;	 /* bit 58 */
														
 
															-	u32 : 8;
														
 
															+	u32 : 2;
														
 
															+	u32 aiv : 1;     /* bit 61 */
														
 
															+	u32 : 5;
														
 
															 	u32 aif_osa : 1; /* bit 67 */
														
 
															 	u32 : 12;
														
 
															 	u32 eadm_rf : 1; /* bit 80 */
														
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -2,7 +2,7 @@
 
															 /*
														
 
															  * definition for kernel virtual machines on s390
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008, 2009
														
 
															+ * Copyright IBM Corp. 2008, 2018
														
 
															  *
														
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  */
														
@@ -183,6 +183,7 @@ struct kvm_s390_sie_block {
 
															 #define ECA_IB		0x40000000
														
 
															 #define ECA_SIGPI	0x10000000
														
 
															 #define ECA_MVPGI	0x01000000
														
 
															+#define ECA_AIV		0x00200000
														
 
															 #define ECA_VX		0x00020000
														
 
															 #define ECA_PROTEXCI	0x00002000
														
 
															 #define ECA_SII		0x00000001
														
@@ -228,7 +229,9 @@ struct kvm_s390_sie_block {
 
															 	__u8    epdx;			/* 0x0069 */
														
 
															 	__u8    reserved6a[2];		/* 0x006a */
														
 
															 	__u32	todpr;			/* 0x006c */
														
 
															-	__u8	reserved70[16];		/* 0x0070 */
														
 
															+#define GISA_FORMAT1 0x00000001
														
 
															+	__u32	gd;			/* 0x0070 */
														
 
															+	__u8	reserved74[12];		/* 0x0074 */
														
 
															 	__u64	mso;			/* 0x0080 */
														
 
															 	__u64	msl;			/* 0x0088 */
														
 
															 	psw_t	gpsw;			/* 0x0090 */
														
@@ -317,18 +320,30 @@ struct kvm_vcpu_stat {
 
															 	u64 deliver_program_int;
														
 
															 	u64 deliver_io_int;
														
 
															 	u64 exit_wait_state;
														
 
															+	u64 instruction_epsw;
														
 
															+	u64 instruction_gs;
														
 
															+	u64 instruction_io_other;
														
 
															+	u64 instruction_lpsw;
														
 
															+	u64 instruction_lpswe;
														
 
															 	u64 instruction_pfmf;
														
 
															+	u64 instruction_ptff;
														
 
															+	u64 instruction_sck;
														
 
															+	u64 instruction_sckpf;
														
 
															 	u64 instruction_stidp;
														
 
															 	u64 instruction_spx;
														
 
															 	u64 instruction_stpx;
														
 
															 	u64 instruction_stap;
														
 
															-	u64 instruction_storage_key;
														
 
															+	u64 instruction_iske;
														
 
															+	u64 instruction_ri;
														
 
															+	u64 instruction_rrbe;
														
 
															+	u64 instruction_sske;
														
 
															 	u64 instruction_ipte_interlock;
														
 
															-	u64 instruction_stsch;
														
 
															-	u64 instruction_chsc;
														
 
															 	u64 instruction_stsi;
														
 
															 	u64 instruction_stfl;
														
 
															+	u64 instruction_tb;
														
 
															+	u64 instruction_tpi;
														
 
															 	u64 instruction_tprot;
														
 
															+	u64 instruction_tsch;
														
 
															 	u64 instruction_sie;
														
 
															 	u64 instruction_essa;
														
 
															 	u64 instruction_sthyi;
														
@@ -354,6 +369,7 @@ struct kvm_vcpu_stat {
 
															 	u64 diagnose_258;
														
 
															 	u64 diagnose_308;
														
 
															 	u64 diagnose_500;
														
 
															+	u64 diagnose_other;
														
 
															 };
														
 
															 #define PGM_OPERATION			0x01
														
@@ -410,35 +426,35 @@ struct kvm_vcpu_stat {
 
															 #define PGM_PER				0x80
														
 
															 #define PGM_CRYPTO_OPERATION		0x119
														
 
															-/* irq types in order of priority */
														
 
															+/* irq types in ascend order of priorities */
														
 
															 enum irq_types {
														
 
															-	IRQ_PEND_MCHK_EX = 0,
														
 
															-	IRQ_PEND_SVC,
														
 
															-	IRQ_PEND_PROG,
														
 
															-	IRQ_PEND_MCHK_REP,
														
 
															-	IRQ_PEND_EXT_IRQ_KEY,
														
 
															-	IRQ_PEND_EXT_MALFUNC,
														
 
															-	IRQ_PEND_EXT_EMERGENCY,
														
 
															-	IRQ_PEND_EXT_EXTERNAL,
														
 
															-	IRQ_PEND_EXT_CLOCK_COMP,
														
 
															-	IRQ_PEND_EXT_CPU_TIMER,
														
 
															-	IRQ_PEND_EXT_TIMING,
														
 
															-	IRQ_PEND_EXT_SERVICE,
														
 
															-	IRQ_PEND_EXT_HOST,
														
 
															-	IRQ_PEND_PFAULT_INIT,
														
 
															-	IRQ_PEND_PFAULT_DONE,
														
 
															-	IRQ_PEND_VIRTIO,
														
 
															-	IRQ_PEND_IO_ISC_0,
														
 
															-	IRQ_PEND_IO_ISC_1,
														
 
															-	IRQ_PEND_IO_ISC_2,
														
 
															-	IRQ_PEND_IO_ISC_3,
														
 
															-	IRQ_PEND_IO_ISC_4,
														
 
															-	IRQ_PEND_IO_ISC_5,
														
 
															-	IRQ_PEND_IO_ISC_6,
														
 
															-	IRQ_PEND_IO_ISC_7,
														
 
															-	IRQ_PEND_SIGP_STOP,
														
 
															+	IRQ_PEND_SET_PREFIX = 0,
														
 
															 	IRQ_PEND_RESTART,
														
 
															-	IRQ_PEND_SET_PREFIX,
														
 
															+	IRQ_PEND_SIGP_STOP,
														
 
															+	IRQ_PEND_IO_ISC_7,
														
 
															+	IRQ_PEND_IO_ISC_6,
														
 
															+	IRQ_PEND_IO_ISC_5,
														
 
															+	IRQ_PEND_IO_ISC_4,
														
 
															+	IRQ_PEND_IO_ISC_3,
														
 
															+	IRQ_PEND_IO_ISC_2,
														
 
															+	IRQ_PEND_IO_ISC_1,
														
 
															+	IRQ_PEND_IO_ISC_0,
														
 
															+	IRQ_PEND_VIRTIO,
														
 
															+	IRQ_PEND_PFAULT_DONE,
														
 
															+	IRQ_PEND_PFAULT_INIT,
														
 
															+	IRQ_PEND_EXT_HOST,
														
 
															+	IRQ_PEND_EXT_SERVICE,
														
 
															+	IRQ_PEND_EXT_TIMING,
														
 
															+	IRQ_PEND_EXT_CPU_TIMER,
														
 
															+	IRQ_PEND_EXT_CLOCK_COMP,
														
 
															+	IRQ_PEND_EXT_EXTERNAL,
														
 
															+	IRQ_PEND_EXT_EMERGENCY,
														
 
															+	IRQ_PEND_EXT_MALFUNC,
														
 
															+	IRQ_PEND_EXT_IRQ_KEY,
														
 
															+	IRQ_PEND_MCHK_REP,
														
 
															+	IRQ_PEND_PROG,
														
 
															+	IRQ_PEND_SVC,
														
 
															+	IRQ_PEND_MCHK_EX,
														
 
															 	IRQ_PEND_COUNT
														
 
															 };
														
@@ -516,9 +532,6 @@ struct kvm_s390_irq_payload {
 
															 struct kvm_s390_local_interrupt {
														
 
															 	spinlock_t lock;
														
 
															-	struct kvm_s390_float_interrupt *float_int;
														
 
															-	struct swait_queue_head *wq;
														
 
															-	atomic_t *cpuflags;
														
 
															 	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
														
 
															 	struct kvm_s390_irq_payload irq;
														
 
															 	unsigned long pending_irqs;
														
@@ -707,14 +720,50 @@ struct kvm_s390_crypto_cb {
 
															 	struct kvm_s390_apcb1 apcb1;		/* 0x0080 */
														
 
															 };
														
 
															+struct kvm_s390_gisa {
														
 
															+	union {
														
 
															+		struct { /* common to all formats */
														
 
															+			u32 next_alert;
														
 
															+			u8  ipm;
														
 
															+			u8  reserved01[2];
														
 
															+			u8  iam;
														
 
															+		};
														
 
															+		struct { /* format 0 */
														
 
															+			u32 next_alert;
														
 
															+			u8  ipm;
														
 
															+			u8  reserved01;
														
 
															+			u8  : 6;
														
 
															+			u8  g : 1;
														
 
															+			u8  c : 1;
														
 
															+			u8  iam;
														
 
															+			u8  reserved02[4];
														
 
															+			u32 airq_count;
														
 
															+		} g0;
														
 
															+		struct { /* format 1 */
														
 
															+			u32 next_alert;
														
 
															+			u8  ipm;
														
 
															+			u8  simm;
														
 
															+			u8  nimm;
														
 
															+			u8  iam;
														
 
															+			u8  aism[8];
														
 
															+			u8  : 6;
														
 
															+			u8  g : 1;
														
 
															+			u8  c : 1;
														
 
															+			u8  reserved03[11];
														
 
															+			u32 airq_count;
														
 
															+		} g1;
														
 
															+	};
														
 
															+};
														
 
															+
														
 
															 /*
														
 
															- * sie_page2 has to be allocated as DMA because fac_list and crycb need
														
 
															- * 31bit addresses in the sie control block.
														
 
															+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
														
 
															+ * gisa need 31bit addresses in the sie control block.
														
 
															  */
														
 
															 struct sie_page2 {
														
 
															 	__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64];	/* 0x0000 */
														
 
															 	struct kvm_s390_crypto_cb crycb;		/* 0x0800 */
														
 
															-	u8 reserved900[0x1000 - 0x900];			/* 0x0900 */
														
 
															+	struct kvm_s390_gisa gisa;			/* 0x0900 */
														
 
															+	u8 reserved920[0x1000 - 0x920];			/* 0x0920 */
														
 
															 };
														
 
															 struct kvm_s390_vsie {
														
@@ -761,6 +810,7 @@ struct kvm_arch{
 
															 	struct kvm_s390_migration_state *migration_state;
														
 
															 	/* subset of available cpu features enabled by user space */
														
 
															 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
														
 
															+	struct kvm_s390_gisa *gisa;
														
 
															 };
														
 
															 #define KVM_HVA_ERR_BAD		(-1UL)
														
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -77,6 +77,7 @@ struct sclp_info {
 
															 	unsigned char has_ibs : 1;
														
 
															 	unsigned char has_skey : 1;
														
 
															 	unsigned char has_kss : 1;
														
 
															+	unsigned char has_gisaf : 1;
														
 
															 	unsigned int ibc;
														
 
															 	unsigned int mtid;
														
 
															 	unsigned int mtid_cp;
														
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															+	select HAVE_KVM_VCPU_ASYNC_IOCTL
														
 
															 	select HAVE_KVM_EVENTFD
														
 
															 	select KVM_ASYNC_PF
														
 
															 	select KVM_ASYNC_PF_SYNC
														
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -257,6 +257,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 
															 	case 0x500:
														
 
															 		return __diag_virtio_hypercall(vcpu);
														
 
															 	default:
														
 
															+		vcpu->stat.diagnose_other++;
														
 
															 		return -EOPNOTSUPP;
														
 
															 	}
														
 
															 }
														
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -36,7 +36,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 
															 {
														
 
															 	int c, scn;
														
 
															-	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
														
 
															+	if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
														
 
															 		return 0;
														
 
															 	BUG_ON(!kvm_s390_use_sca_entries());
														
@@ -101,18 +101,17 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
															 		/* another external call is pending */
														
 
															 		return -EBUSY;
														
 
															 	}
														
 
															-	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
														
 
															 	return 0;
														
 
															 }
														
 
															 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
														
 
															 	int rc, expect;
														
 
															 	if (!kvm_s390_use_sca_entries())
														
 
															 		return;
														
 
															-	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
														
 
															 	read_lock(&vcpu->kvm->arch.sca_lock);
														
 
															 	if (vcpu->kvm->arch.use_esca) {
														
 
															 		struct esca_block *sca = vcpu->kvm->arch.sca;
														
@@ -190,8 +189,8 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
 
															 static inline int is_ioirq(unsigned long irq_type)
														
 
															 {
														
 
															-	return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
														
 
															-		(irq_type <= IRQ_PEND_IO_ISC_7));
														
 
															+	return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
														
 
															+		(irq_type <= IRQ_PEND_IO_ISC_0));
														
 
															 }
														
 
															 static uint64_t isc_to_isc_bits(int isc)
														
@@ -199,25 +198,59 @@ static uint64_t isc_to_isc_bits(int isc)
 
															 	return (0x80 >> isc) << 24;
														
 
															 }
														
 
															+static inline u32 isc_to_int_word(u8 isc)
														
 
															+{
														
 
															+	return ((u32)isc << 27) | 0x80000000;
														
 
															+}
														
 
															+
														
 
															 static inline u8 int_word_to_isc(u32 int_word)
														
 
															 {
														
 
															 	return (int_word & 0x38000000) >> 27;
														
 
															 }
														
 
															+/*
														
 
															+ * To use atomic bitmap functions, we have to provide a bitmap address
														
 
															+ * that is u64 aligned. However, the ipm might be u32 aligned.
														
 
															+ * Therefore, we logically start the bitmap at the very beginning of the
														
 
															+ * struct and fixup the bit number.
														
 
															+ */
														
 
															+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
														
 
															+
														
 
															+static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
														
 
															+{
														
 
															+	set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
														
 
															+}
														
 
															+
														
 
															+static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
														
 
															+{
														
 
															+	return READ_ONCE(gisa->ipm);
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
														
 
															+{
														
 
															+	clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
														
 
															+}
														
 
															+
														
 
															+static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
														
 
															+{
														
 
															+	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
														
 
															+}
														
 
															+
														
 
															 static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vcpu->kvm->arch.float_int.pending_irqs |
														
 
															-	       vcpu->arch.local_int.pending_irqs;
														
 
															+		vcpu->arch.local_int.pending_irqs |
														
 
															+		kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
														
 
															 }
														
 
															 static inline int isc_to_irq_type(unsigned long isc)
														
 
															 {
														
 
															-	return IRQ_PEND_IO_ISC_0 + isc;
														
 
															+	return IRQ_PEND_IO_ISC_0 - isc;
														
 
															 }
														
 
															 static inline int irq_type_to_isc(unsigned long irq_type)
														
 
															 {
														
 
															-	return irq_type - IRQ_PEND_IO_ISC_0;
														
 
															+	return IRQ_PEND_IO_ISC_0 - irq_type;
														
 
															 }
														
 
															 static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
														
@@ -278,20 +311,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 
															 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
														
 
															-	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
														
 
															+	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
														
 
															 }
														
 
															 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
														
 
															-	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
														
 
															+	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
														
 
															 }
														
 
															 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
														
 
															-		    &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
														
 
															+				      CPUSTAT_STOP_INT);
														
 
															 	vcpu->arch.sie_block->lctl = 0x0000;
														
 
															 	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
														
@@ -302,17 +335,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 }
														
 
															-static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
														
 
															-{
														
 
															-	atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
														
 
															-}
														
 
															-
														
 
															 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
														
 
															 		return;
														
 
															 	else if (psw_ioint_disabled(vcpu))
														
 
															-		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
														
 
															 	else
														
 
															 		vcpu->arch.sie_block->lctl |= LCTL_CR6;
														
 
															 }
														
@@ -322,7 +350,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 
															 	if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
														
 
															 		return;
														
 
															 	if (psw_extint_disabled(vcpu))
														
 
															-		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	else
														
 
															 		vcpu->arch.sie_block->lctl |= LCTL_CR0;
														
 
															 }
														
@@ -340,7 +368,7 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
 
															 static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (kvm_s390_is_stop_irq_pending(vcpu))
														
 
															-		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
														
 
															 }
														
 
															 /* Set interception request for non-deliverable interrupts */
														
@@ -897,18 +925,38 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 
															 	return rc ? -EFAULT : 0;
														
 
															 }
														
 
															+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	rc  = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
														
 
															+	rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
														
 
															+	rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
														
 
															+	rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
														
 
															+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
														
 
															+			     &vcpu->arch.sie_block->gpsw,
														
 
															+			     sizeof(psw_t));
														
 
															+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
														
 
															+			    &vcpu->arch.sie_block->gpsw,
														
 
															+			    sizeof(psw_t));
														
 
															+	return rc ? -EFAULT : 0;
														
 
															+}
														
 
															+
														
 
															 static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
														
 
															 				     unsigned long irq_type)
														
 
															 {
														
 
															 	struct list_head *isc_list;
														
 
															 	struct kvm_s390_float_interrupt *fi;
														
 
															 	struct kvm_s390_interrupt_info *inti = NULL;
														
 
															+	struct kvm_s390_io_info io;
														
 
															+	u32 isc;
														
 
															 	int rc = 0;
														
 
															 	fi = &vcpu->kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															-	isc_list = &fi->lists[irq_type_to_isc(irq_type)];
														
 
															+	isc = irq_type_to_isc(irq_type);
														
 
															+	isc_list = &fi->lists[isc];
														
 
															 	inti = list_first_entry_or_null(isc_list,
														
 
															 					struct kvm_s390_interrupt_info,
														
 
															 					list);
														
@@ -936,24 +984,31 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 
															 	spin_unlock(&fi->lock);
														
 
															 	if (inti) {
														
 
															-		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
														
 
															-				(u16 *)__LC_SUBCHANNEL_ID);
														
 
															-		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
														
 
															-				(u16 *)__LC_SUBCHANNEL_NR);
														
 
															-		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
														
 
															-				(u32 *)__LC_IO_INT_PARM);
														
 
															-		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
														
 
															-				(u32 *)__LC_IO_INT_WORD);
														
 
															-		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
														
 
															-				&vcpu->arch.sie_block->gpsw,
														
 
															-				sizeof(psw_t));
														
 
															-		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
														
 
															-				&vcpu->arch.sie_block->gpsw,
														
 
															-				sizeof(psw_t));
														
 
															+		rc = __do_deliver_io(vcpu, &(inti->io));
														
 
															 		kfree(inti);
														
 
															+		goto out;
														
 
															 	}
														
 
															-	return rc ? -EFAULT : 0;
														
 
															+	if (vcpu->kvm->arch.gisa &&
														
 
															+	    kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
														
 
															+		/*
														
 
															+		 * in case an adapter interrupt was not delivered
														
 
															+		 * in SIE context KVM will handle the delivery
														
 
															+		 */
														
 
															+		VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
														
 
															+		memset(&io, 0, sizeof(io));
														
 
															+		io.io_int_word = isc_to_int_word(isc);
														
 
															+		vcpu->stat.deliver_io_int++;
														
 
															+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
														
 
															+			KVM_S390_INT_IO(1, 0, 0, 0),
														
 
															+			((__u32)io.subchannel_id << 16) |
														
 
															+			io.subchannel_nr,
														
 
															+			((__u64)io.io_int_parm << 32) |
														
 
															+			io.io_int_word);
														
 
															+		rc = __do_deliver_io(vcpu, &io);
														
 
															+	}
														
 
															+out:
														
 
															+	return rc;
														
 
															 }
														
 
															 typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
														
@@ -1155,8 +1210,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 
															 		set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
														
 
															 	while ((irqs = deliverable_irqs(vcpu)) && !rc) {
														
 
															-		/* bits are in the order of interrupt priority */
														
 
															-		irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
														
 
															+		/* bits are in the reverse order of interrupt priority */
														
 
															+		irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
														
 
															 		if (is_ioirq(irq_type)) {
														
 
															 			rc = __deliver_io(vcpu, irq_type);
														
 
															 		} else {
														
@@ -1228,7 +1283,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	li->irq.ext = irq->u.ext;
														
 
															 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
														
 
															-	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1253,7 +1308,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
														
 
															 		return -EBUSY;
														
 
															 	*extcall = irq->u.extcall;
														
 
															-	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1297,7 +1352,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
															 	if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
														
 
															 		return -EBUSY;
														
 
															 	stop->flags = irq->u.stop.flags;
														
 
															-	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1329,7 +1384,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 
															 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
														
 
															 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
														
 
															-	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1373,7 +1428,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 
															 				   0, 0);
														
 
															 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
														
 
															-	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1386,7 +1441,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 
															 				   0, 0);
														
 
															 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
														
 
															-	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
														
 
															 	return 0;
														
 
															 }
														
@@ -1416,20 +1471,86 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
 
															 	return NULL;
														
 
															 }
														
 
															+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
														
 
															+						      u64 isc_mask, u32 schid)
														
 
															+{
														
 
															+	struct kvm_s390_interrupt_info *inti = NULL;
														
 
															+	int isc;
														
 
															+
														
 
															+	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
														
 
															+		if (isc_mask & isc_to_isc_bits(isc))
														
 
															+			inti = get_io_int(kvm, isc, schid);
														
 
															+	}
														
 
															+	return inti;
														
 
															+}
														
 
															+
														
 
															+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
														
 
															+{
														
 
															+	unsigned long active_mask;
														
 
															+	int isc;
														
 
															+
														
 
															+	if (schid)
														
 
															+		goto out;
														
 
															+	if (!kvm->arch.gisa)
														
 
															+		goto out;
														
 
															+
														
 
															+	active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
														
 
															+	while (active_mask) {
														
 
															+		isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
														
 
															+		if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
														
 
															+			return isc;
														
 
															+		clear_bit_inv(isc, &active_mask);
														
 
															+	}
														
 
															+out:
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Dequeue and return an I/O interrupt matching any of the interruption
														
 
															  * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
														
 
															+ * Take into account the interrupts pending in the interrupt list and in GISA.
														
 
															+ *
														
 
															+ * Note that for a guest that does not enable I/O interrupts
														
 
															+ * but relies on TPI, a flood of classic interrupts may starve
														
 
															+ * out adapter interrupts on the same isc. Linux does not do
														
 
															+ * that, and it is possible to work around the issue by configuring
														
 
															+ * different iscs for classic and adapter interrupts in the guest,
														
 
															+ * but we may want to revisit this in the future.
														
 
															  */
														
 
															 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
														
 
															 						    u64 isc_mask, u32 schid)
														
 
															 {
														
 
															-	struct kvm_s390_interrupt_info *inti = NULL;
														
 
															+	struct kvm_s390_interrupt_info *inti, *tmp_inti;
														
 
															 	int isc;
														
 
															-	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
														
 
															-		if (isc_mask & isc_to_isc_bits(isc))
														
 
															-			inti = get_io_int(kvm, isc, schid);
														
 
															+	inti = get_top_io_int(kvm, isc_mask, schid);
														
 
															+
														
 
															+	isc = get_top_gisa_isc(kvm, isc_mask, schid);
														
 
															+	if (isc < 0)
														
 
															+		/* no AI in GISA */
														
 
															+		goto out;
														
 
															+
														
 
															+	if (!inti)
														
 
															+		/* AI in GISA but no classical IO int */
														
 
															+		goto gisa_out;
														
 
															+
														
 
															+	/* both types of interrupts present */
														
 
															+	if (int_word_to_isc(inti->io.io_int_word) <= isc) {
														
 
															+		/* classical IO int with higher priority */
														
 
															+		kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
														
 
															+		goto out;
														
 
															 	}
														
 
															+gisa_out:
														
 
															+	tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
														
 
															+	if (tmp_inti) {
														
 
															+		tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
														
 
															+		tmp_inti->io.io_int_word = isc_to_int_word(isc);
														
 
															+		if (inti)
														
 
															+			kvm_s390_reinject_io_int(kvm, inti);
														
 
															+		inti = tmp_inti;
														
 
															+	} else
														
 
															+		kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
														
 
															+out:
														
 
															 	return inti;
														
 
															 }
														
@@ -1517,6 +1638,15 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 	struct list_head *list;
														
 
															 	int isc;
														
 
															+	isc = int_word_to_isc(inti->io.io_int_word);
														
 
															+
														
 
															+	if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
														
 
															+		VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
														
 
															+		kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
														
 
															+		kfree(inti);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															 	if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
														
@@ -1532,7 +1662,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 			inti->io.subchannel_id >> 8,
														
 
															 			inti->io.subchannel_id >> 1 & 0x3,
														
 
															 			inti->io.subchannel_nr);
														
 
															-	isc = int_word_to_isc(inti->io.io_int_word);
														
 
															 	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
														
 
															 	list_add_tail(&inti->list, list);
														
 
															 	set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
														
@@ -1546,7 +1675,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 
															 static void __floating_irq_kick(struct kvm *kvm, u64 type)
														
 
															 {
														
 
															 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
														
 
															-	struct kvm_s390_local_interrupt *li;
														
 
															 	struct kvm_vcpu *dst_vcpu;
														
 
															 	int sigcpu, online_vcpus, nr_tries = 0;
														
@@ -1568,20 +1696,17 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 
															 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
														
 
															 	/* make the VCPU drop out of the SIE, or wake it up if sleeping */
														
 
															-	li = &dst_vcpu->arch.local_int;
														
 
															-	spin_lock(&li->lock);
														
 
															 	switch (type) {
														
 
															 	case KVM_S390_MCHK:
														
 
															-		atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
														
 
															 		break;
														
 
															 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
														
 
															-		atomic_or(CPUSTAT_IO_INT, li->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
														
 
															 		break;
														
 
															 	default:
														
 
															-		atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
														
 
															 		break;
														
 
															 	}
														
 
															-	spin_unlock(&li->lock);
														
 
															 	kvm_s390_vcpu_wakeup(dst_vcpu);
														
 
															 }
														
@@ -1820,6 +1945,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 
															 	for (i = 0; i < FIRQ_MAX_COUNT; i++)
														
 
															 		fi->counters[i] = 0;
														
 
															 	spin_unlock(&fi->lock);
														
 
															+	kvm_s390_gisa_clear(kvm);
														
 
															 };
														
 
															 static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
														
@@ -1847,6 +1973,22 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 
															 	max_irqs = len / sizeof(struct kvm_s390_irq);
														
 
															+	if (kvm->arch.gisa &&
														
 
															+	    kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
														
 
															+		for (i = 0; i <= MAX_ISC; i++) {
														
 
															+			if (n == max_irqs) {
														
 
															+				/* signal userspace to try again */
														
 
															+				ret = -ENOMEM;
														
 
															+				goto out_nolock;
														
 
															+			}
														
 
															+			if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
														
 
															+				irq = (struct kvm_s390_irq *) &buf[n];
														
 
															+				irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
														
 
															+				irq->u.io.io_int_word = isc_to_int_word(i);
														
 
															+				n++;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															 	fi = &kvm->arch.float_int;
														
 
															 	spin_lock(&fi->lock);
														
 
															 	for (i = 0; i < FIRQ_LIST_COUNT; i++) {
														
@@ -1885,6 +2027,7 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 
															 out:
														
 
															 	spin_unlock(&fi->lock);
														
 
															+out_nolock:
														
 
															 	if (!ret && n > 0) {
														
 
															 		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
														
 
															 			ret = -EFAULT;
														
@@ -2245,7 +2388,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
 
															 	struct kvm_s390_interrupt s390int = {
														
 
															 		.type = KVM_S390_INT_IO(1, 0, 0, 0),
														
 
															 		.parm = 0,
														
 
															-		.parm64 = (adapter->isc << 27) | 0x80000000,
														
 
															+		.parm64 = isc_to_int_word(adapter->isc),
														
 
															 	};
														
 
															 	int ret = 0;
														
@@ -2687,3 +2830,28 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 
															 	return n;
														
 
															 }
														
 
															+
														
 
															+void kvm_s390_gisa_clear(struct kvm *kvm)
														
 
															+{
														
 
															+	if (kvm->arch.gisa) {
														
 
															+		memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
														
 
															+		kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
														
 
															+		VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void kvm_s390_gisa_init(struct kvm *kvm)
														
 
															+{
														
 
															+	if (css_general_characteristics.aiv) {
														
 
															+		kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
														
 
															+		VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
														
 
															+		kvm_s390_gisa_clear(kvm);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void kvm_s390_gisa_destroy(struct kvm *kvm)
														
 
															+{
														
 
															+	if (!kvm->arch.gisa)
														
 
															+		return;
														
 
															+	kvm->arch.gisa = NULL;
														
 
															+}
														
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
 
															 /*
														
 
															  * hosting IBM Z kernel virtual machines (s390x)
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008, 2017
														
 
															+ * Copyright IBM Corp. 2008, 2018
														
 
															  *
														
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  *               Christian Borntraeger <borntraeger@de.ibm.com>
														
@@ -87,19 +87,31 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
														
 
															 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
														
 
															 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
														
 
															+	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
														
 
															+	{ "instruction_gs", VCPU_STAT(instruction_gs) },
														
 
															+	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
														
 
															+	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
														
 
															+	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
														
 
															 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
														
 
															+	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
														
 
															 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
														
 
															+	{ "instruction_sck", VCPU_STAT(instruction_sck) },
														
 
															+	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
														
 
															 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
														
 
															 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
														
 
															 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
														
 
															-	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
														
 
															+	{ "instruction_iske", VCPU_STAT(instruction_iske) },
														
 
															+	{ "instruction_ri", VCPU_STAT(instruction_ri) },
														
 
															+	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
														
 
															+	{ "instruction_sske", VCPU_STAT(instruction_sske) },
														
 
															 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
														
 
															-	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
														
 
															-	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
														
 
															 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
														
 
															 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
														
 
															 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
														
 
															+	{ "instruction_tb", VCPU_STAT(instruction_tb) },
														
 
															+	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
														
 
															 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
														
 
															+	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
														
 
															 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
														
 
															 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
														
 
															 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
														
@@ -118,12 +130,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
														
 
															 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
														
 
															 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
														
 
															-	{ "diagnose_10", VCPU_STAT(diagnose_10) },
														
 
															-	{ "diagnose_44", VCPU_STAT(diagnose_44) },
														
 
															-	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
														
 
															-	{ "diagnose_258", VCPU_STAT(diagnose_258) },
														
 
															-	{ "diagnose_308", VCPU_STAT(diagnose_308) },
														
 
															-	{ "diagnose_500", VCPU_STAT(diagnose_500) },
														
 
															+	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
														
 
															+	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
														
 
															+	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
														
 
															+	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
														
 
															+	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
														
 
															+	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
														
 
															+	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
														
 
															 	{ NULL }
														
 
															 };
														
@@ -576,7 +589,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 
															 	case KVM_CAP_S390_GS:
														
 
															 		r = -EINVAL;
														
 
															 		mutex_lock(&kvm->lock);
														
 
															-		if (atomic_read(&kvm->online_vcpus)) {
														
 
															+		if (kvm->created_vcpus) {
														
 
															 			r = -EBUSY;
														
 
															 		} else if (test_facility(133)) {
														
 
															 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
														
@@ -1088,7 +1101,6 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
 
															 				       struct kvm_device_attr *attr)
														
 
															 {
														
 
															 	struct kvm_s390_vm_cpu_feat data;
														
 
															-	int ret = -EBUSY;
														
 
															 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
														
 
															 		return -EFAULT;
														
@@ -1098,13 +1110,18 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
 
															 		return -EINVAL;
														
 
															 	mutex_lock(&kvm->lock);
														
 
															-	if (!atomic_read(&kvm->online_vcpus)) {
														
 
															-		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
														
 
															-			    KVM_S390_VM_CPU_FEAT_NR_BITS);
														
 
															-		ret = 0;
														
 
															+	if (kvm->created_vcpus) {
														
 
															+		mutex_unlock(&kvm->lock);
														
 
															+		return -EBUSY;
														
 
															 	}
														
 
															+	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
														
 
															+		    KVM_S390_VM_CPU_FEAT_NR_BITS);
														
 
															 	mutex_unlock(&kvm->lock);
														
 
															-	return ret;
														
 
															+	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
														
 
															+			 data.feat[0],
														
 
															+			 data.feat[1],
														
 
															+			 data.feat[2]);
														
 
															+	return 0;
														
 
															 }
														
 
															 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
														
@@ -1206,6 +1223,10 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm,
 
															 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
														
 
															 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
														
 
															 		return -EFAULT;
														
 
															+	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
														
 
															+			 data.feat[0],
														
 
															+			 data.feat[1],
														
 
															+			 data.feat[2]);
														
 
															 	return 0;
														
 
															 }
														
@@ -1219,6 +1240,10 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
 
															 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
														
 
															 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
														
 
															 		return -EFAULT;
														
 
															+	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
														
 
															+			 data.feat[0],
														
 
															+			 data.feat[1],
														
 
															+			 data.feat[2]);
														
 
															 	return 0;
														
 
															 }
														
@@ -1911,6 +1936,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	if (!kvm->arch.dbf)
														
 
															 		goto out_err;
														
 
															+	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
														
 
															 	kvm->arch.sie_page2 =
														
 
															 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
														
 
															 	if (!kvm->arch.sie_page2)
														
@@ -1981,6 +2007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 	spin_lock_init(&kvm->arch.start_stop_lock);
														
 
															 	kvm_s390_vsie_init(kvm);
														
 
															+	kvm_s390_gisa_init(kvm);
														
 
															 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
														
 
															 	return 0;
														
@@ -2043,6 +2070,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 
															 	kvm_free_vcpus(kvm);
														
 
															 	sca_dispose(kvm);
														
 
															 	debug_unregister(kvm->arch.dbf);
														
 
															+	kvm_s390_gisa_destroy(kvm);
														
 
															 	free_page((unsigned long)kvm->arch.sie_page2);
														
 
															 	if (!kvm_is_ucontrol(kvm))
														
 
															 		gmap_remove(kvm->arch.gmap);
														
@@ -2314,7 +2342,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
															 {
														
 
															 	gmap_enable(vcpu->arch.enabled_gmap);
														
 
															-	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
														
 
															 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
														
 
															 		__start_cpu_timer_accounting(vcpu);
														
 
															 	vcpu->cpu = cpu;
														
@@ -2325,7 +2353,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 	vcpu->cpu = -1;
														
 
															 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
														
 
															 		__stop_cpu_timer_accounting(vcpu);
														
 
															-	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
														
 
															 	vcpu->arch.enabled_gmap = gmap_get_enabled();
														
 
															 	gmap_disable(vcpu->arch.enabled_gmap);
														
@@ -2422,9 +2450,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 						    CPUSTAT_STOPPED);
														
 
															 	if (test_kvm_facility(vcpu->kvm, 78))
														
 
															-		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
														
 
															 	else if (test_kvm_facility(vcpu->kvm, 8))
														
 
															-		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
														
 
															 	kvm_s390_vcpu_setup_model(vcpu);
														
@@ -2456,12 +2484,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
															 	if (test_kvm_facility(vcpu->kvm, 139))
														
 
															 		vcpu->arch.sie_block->ecd |= ECD_MEF;
														
 
															+	if (vcpu->arch.sie_block->gd) {
														
 
															+		vcpu->arch.sie_block->eca |= ECA_AIV;
														
 
															+		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
														
 
															+			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
														
 
															+	}
														
 
															 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
														
 
															 					| SDNXC;
														
 
															 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
														
 
															 	if (sclp.has_kss)
														
 
															-		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
														
 
															 	else
														
 
															 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
														
@@ -2508,9 +2541,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 	vcpu->arch.sie_block->icpua = id;
														
 
															 	spin_lock_init(&vcpu->arch.local_int.lock);
														
 
															-	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
														
 
															-	vcpu->arch.local_int.wq = &vcpu->wq;
														
 
															-	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
														
 
															+	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
														
 
															+	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
														
 
															+		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
														
 
															 	seqcount_init(&vcpu->arch.cputm_seqcount);
														
 
															 	rc = kvm_vcpu_init(vcpu, kvm, id);
														
@@ -2567,7 +2600,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
 
															  * return immediately. */
														
 
															 void exit_sie(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
														
 
															 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
														
 
															 		cpu_relax();
														
 
															 }
														
@@ -2720,47 +2753,70 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
 
															 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
														
 
															 				  struct kvm_sregs *sregs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
														
 
															 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
														
 
															 				  struct kvm_sregs *sregs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
														
 
															 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															-	if (test_fp_ctl(fpu->fpc))
														
 
															-		return -EINVAL;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															+	if (test_fp_ctl(fpu->fpc)) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															 	vcpu->run->s.regs.fpc = fpu->fpc;
														
 
															 	if (MACHINE_HAS_VX)
														
 
															 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
														
 
															 				 (freg_t *) fpu->fprs);
														
 
															 	else
														
 
															 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
														
 
															-	return 0;
														
 
															+
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	/* make sure we have the latest values */
														
 
															 	save_fpu_regs();
														
 
															 	if (MACHINE_HAS_VX)
														
@@ -2769,6 +2825,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
															 	else
														
 
															 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
														
 
															 	fpu->fpc = vcpu->run->s.regs.fpc;
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -2800,41 +2858,56 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	int rc = 0;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	vcpu->guest_debug = 0;
														
 
															 	kvm_s390_clear_bp_data(vcpu);
														
 
															-	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
														
 
															-		return -EINVAL;
														
 
															-	if (!sclp.has_gpere)
														
 
															-		return -EINVAL;
														
 
															+	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
														
 
															+		rc = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (!sclp.has_gpere) {
														
 
															+		rc = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
														
 
															 		vcpu->guest_debug = dbg->control;
														
 
															 		/* enforce guest PER */
														
 
															-		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
														
 
															 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
														
 
															 			rc = kvm_s390_import_bp_data(vcpu, dbg);
														
 
															 	} else {
														
 
															-		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
														
 
															 		vcpu->arch.guestdbg.last_bp = 0;
														
 
															 	}
														
 
															 	if (rc) {
														
 
															 		vcpu->guest_debug = 0;
														
 
															 		kvm_s390_clear_bp_data(vcpu);
														
 
															-		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
														
 
															+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
														
 
															 	}
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															 	return rc;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															+	int ret;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	/* CHECK_STOP and LOAD are not supported yet */
														
 
															-	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
														
 
															-				       KVM_MP_STATE_OPERATING;
														
 
															+	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
														
 
															+				      KVM_MP_STATE_OPERATING;
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
														
@@ -2842,6 +2915,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	int rc = 0;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	/* user space knows about this interface - let it control the state */
														
 
															 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
														
@@ -2859,12 +2934,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															 		rc = -ENXIO;
														
 
															 	}
														
 
															+	vcpu_put(vcpu);
														
 
															 	return rc;
														
 
															 }
														
 
															 static bool ibs_enabled(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
														
 
															+	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
														
 
															 }
														
 
															 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
														
@@ -2900,8 +2976,7 @@ retry:
 
															 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
														
 
															 		if (!ibs_enabled(vcpu)) {
														
 
															 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
														
 
															-			atomic_or(CPUSTAT_IBS,
														
 
															-					&vcpu->arch.sie_block->cpuflags);
														
 
															+			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
														
 
															 		}
														
 
															 		goto retry;
														
 
															 	}
														
@@ -2909,8 +2984,7 @@ retry:
 
															 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
														
 
															 		if (ibs_enabled(vcpu)) {
														
 
															 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
														
 
															-			atomic_andnot(CPUSTAT_IBS,
														
 
															-					  &vcpu->arch.sie_block->cpuflags);
														
 
															+			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
														
 
															 		}
														
 
															 		goto retry;
														
 
															 	}
														
@@ -3390,9 +3464,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	if (kvm_run->immediate_exit)
														
 
															 		return -EINTR;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	if (guestdbg_exit_pending(vcpu)) {
														
 
															 		kvm_s390_prepare_debug_exit(vcpu);
														
 
															-		return 0;
														
 
															+		rc = 0;
														
 
															+		goto out;
														
 
															 	}
														
 
															 	kvm_sigset_activate(vcpu);
														
@@ -3402,7 +3479,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	} else if (is_vcpu_stopped(vcpu)) {
														
 
															 		pr_err_ratelimited("can't run stopped vcpu %d\n",
														
 
															 				   vcpu->vcpu_id);
														
 
															-		return -EINVAL;
														
 
															+		rc = -EINVAL;
														
 
															+		goto out;
														
 
															 	}
														
 
															 	sync_regs(vcpu, kvm_run);
														
@@ -3432,6 +3510,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 	kvm_sigset_deactivate(vcpu);
														
 
															 	vcpu->stat.exit_userspace++;
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															 	return rc;
														
 
															 }
														
@@ -3560,7 +3640,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 
															 		__disable_ibs_on_all_vcpus(vcpu->kvm);
														
 
															 	}
														
 
															-	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
														
 
															 	/*
														
 
															 	 * Another VCPU might have used IBS while we were offline.
														
 
															 	 * Let's play safe and flush the VCPU at startup.
														
@@ -3586,7 +3666,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 
															 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
														
 
															 	kvm_s390_clear_stop_irq(vcpu);
														
 
															-	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
														
 
															 	__disable_ibs_on_vcpu(vcpu);
														
 
															 	for (i = 0; i < online_vcpus; i++) {
														
@@ -3693,36 +3773,45 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 
															 	return r;
														
 
															 }
														
 
															-long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															-			 unsigned int ioctl, unsigned long arg)
														
 
															+long kvm_arch_vcpu_async_ioctl(struct file *filp,
														
 
															+			       unsigned int ioctl, unsigned long arg)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = filp->private_data;
														
 
															 	void __user *argp = (void __user *)arg;
														
 
															-	int idx;
														
 
															-	long r;
														
 
															 	switch (ioctl) {
														
 
															 	case KVM_S390_IRQ: {
														
 
															 		struct kvm_s390_irq s390irq;
														
 
															-		r = -EFAULT;
														
 
															 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
														
 
															-			break;
														
 
															-		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
														
 
															-		break;
														
 
															+			return -EFAULT;
														
 
															+		return kvm_s390_inject_vcpu(vcpu, &s390irq);
														
 
															 	}
														
 
															 	case KVM_S390_INTERRUPT: {
														
 
															 		struct kvm_s390_interrupt s390int;
														
 
															 		struct kvm_s390_irq s390irq;
														
 
															-		r = -EFAULT;
														
 
															 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
														
 
															-			break;
														
 
															+			return -EFAULT;
														
 
															 		if (s390int_to_s390irq(&s390int, &s390irq))
														
 
															 			return -EINVAL;
														
 
															-		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
														
 
															-		break;
														
 
															+		return kvm_s390_inject_vcpu(vcpu, &s390irq);
														
 
															+	}
														
 
															 	}
														
 
															+	return -ENOIOCTLCMD;
														
 
															+}
														
 
															+
														
 
															+long kvm_arch_vcpu_ioctl(struct file *filp,
														
 
															+			 unsigned int ioctl, unsigned long arg)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = filp->private_data;
														
 
															+	void __user *argp = (void __user *)arg;
														
 
															+	int idx;
														
 
															+	long r;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															+	switch (ioctl) {
														
 
															 	case KVM_S390_STORE_STATUS:
														
 
															 		idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															 		r = kvm_s390_vcpu_store_status(vcpu, arg);
														
@@ -3847,6 +3936,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	default:
														
 
															 		r = -ENOTTY;
														
 
															 	}
														
 
															+
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -47,14 +47,29 @@ do { \
 
															 	  d_args); \
														
 
															 } while (0)
														
 
															+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
														
 
															+{
														
 
															+	atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
														
 
															+}
														
 
															+
														
 
															+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
														
 
															+{
														
 
															+	atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
														
 
															+}
														
 
															+
														
 
															+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
														
 
															+{
														
 
															+	return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
														
 
															+}
														
 
															+
														
 
															 static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
														
 
															+	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
														
 
															 }
														
 
															 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
														
 
															+	return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
														
 
															 }
														
 
															 static inline int kvm_is_ucontrol(struct kvm *kvm)
														
@@ -367,6 +382,9 @@ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
 
															 			   void __user *buf, int len);
														
 
															 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
														
 
															 			   __u8 __user *buf, int len);
														
 
															+void kvm_s390_gisa_init(struct kvm *kvm);
														
 
															+void kvm_s390_gisa_clear(struct kvm *kvm);
														
 
															+void kvm_s390_gisa_destroy(struct kvm *kvm);
														
 
															 /* implemented in guestdbg.c */
														
 
															 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
														
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
 
															 /*
														
 
															  * handling privileged instructions
														
 
															  *
														
 
															- * Copyright IBM Corp. 2008, 2013
														
 
															+ * Copyright IBM Corp. 2008, 2018
														
 
															  *
														
 
															  *    Author(s): Carsten Otte <cotte@de.ibm.com>
														
 
															  *               Christian Borntraeger <borntraeger@de.ibm.com>
														
@@ -34,6 +34,8 @@
 
															 static int handle_ri(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	vcpu->stat.instruction_ri++;
														
 
															+
														
 
															 	if (test_kvm_facility(vcpu->kvm, 64)) {
														
 
															 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
														
 
															 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
														
@@ -53,6 +55,8 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
 
															 static int handle_gs(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	vcpu->stat.instruction_gs++;
														
 
															+
														
 
															 	if (test_kvm_facility(vcpu->kvm, 133)) {
														
 
															 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
														
 
															 		preempt_disable();
														
@@ -85,6 +89,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 
															 	u8 ar;
														
 
															 	u64 op2, val;
														
 
															+	vcpu->stat.instruction_sck++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -203,14 +209,14 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
 
															 	trace_kvm_s390_skey_related_inst(vcpu);
														
 
															 	if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
														
 
															-	    !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
														
 
															+	    !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
														
 
															 		return rc;
														
 
															 	rc = s390_enable_skey();
														
 
															 	VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
														
 
															 	if (!rc) {
														
 
															-		if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
														
 
															-			atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
														
 
															+		if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
														
 
															+			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
														
 
															 		else
														
 
															 			sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
														
 
															 					     ICTL_RRBE);
														
@@ -222,7 +228,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	int rc;
														
 
															-	vcpu->stat.instruction_storage_key++;
														
 
															 	rc = kvm_s390_skey_check_enable(vcpu);
														
 
															 	if (rc)
														
 
															 		return rc;
														
@@ -242,6 +247,8 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 
															 	int reg1, reg2;
														
 
															 	int rc;
														
 
															+	vcpu->stat.instruction_iske++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -274,6 +281,8 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 
															 	int reg1, reg2;
														
 
															 	int rc;
														
 
															+	vcpu->stat.instruction_rrbe++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -312,6 +321,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 
															 	int reg1, reg2;
														
 
															 	int rc;
														
 
															+	vcpu->stat.instruction_sske++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -392,6 +403,8 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 
															 	gpa_t addr;
														
 
															 	int reg2;
														
 
															+	vcpu->stat.instruction_tb++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -424,6 +437,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 
															 	u64 addr;
														
 
															 	u8 ar;
														
 
															+	vcpu->stat.instruction_tpi++;
														
 
															+
														
 
															 	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
														
 
															 	if (addr & 3)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
														
@@ -484,6 +499,8 @@ static int handle_tsch(struct kvm_vcpu *vcpu)
 
															 	struct kvm_s390_interrupt_info *inti = NULL;
														
 
															 	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
														
 
															+	vcpu->stat.instruction_tsch++;
														
 
															+
														
 
															 	/* a valid schid has at least one bit set */
														
 
															 	if (vcpu->run->s.regs.gprs[1])
														
 
															 		inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
														
@@ -527,6 +544,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 
															 		if (vcpu->arch.sie_block->ipa == 0xb235)
														
 
															 			return handle_tsch(vcpu);
														
 
															 		/* Handle in userspace. */
														
 
															+		vcpu->stat.instruction_io_other++;
														
 
															 		return -EOPNOTSUPP;
														
 
															 	} else {
														
 
															 		/*
														
@@ -592,6 +610,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 
															 	int rc;
														
 
															 	u8 ar;
														
 
															+	vcpu->stat.instruction_lpsw++;
														
 
															+
														
 
															 	if (gpsw->mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -619,6 +639,8 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 
															 	int rc;
														
 
															 	u8 ar;
														
 
															+	vcpu->stat.instruction_lpswe++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -828,6 +850,8 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	int reg1, reg2;
														
 
															+	vcpu->stat.instruction_epsw++;
														
 
															+
														
 
															 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
														
 
															 	/* This basically extracts the mask half of the psw. */
														
@@ -1332,6 +1356,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	u32 value;
														
 
															+	vcpu->stat.instruction_sckpf++;
														
 
															+
														
 
															 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
														
 
															 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
														
@@ -1347,6 +1373,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
 
															 static int handle_ptff(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	vcpu->stat.instruction_ptff++;
														
 
															+
														
 
															 	/* we don't emulate any control instructions yet */
														
 
															 	kvm_s390_set_psw_cc(vcpu, 3);
														
 
															 	return 0;
														
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,22 +20,18 @@
 
															 static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
														
 
															 			u64 *reg)
														
 
															 {
														
 
															-	struct kvm_s390_local_interrupt *li;
														
 
															-	int cpuflags;
														
 
															+	const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
														
 
															 	int rc;
														
 
															 	int ext_call_pending;
														
 
															-	li = &dst_vcpu->arch.local_int;
														
 
															-
														
 
															-	cpuflags = atomic_read(li->cpuflags);
														
 
															 	ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
														
 
															-	if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
														
 
															+	if (!stopped && !ext_call_pending)
														
 
															 		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															 	else {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															 		if (ext_call_pending)
														
 
															 			*reg |= SIGP_STATUS_EXT_CALL_PENDING;
														
 
															-		if (cpuflags & CPUSTAT_STOPPED)
														
 
															+		if (stopped)
														
 
															 			*reg |= SIGP_STATUS_STOPPED;
														
 
															 		rc = SIGP_CC_STATUS_STORED;
														
 
															 	}
														
@@ -208,11 +204,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
 
															 				       struct kvm_vcpu *dst_vcpu,
														
 
															 				       u32 addr, u64 *reg)
														
 
															 {
														
 
															-	int flags;
														
 
															 	int rc;
														
 
															-	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
														
 
															-	if (!(flags & CPUSTAT_STOPPED)) {
														
 
															+	if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
														
 
															 		*reg &= 0xffffffff00000000UL;
														
 
															 		*reg |= SIGP_STATUS_INCORRECT_STATE;
														
 
															 		return SIGP_CC_STATUS_STORED;
														
@@ -231,7 +225,6 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
 
															 static int __sigp_sense_running(struct kvm_vcpu *vcpu,
														
 
															 				struct kvm_vcpu *dst_vcpu, u64 *reg)
														
 
															 {
														
 
															-	struct kvm_s390_local_interrupt *li;
														
 
															 	int rc;
														
 
															 	if (!test_kvm_facility(vcpu->kvm, 9)) {
														
@@ -240,8 +233,7 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
 
															 		return SIGP_CC_STATUS_STORED;
														
 
															 	}
														
 
															-	li = &dst_vcpu->arch.local_int;
														
 
															-	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
														
 
															+	if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
														
 
															 		/* running */
														
 
															 		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
														
 
															 	} else {
														
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -28,13 +28,23 @@ struct vsie_page {
 
															 	 * the same offset as that in struct sie_page!
														
 
															 	 */
														
 
															 	struct mcck_volatile_info mcck_info;    /* 0x0200 */
														
 
															-	/* the pinned originial scb */
														
 
															+	/*
														
 
															+	 * The pinned original scb. Be aware that other VCPUs can modify
														
 
															+	 * it while we read from it. Values that are used for conditions or
														
 
															+	 * are reused conditionally, should be accessed via READ_ONCE.
														
 
															+	 */
														
 
															 	struct kvm_s390_sie_block *scb_o;	/* 0x0218 */
														
 
															 	/* the shadow gmap in use by the vsie_page */
														
 
															 	struct gmap *gmap;			/* 0x0220 */
														
 
															 	/* address of the last reported fault to guest2 */
														
 
															 	unsigned long fault_addr;		/* 0x0228 */
														
 
															-	__u8 reserved[0x0700 - 0x0230];		/* 0x0230 */
														
 
															+	/* calculated guest addresses of satellite control blocks */
														
 
															+	gpa_t sca_gpa;				/* 0x0230 */
														
 
															+	gpa_t itdba_gpa;			/* 0x0238 */
														
 
															+	gpa_t gvrd_gpa;				/* 0x0240 */
														
 
															+	gpa_t riccbd_gpa;			/* 0x0248 */
														
 
															+	gpa_t sdnx_gpa;				/* 0x0250 */
														
 
															+	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
														
 
															 	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
														
 
															 	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
														
 
															 };
														
@@ -140,12 +150,13 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 {
														
 
															 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
														
 
															 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
														
 
															-	u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
														
 
															+	const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
														
 
															+	const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
														
 
															 	unsigned long *b1, *b2;
														
 
															 	u8 ecb3_flags;
														
 
															 	scb_s->crycbd = 0;
														
 
															-	if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
														
 
															+	if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
														
 
															 		return 0;
														
 
															 	/* format-1 is supported with message-security-assist extension 3 */
														
 
															 	if (!test_kvm_facility(vcpu->kvm, 76))
														
@@ -183,12 +194,15 @@ static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 {
														
 
															 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
														
 
															 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
														
 
															+	/* READ_ONCE does not work on bitfields - use a temporary variable */
														
 
															+	const uint32_t __new_ibc = scb_o->ibc;
														
 
															+	const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
														
 
															 	__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
														
 
															 	scb_s->ibc = 0;
														
 
															 	/* ibc installed in g2 and requested for g3 */
														
 
															-	if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
														
 
															-		scb_s->ibc = scb_o->ibc & 0x0fffU;
														
 
															+	if (vcpu->kvm->arch.model.ibc && new_ibc) {
														
 
															+		scb_s->ibc = new_ibc;
														
 
															 		/* takte care of the minimum ibc level of the machine */
														
 
															 		if (scb_s->ibc < min_ibc)
														
 
															 			scb_s->ibc = min_ibc;
														
@@ -259,6 +273,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 {
														
 
															 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
														
 
															 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
														
 
															+	/* READ_ONCE does not work on bitfields - use a temporary variable */
														
 
															+	const uint32_t __new_prefix = scb_o->prefix;
														
 
															+	const uint32_t new_prefix = READ_ONCE(__new_prefix);
														
 
															+	const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
														
 
															 	bool had_tx = scb_s->ecb & ECB_TE;
														
 
															 	unsigned long new_mso = 0;
														
 
															 	int rc;
														
@@ -306,14 +324,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 	scb_s->icpua = scb_o->icpua;
														
 
															 	if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
														
 
															-		new_mso = scb_o->mso & 0xfffffffffff00000UL;
														
 
															+		new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
														
 
															 	/* if the hva of the prefix changes, we have to remap the prefix */
														
 
															-	if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
														
 
															+	if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
														
 
															 		prefix_unmapped(vsie_page);
														
 
															 	 /* SIE will do mso/msl validity and exception checks for us */
														
 
															 	scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
														
 
															 	scb_s->mso = new_mso;
														
 
															-	scb_s->prefix = scb_o->prefix;
														
 
															+	scb_s->prefix = new_prefix;
														
 
															 	/* We have to definetly flush the tlb if this scb never ran */
														
 
															 	if (scb_s->ihcpu != 0xffffU)
														
@@ -325,11 +343,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
														
 
															 		scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
														
 
															 	/* transactional execution */
														
 
															-	if (test_kvm_facility(vcpu->kvm, 73)) {
														
 
															+	if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
														
 
															 		/* remap the prefix is tx is toggled on */
														
 
															-		if ((scb_o->ecb & ECB_TE) && !had_tx)
														
 
															+		if (!had_tx)
														
 
															 			prefix_unmapped(vsie_page);
														
 
															-		scb_s->ecb |= scb_o->ecb & ECB_TE;
														
 
															+		scb_s->ecb |= ECB_TE;
														
 
															 	}
														
 
															 	/* branch prediction */
														
 
															 	if (test_kvm_facility(vcpu->kvm, 82))
														
@@ -473,46 +491,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
 
															 /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
														
 
															 static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
														
 
															 {
														
 
															-	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
														
 
															 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
														
 
															 	hpa_t hpa;
														
 
															-	gpa_t gpa;
														
 
															 	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
														
 
															 	if (hpa) {
														
 
															-		gpa = scb_o->scaol & ~0xfUL;
														
 
															-		if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
														
 
															-			gpa |= (u64) scb_o->scaoh << 32;
														
 
															-		unpin_guest_page(vcpu->kvm, gpa, hpa);
														
 
															+		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
														
 
															+		vsie_page->sca_gpa = 0;
														
 
															 		scb_s->scaol = 0;
														
 
															 		scb_s->scaoh = 0;
														
 
															 	}
														
 
															 	hpa = scb_s->itdba;
														
 
															 	if (hpa) {
														
 
															-		gpa = scb_o->itdba & ~0xffUL;
														
 
															-		unpin_guest_page(vcpu->kvm, gpa, hpa);
														
 
															+		unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
														
 
															+		vsie_page->itdba_gpa = 0;
														
 
															 		scb_s->itdba = 0;
														
 
															 	}
														
 
															 	hpa = scb_s->gvrd;
														
 
															 	if (hpa) {
														
 
															-		gpa = scb_o->gvrd & ~0x1ffUL;
														
 
															-		unpin_guest_page(vcpu->kvm, gpa, hpa);
														
 
															+		unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
														
 
															+		vsie_page->gvrd_gpa = 0;
														
 
															 		scb_s->gvrd = 0;
														
 
															 	}
														
 
															 	hpa = scb_s->riccbd;
														
 
															 	if (hpa) {
														
 
															-		gpa = scb_o->riccbd & ~0x3fUL;
														
 
															-		unpin_guest_page(vcpu->kvm, gpa, hpa);
														
 
															+		unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
														
 
															+		vsie_page->riccbd_gpa = 0;
														
 
															 		scb_s->riccbd = 0;
														
 
															 	}
														
 
															 	hpa = scb_s->sdnxo;
														
 
															 	if (hpa) {
														
 
															-		gpa = scb_o->sdnxo;
														
 
															-		unpin_guest_page(vcpu->kvm, gpa, hpa);
														
 
															+		unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
														
 
															+		vsie_page->sdnx_gpa = 0;
														
 
															 		scb_s->sdnxo = 0;
														
 
															 	}
														
 
															 }
														
@@ -539,9 +553,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 	gpa_t gpa;
														
 
															 	int rc = 0;
														
 
															-	gpa = scb_o->scaol & ~0xfUL;
														
 
															+	gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
														
 
															 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
														
 
															-		gpa |= (u64) scb_o->scaoh << 32;
														
 
															+		gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
														
 
															 	if (gpa) {
														
 
															 		if (!(gpa & ~0x1fffUL))
														
 
															 			rc = set_validity_icpt(scb_s, 0x0038U);
														
@@ -557,11 +571,12 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 		}
														
 
															 		if (rc)
														
 
															 			goto unpin;
														
 
															+		vsie_page->sca_gpa = gpa;
														
 
															 		scb_s->scaoh = (u32)((u64)hpa >> 32);
														
 
															 		scb_s->scaol = (u32)(u64)hpa;
														
 
															 	}
														
 
															-	gpa = scb_o->itdba & ~0xffUL;
														
 
															+	gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
														
 
															 	if (gpa && (scb_s->ecb & ECB_TE)) {
														
 
															 		if (!(gpa & ~0x1fffU)) {
														
 
															 			rc = set_validity_icpt(scb_s, 0x0080U);
														
@@ -573,10 +588,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 			rc = set_validity_icpt(scb_s, 0x0080U);
														
 
															 			goto unpin;
														
 
															 		}
														
 
															+		vsie_page->itdba_gpa = gpa;
														
 
															 		scb_s->itdba = hpa;
														
 
															 	}
														
 
															-	gpa = scb_o->gvrd & ~0x1ffUL;
														
 
															+	gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
														
 
															 	if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
														
 
															 		if (!(gpa & ~0x1fffUL)) {
														
 
															 			rc = set_validity_icpt(scb_s, 0x1310U);
														
@@ -591,10 +607,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 			rc = set_validity_icpt(scb_s, 0x1310U);
														
 
															 			goto unpin;
														
 
															 		}
														
 
															+		vsie_page->gvrd_gpa = gpa;
														
 
															 		scb_s->gvrd = hpa;
														
 
															 	}
														
 
															-	gpa = scb_o->riccbd & ~0x3fUL;
														
 
															+	gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
														
 
															 	if (gpa && (scb_s->ecb3 & ECB3_RI)) {
														
 
															 		if (!(gpa & ~0x1fffUL)) {
														
 
															 			rc = set_validity_icpt(scb_s, 0x0043U);
														
@@ -607,13 +624,14 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 			goto unpin;
														
 
															 		}
														
 
															 		/* Validity 0x0044 will be checked by SIE */
														
 
															+		vsie_page->riccbd_gpa = gpa;
														
 
															 		scb_s->riccbd = hpa;
														
 
															 	}
														
 
															 	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
														
 
															 		unsigned long sdnxc;
														
 
															-		gpa = scb_o->sdnxo & ~0xfUL;
														
 
															-		sdnxc = scb_o->sdnxo & 0xfUL;
														
 
															+		gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
														
 
															+		sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
														
 
															 		if (!gpa || !(gpa & ~0x1fffUL)) {
														
 
															 			rc = set_validity_icpt(scb_s, 0x10b0U);
														
 
															 			goto unpin;
														
@@ -634,6 +652,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
															 			rc = set_validity_icpt(scb_s, 0x10b0U);
														
 
															 			goto unpin;
														
 
															 		}
														
 
															+		vsie_page->sdnx_gpa = gpa;
														
 
															 		scb_s->sdnxo = hpa | sdnxc;
														
 
															 	}
														
 
															 	return 0;
														
@@ -778,7 +797,7 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
 
															 static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
														
 
															 {
														
 
															 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
														
 
															-	__u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
														
 
															+	__u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
														
 
															 	if (fac && test_kvm_facility(vcpu->kvm, 7)) {
														
 
															 		retry_vsie_icpt(vsie_page);
														
@@ -904,7 +923,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
 
															 	 * External calls have to lead to a kick of the vcpu and
														
 
															 	 * therefore the vsie -> Simulate Wait state.
														
 
															 	 */
														
 
															-	atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
														
 
															 	/*
														
 
															 	 * We have to adjust the g3 epoch by the g2 epoch. The epoch will
														
 
															 	 * automatically be adjusted on tod clock changes via kvm_sync_clock.
														
@@ -926,7 +945,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
 
															  */
														
 
															 static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
														
 
															+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
														
 
															 	WRITE_ONCE(vcpu->arch.vsie_block, NULL);
														
 
															 }
														
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -815,27 +815,17 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 
															  * @ptl: pointer to the spinlock pointer
														
 
															  *
														
 
															  * Returns a pointer to the locked pte for a guest address, or NULL
														
 
															- *
														
 
															- * Note: Can also be called for shadow gmaps.
														
 
															  */
														
 
															 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
														
 
															 			       spinlock_t **ptl)
														
 
															 {
														
 
															 	unsigned long *table;
														
 
															-	if (gmap_is_shadow(gmap))
														
 
															-		spin_lock(&gmap->guest_table_lock);
														
 
															+	BUG_ON(gmap_is_shadow(gmap));
														
 
															 	/* Walk the gmap page table, lock and get pte pointer */
														
 
															 	table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
														
 
															-	if (!table || *table & _SEGMENT_ENTRY_INVALID) {
														
 
															-		if (gmap_is_shadow(gmap))
														
 
															-			spin_unlock(&gmap->guest_table_lock);
														
 
															+	if (!table || *table & _SEGMENT_ENTRY_INVALID)
														
 
															 		return NULL;
														
 
															-	}
														
 
															-	if (gmap_is_shadow(gmap)) {
														
 
															-		*ptl = &gmap->guest_table_lock;
														
 
															-		return pte_offset_map((pmd_t *) table, gaddr);
														
 
															-	}
														
 
															 	return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
														
 
															 }
														
@@ -889,8 +879,6 @@ static void gmap_pte_op_end(spinlock_t *ptl)
 
															  * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
														
 
															  *
														
 
															  * Called with sg->mm->mmap_sem in read.
														
 
															- *
														
 
															- * Note: Can also be called for shadow gmaps.
														
 
															  */
														
 
															 static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
														
 
															 			      unsigned long len, int prot, unsigned long bits)
														
@@ -900,6 +888,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
 
															 	pte_t *ptep;
														
 
															 	int rc;
														
 
															+	BUG_ON(gmap_is_shadow(gmap));
														
 
															 	while (len) {
														
 
															 		rc = -EAGAIN;
														
 
															 		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
														
@@ -960,7 +949,8 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
 
															  * @val: pointer to the unsigned long value to return
														
 
															  *
														
 
															  * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
														
 
															- * if reading using the virtual address failed.
														
 
															+ * if reading using the virtual address failed. -EINVAL if called on a gmap
														
 
															+ * shadow.
														
 
															  *
														
 
															  * Called with gmap->mm->mmap_sem in read.
														
 
															  */
														
@@ -971,6 +961,9 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
 
															 	pte_t *ptep, pte;
														
 
															 	int rc;
														
 
															+	if (gmap_is_shadow(gmap))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															 	while (1) {
														
 
															 		rc = -EAGAIN;
														
 
															 		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
														
@@ -1028,18 +1021,17 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
 
															 }
														
 
															 /**
														
 
															- * gmap_protect_rmap - modify access rights to memory and create an rmap
														
 
															+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
														
 
															  * @sg: pointer to the shadow guest address space structure
														
 
															  * @raddr: rmap address in the shadow gmap
														
 
															  * @paddr: address in the parent guest address space
														
 
															  * @len: length of the memory area to protect
														
 
															- * @prot: indicates access rights: none, read-only or read-write
														
 
															  *
														
 
															  * Returns 0 if successfully protected and the rmap was created, -ENOMEM
														
 
															  * if out of memory and -EFAULT if paddr is invalid.
														
 
															  */
														
 
															 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
														
 
															-			     unsigned long paddr, unsigned long len, int prot)
														
 
															+			     unsigned long paddr, unsigned long len)
														
 
															 {
														
 
															 	struct gmap *parent;
														
 
															 	struct gmap_rmap *rmap;
														
@@ -1067,7 +1059,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
 
															 		ptep = gmap_pte_op_walk(parent, paddr, &ptl);
														
 
															 		if (ptep) {
														
 
															 			spin_lock(&sg->guest_table_lock);
														
 
															-			rc = ptep_force_prot(parent->mm, paddr, ptep, prot,
														
 
															+			rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
														
 
															 					     PGSTE_VSIE_BIT);
														
 
															 			if (!rc)
														
 
															 				gmap_insert_rmap(sg, vmaddr, rmap);
														
@@ -1077,7 +1069,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
 
															 		radix_tree_preload_end();
														
 
															 		if (rc) {
														
 
															 			kfree(rmap);
														
 
															-			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
														
 
															+			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
														
 
															 			if (rc)
														
 
															 				return rc;
														
 
															 			continue;
														
@@ -1616,7 +1608,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 
															 	origin = r2t & _REGION_ENTRY_ORIGIN;
														
 
															 	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
														
 
															 	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
														
 
															-	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
														
 
															+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
														
 
															 	spin_lock(&sg->guest_table_lock);
														
 
															 	if (!rc) {
														
 
															 		table = gmap_table_walk(sg, saddr, 4);
														
@@ -1699,7 +1691,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 
															 	origin = r3t & _REGION_ENTRY_ORIGIN;
														
 
															 	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
														
 
															 	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
														
 
															-	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
														
 
															+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
														
 
															 	spin_lock(&sg->guest_table_lock);
														
 
															 	if (!rc) {
														
 
															 		table = gmap_table_walk(sg, saddr, 3);
														
@@ -1783,7 +1775,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 
															 	origin = sgt & _REGION_ENTRY_ORIGIN;
														
 
															 	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
														
 
															 	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
														
 
															-	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
														
 
															+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
														
 
															 	spin_lock(&sg->guest_table_lock);
														
 
															 	if (!rc) {
														
 
															 		table = gmap_table_walk(sg, saddr, 2);
														
@@ -1902,7 +1894,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 
															 	/* Make pgt read-only in parent gmap page table (not the pgste) */
														
 
															 	raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
														
 
															 	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
														
 
															-	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
														
 
															+	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
														
 
															 	spin_lock(&sg->guest_table_lock);
														
 
															 	if (!rc) {
														
 
															 		table = gmap_table_walk(sg, saddr, 1);
														
@@ -2005,7 +1997,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
 
															  * Called with sg->parent->shadow_lock.
														
 
															  */
														
 
															 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
														
 
															-			       unsigned long gaddr, pte_t *pte)
														
 
															+			       unsigned long gaddr)
														
 
															 {
														
 
															 	struct gmap_rmap *rmap, *rnext, *head;
														
 
															 	unsigned long start, end, bits, raddr;
														
@@ -2090,7 +2082,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 
															 			spin_lock(&gmap->shadow_lock);
														
 
															 			list_for_each_entry_safe(sg, next,
														
 
															 						 &gmap->children, list)
														
 
															-				gmap_shadow_notify(sg, vmaddr, gaddr, pte);
														
 
															+				gmap_shadow_notify(sg, vmaddr, gaddr);
														
 
															 			spin_unlock(&gmap->shadow_lock);
														
 
															 		}
														
 
															 		if (bits & PGSTE_IN_BIT)
														
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -900,6 +900,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
 
															 BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
														
 
															 		 hyperv_vector_handler)
														
 
															+BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
														
 
															+		 hyperv_reenlightenment_intr)
														
 
															+
														
 
															 #endif /* CONFIG_HYPERV */
														
 
															 ENTRY(page_fault)
														
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1136,6 +1136,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 
															 #if IS_ENABLED(CONFIG_HYPERV)
														
 
															 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
														
 
															 	hyperv_callback_vector hyperv_vector_handler
														
 
															+
														
 
															+apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
														
 
															+	hyperv_reenlightenment_vector hyperv_reenlightenment_intr
														
 
															 #endif /* CONFIG_HYPERV */
														
 
															 idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
														
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -18,6 +18,8 @@
 
															  */
														
 
															 #include <linux/types.h>
														
 
															+#include <asm/apic.h>
														
 
															+#include <asm/desc.h>
														
 
															 #include <asm/hypervisor.h>
														
 
															 #include <asm/hyperv.h>
														
 
															 #include <asm/mshyperv.h>
														
@@ -37,6 +39,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 
															 {
														
 
															 	return tsc_pg;
														
 
															 }
														
 
															+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
														
 
															 static u64 read_hv_clock_tsc(struct clocksource *arg)
														
 
															 {
														
@@ -101,6 +104,115 @@ static int hv_cpu_init(unsigned int cpu)
 
															 	return 0;
														
 
															 }
														
 
															+static void (*hv_reenlightenment_cb)(void);
														
 
															+
														
 
															+static void hv_reenlightenment_notify(struct work_struct *dummy)
														
 
															+{
														
 
															+	struct hv_tsc_emulation_status emu_status;
														
 
															+
														
 
															+	rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
														
 
															+
														
 
															+	/* Don't issue the callback if TSC accesses are not emulated */
														
 
															+	if (hv_reenlightenment_cb && emu_status.inprogress)
														
 
															+		hv_reenlightenment_cb();
														
 
															+}
														
 
															+static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
														
 
															+
														
 
															+void hyperv_stop_tsc_emulation(void)
														
 
															+{
														
 
															+	u64 freq;
														
 
															+	struct hv_tsc_emulation_status emu_status;
														
 
															+
														
 
															+	rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
														
 
															+	emu_status.inprogress = 0;
														
 
															+	wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
														
 
															+
														
 
															+	rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
														
 
															+	tsc_khz = div64_u64(freq, 1000);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
														
 
															+
														
 
															+static inline bool hv_reenlightenment_available(void)
														
 
															+{
														
 
															+	/*
														
 
															+	 * Check for required features and priviliges to make TSC frequency
														
 
															+	 * change notifications work.
														
 
															+	 */
														
 
															+	return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
														
 
															+		ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
														
 
															+		ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
														
 
															+}
														
 
															+
														
 
															+__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
														
 
															+{
														
 
															+	entering_ack_irq();
														
 
															+
														
 
															+	inc_irq_stat(irq_hv_reenlightenment_count);
														
 
															+
														
 
															+	schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
														
 
															+
														
 
															+	exiting_irq();
														
 
															+}
														
 
															+
														
 
															+void set_hv_tscchange_cb(void (*cb)(void))
														
 
															+{
														
 
															+	struct hv_reenlightenment_control re_ctrl = {
														
 
															+		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
														
 
															+		.enabled = 1,
														
 
															+		.target_vp = hv_vp_index[smp_processor_id()]
														
 
															+	};
														
 
															+	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
														
 
															+
														
 
															+	if (!hv_reenlightenment_available()) {
														
 
															+		pr_warn("Hyper-V: reenlightenment support is unavailable\n");
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	hv_reenlightenment_cb = cb;
														
 
															+
														
 
															+	/* Make sure callback is registered before we write to MSRs */
														
 
															+	wmb();
														
 
															+
														
 
															+	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
														
 
															+	wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(set_hv_tscchange_cb);
														
 
															+
														
 
															+void clear_hv_tscchange_cb(void)
														
 
															+{
														
 
															+	struct hv_reenlightenment_control re_ctrl;
														
 
															+
														
 
															+	if (!hv_reenlightenment_available())
														
 
															+		return;
														
 
															+
														
 
															+	rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
														
 
															+	re_ctrl.enabled = 0;
														
 
															+	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
														
 
															+
														
 
															+	hv_reenlightenment_cb = NULL;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb);
														
 
															+
														
 
															+static int hv_cpu_die(unsigned int cpu)
														
 
															+{
														
 
															+	struct hv_reenlightenment_control re_ctrl;
														
 
															+	unsigned int new_cpu;
														
 
															+
														
 
															+	if (hv_reenlightenment_cb == NULL)
														
 
															+		return 0;
														
 
															+
														
 
															+	rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
														
 
															+	if (re_ctrl.target_vp == hv_vp_index[cpu]) {
														
 
															+		/* Reassign to some other online CPU */
														
 
															+		new_cpu = cpumask_any_but(cpu_online_mask, cpu);
														
 
															+
														
 
															+		re_ctrl.target_vp = hv_vp_index[new_cpu];
														
 
															+		wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * This function is to be invoked early in the boot sequence after the
														
 
															  * hypervisor has been detected.
														
@@ -110,12 +222,19 @@ static int hv_cpu_init(unsigned int cpu)
 
															  */
														
 
															 void hyperv_init(void)
														
 
															 {
														
 
															-	u64 guest_id;
														
 
															+	u64 guest_id, required_msrs;
														
 
															 	union hv_x64_msr_hypercall_contents hypercall_msr;
														
 
															 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
														
 
															 		return;
														
 
															+	/* Absolutely required MSRs */
														
 
															+	required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
														
 
															+		HV_X64_MSR_VP_INDEX_AVAILABLE;
														
 
															+
														
 
															+	if ((ms_hyperv.features & required_msrs) != required_msrs)
														
 
															+		return;
														
 
															+
														
 
															 	/* Allocate percpu VP index */
														
 
															 	hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
														
 
															 				    GFP_KERNEL);
														
@@ -123,7 +242,7 @@ void hyperv_init(void)
 
															 		return;
														
 
															 	if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
														
 
															-			      hv_cpu_init, NULL) < 0)
														
 
															+			      hv_cpu_init, hv_cpu_die) < 0)
														
 
															 		goto free_vp_index;
														
 
															 	/*
														
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
 
															 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
														
 
															 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
														
 
															+#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
														
 
															 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
														
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -38,6 +38,9 @@ typedef struct {
 
															 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
														
 
															 	unsigned int irq_hv_callback_count;
														
 
															 #endif
														
 
															+#if IS_ENABLED(CONFIG_HYPERV)
														
 
															+	unsigned int irq_hv_reenlightenment_count;
														
 
															+#endif
														
 
															 } ____cacheline_aligned irq_cpustat_t;
														
 
															 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
														
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,7 +103,12 @@
 
															 #endif
														
 
															 #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
														
 
															-#define LOCAL_TIMER_VECTOR		0xee
														
 
															+
														
 
															+#if IS_ENABLED(CONFIG_HYPERV)
														
 
															+#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
														
 
															+#endif
														
 
															+
														
 
															+#define LOCAL_TIMER_VECTOR		0xed
														
 
															 #define NR_VECTORS			 256
														
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,7 +86,7 @@
 
															 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
														
 
															 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
														
 
															 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
														
 
															-			  | X86_CR4_SMAP | X86_CR4_PKE))
														
 
															+			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
														
 
															 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
														
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
 
															 	int mp_state;
														
 
															 	u64 ia32_misc_enable_msr;
														
 
															 	u64 smbase;
														
 
															+	u64 smi_count;
														
 
															 	bool tpr_access_reporting;
														
 
															 	u64 ia32_xss;
														
@@ -760,6 +761,15 @@ enum kvm_irqchip_mode {
 
															 	KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
														
 
															 };
														
 
															+struct kvm_sev_info {
														
 
															+	bool active;		/* SEV enabled guest */
														
 
															+	unsigned int asid;	/* ASID used for this guest */
														
 
															+	unsigned int handle;	/* SEV firmware handle */
														
 
															+	int fd;			/* SEV device fd */
														
 
															+	unsigned long pages_locked; /* Number of pages locked */
														
 
															+	struct list_head regions_list;  /* List of registered regions */
														
 
															+};
														
 
															+
														
 
															 struct kvm_arch {
														
 
															 	unsigned int n_used_mmu_pages;
														
 
															 	unsigned int n_requested_mmu_pages;
														
@@ -847,6 +857,8 @@ struct kvm_arch {
 
															 	bool x2apic_format;
														
 
															 	bool x2apic_broadcast_quirk_disabled;
														
 
															+
														
 
															+	struct kvm_sev_info sev_info;
														
 
															 };
														
 
															 struct kvm_vm_stat {
														
@@ -883,7 +895,6 @@ struct kvm_vcpu_stat {
 
															 	u64 request_irq_exits;
														
 
															 	u64 irq_exits;
														
 
															 	u64 host_state_reload;
														
 
															-	u64 efer_reload;
														
 
															 	u64 fpu_reload;
														
 
															 	u64 insn_emulation;
														
 
															 	u64 insn_emulation_fail;
														
@@ -965,7 +976,7 @@ struct kvm_x86_ops {
 
															 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
														
 
															 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
														
 
															-	void (*tlb_flush)(struct kvm_vcpu *vcpu);
														
 
															+	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
														
 
															 	void (*run)(struct kvm_vcpu *vcpu);
														
 
															 	int (*handle_exit)(struct kvm_vcpu *vcpu);
														
@@ -1017,6 +1028,7 @@ struct kvm_x86_ops {
 
															 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
														
 
															 	bool (*mpx_supported)(void);
														
 
															 	bool (*xsaves_supported)(void);
														
 
															+	bool (*umip_emulated)(void);
														
 
															 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
														
@@ -1079,6 +1091,10 @@ struct kvm_x86_ops {
 
															 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
														
 
															 	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
														
 
															 	int (*enable_smi_window)(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															+	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
														
 
															+	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
														
 
															+	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
														
 
															 };
														
 
															 struct kvm_arch_async_pf {
														
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
 
															 #define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
														
 
															 void hyperv_callback_vector(void);
														
 
															+void hyperv_reenlightenment_vector(void);
														
 
															 #ifdef CONFIG_TRACING
														
 
															 #define trace_hyperv_callback_vector hyperv_callback_vector
														
 
															 #endif
														
@@ -316,18 +317,27 @@ void hyper_alloc_mmu(void);
 
															 void hyperv_report_panic(struct pt_regs *regs, long err);
														
 
															 bool hv_is_hyperv_initialized(void);
														
 
															 void hyperv_cleanup(void);
														
 
															+
														
 
															+void hyperv_reenlightenment_intr(struct pt_regs *regs);
														
 
															+void set_hv_tscchange_cb(void (*cb)(void));
														
 
															+void clear_hv_tscchange_cb(void);
														
 
															+void hyperv_stop_tsc_emulation(void);
														
 
															 #else /* CONFIG_HYPERV */
														
 
															 static inline void hyperv_init(void) {}
														
 
															 static inline bool hv_is_hyperv_initialized(void) { return false; }
														
 
															 static inline void hyperv_cleanup(void) {}
														
 
															 static inline void hyperv_setup_mmu_ops(void) {}
														
 
															+static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
														
 
															+static inline void clear_hv_tscchange_cb(void) {}
														
 
															+static inline void hyperv_stop_tsc_emulation(void) {};
														
 
															 #endif /* CONFIG_HYPERV */
														
 
															 #ifdef CONFIG_HYPERV_TSCPAGE
														
 
															 struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
														
 
															-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
														
 
															+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
														
 
															+				       u64 *cur_tsc)
														
 
															 {
														
 
															-	u64 scale, offset, cur_tsc;
														
 
															+	u64 scale, offset;
														
 
															 	u32 sequence;
														
 
															 	/*
														
@@ -358,7 +368,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
 
															 		scale = READ_ONCE(tsc_pg->tsc_scale);
														
 
															 		offset = READ_ONCE(tsc_pg->tsc_offset);
														
 
															-		cur_tsc = rdtsc_ordered();
														
 
															+		*cur_tsc = rdtsc_ordered();
														
 
															 		/*
														
 
															 		 * Make sure we read sequence after we read all other values
														
@@ -368,7 +378,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
 
															 	} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
														
 
															-	return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
														
 
															+	return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
														
 
															+}
														
 
															+
														
 
															+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
														
 
															+{
														
 
															+	u64 cur_tsc;
														
 
															+
														
 
															+	return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
														
 
															 }
														
 
															 #else
														
@@ -376,5 +393,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 
															 {
														
 
															 	return NULL;
														
 
															 }
														
 
															+
														
 
															+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
														
 
															+				       u64 *cur_tsc)
														
 
															+{
														
 
															+	BUG();
														
 
															+	return U64_MAX;
														
 
															+}
														
 
															 #endif
														
 
															 #endif
														
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -397,6 +397,8 @@
 
															 #define MSR_K7_PERFCTR3			0xc0010007
														
 
															 #define MSR_K7_CLK_CTL			0xc001001b
														
 
															 #define MSR_K7_HWCR			0xc0010015
														
 
															+#define MSR_K7_HWCR_SMMLOCK_BIT		0
														
 
															+#define MSR_K7_HWCR_SMMLOCK		BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
														
 
															 #define MSR_K7_FID_VID_CTL		0xc0010041
														
 
															 #define MSR_K7_FID_VID_STATUS		0xc0010042
														
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -22,4 +22,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
 
															 void io_free_memtype(resource_size_t start, resource_size_t end);
														
 
															+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
														
 
															+
														
 
															 #endif /* _ASM_X86_PAT_H */
														
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -146,6 +146,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
															 #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
														
 
															 #define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
														
 
															+#define SVM_NESTED_CTL_NP_ENABLE	BIT(0)
														
 
															+#define SVM_NESTED_CTL_SEV_ENABLE	BIT(1)
														
 
															+
														
 
															 struct __attribute__ ((__packed__)) vmcb_seg {
														
 
															 	u16 selector;
														
 
															 	u16 attrib;
														
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -40,6 +40,9 @@
 
															  */
														
 
															 #define HV_X64_ACCESS_FREQUENCY_MSRS		(1 << 11)
														
 
															+/* AccessReenlightenmentControls privilege */
														
 
															+#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
														
 
															+
														
 
															 /*
														
 
															  * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
														
 
															  * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
														
@@ -234,6 +237,30 @@
 
															 #define HV_X64_MSR_CRASH_PARAMS		\
														
 
															 		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
														
 
															+/* TSC emulation after migration */
														
 
															+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
														
 
															+
														
 
															+struct hv_reenlightenment_control {
														
 
															+	u64 vector:8;
														
 
															+	u64 reserved1:8;
														
 
															+	u64 enabled:1;
														
 
															+	u64 reserved2:15;
														
 
															+	u64 target_vp:32;
														
 
															+};
														
 
															+
														
 
															+#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
														
 
															+#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
														
 
															+
														
 
															+struct hv_tsc_emulation_control {
														
 
															+	u64 enabled:1;
														
 
															+	u64 reserved:63;
														
 
															+};
														
 
															+
														
 
															+struct hv_tsc_emulation_status {
														
 
															+	u64 inprogress:1;
														
 
															+	u64 reserved:63;
														
 
															+};
														
 
															+
														
 
															 #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
														
 
															 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
														
 
															 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
														
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -25,6 +25,7 @@
 
															 #define KVM_FEATURE_STEAL_TIME		5
														
 
															 #define KVM_FEATURE_PV_EOI		6
														
 
															 #define KVM_FEATURE_PV_UNHALT		7
														
 
															+#define KVM_FEATURE_PV_TLB_FLUSH	9
														
 
															 /* The last 8 bits are used to indicate how to interpret the flags field
														
 
															  * in pvclock structure. If no bits are set, all flags are ignored.
														
@@ -51,6 +52,9 @@ struct kvm_steal_time {
 
															 	__u32 pad[11];
														
 
															 };
														
 
															+#define KVM_VCPU_PREEMPTED          (1 << 0)
														
 
															+#define KVM_VCPU_FLUSH_TLB          (1 << 1)
														
 
															+
														
 
															 #define KVM_CLOCK_PAIRING_WALLCLOCK 0
														
 
															 struct kvm_clock_pairing {
														
 
															 	__s64 sec;
														
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
															 	}
														
 
															 }
														
 
															+static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
														
 
															+{
														
 
															+	u64 msr;
														
 
															+
														
 
															+	/*
														
 
															+	 * BIOS support is required for SME and SEV.
														
 
															+	 *   For SME: If BIOS has enabled SME then adjust x86_phys_bits by
														
 
															+	 *	      the SME physical address space reduction value.
														
 
															+	 *	      If BIOS has not enabled SME then don't advertise the
														
 
															+	 *	      SME feature (set in scattered.c).
														
 
															+	 *   For SEV: If BIOS has not enabled SEV then don't advertise the
														
 
															+	 *            SEV feature (set in scattered.c).
														
 
															+	 *
														
 
															+	 *   In all cases, since support for SME and SEV requires long mode,
														
 
															+	 *   don't advertise the feature under CONFIG_X86_32.
														
 
															+	 */
														
 
															+	if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
														
 
															+		/* Check if memory encryption is enabled */
														
 
															+		rdmsrl(MSR_K8_SYSCFG, msr);
														
 
															+		if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
														
 
															+			goto clear_all;
														
 
															+
														
 
															+		/*
														
 
															+		 * Always adjust physical address bits. Even though this
														
 
															+		 * will be a value above 32-bits this is still done for
														
 
															+		 * CONFIG_X86_32 so that accurate values are reported.
														
 
															+		 */
														
 
															+		c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
														
 
															+
														
 
															+		if (IS_ENABLED(CONFIG_X86_32))
														
 
															+			goto clear_all;
														
 
															+
														
 
															+		rdmsrl(MSR_K7_HWCR, msr);
														
 
															+		if (!(msr & MSR_K7_HWCR_SMMLOCK))
														
 
															+			goto clear_sev;
														
 
															+
														
 
															+		return;
														
 
															+
														
 
															+clear_all:
														
 
															+		clear_cpu_cap(c, X86_FEATURE_SME);
														
 
															+clear_sev:
														
 
															+		clear_cpu_cap(c, X86_FEATURE_SEV);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 static void early_init_amd(struct cpuinfo_x86 *c)
														
 
															 {
														
 
															 	u32 dummy;
														
@@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 
															 	if (cpu_has_amd_erratum(c, amd_erratum_400))
														
 
															 		set_cpu_bug(c, X86_BUG_AMD_E400);
														
 
															-	/*
														
 
															-	 * BIOS support is required for SME. If BIOS has enabled SME then
														
 
															-	 * adjust x86_phys_bits by the SME physical address space reduction
														
 
															-	 * value. If BIOS has not enabled SME then don't advertise the
														
 
															-	 * feature (set in scattered.c). Also, since the SME support requires
														
 
															-	 * long mode, don't advertise the feature under CONFIG_X86_32.
														
 
															-	 */
														
 
															-	if (cpu_has(c, X86_FEATURE_SME)) {
														
 
															-		u64 msr;
														
 
															-
														
 
															-		/* Check if SME is enabled */
														
 
															-		rdmsrl(MSR_K8_SYSCFG, msr);
														
 
															-		if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
														
 
															-			c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
														
 
															-			if (IS_ENABLED(CONFIG_X86_32))
														
 
															-				clear_cpu_cap(c, X86_FEATURE_SME);
														
 
															-		} else {
														
 
															-			clear_cpu_cap(c, X86_FEATURE_SME);
														
 
															-		}
														
 
															-	}
														
 
															+	early_detect_mem_encrypt(c);
														
 
															 }
														
 
															 static void init_amd_k8(struct cpuinfo_x86 *c)
														
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void)
 
															 	hyperv_setup_mmu_ops();
														
 
															 	/* Setup the IDT for hypervisor callback */
														
 
															 	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
														
 
															+
														
 
															+	/* Setup the IDT for reenlightenment notifications */
														
 
															+	if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
														
 
															+		alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
														
 
															+				hyperv_reenlightenment_vector);
														
 
															+
														
 
															 #endif
														
 
															 }
														
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -30,6 +30,7 @@ static const struct cpuid_bit cpuid_bits[] = {
 
															 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
														
 
															 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
														
 
															 	{ X86_FEATURE_SME,		CPUID_EAX,  0, 0x8000001f, 0 },
														
 
															+	{ X86_FEATURE_SEV,		CPUID_EAX,  1, 0x8000001f, 0 },
														
 
															 	{ 0, 0, 0, 0, 0 }
														
 
															 };
														
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -141,6 +141,15 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 
															 				   irq_stats(j)->irq_hv_callback_count);
														
 
															 		seq_puts(p, "  Hypervisor callback interrupts\n");
														
 
															 	}
														
 
															+#endif
														
 
															+#if IS_ENABLED(CONFIG_HYPERV)
														
 
															+	if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
														
 
															+		seq_printf(p, "%*s: ", prec, "HRE");
														
 
															+		for_each_online_cpu(j)
														
 
															+			seq_printf(p, "%10u ",
														
 
															+				   irq_stats(j)->irq_hv_reenlightenment_count);
														
 
															+		seq_puts(p, "  Hyper-V reenlightenment interrupts\n");
														
 
															+	}
														
 
															 #endif
														
 
															 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
														
 
															 #if defined(CONFIG_X86_IO_APIC)
														
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void)
 
															 	update_intr_gate(X86_TRAP_PF, async_page_fault);
														
 
															 }
														
 
															+static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
														
 
															+
														
 
															+static void kvm_flush_tlb_others(const struct cpumask *cpumask,
														
 
															+			const struct flush_tlb_info *info)
														
 
															+{
														
 
															+	u8 state;
														
 
															+	int cpu;
														
 
															+	struct kvm_steal_time *src;
														
 
															+	struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
														
 
															+
														
 
															+	cpumask_copy(flushmask, cpumask);
														
 
															+	/*
														
 
															+	 * We have to call flush only on online vCPUs. And
														
 
															+	 * queue flush_on_enter for pre-empted vCPUs
														
 
															+	 */
														
 
															+	for_each_cpu(cpu, flushmask) {
														
 
															+		src = &per_cpu(steal_time, cpu);
														
 
															+		state = READ_ONCE(src->preempted);
														
 
															+		if ((state & KVM_VCPU_PREEMPTED)) {
														
 
															+			if (try_cmpxchg(&src->preempted, &state,
														
 
															+					state | KVM_VCPU_FLUSH_TLB))
														
 
															+				__cpumask_clear_cpu(cpu, flushmask);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	native_flush_tlb_others(flushmask, info);
														
 
															+}
														
 
															+
														
 
															 static void __init kvm_guest_init(void)
														
 
															 {
														
 
															 	int i;
														
@@ -517,6 +545,9 @@ static void __init kvm_guest_init(void)
 
															 		pv_time_ops.steal_clock = kvm_steal_clock;
														
 
															 	}
														
 
															+	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
														
 
															+		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
														
 
															+
														
 
															 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
														
 
															 		apic_set_eoi_write(kvm_guest_apic_eoi_write);
														
@@ -598,6 +629,22 @@ static __init int activate_jump_labels(void)
 
															 }
														
 
															 arch_initcall(activate_jump_labels);
														
 
															+static __init int kvm_setup_pv_tlb_flush(void)
														
 
															+{
														
 
															+	int cpu;
														
 
															+
														
 
															+	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
														
 
															+		for_each_possible_cpu(cpu) {
														
 
															+			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
														
 
															+				GFP_KERNEL, cpu_to_node(cpu));
														
 
															+		}
														
 
															+		pr_info("KVM setup pv remote TLB flush\n");
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+arch_initcall(kvm_setup_pv_tlb_flush);
														
 
															+
														
 
															 #ifdef CONFIG_PARAVIRT_SPINLOCKS
														
 
															 /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
														
@@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu)
 
															 {
														
 
															 	struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
														
 
															-	return !!src->preempted;
														
 
															+	return !!(src->preempted & KVM_VCPU_PREEMPTED);
														
 
															 }
														
 
															 PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -81,6 +81,14 @@ config KVM_AMD
 
															 	  To compile this as a module, choose M here: the module
														
 
															 	  will be called kvm-amd.
														
 
															+config KVM_AMD_SEV
														
 
															+	def_bool y
														
 
															+	bool "AMD Secure Encrypted Virtualization (SEV) support"
														
 
															+	depends on KVM_AMD && X86_64
														
 
															+	depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
														
 
															+	---help---
														
 
															+	Provides support for launching Encrypted VMs on AMD processors.
														
 
															+
														
 
															 config KVM_MMU_AUDIT
														
 
															 	bool "Audit KVM MMU"
														
 
															 	depends on KVM && TRACEPOINTS
														
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 
															 {
														
 
															 	switch (func) {
														
 
															 	case 0:
														
 
															-		entry->eax = 1;		/* only one leaf currently */
														
 
															+		entry->eax = 7;
														
 
															 		++*nent;
														
 
															 		break;
														
 
															 	case 1:
														
 
															 		entry->ecx = F(MOVBE);
														
 
															 		++*nent;
														
 
															 		break;
														
 
															+	case 7:
														
 
															+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
														
 
															+		if (index == 0)
														
 
															+			entry->ecx = F(RDPID);
														
 
															+		++*nent;
														
 
															 	default:
														
 
															 		break;
														
 
															 	}
														
@@ -325,6 +330,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
														
 
															 	unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
														
 
															 	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
														
 
															+	unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
														
 
															 	/* cpuid 1.edx */
														
 
															 	const u32 kvm_cpuid_1_edx_x86_features =
														
@@ -363,7 +369,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
														
 
															 		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
														
 
															 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
														
 
															-		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
														
 
															+		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
														
 
															+		F(TOPOEXT);
														
 
															 	/* cpuid 0x80000008.ebx */
														
 
															 	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
														
@@ -389,8 +396,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 	/* cpuid 7.0.ecx*/
														
 
															 	const u32 kvm_cpuid_7_0_ecx_x86_features =
														
 
															-		F(AVX512VBMI) | F(LA57) | F(PKU) |
														
 
															-		0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
														
 
															+		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
														
 
															+		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
														
 
															+		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
														
 
															 	/* cpuid 7.0.edx*/
														
 
															 	const u32 kvm_cpuid_7_0_edx_x86_features =
														
@@ -476,6 +484,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 			entry->ebx |= F(TSC_ADJUST);
														
 
															 			entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
														
 
															 			cpuid_mask(&entry->ecx, CPUID_7_ECX);
														
 
															+			entry->ecx |= f_umip;
														
 
															 			/* PKU is not yet implemented for shadow paging. */
														
 
															 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
														
 
															 				entry->ecx &= ~F(PKU);
														
@@ -597,7 +606,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 			     (1 << KVM_FEATURE_ASYNC_PF) |
														
 
															 			     (1 << KVM_FEATURE_PV_EOI) |
														
 
															 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
														
 
															-			     (1 << KVM_FEATURE_PV_UNHALT);
														
 
															+			     (1 << KVM_FEATURE_PV_UNHALT) |
														
 
															+			     (1 << KVM_FEATURE_PV_TLB_FLUSH);
														
 
															 		if (sched_info_on())
														
 
															 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
														
@@ -607,7 +617,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
															 		entry->edx = 0;
														
 
															 		break;
														
 
															 	case 0x80000000:
														
 
															-		entry->eax = min(entry->eax, 0x8000001a);
														
 
															+		entry->eax = min(entry->eax, 0x8000001f);
														
 
															 		break;
														
 
															 	case 0x80000001:
														
 
															 		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
														
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3533,6 +3533,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
 
															+static int em_rdpid(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	u64 tsc_aux = 0;
														
 
															+
														
 
															+	if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
														
 
															+		return emulate_gp(ctxt, 0);
														
 
															+	ctxt->dst.val = tsc_aux;
														
 
															+	return X86EMUL_CONTINUE;
														
 
															+}
														
 
															+
														
 
															 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
														
 
															 {
														
 
															 	u64 tsc = 0;
														
@@ -3652,17 +3662,27 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
 
															-static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
														
 
															+static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
														
 
															 {
														
 
															-	if (ctxt->modrm_reg > VCPU_SREG_GS)
														
 
															-		return emulate_ud(ctxt);
														
 
															+	if (segment > VCPU_SREG_GS &&
														
 
															+	    (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
														
 
															+	    ctxt->ops->cpl(ctxt) > 0)
														
 
															+		return emulate_gp(ctxt, 0);
														
 
															-	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
														
 
															+	ctxt->dst.val = get_segment_selector(ctxt, segment);
														
 
															 	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
														
 
															 		ctxt->dst.bytes = 2;
														
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
 
															+static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	if (ctxt->modrm_reg > VCPU_SREG_GS)
														
 
															+		return emulate_ud(ctxt);
														
 
															+
														
 
															+	return em_store_sreg(ctxt, ctxt->modrm_reg);
														
 
															+}
														
 
															+
														
 
															 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
														
 
															 {
														
 
															 	u16 sel = ctxt->src.val;
														
@@ -3678,6 +3698,11 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
 
															 	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
														
 
															 }
														
 
															+static int em_sldt(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	return em_store_sreg(ctxt, VCPU_SREG_LDTR);
														
 
															+}
														
 
															+
														
 
															 static int em_lldt(struct x86_emulate_ctxt *ctxt)
														
 
															 {
														
 
															 	u16 sel = ctxt->src.val;
														
@@ -3687,6 +3712,11 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
 
															 	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
														
 
															 }
														
 
															+static int em_str(struct x86_emulate_ctxt *ctxt)
														
 
															+{
														
 
															+	return em_store_sreg(ctxt, VCPU_SREG_TR);
														
 
															+}
														
 
															+
														
 
															 static int em_ltr(struct x86_emulate_ctxt *ctxt)
														
 
															 {
														
 
															 	u16 sel = ctxt->src.val;
														
@@ -3739,6 +3769,10 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
 
															 {
														
 
															 	struct desc_ptr desc_ptr;
														
 
															+	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
														
 
															+	    ctxt->ops->cpl(ctxt) > 0)
														
 
															+		return emulate_gp(ctxt, 0);
														
 
															+
														
 
															 	if (ctxt->mode == X86EMUL_MODE_PROT64)
														
 
															 		ctxt->op_bytes = 8;
														
 
															 	get(ctxt, &desc_ptr);
														
@@ -3798,6 +3832,10 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
 
															 static int em_smsw(struct x86_emulate_ctxt *ctxt)
														
 
															 {
														
 
															+	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
														
 
															+	    ctxt->ops->cpl(ctxt) > 0)
														
 
															+		return emulate_gp(ctxt, 0);
														
 
															+
														
 
															 	if (ctxt->dst.type == OP_MEM)
														
 
															 		ctxt->dst.bytes = 2;
														
 
															 	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
														
@@ -4383,8 +4421,8 @@ static const struct opcode group5[] = {
 
															 };
														
 
															 static const struct opcode group6[] = {
														
 
															-	DI(Prot | DstMem,	sldt),
														
 
															-	DI(Prot | DstMem,	str),
														
 
															+	II(Prot | DstMem,	   em_sldt, sldt),
														
 
															+	II(Prot | DstMem,	   em_str, str),
														
 
															 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
														
 
															 	II(Prot | Priv | SrcMem16, em_ltr, ltr),
														
 
															 	N, N, N, N,
														
@@ -4415,10 +4453,20 @@ static const struct opcode group8[] = {
 
															 	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
														
 
															 };
														
 
															+/*
														
 
															+ * The "memory" destination is actually always a register, since we come
														
 
															+ * from the register case of group9.
														
 
															+ */
														
 
															+static const struct gprefix pfx_0f_c7_7 = {
														
 
															+	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
														
 
															+};
														
 
															+
														
 
															+
														
 
															 static const struct group_dual group9 = { {
														
 
															 	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
														
 
															 }, {
														
 
															-	N, N, N, N, N, N, N, N,
														
 
															+	N, N, N, N, N, N, N,
														
 
															+	GP(0, &pfx_0f_c7_7),
														
 
															 } };
														
 
															 static const struct opcode group11[] = {
														
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -79,7 +79,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 
															 	if (kvm_cpu_has_extint(v))
														
 
															 		return 1;
														
 
															-	if (kvm_vcpu_apicv_active(v))
														
 
															+	if (!is_guest_mode(v) && kvm_vcpu_apicv_active(v))
														
 
															 		return 0;
														
 
															 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
														
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -364,32 +364,41 @@ static u8 count_vectors(void *bitmap)
 
															 	return count;
														
 
															 }
														
 
															-int __kvm_apic_update_irr(u32 *pir, void *regs)
														
 
															+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
														
 
															 {
														
 
															 	u32 i, vec;
														
 
															-	u32 pir_val, irr_val;
														
 
															-	int max_irr = -1;
														
 
															+	u32 pir_val, irr_val, prev_irr_val;
														
 
															+	int max_updated_irr;
														
 
															+
														
 
															+	max_updated_irr = -1;
														
 
															+	*max_irr = -1;
														
 
															 	for (i = vec = 0; i <= 7; i++, vec += 32) {
														
 
															 		pir_val = READ_ONCE(pir[i]);
														
 
															 		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
														
 
															 		if (pir_val) {
														
 
															+			prev_irr_val = irr_val;
														
 
															 			irr_val |= xchg(&pir[i], 0);
														
 
															 			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
														
 
															+			if (prev_irr_val != irr_val) {
														
 
															+				max_updated_irr =
														
 
															+					__fls(irr_val ^ prev_irr_val) + vec;
														
 
															+			}
														
 
															 		}
														
 
															 		if (irr_val)
														
 
															-			max_irr = __fls(irr_val) + vec;
														
 
															+			*max_irr = __fls(irr_val) + vec;
														
 
															 	}
														
 
															-	return max_irr;
														
 
															+	return ((max_updated_irr != -1) &&
														
 
															+		(max_updated_irr == *max_irr));
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
														
 
															-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
														
 
															+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
														
 
															 {
														
 
															 	struct kvm_lapic *apic = vcpu->arch.apic;
														
 
															-	return __kvm_apic_update_irr(pir, apic->regs);
														
 
															+	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
														
@@ -581,7 +590,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 
															 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
														
 
															 {
														
 
															 	int highest_irr;
														
 
															-	if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
														
 
															+	if (apic->vcpu->arch.apicv_active)
														
 
															 		highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
														
 
															 	else
														
 
															 		highest_irr = apic_find_highest_irr(apic);
														
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -75,8 +75,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 
															 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
														
 
															 			   int short_hand, unsigned int dest, int dest_mode);
														
 
															-int __kvm_apic_update_irr(u32 *pir, void *regs);
														
 
															-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
														
 
															+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
														
 
															+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
														
 
															 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
														
 
															 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
														
 
															 		     struct dest_map *dest_map);
														
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -42,6 +42,7 @@
 
															 #include <linux/kern_levels.h>
														
 
															 #include <asm/page.h>
														
 
															+#include <asm/pat.h>
														
 
															 #include <asm/cmpxchg.h>
														
 
															 #include <asm/io.h>
														
 
															 #include <asm/vmx.h>
														
@@ -381,7 +382,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
														
 
															-void kvm_mmu_clear_all_pte_masks(void)
														
 
															+static void kvm_mmu_clear_all_pte_masks(void)
														
 
															 {
														
 
															 	shadow_user_mask = 0;
														
 
															 	shadow_accessed_mask = 0;
														
@@ -2708,7 +2709,18 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 
															 static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
														
 
															 {
														
 
															 	if (pfn_valid(pfn))
														
 
															-		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
														
 
															+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
														
 
															+			/*
														
 
															+			 * Some reserved pages, such as those from NVDIMM
														
 
															+			 * DAX devices, are not for MMIO, and can be mapped
														
 
															+			 * with cached memory type for better performance.
														
 
															+			 * However, the above check misconceives those pages
														
 
															+			 * as MMIO, and results in KVM mapping them with UC
														
 
															+			 * memory type, which would hurt the performance.
														
 
															+			 * Therefore, we check the host memory type in addition
														
 
															+			 * and only treat UC/UC-/WC pages as MMIO.
														
 
															+			 */
														
 
															+			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
														
 
															 	return true;
														
 
															 }
														
@@ -4951,6 +4963,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 
															 	if (mmio_info_in_cache(vcpu, cr2, direct))
														
 
															 		emulation_type = 0;
														
 
															 emulate:
														
 
															+	/*
														
 
															+	 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
														
 
															+	 * This can happen if a guest gets a page-fault on data access but the HW
														
 
															+	 * table walker is not able to read the instruction page (e.g instruction
														
 
															+	 * page is not present in memory). In those cases we simply restart the
														
 
															+	 * guest.
														
 
															+	 */
														
 
															+	if (unlikely(insn && !insn_len))
														
 
															+		return 1;
														
 
															+
														
 
															 	er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
														
 
															 	switch (er) {
														
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -19,7 +19,7 @@
 
															 #include <linux/ratelimit.h>
														
 
															-char const *audit_point_name[] = {
														
 
															+static char const *audit_point_name[] = {
														
 
															 	"pre page fault",
														
 
															 	"post page fault",
														
 
															 	"pre pte write",
														
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,10 @@
 
															 #include <linux/amd-iommu.h>
														
 
															 #include <linux/hashtable.h>
														
 
															 #include <linux/frame.h>
														
 
															+#include <linux/psp-sev.h>
														
 
															+#include <linux/file.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/swap.h>
														
 
															 #include <asm/apic.h>
														
 
															 #include <asm/perf_event.h>
														
@@ -214,6 +218,9 @@ struct vcpu_svm {
 
															 	 */
														
 
															 	struct list_head ir_list;
														
 
															 	spinlock_t ir_list_lock;
														
 
															+
														
 
															+	/* which host CPU was used for running this vcpu */
														
 
															+	unsigned int last_cpu;
														
 
															 };
														
 
															 /*
														
@@ -289,8 +296,12 @@ module_param(vls, int, 0444);
 
															 static int vgif = true;
														
 
															 module_param(vgif, int, 0444);
														
 
															+/* enable/disable SEV support */
														
 
															+static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
														
 
															+module_param(sev, int, 0444);
														
 
															+
														
 
															 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
														
 
															-static void svm_flush_tlb(struct kvm_vcpu *vcpu);
														
 
															+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
														
 
															 static void svm_complete_interrupts(struct vcpu_svm *svm);
														
 
															 static int nested_svm_exit_handled(struct vcpu_svm *svm);
														
@@ -324,6 +335,38 @@ enum {
 
															 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
														
 
															+static unsigned int max_sev_asid;
														
 
															+static unsigned int min_sev_asid;
														
 
															+static unsigned long *sev_asid_bitmap;
														
 
															+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
														
 
															+
														
 
															+struct enc_region {
														
 
															+	struct list_head list;
														
 
															+	unsigned long npages;
														
 
															+	struct page **pages;
														
 
															+	unsigned long uaddr;
														
 
															+	unsigned long size;
														
 
															+};
														
 
															+
														
 
															+static inline bool svm_sev_enabled(void)
														
 
															+{
														
 
															+	return max_sev_asid;
														
 
															+}
														
 
															+
														
 
															+static inline bool sev_guest(struct kvm *kvm)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+
														
 
															+	return sev->active;
														
 
															+}
														
 
															+
														
 
															+static inline int sev_get_asid(struct kvm *kvm)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+
														
 
															+	return sev->asid;
														
 
															+}
														
 
															+
														
 
															 static inline void mark_all_dirty(struct vmcb *vmcb)
														
 
															 {
														
 
															 	vmcb->control.clean = 0;
														
@@ -530,10 +573,14 @@ struct svm_cpu_data {
 
															 	u64 asid_generation;
														
 
															 	u32 max_asid;
														
 
															 	u32 next_asid;
														
 
															+	u32 min_asid;
														
 
															 	struct kvm_ldttss_desc *tss_desc;
														
 
															 	struct page *save_area;
														
 
															 	struct vmcb *current_vmcb;
														
 
															+
														
 
															+	/* index = sev_asid, value = vmcb pointer */
														
 
															+	struct vmcb **sev_vmcbs;
														
 
															 };
														
 
															 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
														
@@ -788,6 +835,7 @@ static int svm_hardware_enable(void)
 
															 	sd->asid_generation = 1;
														
 
															 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
														
 
															 	sd->next_asid = sd->max_asid + 1;
														
 
															+	sd->min_asid = max_sev_asid + 1;
														
 
															 	gdt = get_current_gdt_rw();
														
 
															 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
														
@@ -846,6 +894,7 @@ static void svm_cpu_uninit(int cpu)
 
															 		return;
														
 
															 	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
														
 
															+	kfree(sd->sev_vmcbs);
														
 
															 	__free_page(sd->save_area);
														
 
															 	kfree(sd);
														
 
															 }
														
@@ -859,11 +908,18 @@ static int svm_cpu_init(int cpu)
 
															 	if (!sd)
														
 
															 		return -ENOMEM;
														
 
															 	sd->cpu = cpu;
														
 
															-	sd->save_area = alloc_page(GFP_KERNEL);
														
 
															 	r = -ENOMEM;
														
 
															+	sd->save_area = alloc_page(GFP_KERNEL);
														
 
															 	if (!sd->save_area)
														
 
															 		goto err_1;
														
 
															+	if (svm_sev_enabled()) {
														
 
															+		r = -ENOMEM;
														
 
															+		sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
														
 
															+		if (!sd->sev_vmcbs)
														
 
															+			goto err_1;
														
 
															+	}
														
 
															+
														
 
															 	per_cpu(svm_data, cpu) = sd;
														
 
															 	return 0;
														
@@ -1070,6 +1126,48 @@ static int avic_ga_log_notifier(u32 ga_tag)
 
															 	return 0;
														
 
															 }
														
 
															+static __init int sev_hardware_setup(void)
														
 
															+{
														
 
															+	struct sev_user_data_status *status;
														
 
															+	int rc;
														
 
															+
														
 
															+	/* Maximum number of encrypted guests supported simultaneously */
														
 
															+	max_sev_asid = cpuid_ecx(0x8000001F);
														
 
															+
														
 
															+	if (!max_sev_asid)
														
 
															+		return 1;
														
 
															+
														
 
															+	/* Minimum ASID value that should be used for SEV guest */
														
 
															+	min_sev_asid = cpuid_edx(0x8000001F);
														
 
															+
														
 
															+	/* Initialize SEV ASID bitmap */
														
 
															+	sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
														
 
															+				sizeof(unsigned long), GFP_KERNEL);
														
 
															+	if (!sev_asid_bitmap)
														
 
															+		return 1;
														
 
															+
														
 
															+	status = kmalloc(sizeof(*status), GFP_KERNEL);
														
 
															+	if (!status)
														
 
															+		return 1;
														
 
															+
														
 
															+	/*
														
 
															+	 * Check SEV platform status.
														
 
															+	 *
														
 
															+	 * PLATFORM_STATUS can be called in any state, if we failed to query
														
 
															+	 * the PLATFORM status then either PSP firmware does not support SEV
														
 
															+	 * feature or SEV firmware is dead.
														
 
															+	 */
														
 
															+	rc = sev_platform_status(status, NULL);
														
 
															+	if (rc)
														
 
															+		goto err;
														
 
															+
														
 
															+	pr_info("SEV supported\n");
														
 
															+
														
 
															+err:
														
 
															+	kfree(status);
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															 static __init int svm_hardware_setup(void)
														
 
															 {
														
 
															 	int cpu;
														
@@ -1105,6 +1203,17 @@ static __init int svm_hardware_setup(void)
 
															 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
														
 
															 	}
														
 
															+	if (sev) {
														
 
															+		if (boot_cpu_has(X86_FEATURE_SEV) &&
														
 
															+		    IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
														
 
															+			r = sev_hardware_setup();
														
 
															+			if (r)
														
 
															+				sev = false;
														
 
															+		} else {
														
 
															+			sev = false;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	for_each_possible_cpu(cpu) {
														
 
															 		r = svm_cpu_init(cpu);
														
 
															 		if (r)
														
@@ -1166,6 +1275,9 @@ static __exit void svm_hardware_unsetup(void)
 
															 {
														
 
															 	int cpu;
														
 
															+	if (svm_sev_enabled())
														
 
															+		kfree(sev_asid_bitmap);
														
 
															+
														
 
															 	for_each_possible_cpu(cpu)
														
 
															 		svm_cpu_uninit(cpu);
														
@@ -1318,7 +1430,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 
															 	if (npt_enabled) {
														
 
															 		/* Setup VMCB for Nested Paging */
														
 
															-		control->nested_ctl = 1;
														
 
															+		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
														
 
															 		clr_intercept(svm, INTERCEPT_INVLPG);
														
 
															 		clr_exception_intercept(svm, PF_VECTOR);
														
 
															 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
														
@@ -1356,6 +1468,11 @@ static void init_vmcb(struct vcpu_svm *svm)
 
															 		svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
														
 
															 	}
														
 
															+	if (sev_guest(svm->vcpu.kvm)) {
														
 
															+		svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
														
 
															+		clr_exception_intercept(svm, UD_VECTOR);
														
 
															+	}
														
 
															+
														
 
															 	mark_all_dirty(svm->vmcb);
														
 
															 	enable_gif(svm);
														
@@ -1438,6 +1555,179 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 
															 	return 0;
														
 
															 }
														
 
															+static void __sev_asid_free(int asid)
														
 
															+{
														
 
															+	struct svm_cpu_data *sd;
														
 
															+	int cpu, pos;
														
 
															+
														
 
															+	pos = asid - 1;
														
 
															+	clear_bit(pos, sev_asid_bitmap);
														
 
															+
														
 
															+	for_each_possible_cpu(cpu) {
														
 
															+		sd = per_cpu(svm_data, cpu);
														
 
															+		sd->sev_vmcbs[pos] = NULL;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void sev_asid_free(struct kvm *kvm)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+
														
 
															+	__sev_asid_free(sev->asid);
														
 
															+}
														
 
															+
														
 
															+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
														
 
															+{
														
 
															+	struct sev_data_decommission *decommission;
														
 
															+	struct sev_data_deactivate *data;
														
 
															+
														
 
															+	if (!handle)
														
 
															+		return;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return;
														
 
															+
														
 
															+	/* deactivate handle */
														
 
															+	data->handle = handle;
														
 
															+	sev_guest_deactivate(data, NULL);
														
 
															+
														
 
															+	wbinvd_on_all_cpus();
														
 
															+	sev_guest_df_flush(NULL);
														
 
															+	kfree(data);
														
 
															+
														
 
															+	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
														
 
															+	if (!decommission)
														
 
															+		return;
														
 
															+
														
 
															+	/* decommission handle */
														
 
															+	decommission->handle = handle;
														
 
															+	sev_guest_decommission(decommission, NULL);
														
 
															+
														
 
															+	kfree(decommission);
														
 
															+}
														
 
															+
														
 
															+static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
														
 
															+				    unsigned long ulen, unsigned long *n,
														
 
															+				    int write)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	unsigned long npages, npinned, size;
														
 
															+	unsigned long locked, lock_limit;
														
 
															+	struct page **pages;
														
 
															+	int first, last;
														
 
															+
														
 
															+	/* Calculate number of pages. */
														
 
															+	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
														
 
															+	last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
														
 
															+	npages = (last - first + 1);
														
 
															+
														
 
															+	locked = sev->pages_locked + npages;
														
 
															+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
														
 
															+	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
														
 
															+		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
														
 
															+		return NULL;
														
 
															+	}
														
 
															+
														
 
															+	/* Avoid using vmalloc for smaller buffers. */
														
 
															+	size = npages * sizeof(struct page *);
														
 
															+	if (size > PAGE_SIZE)
														
 
															+		pages = vmalloc(size);
														
 
															+	else
														
 
															+		pages = kmalloc(size, GFP_KERNEL);
														
 
															+
														
 
															+	if (!pages)
														
 
															+		return NULL;
														
 
															+
														
 
															+	/* Pin the user virtual address. */
														
 
															+	npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
														
 
															+	if (npinned != npages) {
														
 
															+		pr_err("SEV: Failure locking %lu pages.\n", npages);
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	*n = npages;
														
 
															+	sev->pages_locked = locked;
														
 
															+
														
 
															+	return pages;
														
 
															+
														
 
															+err:
														
 
															+	if (npinned > 0)
														
 
															+		release_pages(pages, npinned);
														
 
															+
														
 
															+	kvfree(pages);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
														
 
															+			     unsigned long npages)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+
														
 
															+	release_pages(pages, npages);
														
 
															+	kvfree(pages);
														
 
															+	sev->pages_locked -= npages;
														
 
															+}
														
 
															+
														
 
															+static void sev_clflush_pages(struct page *pages[], unsigned long npages)
														
 
															+{
														
 
															+	uint8_t *page_virtual;
														
 
															+	unsigned long i;
														
 
															+
														
 
															+	if (npages == 0 || pages == NULL)
														
 
															+		return;
														
 
															+
														
 
															+	for (i = 0; i < npages; i++) {
														
 
															+		page_virtual = kmap_atomic(pages[i]);
														
 
															+		clflush_cache_range(page_virtual, PAGE_SIZE);
														
 
															+		kunmap_atomic(page_virtual);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void __unregister_enc_region_locked(struct kvm *kvm,
														
 
															+					   struct enc_region *region)
														
 
															+{
														
 
															+	/*
														
 
															+	 * The guest may change the memory encryption attribute from C=0 -> C=1
														
 
															+	 * or vice versa for this memory range. Lets make sure caches are
														
 
															+	 * flushed to ensure that guest data gets written into memory with
														
 
															+	 * correct C-bit.
														
 
															+	 */
														
 
															+	sev_clflush_pages(region->pages, region->npages);
														
 
															+
														
 
															+	sev_unpin_memory(kvm, region->pages, region->npages);
														
 
															+	list_del(&region->list);
														
 
															+	kfree(region);
														
 
															+}
														
 
															+
														
 
															+static void sev_vm_destroy(struct kvm *kvm)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct list_head *head = &sev->regions_list;
														
 
															+	struct list_head *pos, *q;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * if userspace was terminated before unregistering the memory regions
														
 
															+	 * then lets unpin all the registered memory.
														
 
															+	 */
														
 
															+	if (!list_empty(head)) {
														
 
															+		list_for_each_safe(pos, q, head) {
														
 
															+			__unregister_enc_region_locked(kvm,
														
 
															+				list_entry(pos, struct enc_region, list));
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	sev_unbind_asid(kvm, sev->handle);
														
 
															+	sev_asid_free(kvm);
														
 
															+}
														
 
															+
														
 
															 static void avic_vm_destroy(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned long flags;
														
@@ -1456,6 +1746,12 @@ static void avic_vm_destroy(struct kvm *kvm)
 
															 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
														
 
															 }
														
 
															+static void svm_vm_destroy(struct kvm *kvm)
														
 
															+{
														
 
															+	avic_vm_destroy(kvm);
														
 
															+	sev_vm_destroy(kvm);
														
 
															+}
														
 
															+
														
 
															 static int avic_vm_init(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned long flags;
														
@@ -2066,7 +2362,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
															 		return 1;
														
 
															 	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
														
 
															-		svm_flush_tlb(vcpu);
														
 
															+		svm_flush_tlb(vcpu, true);
														
 
															 	vcpu->arch.cr4 = cr4;
														
 
															 	if (!npt_enabled)
														
@@ -2125,7 +2421,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
 
															 {
														
 
															 	if (sd->next_asid > sd->max_asid) {
														
 
															 		++sd->asid_generation;
														
 
															-		sd->next_asid = 1;
														
 
															+		sd->next_asid = sd->min_asid;
														
 
															 		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
														
 
															 	}
														
@@ -2173,22 +2469,24 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 
															 static int pf_interception(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	u64 fault_address = svm->vmcb->control.exit_info_2;
														
 
															+	u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
														
 
															 	u64 error_code = svm->vmcb->control.exit_info_1;
														
 
															 	return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
														
 
															-			svm->vmcb->control.insn_bytes,
														
 
															+			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
														
 
															+			svm->vmcb->control.insn_bytes : NULL,
														
 
															 			svm->vmcb->control.insn_len);
														
 
															 }
														
 
															 static int npf_interception(struct vcpu_svm *svm)
														
 
															 {
														
 
															-	u64 fault_address = svm->vmcb->control.exit_info_2;
														
 
															+	u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
														
 
															 	u64 error_code = svm->vmcb->control.exit_info_1;
														
 
															 	trace_kvm_page_fault(fault_address, error_code);
														
 
															 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
														
 
															-			svm->vmcb->control.insn_bytes,
														
 
															+			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
														
 
															+			svm->vmcb->control.insn_bytes : NULL,
														
 
															 			svm->vmcb->control.insn_len);
														
 
															 }
														
@@ -2415,7 +2713,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
 
															 	svm->vmcb->control.nested_cr3 = __sme_set(root);
														
 
															 	mark_dirty(svm->vmcb, VMCB_NPT);
														
 
															-	svm_flush_tlb(vcpu);
														
 
															+	svm_flush_tlb(vcpu, true);
														
 
															 }
														
 
															 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
														
@@ -2957,7 +3255,8 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
 
															 	if (vmcb->control.asid == 0)
														
 
															 		return false;
														
 
															-	if (vmcb->control.nested_ctl && !npt_enabled)
														
 
															+	if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
														
 
															+	    !npt_enabled)
														
 
															 		return false;
														
 
															 	return true;
														
@@ -2971,7 +3270,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 
															 	else
														
 
															 		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
														
 
															-	if (nested_vmcb->control.nested_ctl) {
														
 
															+	if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
														
 
															 		kvm_mmu_unload(&svm->vcpu);
														
 
															 		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
														
 
															 		nested_svm_init_mmu_context(&svm->vcpu);
														
@@ -3019,7 +3318,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 
															 	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
														
 
															 	svm->nested.intercept            = nested_vmcb->control.intercept;
														
 
															-	svm_flush_tlb(&svm->vcpu);
														
 
															+	svm_flush_tlb(&svm->vcpu, true);
														
 
															 	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
														
 
															 	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
														
 
															 		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
														
@@ -4442,12 +4741,39 @@ static void reload_tss(struct kvm_vcpu *vcpu)
 
															 	load_TR_desc();
														
 
															 }
														
 
															+static void pre_sev_run(struct vcpu_svm *svm, int cpu)
														
 
															+{
														
 
															+	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
														
 
															+	int asid = sev_get_asid(svm->vcpu.kvm);
														
 
															+
														
 
															+	/* Assign the asid allocated with this SEV guest */
														
 
															+	svm->vmcb->control.asid = asid;
														
 
															+
														
 
															+	/*
														
 
															+	 * Flush guest TLB:
														
 
															+	 *
														
 
															+	 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
														
 
															+	 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
														
 
															+	 */
														
 
															+	if (sd->sev_vmcbs[asid] == svm->vmcb &&
														
 
															+	    svm->last_cpu == cpu)
														
 
															+		return;
														
 
															+
														
 
															+	svm->last_cpu = cpu;
														
 
															+	sd->sev_vmcbs[asid] = svm->vmcb;
														
 
															+	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
														
 
															+	mark_dirty(svm->vmcb, VMCB_ASID);
														
 
															+}
														
 
															+
														
 
															 static void pre_svm_run(struct vcpu_svm *svm)
														
 
															 {
														
 
															 	int cpu = raw_smp_processor_id();
														
 
															 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
														
 
															+	if (sev_guest(svm->vcpu.kvm))
														
 
															+		return pre_sev_run(svm, cpu);
														
 
															+
														
 
															 	/* FIXME: handle wraparound of asid_generation */
														
 
															 	if (svm->asid_generation != sd->asid_generation)
														
 
															 		new_asid(svm, sd);
														
@@ -4865,7 +5191,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 
															 	return 0;
														
 
															 }
														
 
															-static void svm_flush_tlb(struct kvm_vcpu *vcpu)
														
 
															+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
														
 
															 {
														
 
															 	struct vcpu_svm *svm = to_svm(vcpu);
														
@@ -5208,7 +5534,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
															 	svm->vmcb->save.cr3 = __sme_set(root);
														
 
															 	mark_dirty(svm->vmcb, VMCB_CR);
														
 
															-	svm_flush_tlb(vcpu);
														
 
															+	svm_flush_tlb(vcpu, true);
														
 
															 }
														
 
															 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
														
@@ -5222,7 +5548,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
															 	svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
														
 
															 	mark_dirty(svm->vmcb, VMCB_CR);
														
 
															-	svm_flush_tlb(vcpu);
														
 
															+	svm_flush_tlb(vcpu, true);
														
 
															 }
														
 
															 static int is_disabled(void)
														
@@ -5308,6 +5634,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 
															 			entry->edx |= SVM_FEATURE_NPT;
														
 
															 		break;
														
 
															+	case 0x8000001F:
														
 
															+		/* Support memory encryption cpuid if host supports it */
														
 
															+		if (boot_cpu_has(X86_FEATURE_SEV))
														
 
															+			cpuid(0x8000001f, &entry->eax, &entry->ebx,
														
 
															+				&entry->ecx, &entry->edx);
														
 
															+
														
 
															 	}
														
 
															 }
														
@@ -5336,6 +5668,11 @@ static bool svm_xsaves_supported(void)
 
															 	return false;
														
 
															 }
														
 
															+static bool svm_umip_emulated(void)
														
 
															+{
														
 
															+	return false;
														
 
															+}
														
 
															+
														
 
															 static bool svm_has_wbinvd_exit(void)
														
 
															 {
														
 
															 	return true;
														
@@ -5637,6 +5974,828 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 
															 	return 0;
														
 
															 }
														
 
															+static int sev_asid_new(void)
														
 
															+{
														
 
															+	int pos;
														
 
															+
														
 
															+	/*
														
 
															+	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
														
 
															+	 */
														
 
															+	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
														
 
															+	if (pos >= max_sev_asid)
														
 
															+		return -EBUSY;
														
 
															+
														
 
															+	set_bit(pos, sev_asid_bitmap);
														
 
															+	return pos + 1;
														
 
															+}
														
 
															+
														
 
															+static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	int asid, ret;
														
 
															+
														
 
															+	ret = -EBUSY;
														
 
															+	asid = sev_asid_new();
														
 
															+	if (asid < 0)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = sev_platform_init(&argp->error);
														
 
															+	if (ret)
														
 
															+		goto e_free;
														
 
															+
														
 
															+	sev->active = true;
														
 
															+	sev->asid = asid;
														
 
															+	INIT_LIST_HEAD(&sev->regions_list);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_free:
														
 
															+	__sev_asid_free(asid);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
														
 
															+{
														
 
															+	struct sev_data_activate *data;
														
 
															+	int asid = sev_get_asid(kvm);
														
 
															+	int ret;
														
 
															+
														
 
															+	wbinvd_on_all_cpus();
														
 
															+
														
 
															+	ret = sev_guest_df_flush(error);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* activate ASID on the given handle */
														
 
															+	data->handle = handle;
														
 
															+	data->asid   = asid;
														
 
															+	ret = sev_guest_activate(data, error);
														
 
															+	kfree(data);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __sev_issue_cmd(int fd, int id, void *data, int *error)
														
 
															+{
														
 
															+	struct fd f;
														
 
															+	int ret;
														
 
															+
														
 
															+	f = fdget(fd);
														
 
															+	if (!f.file)
														
 
															+		return -EBADF;
														
 
															+
														
 
															+	ret = sev_issue_cmd_external_user(f.file, id, data, error);
														
 
															+
														
 
															+	fdput(f);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+
														
 
															+	return __sev_issue_cmd(sev->fd, id, data, error);
														
 
															+}
														
 
															+
														
 
															+static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct sev_data_launch_start *start;
														
 
															+	struct kvm_sev_launch_start params;
														
 
															+	void *dh_blob, *session_blob;
														
 
															+	int *error = &argp->error;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	start = kzalloc(sizeof(*start), GFP_KERNEL);
														
 
															+	if (!start)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dh_blob = NULL;
														
 
															+	if (params.dh_uaddr) {
														
 
															+		dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
														
 
															+		if (IS_ERR(dh_blob)) {
														
 
															+			ret = PTR_ERR(dh_blob);
														
 
															+			goto e_free;
														
 
															+		}
														
 
															+
														
 
															+		start->dh_cert_address = __sme_set(__pa(dh_blob));
														
 
															+		start->dh_cert_len = params.dh_len;
														
 
															+	}
														
 
															+
														
 
															+	session_blob = NULL;
														
 
															+	if (params.session_uaddr) {
														
 
															+		session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
														
 
															+		if (IS_ERR(session_blob)) {
														
 
															+			ret = PTR_ERR(session_blob);
														
 
															+			goto e_free_dh;
														
 
															+		}
														
 
															+
														
 
															+		start->session_address = __sme_set(__pa(session_blob));
														
 
															+		start->session_len = params.session_len;
														
 
															+	}
														
 
															+
														
 
															+	start->handle = params.handle;
														
 
															+	start->policy = params.policy;
														
 
															+
														
 
															+	/* create memory encryption context */
														
 
															+	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
														
 
															+	if (ret)
														
 
															+		goto e_free_session;
														
 
															+
														
 
															+	/* Bind ASID to this guest */
														
 
															+	ret = sev_bind_asid(kvm, start->handle, error);
														
 
															+	if (ret)
														
 
															+		goto e_free_session;
														
 
															+
														
 
															+	/* return handle to userspace */
														
 
															+	params.handle = start->handle;
														
 
															+	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
														
 
															+		sev_unbind_asid(kvm, start->handle);
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free_session;
														
 
															+	}
														
 
															+
														
 
															+	sev->handle = start->handle;
														
 
															+	sev->fd = argp->sev_fd;
														
 
															+
														
 
															+e_free_session:
														
 
															+	kfree(session_blob);
														
 
															+e_free_dh:
														
 
															+	kfree(dh_blob);
														
 
															+e_free:
														
 
															+	kfree(start);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int get_num_contig_pages(int idx, struct page **inpages,
														
 
															+				unsigned long npages)
														
 
															+{
														
 
															+	unsigned long paddr, next_paddr;
														
 
															+	int i = idx + 1, pages = 1;
														
 
															+
														
 
															+	/* find the number of contiguous pages starting from idx */
														
 
															+	paddr = __sme_page_pa(inpages[idx]);
														
 
															+	while (i < npages) {
														
 
															+		next_paddr = __sme_page_pa(inpages[i++]);
														
 
															+		if ((paddr + PAGE_SIZE) == next_paddr) {
														
 
															+			pages++;
														
 
															+			paddr = next_paddr;
														
 
															+			continue;
														
 
															+		}
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return pages;
														
 
															+}
														
 
															+
														
 
															+static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct kvm_sev_launch_update_data params;
														
 
															+	struct sev_data_launch_update_data *data;
														
 
															+	struct page **inpages;
														
 
															+	int i, ret, pages;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	vaddr = params.uaddr;
														
 
															+	size = params.len;
														
 
															+	vaddr_end = vaddr + size;
														
 
															+
														
 
															+	/* Lock the user memory. */
														
 
															+	inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
														
 
															+	if (!inpages) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * The LAUNCH_UPDATE command will perform in-place encryption of the
														
 
															+	 * memory content (i.e it will write the same memory region with C=1).
														
 
															+	 * It's possible that the cache may contain the data with C=0, i.e.,
														
 
															+	 * unencrypted so invalidate it first.
														
 
															+	 */
														
 
															+	sev_clflush_pages(inpages, npages);
														
 
															+
														
 
															+	for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
														
 
															+		int offset, len;
														
 
															+
														
 
															+		/*
														
 
															+		 * If the user buffer is not page-aligned, calculate the offset
														
 
															+		 * within the page.
														
 
															+		 */
														
 
															+		offset = vaddr & (PAGE_SIZE - 1);
														
 
															+
														
 
															+		/* Calculate the number of pages that can be encrypted in one go. */
														
 
															+		pages = get_num_contig_pages(i, inpages, npages);
														
 
															+
														
 
															+		len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
														
 
															+
														
 
															+		data->handle = sev->handle;
														
 
															+		data->len = len;
														
 
															+		data->address = __sme_page_pa(inpages[i]) + offset;
														
 
															+		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
														
 
															+		if (ret)
														
 
															+			goto e_unpin;
														
 
															+
														
 
															+		size -= len;
														
 
															+		next_vaddr = vaddr + len;
														
 
															+	}
														
 
															+
														
 
															+e_unpin:
														
 
															+	/* content of memory is updated, mark pages dirty */
														
 
															+	for (i = 0; i < npages; i++) {
														
 
															+		set_page_dirty_lock(inpages[i]);
														
 
															+		mark_page_accessed(inpages[i]);
														
 
															+	}
														
 
															+	/* unlock the user pages */
														
 
															+	sev_unpin_memory(kvm, inpages, npages);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct sev_data_launch_measure *data;
														
 
															+	struct kvm_sev_launch_measure params;
														
 
															+	void *blob = NULL;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* User wants to query the blob length */
														
 
															+	if (!params.len)
														
 
															+		goto cmd;
														
 
															+
														
 
															+	if (params.uaddr) {
														
 
															+		if (params.len > SEV_FW_BLOB_MAX_SIZE) {
														
 
															+			ret = -EINVAL;
														
 
															+			goto e_free;
														
 
															+		}
														
 
															+
														
 
															+		if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
														
 
															+			ret = -EFAULT;
														
 
															+			goto e_free;
														
 
															+		}
														
 
															+
														
 
															+		ret = -ENOMEM;
														
 
															+		blob = kmalloc(params.len, GFP_KERNEL);
														
 
															+		if (!blob)
														
 
															+			goto e_free;
														
 
															+
														
 
															+		data->address = __psp_pa(blob);
														
 
															+		data->len = params.len;
														
 
															+	}
														
 
															+
														
 
															+cmd:
														
 
															+	data->handle = sev->handle;
														
 
															+	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
														
 
															+
														
 
															+	/*
														
 
															+	 * If we query the session length, FW responded with expected data.
														
 
															+	 */
														
 
															+	if (!params.len)
														
 
															+		goto done;
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto e_free_blob;
														
 
															+
														
 
															+	if (blob) {
														
 
															+		if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
														
 
															+			ret = -EFAULT;
														
 
															+	}
														
 
															+
														
 
															+done:
														
 
															+	params.len = data->len;
														
 
															+	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
														
 
															+		ret = -EFAULT;
														
 
															+e_free_blob:
														
 
															+	kfree(blob);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct sev_data_launch_finish *data;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	data->handle = sev->handle;
														
 
															+	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
														
 
															+
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct kvm_sev_guest_status params;
														
 
															+	struct sev_data_guest_status *data;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	data->handle = sev->handle;
														
 
															+	ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
														
 
															+	if (ret)
														
 
															+		goto e_free;
														
 
															+
														
 
															+	params.policy = data->policy;
														
 
															+	params.state = data->state;
														
 
															+	params.handle = data->handle;
														
 
															+
														
 
															+	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
														
 
															+		ret = -EFAULT;
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
														
 
															+			       unsigned long dst, int size,
														
 
															+			       int *error, bool enc)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct sev_data_dbg *data;
														
 
															+	int ret;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	data->handle = sev->handle;
														
 
															+	data->dst_addr = dst;
														
 
															+	data->src_addr = src;
														
 
															+	data->len = size;
														
 
															+
														
 
															+	ret = sev_issue_cmd(kvm,
														
 
															+			    enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
														
 
															+			    data, error);
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
														
 
															+			     unsigned long dst_paddr, int sz, int *err)
														
 
															+{
														
 
															+	int offset;
														
 
															+
														
 
															+	/*
														
 
															+	 * Its safe to read more than we are asked, caller should ensure that
														
 
															+	 * destination has enough space.
														
 
															+	 */
														
 
															+	src_paddr = round_down(src_paddr, 16);
														
 
															+	offset = src_paddr & 15;
														
 
															+	sz = round_up(sz + offset, 16);
														
 
															+
														
 
															+	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
														
 
															+}
														
 
															+
														
 
															+static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
														
 
															+				  unsigned long __user dst_uaddr,
														
 
															+				  unsigned long dst_paddr,
														
 
															+				  int size, int *err)
														
 
															+{
														
 
															+	struct page *tpage = NULL;
														
 
															+	int ret, offset;
														
 
															+
														
 
															+	/* if inputs are not 16-byte then use intermediate buffer */
														
 
															+	if (!IS_ALIGNED(dst_paddr, 16) ||
														
 
															+	    !IS_ALIGNED(paddr,     16) ||
														
 
															+	    !IS_ALIGNED(size,      16)) {
														
 
															+		tpage = (void *)alloc_page(GFP_KERNEL);
														
 
															+		if (!tpage)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		dst_paddr = __sme_page_pa(tpage);
														
 
															+	}
														
 
															+
														
 
															+	ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
														
 
															+	if (ret)
														
 
															+		goto e_free;
														
 
															+
														
 
															+	if (tpage) {
														
 
															+		offset = paddr & 15;
														
 
															+		if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
														
 
															+				 page_address(tpage) + offset, size))
														
 
															+			ret = -EFAULT;
														
 
															+	}
														
 
															+
														
 
															+e_free:
														
 
															+	if (tpage)
														
 
															+		__free_page(tpage);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
														
 
															+				  unsigned long __user vaddr,
														
 
															+				  unsigned long dst_paddr,
														
 
															+				  unsigned long __user dst_vaddr,
														
 
															+				  int size, int *error)
														
 
															+{
														
 
															+	struct page *src_tpage = NULL;
														
 
															+	struct page *dst_tpage = NULL;
														
 
															+	int ret, len = size;
														
 
															+
														
 
															+	/* If source buffer is not aligned then use an intermediate buffer */
														
 
															+	if (!IS_ALIGNED(vaddr, 16)) {
														
 
															+		src_tpage = alloc_page(GFP_KERNEL);
														
 
															+		if (!src_tpage)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		if (copy_from_user(page_address(src_tpage),
														
 
															+				(void __user *)(uintptr_t)vaddr, size)) {
														
 
															+			__free_page(src_tpage);
														
 
															+			return -EFAULT;
														
 
															+		}
														
 
															+
														
 
															+		paddr = __sme_page_pa(src_tpage);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 *  If destination buffer or length is not aligned then do read-modify-write:
														
 
															+	 *   - decrypt destination in an intermediate buffer
														
 
															+	 *   - copy the source buffer in an intermediate buffer
														
 
															+	 *   - use the intermediate buffer as source buffer
														
 
															+	 */
														
 
															+	if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
														
 
															+		int dst_offset;
														
 
															+
														
 
															+		dst_tpage = alloc_page(GFP_KERNEL);
														
 
															+		if (!dst_tpage) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto e_free;
														
 
															+		}
														
 
															+
														
 
															+		ret = __sev_dbg_decrypt(kvm, dst_paddr,
														
 
															+					__sme_page_pa(dst_tpage), size, error);
														
 
															+		if (ret)
														
 
															+			goto e_free;
														
 
															+
														
 
															+		/*
														
 
															+		 *  If source is kernel buffer then use memcpy() otherwise
														
 
															+		 *  copy_from_user().
														
 
															+		 */
														
 
															+		dst_offset = dst_paddr & 15;
														
 
															+
														
 
															+		if (src_tpage)
														
 
															+			memcpy(page_address(dst_tpage) + dst_offset,
														
 
															+			       page_address(src_tpage), size);
														
 
															+		else {
														
 
															+			if (copy_from_user(page_address(dst_tpage) + dst_offset,
														
 
															+					   (void __user *)(uintptr_t)vaddr, size)) {
														
 
															+				ret = -EFAULT;
														
 
															+				goto e_free;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		paddr = __sme_page_pa(dst_tpage);
														
 
															+		dst_paddr = round_down(dst_paddr, 16);
														
 
															+		len = round_up(size, 16);
														
 
															+	}
														
 
															+
														
 
															+	ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
														
 
															+
														
 
															+e_free:
														
 
															+	if (src_tpage)
														
 
															+		__free_page(src_tpage);
														
 
															+	if (dst_tpage)
														
 
															+		__free_page(dst_tpage);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
														
 
															+{
														
 
															+	unsigned long vaddr, vaddr_end, next_vaddr;
														
 
															+	unsigned long dst_vaddr, dst_vaddr_end;
														
 
															+	struct page **src_p, **dst_p;
														
 
															+	struct kvm_sev_dbg debug;
														
 
															+	unsigned long n;
														
 
															+	int ret, size;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	vaddr = debug.src_uaddr;
														
 
															+	size = debug.len;
														
 
															+	vaddr_end = vaddr + size;
														
 
															+	dst_vaddr = debug.dst_uaddr;
														
 
															+	dst_vaddr_end = dst_vaddr + size;
														
 
															+
														
 
															+	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
														
 
															+		int len, s_off, d_off;
														
 
															+
														
 
															+		/* lock userspace source and destination page */
														
 
															+		src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
														
 
															+		if (!src_p)
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
														
 
															+		if (!dst_p) {
														
 
															+			sev_unpin_memory(kvm, src_p, n);
														
 
															+			return -EFAULT;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
														
 
															+		 * memory content (i.e it will write the same memory region with C=1).
														
 
															+		 * It's possible that the cache may contain the data with C=0, i.e.,
														
 
															+		 * unencrypted so invalidate it first.
														
 
															+		 */
														
 
															+		sev_clflush_pages(src_p, 1);
														
 
															+		sev_clflush_pages(dst_p, 1);
														
 
															+
														
 
															+		/*
														
 
															+		 * Since user buffer may not be page aligned, calculate the
														
 
															+		 * offset within the page.
														
 
															+		 */
														
 
															+		s_off = vaddr & ~PAGE_MASK;
														
 
															+		d_off = dst_vaddr & ~PAGE_MASK;
														
 
															+		len = min_t(size_t, (PAGE_SIZE - s_off), size);
														
 
															+
														
 
															+		if (dec)
														
 
															+			ret = __sev_dbg_decrypt_user(kvm,
														
 
															+						     __sme_page_pa(src_p[0]) + s_off,
														
 
															+						     dst_vaddr,
														
 
															+						     __sme_page_pa(dst_p[0]) + d_off,
														
 
															+						     len, &argp->error);
														
 
															+		else
														
 
															+			ret = __sev_dbg_encrypt_user(kvm,
														
 
															+						     __sme_page_pa(src_p[0]) + s_off,
														
 
															+						     vaddr,
														
 
															+						     __sme_page_pa(dst_p[0]) + d_off,
														
 
															+						     dst_vaddr,
														
 
															+						     len, &argp->error);
														
 
															+
														
 
															+		sev_unpin_memory(kvm, src_p, 1);
														
 
															+		sev_unpin_memory(kvm, dst_p, 1);
														
 
															+
														
 
															+		if (ret)
														
 
															+			goto err;
														
 
															+
														
 
															+		next_vaddr = vaddr + len;
														
 
															+		dst_vaddr = dst_vaddr + len;
														
 
															+		size -= len;
														
 
															+	}
														
 
															+err:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct sev_data_launch_secret *data;
														
 
															+	struct kvm_sev_launch_secret params;
														
 
															+	struct page **pages;
														
 
															+	void *blob, *hdr;
														
 
															+	unsigned long n;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
														
 
															+	if (!pages)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/*
														
 
															+	 * The secret must be copied into contiguous memory region, lets verify
														
 
															+	 * that userspace memory pages are contiguous before we issue command.
														
 
															+	 */
														
 
															+	if (get_num_contig_pages(0, pages, n) != n) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto e_unpin_memory;
														
 
															+	}
														
 
															+
														
 
															+	ret = -ENOMEM;
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		goto e_unpin_memory;
														
 
															+
														
 
															+	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
														
 
															+	if (IS_ERR(blob)) {
														
 
															+		ret = PTR_ERR(blob);
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	data->trans_address = __psp_pa(blob);
														
 
															+	data->trans_len = params.trans_len;
														
 
															+
														
 
															+	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
														
 
															+	if (IS_ERR(hdr)) {
														
 
															+		ret = PTR_ERR(hdr);
														
 
															+		goto e_free_blob;
														
 
															+	}
														
 
															+	data->trans_address = __psp_pa(blob);
														
 
															+	data->trans_len = params.trans_len;
														
 
															+
														
 
															+	data->handle = sev->handle;
														
 
															+	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
														
 
															+
														
 
															+	kfree(hdr);
														
 
															+
														
 
															+e_free_blob:
														
 
															+	kfree(blob);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+e_unpin_memory:
														
 
															+	sev_unpin_memory(kvm, pages, n);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
														
 
															+{
														
 
															+	struct kvm_sev_cmd sev_cmd;
														
 
															+	int r;
														
 
															+
														
 
															+	if (!svm_sev_enabled())
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	switch (sev_cmd.id) {
														
 
															+	case KVM_SEV_INIT:
														
 
															+		r = sev_guest_init(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_LAUNCH_START:
														
 
															+		r = sev_launch_start(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_LAUNCH_UPDATE_DATA:
														
 
															+		r = sev_launch_update_data(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_LAUNCH_MEASURE:
														
 
															+		r = sev_launch_measure(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_LAUNCH_FINISH:
														
 
															+		r = sev_launch_finish(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_GUEST_STATUS:
														
 
															+		r = sev_guest_status(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	case KVM_SEV_DBG_DECRYPT:
														
 
															+		r = sev_dbg_crypt(kvm, &sev_cmd, true);
														
 
															+		break;
														
 
															+	case KVM_SEV_DBG_ENCRYPT:
														
 
															+		r = sev_dbg_crypt(kvm, &sev_cmd, false);
														
 
															+		break;
														
 
															+	case KVM_SEV_LAUNCH_SECRET:
														
 
															+		r = sev_launch_secret(kvm, &sev_cmd);
														
 
															+		break;
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
														
 
															+		r = -EFAULT;
														
 
															+
														
 
															+out:
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int svm_register_enc_region(struct kvm *kvm,
														
 
															+				   struct kvm_enc_region *range)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct enc_region *region;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (!sev_guest(kvm))
														
 
															+		return -ENOTTY;
														
 
															+
														
 
															+	region = kzalloc(sizeof(*region), GFP_KERNEL);
														
 
															+	if (!region)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
														
 
															+	if (!region->pages) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * The guest may change the memory encryption attribute from C=0 -> C=1
														
 
															+	 * or vice versa for this memory range. Lets make sure caches are
														
 
															+	 * flushed to ensure that guest data gets written into memory with
														
 
															+	 * correct C-bit.
														
 
															+	 */
														
 
															+	sev_clflush_pages(region->pages, region->npages);
														
 
															+
														
 
															+	region->uaddr = range->addr;
														
 
															+	region->size = range->size;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	list_add_tail(&region->list, &sev->regions_list);
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	return ret;
														
 
															+
														
 
															+e_free:
														
 
															+	kfree(region);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct enc_region *
														
 
															+find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
														
 
															+{
														
 
															+	struct kvm_sev_info *sev = &kvm->arch.sev_info;
														
 
															+	struct list_head *head = &sev->regions_list;
														
 
															+	struct enc_region *i;
														
 
															+
														
 
															+	list_for_each_entry(i, head, list) {
														
 
															+		if (i->uaddr == range->addr &&
														
 
															+		    i->size == range->size)
														
 
															+			return i;
														
 
															+	}
														
 
															+
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static int svm_unregister_enc_region(struct kvm *kvm,
														
 
															+				     struct kvm_enc_region *range)
														
 
															+{
														
 
															+	struct enc_region *region;
														
 
															+	int ret;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	if (!sev_guest(kvm)) {
														
 
															+		ret = -ENOTTY;
														
 
															+		goto failed;
														
 
															+	}
														
 
															+
														
 
															+	region = find_enc_region(kvm, range);
														
 
															+	if (!region) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto failed;
														
 
															+	}
														
 
															+
														
 
															+	__unregister_enc_region_locked(kvm, region);
														
 
															+
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return 0;
														
 
															+
														
 
															+failed:
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
														
 
															 	.cpu_has_kvm_support = has_svm,
														
 
															 	.disabled_by_bios = is_disabled,
														
@@ -5653,7 +6812,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
															 	.vcpu_reset = svm_vcpu_reset,
														
 
															 	.vm_init = avic_vm_init,
														
 
															-	.vm_destroy = avic_vm_destroy,
														
 
															+	.vm_destroy = svm_vm_destroy,
														
 
															 	.prepare_guest_switch = svm_prepare_guest_switch,
														
 
															 	.vcpu_load = svm_vcpu_load,
														
@@ -5713,6 +6872,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
															 	.load_eoi_exitmap = svm_load_eoi_exitmap,
														
 
															 	.hwapic_irr_update = svm_hwapic_irr_update,
														
 
															 	.hwapic_isr_update = svm_hwapic_isr_update,
														
 
															+	.sync_pir_to_irr = kvm_lapic_find_highest_irr,
														
 
															 	.apicv_post_state_restore = avic_post_state_restore,
														
 
															 	.set_tss_addr = svm_set_tss_addr,
														
@@ -5729,6 +6889,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
															 	.invpcid_supported = svm_invpcid_supported,
														
 
															 	.mpx_supported = svm_mpx_supported,
														
 
															 	.xsaves_supported = svm_xsaves_supported,
														
 
															+	.umip_emulated = svm_umip_emulated,
														
 
															 	.set_supported_cpuid = svm_set_supported_cpuid,
														
@@ -5752,6 +6913,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
															 	.pre_enter_smm = svm_pre_enter_smm,
														
 
															 	.pre_leave_smm = svm_pre_leave_smm,
														
 
															 	.enable_smi_window = enable_smi_window,
														
 
															+
														
 
															+	.mem_enc_op = svm_mem_enc_op,
														
 
															+	.mem_enc_reg_region = svm_register_enc_region,
														
 
															+	.mem_enc_unreg_region = svm_unregister_enc_region,
														
 
															 };
														
 
															 static int __init svm_init(void)
														
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,12 @@ struct __packed vmcs12 {
 
															  */
														
 
															 #define VMCS12_SIZE 0x1000
														
 
															+/*
														
 
															+ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
														
 
															+ * supported VMCS12 field encoding.
														
 
															+ */
														
 
															+#define VMCS12_MAX_FIELD_INDEX 0x17
														
 
															+
														
 
															 /*
														
 
															  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
														
 
															  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
														
@@ -441,6 +447,7 @@ struct nested_vmx {
 
															 	 * data hold by vmcs12
														
 
															 	 */
														
 
															 	bool sync_shadow_vmcs;
														
 
															+	bool dirty_vmcs12;
														
 
															 	bool change_vmcs01_virtual_x2apic_mode;
														
 
															 	/* L2 must run next, and mustn't decide to exit to L1. */
														
@@ -664,6 +671,8 @@ struct vcpu_vmx {
 
															 	u32 host_pkru;
														
 
															+	unsigned long host_debugctlmsr;
														
 
															+
														
 
															 	/*
														
 
															 	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
														
 
															 	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
														
@@ -692,67 +701,24 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
 
															 	return &(to_vmx(vcpu)->pi_desc);
														
 
															 }
														
 
															+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
														
 
															 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
														
 
															-#define FIELD(number, name)	[number] = VMCS12_OFFSET(name)
														
 
															-#define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
														
 
															-				[number##_HIGH] = VMCS12_OFFSET(name)+4
														
 
															+#define FIELD(number, name)	[ROL16(number, 6)] = VMCS12_OFFSET(name)
														
 
															+#define FIELD64(number, name)						\
														
 
															+	FIELD(number, name),						\
														
 
															+	[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
														
 
															-static unsigned long shadow_read_only_fields[] = {
														
 
															-	/*
														
 
															-	 * We do NOT shadow fields that are modified when L0
														
 
															-	 * traps and emulates any vmx instruction (e.g. VMPTRLD,
														
 
															-	 * VMXON...) executed by L1.
														
 
															-	 * For example, VM_INSTRUCTION_ERROR is read
														
 
															-	 * by L1 if a vmx instruction fails (part of the error path).
														
 
															-	 * Note the code assumes this logic. If for some reason
														
 
															-	 * we start shadowing these fields then we need to
														
 
															-	 * force a shadow sync when L0 emulates vmx instructions
														
 
															-	 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
														
 
															-	 * by nested_vmx_failValid)
														
 
															-	 */
														
 
															-	VM_EXIT_REASON,
														
 
															-	VM_EXIT_INTR_INFO,
														
 
															-	VM_EXIT_INSTRUCTION_LEN,
														
 
															-	IDT_VECTORING_INFO_FIELD,
														
 
															-	IDT_VECTORING_ERROR_CODE,
														
 
															-	VM_EXIT_INTR_ERROR_CODE,
														
 
															-	EXIT_QUALIFICATION,
														
 
															-	GUEST_LINEAR_ADDRESS,
														
 
															-	GUEST_PHYSICAL_ADDRESS
														
 
															+static u16 shadow_read_only_fields[] = {
														
 
															+#define SHADOW_FIELD_RO(x) x,
														
 
															+#include "vmx_shadow_fields.h"
														
 
															 };
														
 
															 static int max_shadow_read_only_fields =
														
 
															 	ARRAY_SIZE(shadow_read_only_fields);
														
 
															-static unsigned long shadow_read_write_fields[] = {
														
 
															-	TPR_THRESHOLD,
														
 
															-	GUEST_RIP,
														
 
															-	GUEST_RSP,
														
 
															-	GUEST_CR0,
														
 
															-	GUEST_CR3,
														
 
															-	GUEST_CR4,
														
 
															-	GUEST_INTERRUPTIBILITY_INFO,
														
 
															-	GUEST_RFLAGS,
														
 
															-	GUEST_CS_SELECTOR,
														
 
															-	GUEST_CS_AR_BYTES,
														
 
															-	GUEST_CS_LIMIT,
														
 
															-	GUEST_CS_BASE,
														
 
															-	GUEST_ES_BASE,
														
 
															-	GUEST_BNDCFGS,
														
 
															-	CR0_GUEST_HOST_MASK,
														
 
															-	CR0_READ_SHADOW,
														
 
															-	CR4_READ_SHADOW,
														
 
															-	TSC_OFFSET,
														
 
															-	EXCEPTION_BITMAP,
														
 
															-	CPU_BASED_VM_EXEC_CONTROL,
														
 
															-	VM_ENTRY_EXCEPTION_ERROR_CODE,
														
 
															-	VM_ENTRY_INTR_INFO_FIELD,
														
 
															-	VM_ENTRY_INSTRUCTION_LEN,
														
 
															-	VM_ENTRY_EXCEPTION_ERROR_CODE,
														
 
															-	HOST_FS_BASE,
														
 
															-	HOST_GS_BASE,
														
 
															-	HOST_FS_SELECTOR,
														
 
															-	HOST_GS_SELECTOR
														
 
															+static u16 shadow_read_write_fields[] = {
														
 
															+#define SHADOW_FIELD_RW(x) x,
														
 
															+#include "vmx_shadow_fields.h"
														
 
															 };
														
 
															 static int max_shadow_read_write_fields =
														
 
															 	ARRAY_SIZE(shadow_read_write_fields);
														
@@ -905,13 +871,17 @@ static inline short vmcs_field_to_offset(unsigned long field)
 
															 {
														
 
															 	const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
														
 
															 	unsigned short offset;
														
 
															+	unsigned index;
														
 
															+
														
 
															+	if (field >> 15)
														
 
															+		return -ENOENT;
														
 
															-	BUILD_BUG_ON(size > SHRT_MAX);
														
 
															-	if (field >= size)
														
 
															+	index = ROL16(field, 6);
														
 
															+	if (index >= size)
														
 
															 		return -ENOENT;
														
 
															-	field = array_index_nospec(field, size);
														
 
															-	offset = vmcs_field_to_offset_table[field];
														
 
															+	index = array_index_nospec(index, size);
														
 
															+	offset = vmcs_field_to_offset_table[index];
														
 
															 	if (offset == 0)
														
 
															 		return -ENOENT;
														
 
															 	return offset;
														
@@ -957,8 +927,6 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
 
															 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
														
 
															 enum {
														
 
															-	VMX_IO_BITMAP_A,
														
 
															-	VMX_IO_BITMAP_B,
														
 
															 	VMX_VMREAD_BITMAP,
														
 
															 	VMX_VMWRITE_BITMAP,
														
 
															 	VMX_BITMAP_NR
														
@@ -966,8 +934,6 @@ enum {
 
															 static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
														
 
															-#define vmx_io_bitmap_a                      (vmx_bitmap[VMX_IO_BITMAP_A])
														
 
															-#define vmx_io_bitmap_b                      (vmx_bitmap[VMX_IO_BITMAP_B])
														
 
															 #define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
														
 
															 #define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
														
@@ -2373,6 +2339,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
															 	vmx_vcpu_pi_load(vcpu, cpu);
														
 
															 	vmx->host_pkru = read_pkru();
														
 
															+	vmx->host_debugctlmsr = get_debugctlmsr();
														
 
															 }
														
 
															 static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
														
@@ -2930,7 +2897,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
															 	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
														
 
															 	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
														
 
															-	vmx->nested.nested_vmx_vmcs_enum = 0x2e;
														
 
															+	vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
														
 
															 }
														
 
															 /*
														
@@ -3266,6 +3233,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
 
															  */
														
 
															 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
														
 
															 {
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	struct shared_msr_entry *msr;
														
 
															 	switch (msr_info->index) {
														
@@ -3277,8 +3245,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 		msr_info->data = vmcs_readl(GUEST_GS_BASE);
														
 
															 		break;
														
 
															 	case MSR_KERNEL_GS_BASE:
														
 
															-		vmx_load_host_state(to_vmx(vcpu));
														
 
															-		msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
														
 
															+		vmx_load_host_state(vmx);
														
 
															+		msr_info->data = vmx->msr_guest_kernel_gs_base;
														
 
															 		break;
														
 
															 #endif
														
 
															 	case MSR_EFER:
														
@@ -3318,13 +3286,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 		break;
														
 
															 	case MSR_IA32_MCG_EXT_CTL:
														
 
															 		if (!msr_info->host_initiated &&
														
 
															-		    !(to_vmx(vcpu)->msr_ia32_feature_control &
														
 
															+		    !(vmx->msr_ia32_feature_control &
														
 
															 		      FEATURE_CONTROL_LMCE))
														
 
															 			return 1;
														
 
															 		msr_info->data = vcpu->arch.mcg_ext_ctl;
														
 
															 		break;
														
 
															 	case MSR_IA32_FEATURE_CONTROL:
														
 
															-		msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
														
 
															+		msr_info->data = vmx->msr_ia32_feature_control;
														
 
															 		break;
														
 
															 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
														
 
															 		if (!nested_vmx_allowed(vcpu))
														
@@ -3341,7 +3309,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 			return 1;
														
 
															 		/* Otherwise falls through */
														
 
															 	default:
														
 
															-		msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
														
 
															+		msr = find_msr_entry(vmx, msr_info->index);
														
 
															 		if (msr) {
														
 
															 			msr_info->data = msr->data;
														
 
															 			break;
														
@@ -3727,7 +3695,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 #endif
														
 
															 	      CPU_BASED_CR3_LOAD_EXITING |
														
 
															 	      CPU_BASED_CR3_STORE_EXITING |
														
 
															-	      CPU_BASED_USE_IO_BITMAPS |
														
 
															+	      CPU_BASED_UNCOND_IO_EXITING |
														
 
															 	      CPU_BASED_MOV_DR_EXITING |
														
 
															 	      CPU_BASED_USE_TSC_OFFSETING |
														
 
															 	      CPU_BASED_INVLPG_EXITING |
														
@@ -3757,6 +3725,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
															 			SECONDARY_EXEC_ENABLE_EPT |
														
 
															 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
														
 
															 			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
														
 
															+			SECONDARY_EXEC_DESC |
														
 
															 			SECONDARY_EXEC_RDTSCP |
														
 
															 			SECONDARY_EXEC_ENABLE_INVPCID |
														
 
															 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
@@ -3982,17 +3951,17 @@ static void free_kvm_area(void)
 
															 	}
														
 
															 }
														
 
															-enum vmcs_field_type {
														
 
															-	VMCS_FIELD_TYPE_U16 = 0,
														
 
															-	VMCS_FIELD_TYPE_U64 = 1,
														
 
															-	VMCS_FIELD_TYPE_U32 = 2,
														
 
															-	VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
														
 
															+enum vmcs_field_width {
														
 
															+	VMCS_FIELD_WIDTH_U16 = 0,
														
 
															+	VMCS_FIELD_WIDTH_U64 = 1,
														
 
															+	VMCS_FIELD_WIDTH_U32 = 2,
														
 
															+	VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
														
 
															 };
														
 
															-static inline int vmcs_field_type(unsigned long field)
														
 
															+static inline int vmcs_field_width(unsigned long field)
														
 
															 {
														
 
															 	if (0x1 & field)	/* the *_HIGH fields are all 32 bit */
														
 
															-		return VMCS_FIELD_TYPE_U32;
														
 
															+		return VMCS_FIELD_WIDTH_U32;
														
 
															 	return (field >> 13) & 0x3 ;
														
 
															 }
														
@@ -4005,43 +3974,66 @@ static void init_vmcs_shadow_fields(void)
 
															 {
														
 
															 	int i, j;
														
 
															-	/* No checks for read only fields yet */
														
 
															+	for (i = j = 0; i < max_shadow_read_only_fields; i++) {
														
 
															+		u16 field = shadow_read_only_fields[i];
														
 
															+		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
														
 
															+		    (i + 1 == max_shadow_read_only_fields ||
														
 
															+		     shadow_read_only_fields[i + 1] != field + 1))
														
 
															+			pr_err("Missing field from shadow_read_only_field %x\n",
														
 
															+			       field + 1);
														
 
															+
														
 
															+		clear_bit(field, vmx_vmread_bitmap);
														
 
															+#ifdef CONFIG_X86_64
														
 
															+		if (field & 1)
														
 
															+			continue;
														
 
															+#endif
														
 
															+		if (j < i)
														
 
															+			shadow_read_only_fields[j] = field;
														
 
															+		j++;
														
 
															+	}
														
 
															+	max_shadow_read_only_fields = j;
														
 
															 	for (i = j = 0; i < max_shadow_read_write_fields; i++) {
														
 
															-		switch (shadow_read_write_fields[i]) {
														
 
															-		case GUEST_BNDCFGS:
														
 
															-			if (!kvm_mpx_supported())
														
 
															+		u16 field = shadow_read_write_fields[i];
														
 
															+		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
														
 
															+		    (i + 1 == max_shadow_read_write_fields ||
														
 
															+		     shadow_read_write_fields[i + 1] != field + 1))
														
 
															+			pr_err("Missing field from shadow_read_write_field %x\n",
														
 
															+			       field + 1);
														
 
															+
														
 
															+		/*
														
 
															+		 * PML and the preemption timer can be emulated, but the
														
 
															+		 * processor cannot vmwrite to fields that don't exist
														
 
															+		 * on bare metal.
														
 
															+		 */
														
 
															+		switch (field) {
														
 
															+		case GUEST_PML_INDEX:
														
 
															+			if (!cpu_has_vmx_pml())
														
 
															+				continue;
														
 
															+			break;
														
 
															+		case VMX_PREEMPTION_TIMER_VALUE:
														
 
															+			if (!cpu_has_vmx_preemption_timer())
														
 
															+				continue;
														
 
															+			break;
														
 
															+		case GUEST_INTR_STATUS:
														
 
															+			if (!cpu_has_vmx_apicv())
														
 
															 				continue;
														
 
															 			break;
														
 
															 		default:
														
 
															 			break;
														
 
															 		}
														
 
															+		clear_bit(field, vmx_vmwrite_bitmap);
														
 
															+		clear_bit(field, vmx_vmread_bitmap);
														
 
															+#ifdef CONFIG_X86_64
														
 
															+		if (field & 1)
														
 
															+			continue;
														
 
															+#endif
														
 
															 		if (j < i)
														
 
															-			shadow_read_write_fields[j] =
														
 
															-				shadow_read_write_fields[i];
														
 
															+			shadow_read_write_fields[j] = field;
														
 
															 		j++;
														
 
															 	}
														
 
															 	max_shadow_read_write_fields = j;
														
 
															-
														
 
															-	/* shadowed fields guest access without vmexit */
														
 
															-	for (i = 0; i < max_shadow_read_write_fields; i++) {
														
 
															-		unsigned long field = shadow_read_write_fields[i];
														
 
															-
														
 
															-		clear_bit(field, vmx_vmwrite_bitmap);
														
 
															-		clear_bit(field, vmx_vmread_bitmap);
														
 
															-		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
														
 
															-			clear_bit(field + 1, vmx_vmwrite_bitmap);
														
 
															-			clear_bit(field + 1, vmx_vmread_bitmap);
														
 
															-		}
														
 
															-	}
														
 
															-	for (i = 0; i < max_shadow_read_only_fields; i++) {
														
 
															-		unsigned long field = shadow_read_only_fields[i];
														
 
															-
														
 
															-		clear_bit(field, vmx_vmread_bitmap);
														
 
															-		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
														
 
															-			clear_bit(field + 1, vmx_vmread_bitmap);
														
 
															-	}
														
 
															 }
														
 
															 static __init int alloc_kvm_area(void)
														
@@ -4254,9 +4246,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
															 #endif
														
 
															-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
														
 
															+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
														
 
															+				bool invalidate_gpa)
														
 
															 {
														
 
															-	if (enable_ept) {
														
 
															+	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
														
 
															 		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
														
 
															 			return;
														
 
															 		ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
														
@@ -4265,15 +4258,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
 
															 	}
														
 
															 }
														
 
															-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
														
 
															+static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
														
 
															 {
														
 
															-	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
														
 
															+	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
														
 
															 }
														
 
															 static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (enable_ept)
														
 
															-		vmx_flush_tlb(vcpu);
														
 
															+		vmx_flush_tlb(vcpu, true);
														
 
															 }
														
 
															 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
														
@@ -4471,7 +4464,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
															 		ept_load_pdptrs(vcpu);
														
 
															 	}
														
 
															-	vmx_flush_tlb(vcpu);
														
 
															+	vmx_flush_tlb(vcpu, true);
														
 
															 	vmcs_writel(GUEST_CR3, guest_cr3);
														
 
															 }
														
@@ -4488,6 +4481,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
															 		(to_vmx(vcpu)->rmode.vm86_active ?
														
 
															 		 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
														
 
															+	if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
														
 
															+		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
														
 
															+			      SECONDARY_EXEC_DESC);
														
 
															+		hw_cr4 &= ~X86_CR4_UMIP;
														
 
															+	} else
														
 
															+		vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
														
 
															+				SECONDARY_EXEC_DESC);
														
 
															+
														
 
															 	if (cr4 & X86_CR4_VMXE) {
														
 
															 		/*
														
 
															 		 * To use VMXON (and later other VMX instructions), a guest
														
@@ -5119,11 +5120,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
 
															 {
														
 
															 	int f = sizeof(unsigned long);
														
 
															-	if (!cpu_has_vmx_msr_bitmap()) {
														
 
															-		WARN_ON(1);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															 	/*
														
 
															 	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
														
 
															 	 * have the write-low and read-high bitmap offsets the wrong way round.
														
@@ -5263,7 +5259,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 
															 	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
														
 
															 	if (max_irr != 256) {
														
 
															 		vapic_page = kmap(vmx->nested.virtual_apic_page);
														
 
															-		__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
														
 
															+		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
														
 
															+			vapic_page, &max_irr);
														
 
															 		kunmap(vmx->nested.virtual_apic_page);
														
 
															 		status = vmcs_read16(GUEST_INTR_STATUS);
														
@@ -5323,14 +5320,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
 
															 	if (is_guest_mode(vcpu) &&
														
 
															 	    vector == vmx->nested.posted_intr_nv) {
														
 
															-		/* the PIR and ON have been set by L1. */
														
 
															-		kvm_vcpu_trigger_posted_interrupt(vcpu, true);
														
 
															 		/*
														
 
															 		 * If a posted intr is not recognized by hardware,
														
 
															 		 * we will accomplish it in the next vmentry.
														
 
															 		 */
														
 
															 		vmx->nested.pi_pending = true;
														
 
															 		kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+		/* the PIR and ON have been set by L1. */
														
 
															+		if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
														
 
															+			kvm_vcpu_kick(vcpu);
														
 
															 		return 0;
														
 
															 	}
														
 
															 	return -1;
														
@@ -5509,6 +5507,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 	struct kvm_vcpu *vcpu = &vmx->vcpu;
														
 
															 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
														
 
															+
														
 
															 	if (!cpu_need_virtualize_apic_accesses(vcpu))
														
 
															 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															 	if (vmx->vpid == 0)
														
@@ -5527,6 +5526,11 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
															 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
														
 
															 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
														
 
															 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
														
 
															+
														
 
															+	/* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
														
 
															+	 * in vmx_set_cr4.  */
														
 
															+	exec_control &= ~SECONDARY_EXEC_DESC;
														
 
															+
														
 
															 	/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
														
 
															 	   (handle_vmptrld).
														
 
															 	   We can NOT enable shadow_vmcs here because we don't have yet
														
@@ -5646,10 +5650,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
															 #endif
														
 
															 	int i;
														
 
															-	/* I/O */
														
 
															-	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
														
 
															-	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
														
 
															-
														
 
															 	if (enable_shadow_vmcs) {
														
 
															 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
														
 
															 		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
														
@@ -6304,6 +6304,12 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 
															 		return kvm_set_cr4(vcpu, val);
														
 
															 }
														
 
															+static int handle_desc(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
														
 
															+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
														
 
															+}
														
 
															+
														
 
															 static int handle_cr(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	unsigned long exit_qualification, val;
														
@@ -6760,7 +6766,21 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
															 	if (!is_guest_mode(vcpu) &&
														
 
															 	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
														
 
															 		trace_kvm_fast_mmio(gpa);
														
 
															-		return kvm_skip_emulated_instruction(vcpu);
														
 
															+		/*
														
 
															+		 * Doing kvm_skip_emulated_instruction() depends on undefined
														
 
															+		 * behavior: Intel's manual doesn't mandate
														
 
															+		 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
														
 
															+		 * occurs and while on real hardware it was observed to be set,
														
 
															+		 * other hypervisors (namely Hyper-V) don't set it, we end up
														
 
															+		 * advancing IP with some random value. Disable fast mmio when
														
 
															+		 * running nested and keep it for real hardware in hope that
														
 
															+		 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
														
 
															+		 */
														
 
															+		if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
														
 
															+			return kvm_skip_emulated_instruction(vcpu);
														
 
															+		else
														
 
															+			return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
														
 
															+						       NULL, 0) == EMULATE_DONE;
														
 
															 	}
														
 
															 	ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
														
@@ -6957,10 +6977,6 @@ static __init int hardware_setup(void)
 
															 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
														
 
															 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
														
 
															-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
														
 
															-
														
 
															-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
														
 
															-
														
 
															 	if (setup_vmcs_config(&vmcs_config) < 0) {
														
 
															 		r = -EIO;
														
 
															 		goto out;
														
@@ -6973,11 +6989,6 @@ static __init int hardware_setup(void)
 
															 		!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
														
 
															 		enable_vpid = 0;
														
 
															-	if (!cpu_has_vmx_shadow_vmcs())
														
 
															-		enable_shadow_vmcs = 0;
														
 
															-	if (enable_shadow_vmcs)
														
 
															-		init_vmcs_shadow_fields();
														
 
															-
														
 
															 	if (!cpu_has_vmx_ept() ||
														
 
															 	    !cpu_has_vmx_ept_4levels() ||
														
 
															 	    !cpu_has_vmx_ept_mt_wb() ||
														
@@ -7063,6 +7074,11 @@ static __init int hardware_setup(void)
 
															 		kvm_x86_ops->cancel_hv_timer = NULL;
														
 
															 	}
														
 
															+	if (!cpu_has_vmx_shadow_vmcs())
														
 
															+		enable_shadow_vmcs = 0;
														
 
															+	if (enable_shadow_vmcs)
														
 
															+		init_vmcs_shadow_fields();
														
 
															+
														
 
															 	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
														
 
															 	kvm_mce_cap_supported |= MCG_LMCE_P;
														
@@ -7593,17 +7609,17 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
 
															 	p = ((char *)(get_vmcs12(vcpu))) + offset;
														
 
															-	switch (vmcs_field_type(field)) {
														
 
															-	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															+	switch (vmcs_field_width(field)) {
														
 
															+	case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
														
 
															 		*ret = *((natural_width *)p);
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_U16:
														
 
															+	case VMCS_FIELD_WIDTH_U16:
														
 
															 		*ret = *((u16 *)p);
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_U32:
														
 
															+	case VMCS_FIELD_WIDTH_U32:
														
 
															 		*ret = *((u32 *)p);
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_U64:
														
 
															+	case VMCS_FIELD_WIDTH_U64:
														
 
															 		*ret = *((u64 *)p);
														
 
															 		return 0;
														
 
															 	default:
														
@@ -7620,17 +7636,17 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
 
															 	if (offset < 0)
														
 
															 		return offset;
														
 
															-	switch (vmcs_field_type(field)) {
														
 
															-	case VMCS_FIELD_TYPE_U16:
														
 
															+	switch (vmcs_field_width(field)) {
														
 
															+	case VMCS_FIELD_WIDTH_U16:
														
 
															 		*(u16 *)p = field_value;
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_U32:
														
 
															+	case VMCS_FIELD_WIDTH_U32:
														
 
															 		*(u32 *)p = field_value;
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_U64:
														
 
															+	case VMCS_FIELD_WIDTH_U64:
														
 
															 		*(u64 *)p = field_value;
														
 
															 		return 0;
														
 
															-	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															+	case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
														
 
															 		*(natural_width *)p = field_value;
														
 
															 		return 0;
														
 
															 	default:
														
@@ -7646,7 +7662,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
															 	unsigned long field;
														
 
															 	u64 field_value;
														
 
															 	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
														
 
															-	const unsigned long *fields = shadow_read_write_fields;
														
 
															+	const u16 *fields = shadow_read_write_fields;
														
 
															 	const int num_fields = max_shadow_read_write_fields;
														
 
															 	preempt_disable();
														
@@ -7655,23 +7671,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
															 	for (i = 0; i < num_fields; i++) {
														
 
															 		field = fields[i];
														
 
															-		switch (vmcs_field_type(field)) {
														
 
															-		case VMCS_FIELD_TYPE_U16:
														
 
															-			field_value = vmcs_read16(field);
														
 
															-			break;
														
 
															-		case VMCS_FIELD_TYPE_U32:
														
 
															-			field_value = vmcs_read32(field);
														
 
															-			break;
														
 
															-		case VMCS_FIELD_TYPE_U64:
														
 
															-			field_value = vmcs_read64(field);
														
 
															-			break;
														
 
															-		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															-			field_value = vmcs_readl(field);
														
 
															-			break;
														
 
															-		default:
														
 
															-			WARN_ON(1);
														
 
															-			continue;
														
 
															-		}
														
 
															+		field_value = __vmcs_readl(field);
														
 
															 		vmcs12_write_any(&vmx->vcpu, field, field_value);
														
 
															 	}
														
@@ -7683,7 +7683,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
															 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															-	const unsigned long *fields[] = {
														
 
															+	const u16 *fields[] = {
														
 
															 		shadow_read_write_fields,
														
 
															 		shadow_read_only_fields
														
 
															 	};
														
@@ -7702,24 +7702,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 
															 		for (i = 0; i < max_fields[q]; i++) {
														
 
															 			field = fields[q][i];
														
 
															 			vmcs12_read_any(&vmx->vcpu, field, &field_value);
														
 
															-
														
 
															-			switch (vmcs_field_type(field)) {
														
 
															-			case VMCS_FIELD_TYPE_U16:
														
 
															-				vmcs_write16(field, (u16)field_value);
														
 
															-				break;
														
 
															-			case VMCS_FIELD_TYPE_U32:
														
 
															-				vmcs_write32(field, (u32)field_value);
														
 
															-				break;
														
 
															-			case VMCS_FIELD_TYPE_U64:
														
 
															-				vmcs_write64(field, (u64)field_value);
														
 
															-				break;
														
 
															-			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
														
 
															-				vmcs_writel(field, (long)field_value);
														
 
															-				break;
														
 
															-			default:
														
 
															-				WARN_ON(1);
														
 
															-				break;
														
 
															-			}
														
 
															+			__vmcs_writel(field, field_value);
														
 
															 		}
														
 
															 	}
														
@@ -7788,8 +7771,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 
															 {
														
 
															 	unsigned long field;
														
 
															 	gva_t gva;
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
														
 
															 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
														
 
															+
														
 
															 	/* The value to write might be 32 or 64 bits, depending on L1's long
														
 
															 	 * mode, and eventually we need to write that into a field of several
														
 
															 	 * possible lengths. The code below first zero-extends the value to 64
														
@@ -7832,6 +7817,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 
															 		return kvm_skip_emulated_instruction(vcpu);
														
 
															 	}
														
 
															+	switch (field) {
														
 
															+#define SHADOW_FIELD_RW(x) case x:
														
 
															+#include "vmx_shadow_fields.h"
														
 
															+		/*
														
 
															+		 * The fields that can be updated by L1 without a vmexit are
														
 
															+		 * always updated in the vmcs02, the others go down the slow
														
 
															+		 * path of prepare_vmcs02.
														
 
															+		 */
														
 
															+		break;
														
 
															+	default:
														
 
															+		vmx->nested.dirty_vmcs12 = true;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															 	nested_vmx_succeed(vcpu);
														
 
															 	return kvm_skip_emulated_instruction(vcpu);
														
 
															 }
														
@@ -7846,6 +7845,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
 
															 			     __pa(vmx->vmcs01.shadow_vmcs));
														
 
															 		vmx->nested.sync_shadow_vmcs = true;
														
 
															 	}
														
 
															+	vmx->nested.dirty_vmcs12 = true;
														
 
															 }
														
 
															 /* Emulate the VMPTRLD instruction */
														
@@ -8066,7 +8066,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
															 		return kvm_skip_emulated_instruction(vcpu);
														
 
															 	}
														
 
															-	__vmx_flush_tlb(vcpu, vmx->nested.vpid02);
														
 
															+	__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
														
 
															 	nested_vmx_succeed(vcpu);
														
 
															 	return kvm_skip_emulated_instruction(vcpu);
														
@@ -8260,6 +8260,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 
															 	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
														
 
															 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
														
 
															 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
														
 
															+	[EXIT_REASON_GDTR_IDTR]		      = handle_desc,
														
 
															+	[EXIT_REASON_LDTR_TR]		      = handle_desc,
														
 
															 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
														
 
															 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
														
 
															 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
														
@@ -9069,36 +9071,23 @@ static void vmx_set_rvi(int vector)
 
															 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
														
 
															 {
														
 
															-	if (!is_guest_mode(vcpu)) {
														
 
															-		vmx_set_rvi(max_irr);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															-	if (max_irr == -1)
														
 
															-		return;
														
 
															-
														
 
															-	/*
														
 
															-	 * In guest mode.  If a vmexit is needed, vmx_check_nested_events
														
 
															-	 * handles it.
														
 
															-	 */
														
 
															-	if (nested_exit_on_intr(vcpu))
														
 
															-		return;
														
 
															-
														
 
															 	/*
														
 
															-	 * Else, fall back to pre-APICv interrupt injection since L2
														
 
															-	 * is run without virtual interrupt delivery.
														
 
															+	 * When running L2, updating RVI is only relevant when
														
 
															+	 * vmcs12 virtual-interrupt-delivery enabled.
														
 
															+	 * However, it can be enabled only when L1 also
														
 
															+	 * intercepts external-interrupts and in that case
														
 
															+	 * we should not update vmcs02 RVI but instead intercept
														
 
															+	 * interrupt. Therefore, do nothing when running L2.
														
 
															 	 */
														
 
															-	if (!kvm_event_needs_reinjection(vcpu) &&
														
 
															-	    vmx_interrupt_allowed(vcpu)) {
														
 
															-		kvm_queue_interrupt(vcpu, max_irr, false);
														
 
															-		vmx_inject_irq(vcpu);
														
 
															-	}
														
 
															+	if (!is_guest_mode(vcpu))
														
 
															+		vmx_set_rvi(max_irr);
														
 
															 }
														
 
															 static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															 	int max_irr;
														
 
															+	bool max_irr_updated;
														
 
															 	WARN_ON(!vcpu->arch.apicv_active);
														
 
															 	if (pi_test_on(&vmx->pi_desc)) {
														
@@ -9108,7 +9097,23 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 
															 		 * But on x86 this is just a compiler barrier anyway.
														
 
															 		 */
														
 
															 		smp_mb__after_atomic();
														
 
															-		max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
														
 
															+		max_irr_updated =
														
 
															+			kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
														
 
															+
														
 
															+		/*
														
 
															+		 * If we are running L2 and L1 has a new pending interrupt
														
 
															+		 * which can be injected, we should re-evaluate
														
 
															+		 * what should be done with this new L1 interrupt.
														
 
															+		 * If L1 intercepts external-interrupts, we should
														
 
															+		 * exit from L2 to L1. Otherwise, interrupt should be
														
 
															+		 * delivered directly to L2.
														
 
															+		 */
														
 
															+		if (is_guest_mode(vcpu) && max_irr_updated) {
														
 
															+			if (nested_exit_on_intr(vcpu))
														
 
															+				kvm_vcpu_exiting_guest_mode(vcpu);
														
 
															+			else
														
 
															+				kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+		}
														
 
															 	} else {
														
 
															 		max_irr = kvm_lapic_find_highest_irr(vcpu);
														
 
															 	}
														
@@ -9223,6 +9228,12 @@ static bool vmx_xsaves_supported(void)
 
															 		SECONDARY_EXEC_XSAVES;
														
 
															 }
														
 
															+static bool vmx_umip_emulated(void)
														
 
															+{
														
 
															+	return vmcs_config.cpu_based_2nd_exec_ctrl &
														
 
															+		SECONDARY_EXEC_DESC;
														
 
															+}
														
 
															+
														
 
															 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
														
 
															 {
														
 
															 	u32 exit_intr_info;
														
@@ -9378,7 +9389,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 
															 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															-	unsigned long debugctlmsr, cr3, cr4;
														
 
															+	unsigned long cr3, cr4;
														
 
															 	/* Record the guest's net vcpu time for enforced NMI injections. */
														
 
															 	if (unlikely(!enable_vnmi &&
														
@@ -9431,7 +9442,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
															 		__write_pkru(vcpu->arch.pkru);
														
 
															 	atomic_switch_perf_msrs(vmx);
														
 
															-	debugctlmsr = get_debugctlmsr();
														
 
															 	vmx_arm_hv_timer(vcpu);
														
@@ -9587,8 +9597,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
															 	vmexit_fill_RSB();
														
 
															 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
														
 
															-	if (debugctlmsr)
														
 
															-		update_debugctlmsr(debugctlmsr);
														
 
															+	if (vmx->host_debugctlmsr)
														
 
															+		update_debugctlmsr(vmx->host_debugctlmsr);
														
 
															 #ifndef CONFIG_X86_64
														
 
															 	/*
														
@@ -9668,10 +9678,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 
															 static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															        struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															-       int r;
														
 
															-       r = vcpu_load(vcpu);
														
 
															-       BUG_ON(r);
														
 
															+       vcpu_load(vcpu);
														
 
															        vmx_switch_vmcs(vcpu, &vmx->vmcs01);
														
 
															        free_nested(vmx);
														
 
															        vcpu_put(vcpu);
														
@@ -9871,7 +9879,8 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
 
															 	u32 mask =
														
 
															 		SECONDARY_EXEC_SHADOW_VMCS |
														
 
															 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
														
 
															-		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
														
 
															+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
														
 
															+		SECONDARY_EXEC_DESC;
														
 
															 	u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
														
@@ -10037,8 +10046,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
															 	}
														
 
															 }
														
 
															-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
														
 
															-					       struct vmcs12 *vmcs12);
														
 
															+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
														
 
															+						 struct vmcs12 *vmcs12);
														
 
															 static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
														
 
															 					struct vmcs12 *vmcs12)
														
@@ -10127,11 +10136,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 
															 			(unsigned long)(vmcs12->posted_intr_desc_addr &
														
 
															 			(PAGE_SIZE - 1)));
														
 
															 	}
														
 
															-	if (cpu_has_vmx_msr_bitmap() &&
														
 
															-	    nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
														
 
															-	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
														
 
															-		;
														
 
															-	else
														
 
															+	if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
														
 
															 		vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
														
 
															 				CPU_BASED_USE_MSR_BITMAPS);
														
 
															 }
														
@@ -10199,8 +10204,8 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
 
															  * Merge L0's and L1's MSR bitmap, return false to indicate that
														
 
															  * we do not use the hardware.
														
 
															  */
														
 
															-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
														
 
															-					       struct vmcs12 *vmcs12)
														
 
															+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
														
 
															+						 struct vmcs12 *vmcs12)
														
 
															 {
														
 
															 	int msr;
														
 
															 	struct page *page;
														
@@ -10222,6 +10227,11 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 
															 	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
														
 
															 	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
														
 
															+	/* Nothing to do if the MSR bitmap is not in use.  */
														
 
															+	if (!cpu_has_vmx_msr_bitmap() ||
														
 
															+	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
														
 
															+		return false;
														
 
															+
														
 
															 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
														
 
															 	    !pred_cmd && !spec_ctrl)
														
 
															 		return false;
														
@@ -10229,32 +10239,41 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 
															 	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
														
 
															 	if (is_error_page(page))
														
 
															 		return false;
														
 
															-	msr_bitmap_l1 = (unsigned long *)kmap(page);
														
 
															-	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
														
 
															+	msr_bitmap_l1 = (unsigned long *)kmap(page);
														
 
															+	if (nested_cpu_has_apic_reg_virt(vmcs12)) {
														
 
															+		/*
														
 
															+		 * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
														
 
															+		 * just lets the processor take the value from the virtual-APIC page;
														
 
															+		 * take those 256 bits directly from the L1 bitmap.
														
 
															+		 */
														
 
															+		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
														
 
															+			unsigned word = msr / BITS_PER_LONG;
														
 
															+			msr_bitmap_l0[word] = msr_bitmap_l1[word];
														
 
															+			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
														
 
															+		}
														
 
															+	} else {
														
 
															+		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
														
 
															+			unsigned word = msr / BITS_PER_LONG;
														
 
															+			msr_bitmap_l0[word] = ~0;
														
 
															+			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
														
 
															+		}
														
 
															+	}
														
 
															-	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
														
 
															-		if (nested_cpu_has_apic_reg_virt(vmcs12))
														
 
															-			for (msr = 0x800; msr <= 0x8ff; msr++)
														
 
															-				nested_vmx_disable_intercept_for_msr(
														
 
															-					msr_bitmap_l1, msr_bitmap_l0,
														
 
															-					msr, MSR_TYPE_R);
														
 
															+	nested_vmx_disable_intercept_for_msr(
														
 
															+		msr_bitmap_l1, msr_bitmap_l0,
														
 
															+		X2APIC_MSR(APIC_TASKPRI),
														
 
															+		MSR_TYPE_W);
														
 
															+	if (nested_cpu_has_vid(vmcs12)) {
														
 
															 		nested_vmx_disable_intercept_for_msr(
														
 
															-				msr_bitmap_l1, msr_bitmap_l0,
														
 
															-				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
														
 
															-				MSR_TYPE_R | MSR_TYPE_W);
														
 
															-
														
 
															-		if (nested_cpu_has_vid(vmcs12)) {
														
 
															-			nested_vmx_disable_intercept_for_msr(
														
 
															-				msr_bitmap_l1, msr_bitmap_l0,
														
 
															-				APIC_BASE_MSR + (APIC_EOI >> 4),
														
 
															-				MSR_TYPE_W);
														
 
															-			nested_vmx_disable_intercept_for_msr(
														
 
															-				msr_bitmap_l1, msr_bitmap_l0,
														
 
															-				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
														
 
															-				MSR_TYPE_W);
														
 
															-		}
														
 
															+			msr_bitmap_l1, msr_bitmap_l0,
														
 
															+			X2APIC_MSR(APIC_EOI),
														
 
															+			MSR_TYPE_W);
														
 
															+		nested_vmx_disable_intercept_for_msr(
														
 
															+			msr_bitmap_l1, msr_bitmap_l0,
														
 
															+			X2APIC_MSR(APIC_SELF_IPI),
														
 
															+			MSR_TYPE_W);
														
 
															 	}
														
 
															 	if (spec_ctrl)
														
@@ -10534,25 +10553,12 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 
															 	return 0;
														
 
															 }
														
 
															-/*
														
 
															- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
														
 
															- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
														
 
															- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
														
 
															- * guest in a way that will both be appropriate to L1's requests, and our
														
 
															- * needs. In addition to modifying the active vmcs (which is vmcs02), this
														
 
															- * function also has additional necessary side-effects, like setting various
														
 
															- * vcpu->arch fields.
														
 
															- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
														
 
															- * is assigned to entry_failure_code on failure.
														
 
															- */
														
 
															-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
														
 
															-			  bool from_vmentry, u32 *entry_failure_code)
														
 
															+static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
														
 
															+			       bool from_vmentry)
														
 
															 {
														
 
															 	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															-	u32 exec_control, vmcs12_exec_ctrl;
														
 
															 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
														
 
															-	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
														
 
															 	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
														
 
															 	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
														
 
															 	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
														
@@ -10560,7 +10566,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
														
 
															 	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
														
 
															 	vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
														
 
															-	vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
														
 
															 	vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
														
 
															 	vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
														
 
															 	vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
														
@@ -10570,15 +10575,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
														
 
															 	vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
														
 
															 	vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
														
 
															-	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
														
 
															 	vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
														
 
															 	vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
														
 
															 	vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
														
 
															 	vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
														
 
															 	vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
														
 
															 	vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
														
 
															-	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
														
 
															-	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
														
 
															 	vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
														
 
															 	vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
														
 
															 	vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
														
@@ -10588,6 +10590,125 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
														
 
															 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
														
 
															+	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
														
 
															+	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
														
 
															+		vmcs12->guest_pending_dbg_exceptions);
														
 
															+	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
														
 
															+	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
														
 
															+
														
 
															+	if (nested_cpu_has_xsaves(vmcs12))
														
 
															+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
														
 
															+	vmcs_write64(VMCS_LINK_POINTER, -1ull);
														
 
															+
														
 
															+	if (cpu_has_vmx_posted_intr())
														
 
															+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
														
 
															+
														
 
															+	/*
														
 
															+	 * Whether page-faults are trapped is determined by a combination of
														
 
															+	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
														
 
															+	 * If enable_ept, L0 doesn't care about page faults and we should
														
 
															+	 * set all of these to L1's desires. However, if !enable_ept, L0 does
														
 
															+	 * care about (at least some) page faults, and because it is not easy
														
 
															+	 * (if at all possible?) to merge L0 and L1's desires, we simply ask
														
 
															+	 * to exit on each and every L2 page fault. This is done by setting
														
 
															+	 * MASK=MATCH=0 and (see below) EB.PF=1.
														
 
															+	 * Note that below we don't need special code to set EB.PF beyond the
														
 
															+	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
														
 
															+	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
														
 
															+	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
														
 
															+	 */
														
 
															+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
														
 
															+		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
														
 
															+	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
														
 
															+		enable_ept ? vmcs12->page_fault_error_code_match : 0);
														
 
															+
														
 
															+	/* All VMFUNCs are currently emulated through L0 vmexits.  */
														
 
															+	if (cpu_has_vmx_vmfunc())
														
 
															+		vmcs_write64(VM_FUNCTION_CONTROL, 0);
														
 
															+
														
 
															+	if (cpu_has_vmx_apicv()) {
														
 
															+		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
														
 
															+		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
														
 
															+		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
														
 
															+		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
														
 
															+	 * Some constant fields are set here by vmx_set_constant_host_state().
														
 
															+	 * Other fields are different per CPU, and will be set later when
														
 
															+	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
														
 
															+	 */
														
 
															+	vmx_set_constant_host_state(vmx);
														
 
															+
														
 
															+	/*
														
 
															+	 * Set the MSR load/store lists to match L0's settings.
														
 
															+	 */
														
 
															+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
														
 
															+	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
														
 
															+	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
														
 
															+	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
														
 
															+	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
														
 
															+
														
 
															+	set_cr4_guest_host_mask(vmx);
														
 
															+
														
 
															+	if (vmx_mpx_supported())
														
 
															+		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
														
 
															+
														
 
															+	if (enable_vpid) {
														
 
															+		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
														
 
															+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
														
 
															+		else
														
 
															+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
														
 
															+	 */
														
 
															+	if (enable_ept) {
														
 
															+		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
														
 
															+		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
														
 
															+		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
														
 
															+		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
														
 
															+	}
														
 
															+
														
 
															+	if (cpu_has_vmx_msr_bitmap())
														
 
															+		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
														
 
															+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
														
 
															+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
														
 
															+ * guest in a way that will both be appropriate to L1's requests, and our
														
 
															+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
														
 
															+ * function also has additional necessary side-effects, like setting various
														
 
															+ * vcpu->arch fields.
														
 
															+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
														
 
															+ * is assigned to entry_failure_code on failure.
														
 
															+ */
														
 
															+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
														
 
															+			  bool from_vmentry, u32 *entry_failure_code)
														
 
															+{
														
 
															+	struct vcpu_vmx *vmx = to_vmx(vcpu);
														
 
															+	u32 exec_control, vmcs12_exec_ctrl;
														
 
															+
														
 
															+	/*
														
 
															+	 * First, the fields that are shadowed.  This must be kept in sync
														
 
															+	 * with vmx_shadow_fields.h.
														
 
															+	 */
														
 
															+
														
 
															+	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
														
 
															+	vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
														
 
															+	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
														
 
															+	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
														
 
															+	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
														
 
															+
														
 
															+	/*
														
 
															+	 * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
														
 
															+	 * HOST_FS_BASE, HOST_GS_BASE.
														
 
															+	 */
														
 
															+
														
 
															 	if (from_vmentry &&
														
 
															 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
														
 
															 		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
														
@@ -10610,16 +10731,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	} else {
														
 
															 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
														
 
															 	}
														
 
															-	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
														
 
															 	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
														
 
															-	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
														
 
															-		vmcs12->guest_pending_dbg_exceptions);
														
 
															-	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
														
 
															-	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
														
 
															-
														
 
															-	if (nested_cpu_has_xsaves(vmcs12))
														
 
															-		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
														
 
															-	vmcs_write64(VMCS_LINK_POINTER, -1ull);
														
 
															 	exec_control = vmcs12->pin_based_vm_exec_control;
														
@@ -10633,7 +10745,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	if (nested_cpu_has_posted_intr(vmcs12)) {
														
 
															 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
														
 
															 		vmx->nested.pi_pending = false;
														
 
															-		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
														
 
															 	} else {
														
 
															 		exec_control &= ~PIN_BASED_POSTED_INTR;
														
 
															 	}
														
@@ -10644,25 +10755,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	if (nested_cpu_has_preemption_timer(vmcs12))
														
 
															 		vmx_start_preemption_timer(vcpu);
														
 
															-	/*
														
 
															-	 * Whether page-faults are trapped is determined by a combination of
														
 
															-	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
														
 
															-	 * If enable_ept, L0 doesn't care about page faults and we should
														
 
															-	 * set all of these to L1's desires. However, if !enable_ept, L0 does
														
 
															-	 * care about (at least some) page faults, and because it is not easy
														
 
															-	 * (if at all possible?) to merge L0 and L1's desires, we simply ask
														
 
															-	 * to exit on each and every L2 page fault. This is done by setting
														
 
															-	 * MASK=MATCH=0 and (see below) EB.PF=1.
														
 
															-	 * Note that below we don't need special code to set EB.PF beyond the
														
 
															-	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
														
 
															-	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
														
 
															-	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
														
 
															-	 */
														
 
															-	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
														
 
															-		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
														
 
															-	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
														
 
															-		enable_ept ? vmcs12->page_fault_error_code_match : 0);
														
 
															-
														
 
															 	if (cpu_has_secondary_exec_ctrls()) {
														
 
															 		exec_control = vmx->secondary_exec_control;
														
@@ -10681,22 +10773,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 			exec_control |= vmcs12_exec_ctrl;
														
 
															 		}
														
 
															-		/* All VMFUNCs are currently emulated through L0 vmexits.  */
														
 
															-		if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
														
 
															-			vmcs_write64(VM_FUNCTION_CONTROL, 0);
														
 
															-
														
 
															-		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
														
 
															-			vmcs_write64(EOI_EXIT_BITMAP0,
														
 
															-				vmcs12->eoi_exit_bitmap0);
														
 
															-			vmcs_write64(EOI_EXIT_BITMAP1,
														
 
															-				vmcs12->eoi_exit_bitmap1);
														
 
															-			vmcs_write64(EOI_EXIT_BITMAP2,
														
 
															-				vmcs12->eoi_exit_bitmap2);
														
 
															-			vmcs_write64(EOI_EXIT_BITMAP3,
														
 
															-				vmcs12->eoi_exit_bitmap3);
														
 
															+		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
														
 
															 			vmcs_write16(GUEST_INTR_STATUS,
														
 
															 				vmcs12->guest_intr_status);
														
 
															-		}
														
 
															 		/*
														
 
															 		 * Write an illegal value to APIC_ACCESS_ADDR. Later,
														
@@ -10709,24 +10788,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
														
 
															 	}
														
 
															-
														
 
															-	/*
														
 
															-	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
														
 
															-	 * Some constant fields are set here by vmx_set_constant_host_state().
														
 
															-	 * Other fields are different per CPU, and will be set later when
														
 
															-	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
														
 
															-	 */
														
 
															-	vmx_set_constant_host_state(vmx);
														
 
															-
														
 
															-	/*
														
 
															-	 * Set the MSR load/store lists to match L0's settings.
														
 
															-	 */
														
 
															-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
														
 
															-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
														
 
															-	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
														
 
															-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
														
 
															-	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
														
 
															-
														
 
															 	/*
														
 
															 	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
														
 
															 	 * entry, but only if the current (host) sp changed from the value
														
@@ -10758,8 +10819,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	}
														
 
															 	/*
														
 
															-	 * Merging of IO bitmap not currently supported.
														
 
															-	 * Rather, exit every time.
														
 
															+	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
														
 
															+	 * for I/O port accesses.
														
 
															 	 */
														
 
															 	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
														
 
															 	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
														
@@ -10796,12 +10857,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
														
 
															 	}
														
 
															-	set_cr4_guest_host_mask(vmx);
														
 
															-
														
 
															-	if (from_vmentry &&
														
 
															-	    vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
														
 
															-		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
														
 
															-
														
 
															 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
														
 
															 		vmcs_write64(TSC_OFFSET,
														
 
															 			vcpu->arch.tsc_offset + vmcs12->tsc_offset);
														
@@ -10810,9 +10865,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	if (kvm_has_tsc_control)
														
 
															 		decache_tsc_multiplier(vmx);
														
 
															-	if (cpu_has_vmx_msr_bitmap())
														
 
															-		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
														
 
															-
														
 
															 	if (enable_vpid) {
														
 
															 		/*
														
 
															 		 * There is no direct mapping between vpid02 and vpid12, the
														
@@ -10823,16 +10875,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 		 * even if spawn a lot of nested vCPUs.
														
 
															 		 */
														
 
															 		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
														
 
															-			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
														
 
															 			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
														
 
															 				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
														
 
															-				__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
														
 
															+				__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
														
 
															 			}
														
 
															 		} else {
														
 
															-			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
														
 
															-			vmx_flush_tlb(vcpu);
														
 
															+			vmx_flush_tlb(vcpu, true);
														
 
															 		}
														
 
															-
														
 
															 	}
														
 
															 	if (enable_pml) {
														
@@ -10881,6 +10930,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
														
 
															 	vmx_set_efer(vcpu, vcpu->arch.efer);
														
 
															+	if (vmx->nested.dirty_vmcs12) {
														
 
															+		prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
														
 
															+		vmx->nested.dirty_vmcs12 = false;
														
 
															+	}
														
 
															+
														
 
															 	/* Shadow page tables on either EPT or shadow page tables. */
														
 
															 	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
														
 
															 				entry_failure_code))
														
@@ -10889,16 +10943,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
															 	if (!enable_ept)
														
 
															 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
														
 
															-	/*
														
 
															-	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
														
 
															-	 */
														
 
															-	if (enable_ept) {
														
 
															-		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
														
 
															-		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
														
 
															-		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
														
 
															-		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
														
 
															-	}
														
 
															-
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
														
 
															 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
														
 
															 	return 0;
														
@@ -11254,7 +11298,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 
															 		if (block_nested_events)
														
 
															 			return -EBUSY;
														
 
															 		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
														
 
															-		vcpu->arch.exception.pending = false;
														
 
															 		return 0;
														
 
															 	}
														
@@ -11535,11 +11578,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
															 		 * L1's vpid. TODO: move to a more elaborate solution, giving
														
 
															 		 * each L2 its own vpid and exposing the vpid feature to L1.
														
 
															 		 */
														
 
															-		vmx_flush_tlb(vcpu);
														
 
															+		vmx_flush_tlb(vcpu, true);
														
 
															 	}
														
 
															-	/* Restore posted intr vector. */
														
 
															-	if (nested_cpu_has_posted_intr(vmcs12))
														
 
															-		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
														
 
															 	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
														
 
															 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
														
@@ -11800,6 +11840,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 
															 			       struct x86_instruction_info *info,
														
 
															 			       enum x86_intercept_stage stage)
														
 
															 {
														
 
															+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
														
 
															+	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
														
 
															+
														
 
															+	/*
														
 
															+	 * RDPID causes #UD if disabled through secondary execution controls.
														
 
															+	 * Because it is marked as EmulateOnUD, we need to intercept it here.
														
 
															+	 */
														
 
															+	if (info->intercept == x86_intercept_rdtscp &&
														
 
															+	    !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
														
 
															+		ctxt->exception.vector = UD_VECTOR;
														
 
															+		ctxt->exception.error_code_valid = false;
														
 
															+		return X86EMUL_PROPAGATE_FAULT;
														
 
															+	}
														
 
															+
														
 
															+	/* TODO: check more intercepts... */
														
 
															 	return X86EMUL_CONTINUE;
														
 
															 }
														
@@ -12313,6 +12368,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
															 	.handle_external_intr = vmx_handle_external_intr,
														
 
															 	.mpx_supported = vmx_mpx_supported,
														
 
															 	.xsaves_supported = vmx_xsaves_supported,
														
 
															+	.umip_emulated = vmx_umip_emulated,
														
 
															 	.check_nested_events = vmx_check_nested_events,
														
--- a/arch/x86/kvm/vmx_shadow_fields.h
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -0,0 +1,77 @@
 
															+#ifndef SHADOW_FIELD_RO
														
 
															+#define SHADOW_FIELD_RO(x)
														
 
															+#endif
														
 
															+#ifndef SHADOW_FIELD_RW
														
 
															+#define SHADOW_FIELD_RW(x)
														
 
															+#endif
														
 
															+
														
 
															+/*
														
 
															+ * We do NOT shadow fields that are modified when L0
														
 
															+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
														
 
															+ * VMXON...) executed by L1.
														
 
															+ * For example, VM_INSTRUCTION_ERROR is read
														
 
															+ * by L1 if a vmx instruction fails (part of the error path).
														
 
															+ * Note the code assumes this logic. If for some reason
														
 
															+ * we start shadowing these fields then we need to
														
 
															+ * force a shadow sync when L0 emulates vmx instructions
														
 
															+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
														
 
															+ * by nested_vmx_failValid)
														
 
															+ *
														
 
															+ * When adding or removing fields here, note that shadowed
														
 
															+ * fields must always be synced by prepare_vmcs02, not just
														
 
															+ * prepare_vmcs02_full.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * Keeping the fields ordered by size is an attempt at improving
														
 
															+ * branch prediction in vmcs_read_any and vmcs_write_any.
														
 
															+ */
														
 
															+
														
 
															+/* 16-bits */
														
 
															+SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
														
 
															+SHADOW_FIELD_RW(GUEST_INTR_STATUS)
														
 
															+SHADOW_FIELD_RW(GUEST_PML_INDEX)
														
 
															+SHADOW_FIELD_RW(HOST_FS_SELECTOR)
														
 
															+SHADOW_FIELD_RW(HOST_GS_SELECTOR)
														
 
															+
														
 
															+/* 32-bits */
														
 
															+SHADOW_FIELD_RO(VM_EXIT_REASON)
														
 
															+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
														
 
															+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
														
 
															+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
														
 
															+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
														
 
															+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
														
 
															+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
														
 
															+SHADOW_FIELD_RW(EXCEPTION_BITMAP)
														
 
															+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
														
 
															+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
														
 
															+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
														
 
															+SHADOW_FIELD_RW(TPR_THRESHOLD)
														
 
															+SHADOW_FIELD_RW(GUEST_CS_LIMIT)
														
 
															+SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
														
 
															+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
														
 
															+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
														
 
															+
														
 
															+/* Natural width */
														
 
															+SHADOW_FIELD_RO(EXIT_QUALIFICATION)
														
 
															+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
														
 
															+SHADOW_FIELD_RW(GUEST_RIP)
														
 
															+SHADOW_FIELD_RW(GUEST_RSP)
														
 
															+SHADOW_FIELD_RW(GUEST_CR0)
														
 
															+SHADOW_FIELD_RW(GUEST_CR3)
														
 
															+SHADOW_FIELD_RW(GUEST_CR4)
														
 
															+SHADOW_FIELD_RW(GUEST_RFLAGS)
														
 
															+SHADOW_FIELD_RW(GUEST_CS_BASE)
														
 
															+SHADOW_FIELD_RW(GUEST_ES_BASE)
														
 
															+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
														
 
															+SHADOW_FIELD_RW(CR0_READ_SHADOW)
														
 
															+SHADOW_FIELD_RW(CR4_READ_SHADOW)
														
 
															+SHADOW_FIELD_RW(HOST_FS_BASE)
														
 
															+SHADOW_FIELD_RW(HOST_GS_BASE)
														
 
															+
														
 
															+/* 64-bit */
														
 
															+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
														
 
															+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
														
 
															+
														
 
															+#undef SHADOW_FIELD_RO
														
 
															+#undef SHADOW_FIELD_RW
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@
 
															 #include <asm/pvclock.h>
														
 
															 #include <asm/div64.h>
														
 
															 #include <asm/irq_remapping.h>
														
 
															+#include <asm/mshyperv.h>
														
 
															+#include <asm/hypervisor.h>
														
 
															 #define CREATE_TRACE_POINTS
														
 
															 #include "trace.h"
														
@@ -177,7 +179,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
															 	{ "request_irq", VCPU_STAT(request_irq_exits) },
														
 
															 	{ "irq_exits", VCPU_STAT(irq_exits) },
														
 
															 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
														
 
															-	{ "efer_reload", VCPU_STAT(efer_reload) },
														
 
															 	{ "fpu_reload", VCPU_STAT(fpu_reload) },
														
 
															 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
														
 
															 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
														
@@ -702,7 +703,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 
															 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
														
 
															 			!vcpu->guest_xcr0_loaded) {
														
 
															 		/* kvm_set_xcr() also depends on this */
														
 
															-		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
														
 
															+		if (vcpu->arch.xcr0 != host_xcr0)
														
 
															+			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
														
 
															 		vcpu->guest_xcr0_loaded = 1;
														
 
															 	}
														
 
															 }
														
@@ -794,6 +796,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
															 	if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
														
 
															 		return 1;
														
 
															+	if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
														
 
															+		return 1;
														
 
															+
														
 
															 	if (is_long_mode(vcpu)) {
														
 
															 		if (!(cr4 & X86_CR4_PAE))
														
 
															 			return 1;
														
@@ -1037,6 +1042,7 @@ static u32 emulated_msrs[] = {
 
															 	MSR_IA32_MCG_CTL,
														
 
															 	MSR_IA32_MCG_EXT_CTL,
														
 
															 	MSR_IA32_SMBASE,
														
 
															+	MSR_SMI_COUNT,
														
 
															 	MSR_PLATFORM_INFO,
														
 
															 	MSR_MISC_FEATURES_ENABLES,
														
 
															 };
														
@@ -1378,6 +1384,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 
															 	return tsc;
														
 
															 }
														
 
															+static inline int gtod_is_based_on_tsc(int mode)
														
 
															+{
														
 
															+	return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
														
 
															+}
														
 
															+
														
 
															 static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 #ifdef CONFIG_X86_64
														
@@ -1397,7 +1408,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 
															 	 * perform request to enable masterclock.
														
 
															 	 */
														
 
															 	if (ka->use_master_clock ||
														
 
															-	    (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
														
 
															+	    (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
														
 
															 		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
														
 
															 	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
														
@@ -1460,6 +1471,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 
															 	vcpu->arch.tsc_offset = offset;
														
 
															 }
														
 
															+static inline bool kvm_check_tsc_unstable(void)
														
 
															+{
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	/*
														
 
															+	 * TSC is marked unstable when we're running on Hyper-V,
														
 
															+	 * 'TSC page' clocksource is good.
														
 
															+	 */
														
 
															+	if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
														
 
															+		return false;
														
 
															+#endif
														
 
															+	return check_tsc_unstable();
														
 
															+}
														
 
															+
														
 
															 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
														
 
															 {
														
 
															 	struct kvm *kvm = vcpu->kvm;
														
@@ -1505,7 +1529,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
															          */
														
 
															 	if (synchronizing &&
														
 
															 	    vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
														
 
															-		if (!check_tsc_unstable()) {
														
 
															+		if (!kvm_check_tsc_unstable()) {
														
 
															 			offset = kvm->arch.cur_tsc_offset;
														
 
															 			pr_debug("kvm: matched tsc offset for %llu\n", data);
														
 
															 		} else {
														
@@ -1605,18 +1629,43 @@ static u64 read_tsc(void)
 
															 	return last;
														
 
															 }
														
 
															-static inline u64 vgettsc(u64 *cycle_now)
														
 
															+static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
														
 
															 {
														
 
															 	long v;
														
 
															 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
														
 
															+	u64 tsc_pg_val;
														
 
															+
														
 
															+	switch (gtod->clock.vclock_mode) {
														
 
															+	case VCLOCK_HVCLOCK:
														
 
															+		tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
														
 
															+						  tsc_timestamp);
														
 
															+		if (tsc_pg_val != U64_MAX) {
														
 
															+			/* TSC page valid */
														
 
															+			*mode = VCLOCK_HVCLOCK;
														
 
															+			v = (tsc_pg_val - gtod->clock.cycle_last) &
														
 
															+				gtod->clock.mask;
														
 
															+		} else {
														
 
															+			/* TSC page invalid */
														
 
															+			*mode = VCLOCK_NONE;
														
 
															+		}
														
 
															+		break;
														
 
															+	case VCLOCK_TSC:
														
 
															+		*mode = VCLOCK_TSC;
														
 
															+		*tsc_timestamp = read_tsc();
														
 
															+		v = (*tsc_timestamp - gtod->clock.cycle_last) &
														
 
															+			gtod->clock.mask;
														
 
															+		break;
														
 
															+	default:
														
 
															+		*mode = VCLOCK_NONE;
														
 
															+	}
														
 
															-	*cycle_now = read_tsc();
														
 
															+	if (*mode == VCLOCK_NONE)
														
 
															+		*tsc_timestamp = v = 0;
														
 
															-	v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
														
 
															 	return v * gtod->clock.mult;
														
 
															 }
														
 
															-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
														
 
															+static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
														
 
															 {
														
 
															 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
														
 
															 	unsigned long seq;
														
@@ -1625,9 +1674,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
 
															 	do {
														
 
															 		seq = read_seqcount_begin(&gtod->seq);
														
 
															-		mode = gtod->clock.vclock_mode;
														
 
															 		ns = gtod->nsec_base;
														
 
															-		ns += vgettsc(cycle_now);
														
 
															+		ns += vgettsc(tsc_timestamp, &mode);
														
 
															 		ns >>= gtod->clock.shift;
														
 
															 		ns += gtod->boot_ns;
														
 
															 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
														
@@ -1636,7 +1684,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
 
															 	return mode;
														
 
															 }
														
 
															-static int do_realtime(struct timespec *ts, u64 *cycle_now)
														
 
															+static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
														
 
															 {
														
 
															 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
														
 
															 	unsigned long seq;
														
@@ -1645,10 +1693,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
 
															 	do {
														
 
															 		seq = read_seqcount_begin(&gtod->seq);
														
 
															-		mode = gtod->clock.vclock_mode;
														
 
															 		ts->tv_sec = gtod->wall_time_sec;
														
 
															 		ns = gtod->nsec_base;
														
 
															-		ns += vgettsc(cycle_now);
														
 
															+		ns += vgettsc(tsc_timestamp, &mode);
														
 
															 		ns >>= gtod->clock.shift;
														
 
															 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
														
@@ -1658,25 +1705,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
 
															 	return mode;
														
 
															 }
														
 
															-/* returns true if host is using tsc clocksource */
														
 
															-static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
														
 
															+/* returns true if host is using TSC based clocksource */
														
 
															+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
														
 
															 {
														
 
															 	/* checked again under seqlock below */
														
 
															-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
														
 
															+	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
														
 
															 		return false;
														
 
															-	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
														
 
															+	return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
														
 
															+						      tsc_timestamp));
														
 
															 }
														
 
															-/* returns true if host is using tsc clocksource */
														
 
															+/* returns true if host is using TSC based clocksource */
														
 
															 static bool kvm_get_walltime_and_clockread(struct timespec *ts,
														
 
															-					   u64 *cycle_now)
														
 
															+					   u64 *tsc_timestamp)
														
 
															 {
														
 
															 	/* checked again under seqlock below */
														
 
															-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
														
 
															+	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
														
 
															 		return false;
														
 
															-	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
														
 
															+	return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
														
 
															 }
														
 
															 #endif
														
@@ -2119,6 +2167,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
 
															 	vcpu->arch.pv_time_enabled = false;
														
 
															 }
														
 
															+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
														
 
															+{
														
 
															+	++vcpu->stat.tlb_flush;
														
 
															+	kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
														
 
															+}
														
 
															+
														
 
															 static void record_steal_time(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
														
@@ -2128,7 +2182,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 
															 		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
														
 
															 		return;
														
 
															-	vcpu->arch.st.steal.preempted = 0;
														
 
															+	/*
														
 
															+	 * Doing a TLB flush here, on the guest's behalf, can avoid
														
 
															+	 * expensive IPIs.
														
 
															+	 */
														
 
															+	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
														
 
															+		kvm_vcpu_flush_tlb(vcpu, false);
														
 
															 	if (vcpu->arch.st.steal.version & 1)
														
 
															 		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
														
@@ -2229,6 +2288,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 			return 1;
														
 
															 		vcpu->arch.smbase = data;
														
 
															 		break;
														
 
															+	case MSR_SMI_COUNT:
														
 
															+		if (!msr_info->host_initiated)
														
 
															+			return 1;
														
 
															+		vcpu->arch.smi_count = data;
														
 
															+		break;
														
 
															 	case MSR_KVM_WALL_CLOCK_NEW:
														
 
															 	case MSR_KVM_WALL_CLOCK:
														
 
															 		vcpu->kvm->arch.wall_clock = data;
														
@@ -2503,6 +2567,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
															 			return 1;
														
 
															 		msr_info->data = vcpu->arch.smbase;
														
 
															 		break;
														
 
															+	case MSR_SMI_COUNT:
														
 
															+		msr_info->data = vcpu->arch.smi_count;
														
 
															+		break;
														
 
															 	case MSR_IA32_PERF_STATUS:
														
 
															 		/* TSC increment by tick */
														
 
															 		msr_info->data = 1000ULL;
														
@@ -2870,13 +2937,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
															 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
														
 
															 	}
														
 
															-	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
														
 
															+	if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
														
 
															 		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
														
 
															 				rdtsc() - vcpu->arch.last_host_tsc;
														
 
															 		if (tsc_delta < 0)
														
 
															 			mark_tsc_unstable("KVM discovered backwards TSC");
														
 
															-		if (check_tsc_unstable()) {
														
 
															+		if (kvm_check_tsc_unstable()) {
														
 
															 			u64 offset = kvm_compute_tsc_offset(vcpu,
														
 
															 						vcpu->arch.last_guest_tsc);
														
 
															 			kvm_vcpu_write_tsc_offset(vcpu, offset);
														
@@ -2905,7 +2972,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 
															 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
														
 
															 		return;
														
 
															-	vcpu->arch.st.steal.preempted = 1;
														
 
															+	vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
														
 
															 	kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
														
 
															 			&vcpu->arch.st.steal.preempted,
														
@@ -2939,12 +3006,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 	pagefault_enable();
														
 
															 	kvm_x86_ops->vcpu_put(vcpu);
														
 
															 	vcpu->arch.last_host_tsc = rdtsc();
														
 
															+	/*
														
 
															+	 * If userspace has set any breakpoints or watchpoints, dr6 is restored
														
 
															+	 * on every vmexit, but if not, we might have a stale dr6 from the
														
 
															+	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
														
 
															+	 */
														
 
															+	set_debugreg(0, 6);
														
 
															 }
														
 
															 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_lapic_state *s)
														
 
															 {
														
 
															-	if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
														
 
															+	if (vcpu->arch.apicv_active)
														
 
															 		kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															 	return kvm_apic_get_state(vcpu, s);
														
@@ -3473,6 +3546,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 		void *buffer;
														
 
															 	} u;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	u.buffer = NULL;
														
 
															 	switch (ioctl) {
														
 
															 	case KVM_GET_LAPIC: {
														
@@ -3498,8 +3573,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 		if (!lapic_in_kernel(vcpu))
														
 
															 			goto out;
														
 
															 		u.lapic = memdup_user(argp, sizeof(*u.lapic));
														
 
															-		if (IS_ERR(u.lapic))
														
 
															-			return PTR_ERR(u.lapic);
														
 
															+		if (IS_ERR(u.lapic)) {
														
 
															+			r = PTR_ERR(u.lapic);
														
 
															+			goto out_nofree;
														
 
															+		}
														
 
															 		r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
														
 
															 		break;
														
@@ -3673,8 +3750,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_SET_XSAVE: {
														
 
															 		u.xsave = memdup_user(argp, sizeof(*u.xsave));
														
 
															-		if (IS_ERR(u.xsave))
														
 
															-			return PTR_ERR(u.xsave);
														
 
															+		if (IS_ERR(u.xsave)) {
														
 
															+			r = PTR_ERR(u.xsave);
														
 
															+			goto out_nofree;
														
 
															+		}
														
 
															 		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
														
 
															 		break;
														
@@ -3696,8 +3775,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_SET_XCRS: {
														
 
															 		u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
														
 
															-		if (IS_ERR(u.xcrs))
														
 
															-			return PTR_ERR(u.xcrs);
														
 
															+		if (IS_ERR(u.xcrs)) {
														
 
															+			r = PTR_ERR(u.xcrs);
														
 
															+			goto out_nofree;
														
 
															+		}
														
 
															 		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
														
 
															 		break;
														
@@ -3741,6 +3822,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
															 	}
														
 
															 out:
														
 
															 	kfree(u.buffer);
														
 
															+out_nofree:
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -4297,6 +4380,36 @@ set_identity_unlock:
 
															 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_MEMORY_ENCRYPT_OP: {
														
 
															+		r = -ENOTTY;
														
 
															+		if (kvm_x86_ops->mem_enc_op)
														
 
															+			r = kvm_x86_ops->mem_enc_op(kvm, argp);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_MEMORY_ENCRYPT_REG_REGION: {
														
 
															+		struct kvm_enc_region region;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&region, argp, sizeof(region)))
														
 
															+			goto out;
														
 
															+
														
 
															+		r = -ENOTTY;
														
 
															+		if (kvm_x86_ops->mem_enc_reg_region)
														
 
															+			r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
														
 
															+		struct kvm_enc_region region;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&region, argp, sizeof(region)))
														
 
															+			goto out;
														
 
															+
														
 
															+		r = -ENOTTY;
														
 
															+		if (kvm_x86_ops->mem_enc_unreg_region)
														
 
															+			r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		r = -ENOTTY;
														
 
															 	}
														
@@ -5705,7 +5818,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
															 		 * handle watchpoints yet, those would be handled in
														
 
															 		 * the emulate_ops.
														
 
															 		 */
														
 
															-		if (kvm_vcpu_check_breakpoint(vcpu, &r))
														
 
															+		if (!(emulation_type & EMULTYPE_SKIP) &&
														
 
															+		    kvm_vcpu_check_breakpoint(vcpu, &r))
														
 
															 			return r;
														
 
															 		ctxt->interruptibility = 0;
														
@@ -5891,6 +6005,43 @@ static void tsc_khz_changed(void *data)
 
															 	__this_cpu_write(cpu_tsc_khz, khz);
														
 
															 }
														
 
															+#ifdef CONFIG_X86_64
														
 
															+static void kvm_hyperv_tsc_notifier(void)
														
 
															+{
														
 
															+	struct kvm *kvm;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int cpu;
														
 
															+
														
 
															+	spin_lock(&kvm_lock);
														
 
															+	list_for_each_entry(kvm, &vm_list, vm_list)
														
 
															+		kvm_make_mclock_inprogress_request(kvm);
														
 
															+
														
 
															+	hyperv_stop_tsc_emulation();
														
 
															+
														
 
															+	/* TSC frequency always matches when on Hyper-V */
														
 
															+	for_each_present_cpu(cpu)
														
 
															+		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
														
 
															+	kvm_max_guest_tsc_khz = tsc_khz;
														
 
															+
														
 
															+	list_for_each_entry(kvm, &vm_list, vm_list) {
														
 
															+		struct kvm_arch *ka = &kvm->arch;
														
 
															+
														
 
															+		spin_lock(&ka->pvclock_gtod_sync_lock);
														
 
															+
														
 
															+		pvclock_update_vm_gtod_copy(kvm);
														
 
															+
														
 
															+		kvm_for_each_vcpu(cpu, vcpu, kvm)
														
 
															+			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
														
 
															+
														
 
															+		kvm_for_each_vcpu(cpu, vcpu, kvm)
														
 
															+			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
														
 
															+
														
 
															+		spin_unlock(&ka->pvclock_gtod_sync_lock);
														
 
															+	}
														
 
															+	spin_unlock(&kvm_lock);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
														
 
															 				     void *data)
														
 
															 {
														
@@ -6112,9 +6263,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
															 	update_pvclock_gtod(tk);
														
 
															 	/* disable master clock if host does not trust, or does not
														
 
															-	 * use, TSC clocksource
														
 
															+	 * use, TSC based clocksource.
														
 
															 	 */
														
 
															-	if (gtod->clock.vclock_mode != VCLOCK_TSC &&
														
 
															+	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
														
 
															 	    atomic_read(&kvm_guest_has_master_clock) != 0)
														
 
															 		queue_work(system_long_wq, &pvclock_gtod_work);
														
@@ -6176,6 +6327,9 @@ int kvm_arch_init(void *opaque)
 
															 	kvm_lapic_init();
														
 
															 #ifdef CONFIG_X86_64
														
 
															 	pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
														
 
															+
														
 
															+	if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
														
 
															+		set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
														
 
															 #endif
														
 
															 	return 0;
														
@@ -6188,6 +6342,10 @@ out:
 
															 void kvm_arch_exit(void)
														
 
															 {
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
														
 
															+		clear_hv_tscchange_cb();
														
 
															+#endif
														
 
															 	kvm_lapic_exit();
														
 
															 	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
														
@@ -6450,6 +6608,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 
															 		kvm_x86_ops->queue_exception(vcpu);
														
 
															 	} else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
														
 
															 		vcpu->arch.smi_pending = false;
														
 
															+		++vcpu->arch.smi_count;
														
 
															 		enter_smm(vcpu);
														
 
															 	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
														
 
															 		--vcpu->arch.nmi_pending;
														
@@ -6751,7 +6910,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 
															 	if (irqchip_split(vcpu->kvm))
														
 
															 		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
														
 
															 	else {
														
 
															-		if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
														
 
															+		if (vcpu->arch.apicv_active)
														
 
															 			kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															 		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
														
 
															 	}
														
@@ -6760,12 +6919,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 
															 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
														
 
															 }
														
 
															-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
														
 
															-{
														
 
															-	++vcpu->stat.tlb_flush;
														
 
															-	kvm_x86_ops->tlb_flush(vcpu);
														
 
															-}
														
 
															-
														
 
															 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
														
 
															 		unsigned long start, unsigned long end)
														
 
															 {
														
@@ -6834,7 +6987,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
														
 
															 			kvm_mmu_sync_roots(vcpu);
														
 
															 		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
														
 
															-			kvm_vcpu_flush_tlb(vcpu);
														
 
															+			kvm_vcpu_flush_tlb(vcpu, true);
														
 
															 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
														
 
															 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
														
 
															 			r = 0;
														
@@ -6983,10 +7136,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 	 * This handles the case where a posted interrupt was
														
 
															 	 * notified with kvm_vcpu_kick.
														
 
															 	 */
														
 
															-	if (kvm_lapic_enabled(vcpu)) {
														
 
															-		if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
														
 
															-			kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															-	}
														
 
															+	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
														
 
															+		kvm_x86_ops->sync_pir_to_irr(vcpu);
														
 
															 	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
														
 
															 	    || need_resched() || signal_pending(current)) {
														
@@ -7007,7 +7158,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
															 	}
														
 
															 	trace_kvm_entry(vcpu->vcpu_id);
														
 
															-	wait_lapic_expire(vcpu);
														
 
															+	if (lapic_timer_advance_ns)
														
 
															+		wait_lapic_expire(vcpu);
														
 
															 	guest_enter_irqoff();
														
 
															 	if (unlikely(vcpu->arch.switch_db_regs)) {
														
@@ -7268,8 +7420,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
															 {
														
 
															 	int r;
														
 
															+	vcpu_load(vcpu);
														
 
															 	kvm_sigset_activate(vcpu);
														
 
															-
														
 
															 	kvm_load_guest_fpu(vcpu);
														
 
															 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
														
@@ -7316,11 +7468,14 @@ out:
 
															 	post_kvm_run_save(vcpu);
														
 
															 	kvm_sigset_deactivate(vcpu);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
														
 
															 		/*
														
 
															 		 * We are here if userspace calls get_regs() in the middle of
														
@@ -7354,11 +7509,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	regs->rip = kvm_rip_read(vcpu);
														
 
															 	regs->rflags = kvm_get_rflags(vcpu);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
														
 
															 	vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
														
@@ -7388,6 +7546,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -7406,6 +7565,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
															 {
														
 
															 	struct desc_ptr dt;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
														
 
															 	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
														
 
															 	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
														
@@ -7437,12 +7598,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
															 		set_bit(vcpu->arch.interrupt.nr,
														
 
															 			(unsigned long *)sregs->interrupt_bitmap);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	kvm_apic_accept_events(vcpu);
														
 
															 	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
														
 
															 					vcpu->arch.pv.pv_unhalted)
														
@@ -7450,21 +7614,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 
															 	else
														
 
															 		mp_state->mp_state = vcpu->arch.mp_state;
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
														
 
															 				    struct kvm_mp_state *mp_state)
														
 
															 {
														
 
															+	int ret = -EINVAL;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	if (!lapic_in_kernel(vcpu) &&
														
 
															 	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	/* INITs are latched while in SMM */
														
 
															 	if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
														
 
															 	    (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
														
 
															 	     mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
														
 
															 		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
														
@@ -7472,7 +7641,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
															 	} else
														
 
															 		vcpu->arch.mp_state = mp_state->mp_state;
														
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															-	return 0;
														
 
															+
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
														
@@ -7526,18 +7699,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 	int mmu_reset_needed = 0;
														
 
															 	int pending_vec, max_bits, idx;
														
 
															 	struct desc_ptr dt;
														
 
															+	int ret = -EINVAL;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															 	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
														
 
															 			(sregs->cr4 & X86_CR4_OSXSAVE))
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	if (kvm_valid_sregs(vcpu, sregs))
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	apic_base_msr.data = sregs->apic_base;
														
 
															 	apic_base_msr.host_initiated = true;
														
 
															 	if (kvm_set_apic_base(vcpu, &apic_base_msr))
														
 
															-		return -EINVAL;
														
 
															+		goto out;
														
 
															 	dt.size = sregs->idt.limit;
														
 
															 	dt.address = sregs->idt.base;
														
@@ -7603,7 +7779,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 	kvm_make_request(KVM_REQ_EVENT, vcpu);
														
 
															-	return 0;
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	vcpu_put(vcpu);
														
 
															+	return ret;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
@@ -7612,6 +7791,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 	unsigned long rflags;
														
 
															 	int i, r;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
														
 
															 		r = -EBUSY;
														
 
															 		if (vcpu->arch.exception.pending)
														
@@ -7657,7 +7838,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
															 	r = 0;
														
 
															 out:
														
 
															-
														
 
															+	vcpu_put(vcpu);
														
 
															 	return r;
														
 
															 }
														
@@ -7671,6 +7852,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 	gpa_t gpa;
														
 
															 	int idx;
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															 	idx = srcu_read_lock(&vcpu->kvm->srcu);
														
 
															 	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
														
 
															 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
														
@@ -7679,14 +7862,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 	tr->writeable = 1;
														
 
															 	tr->usermode = 0;
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															-	struct fxregs_state *fxsave =
														
 
															-			&vcpu->arch.guest_fpu.state.fxsave;
														
 
															+	struct fxregs_state *fxsave;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+	fxsave = &vcpu->arch.guest_fpu.state.fxsave;
														
 
															 	memcpy(fpu->fpr, fxsave->st_space, 128);
														
 
															 	fpu->fcw = fxsave->cwd;
														
 
															 	fpu->fsw = fxsave->swd;
														
@@ -7696,13 +7882,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
															 	fpu->last_dp = fxsave->rdp;
														
 
															 	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
 
															 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															-	struct fxregs_state *fxsave =
														
 
															-			&vcpu->arch.guest_fpu.state.fxsave;
														
 
															+	struct fxregs_state *fxsave;
														
 
															+
														
 
															+	vcpu_load(vcpu);
														
 
															+
														
 
															+	fxsave = &vcpu->arch.guest_fpu.state.fxsave;
														
 
															 	memcpy(fxsave->st_space, fpu->fpr, 128);
														
 
															 	fxsave->cwd = fpu->fcw;
														
@@ -7713,6 +7903,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
															 	fxsave->rdp = fpu->last_dp;
														
 
															 	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
														
 
															+	vcpu_put(vcpu);
														
 
															 	return 0;
														
 
															 }
														
@@ -7769,7 +7960,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 {
														
 
															 	struct kvm_vcpu *vcpu;
														
 
															-	if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
														
 
															+	if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
														
 
															 		printk_once(KERN_WARNING
														
 
															 		"kvm: SMP vm created on host with unstable TSC; "
														
 
															 		"guest TSC will not be reliable\n");
														
@@ -7781,16 +7972,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
															 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int r;
														
 
															-
														
 
															 	kvm_vcpu_mtrr_init(vcpu);
														
 
															-	r = vcpu_load(vcpu);
														
 
															-	if (r)
														
 
															-		return r;
														
 
															+	vcpu_load(vcpu);
														
 
															 	kvm_vcpu_reset(vcpu, false);
														
 
															 	kvm_mmu_setup(vcpu);
														
 
															 	vcpu_put(vcpu);
														
 
															-	return r;
														
 
															+	return 0;
														
 
															 }
														
 
															 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
														
@@ -7800,13 +7987,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
															 	kvm_hv_vcpu_postcreate(vcpu);
														
 
															-	if (vcpu_load(vcpu))
														
 
															+	if (mutex_lock_killable(&vcpu->mutex))
														
 
															 		return;
														
 
															+	vcpu_load(vcpu);
														
 
															 	msr.data = 0x0;
														
 
															 	msr.index = MSR_IA32_TSC;
														
 
															 	msr.host_initiated = true;
														
 
															 	kvm_write_tsc(vcpu, &msr);
														
 
															 	vcpu_put(vcpu);
														
 
															+	mutex_unlock(&vcpu->mutex);
														
 
															 	if (!kvmclock_periodic_sync)
														
 
															 		return;
														
@@ -7817,11 +8006,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
															 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int r;
														
 
															 	vcpu->arch.apf.msr_val = 0;
														
 
															-	r = vcpu_load(vcpu);
														
 
															-	BUG_ON(r);
														
 
															+	vcpu_load(vcpu);
														
 
															 	kvm_mmu_unload(vcpu);
														
 
															 	vcpu_put(vcpu);
														
@@ -7833,6 +8020,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
															 	vcpu->arch.hflags = 0;
														
 
															 	vcpu->arch.smi_pending = 0;
														
 
															+	vcpu->arch.smi_count = 0;
														
 
															 	atomic_set(&vcpu->arch.nmi_queued, 0);
														
 
															 	vcpu->arch.nmi_pending = 0;
														
 
															 	vcpu->arch.nmi_injected = false;
														
@@ -7926,7 +8114,7 @@ int kvm_arch_hardware_enable(void)
 
															 		return ret;
														
 
															 	local_tsc = rdtsc();
														
 
															-	stable = !check_tsc_unstable();
														
 
															+	stable = !kvm_check_tsc_unstable();
														
 
															 	list_for_each_entry(kvm, &vm_list, vm_list) {
														
 
															 		kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															 			if (!stable && vcpu->cpu == smp_processor_id())
														
@@ -8192,9 +8380,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
															 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															-	int r;
														
 
															-	r = vcpu_load(vcpu);
														
 
															-	BUG_ON(r);
														
 
															+	vcpu_load(vcpu);
														
 
															 	kvm_mmu_unload(vcpu);
														
 
															 	vcpu_put(vcpu);
														
 
															 }
														
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -12,6 +12,7 @@
 
															 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															+	vcpu->arch.exception.pending = false;
														
 
															 	vcpu->arch.exception.injected = false;
														
 
															 }
														
@@ -265,36 +266,8 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 
															 static inline bool kvm_mwait_in_guest(void)
														
 
															 {
														
 
															-	unsigned int eax, ebx, ecx, edx;
														
 
															-
														
 
															-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
														
 
															-		return false;
														
 
															-
														
 
															-	switch (boot_cpu_data.x86_vendor) {
														
 
															-	case X86_VENDOR_AMD:
														
 
															-		/* All AMD CPUs have a working MWAIT implementation */
														
 
															-		return true;
														
 
															-	case X86_VENDOR_INTEL:
														
 
															-		/* Handle Intel below */
														
 
															-		break;
														
 
															-	default:
														
 
															-		return false;
														
 
															-	}
														
 
															-
														
 
															-	/*
														
 
															-	 * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
														
 
															-	 * they would allow guest to stop the CPU completely by disabling
														
 
															-	 * interrupts then invoking MWAIT.
														
 
															-	 */
														
 
															-	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
														
 
															-		return false;
														
 
															-
														
 
															-	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
														
 
															-
														
 
															-	if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
														
 
															-		return false;
														
 
															-
														
 
															-	return true;
														
 
															+	return boot_cpu_has(X86_FEATURE_MWAIT) &&
														
 
															+		!boot_cpu_has_bug(X86_BUG_MONITOR);
														
 
															 }
														
 
															 #endif
														
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,6 +677,25 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
 
															 	return rettype;
														
 
															 }
														
 
															+/**
														
 
															+ * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
														
 
															+ * of @pfn cannot be overridden by UC MTRR memory type.
														
 
															+ *
														
 
															+ * Only to be called when PAT is enabled.
														
 
															+ *
														
 
															+ * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
														
 
															+ * Returns false in other cases.
														
 
															+ */
														
 
															+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
														
 
															+{
														
 
															+	enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
														
 
															+
														
 
															+	return cm == _PAGE_CACHE_MODE_UC ||
														
 
															+	       cm == _PAGE_CACHE_MODE_UC_MINUS ||
														
 
															+	       cm == _PAGE_CACHE_MODE_WC;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
														
 
															+
														
 
															 /**
														
 
															  * io_reserve_memtype - Request a memory type mapping for a region of memory
														
 
															  * @start: start (physical address) of the region
														
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -1,5 +1,6 @@
 
															 config CRYPTO_DEV_CCP_DD
														
 
															 	tristate "Secure Processor device driver"
														
 
															+	depends on CPU_SUP_AMD || ARM64
														
 
															 	default m
														
 
															 	help
														
 
															 	  Provides AMD Secure Processor device driver.
														
@@ -32,3 +33,14 @@ config CRYPTO_DEV_CCP_CRYPTO
 
															 	  Support for using the cryptographic API with the AMD Cryptographic
														
 
															 	  Coprocessor. This module supports offload of SHA and AES algorithms.
														
 
															 	  If you choose 'M' here, this module will be called ccp_crypto.
														
 
															+
														
 
															+config CRYPTO_DEV_SP_PSP
														
 
															+	bool "Platform Security Processor (PSP) device"
														
 
															+	default y
														
 
															+	depends on CRYPTO_DEV_CCP_DD && X86_64
														
 
															+	help
														
 
															+	 Provide support for the AMD Platform Security Processor (PSP).
														
 
															+	 The PSP is a dedicated processor that provides support for key
														
 
															+	 management commands in Secure Encrypted Virtualization (SEV) mode,
														
 
															+	 along with software-based Trusted Execution Environment (TEE) to
														
 
															+	 enable third-party trusted applications.
														
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -8,6 +8,7 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 
															 	    ccp-dmaengine.o \
														
 
															 	    ccp-debugfs.o
														
 
															 ccp-$(CONFIG_PCI) += sp-pci.o
														
 
															+ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o
														
 
															 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
														
 
															 ccp-crypto-objs := ccp-crypto-main.o \
														
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -0,0 +1,805 @@
 
															+/*
														
 
															+ * AMD Platform Security Processor (PSP) interface
														
 
															+ *
														
 
															+ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Brijesh Singh <brijesh.singh@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/spinlock_types.h>
														
 
															+#include <linux/types.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/hw_random.h>
														
 
															+#include <linux/ccp.h>
														
 
															+
														
 
															+#include "sp-dev.h"
														
 
															+#include "psp-dev.h"
														
 
															+
														
 
															+#define DEVICE_NAME	"sev"
														
 
															+
														
 
															+static DEFINE_MUTEX(sev_cmd_mutex);
														
 
															+static struct sev_misc_dev *misc_dev;
														
 
															+static struct psp_device *psp_master;
														
 
															+
														
 
															+static struct psp_device *psp_alloc_struct(struct sp_device *sp)
														
 
															+{
														
 
															+	struct device *dev = sp->dev;
														
 
															+	struct psp_device *psp;
														
 
															+
														
 
															+	psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
														
 
															+	if (!psp)
														
 
															+		return NULL;
														
 
															+
														
 
															+	psp->dev = dev;
														
 
															+	psp->sp = sp;
														
 
															+
														
 
															+	snprintf(psp->name, sizeof(psp->name), "psp-%u", sp->ord);
														
 
															+
														
 
															+	return psp;
														
 
															+}
														
 
															+
														
 
															+static irqreturn_t psp_irq_handler(int irq, void *data)
														
 
															+{
														
 
															+	struct psp_device *psp = data;
														
 
															+	unsigned int status;
														
 
															+	int reg;
														
 
															+
														
 
															+	/* Read the interrupt status: */
														
 
															+	status = ioread32(psp->io_regs + PSP_P2CMSG_INTSTS);
														
 
															+
														
 
															+	/* Check if it is command completion: */
														
 
															+	if (!(status & BIT(PSP_CMD_COMPLETE_REG)))
														
 
															+		goto done;
														
 
															+
														
 
															+	/* Check if it is SEV command completion: */
														
 
															+	reg = ioread32(psp->io_regs + PSP_CMDRESP);
														
 
															+	if (reg & PSP_CMDRESP_RESP) {
														
 
															+		psp->sev_int_rcvd = 1;
														
 
															+		wake_up(&psp->sev_int_queue);
														
 
															+	}
														
 
															+
														
 
															+done:
														
 
															+	/* Clear the interrupt status by writing the same value we read. */
														
 
															+	iowrite32(status, psp->io_regs + PSP_P2CMSG_INTSTS);
														
 
															+
														
 
															+	return IRQ_HANDLED;
														
 
															+}
														
 
															+
														
 
															+static void sev_wait_cmd_ioc(struct psp_device *psp, unsigned int *reg)
														
 
															+{
														
 
															+	psp->sev_int_rcvd = 0;
														
 
															+
														
 
															+	wait_event(psp->sev_int_queue, psp->sev_int_rcvd);
														
 
															+	*reg = ioread32(psp->io_regs + PSP_CMDRESP);
														
 
															+}
														
 
															+
														
 
															+static int sev_cmd_buffer_len(int cmd)
														
 
															+{
														
 
															+	switch (cmd) {
														
 
															+	case SEV_CMD_INIT:			return sizeof(struct sev_data_init);
														
 
															+	case SEV_CMD_PLATFORM_STATUS:		return sizeof(struct sev_user_data_status);
														
 
															+	case SEV_CMD_PEK_CSR:			return sizeof(struct sev_data_pek_csr);
														
 
															+	case SEV_CMD_PEK_CERT_IMPORT:		return sizeof(struct sev_data_pek_cert_import);
														
 
															+	case SEV_CMD_PDH_CERT_EXPORT:		return sizeof(struct sev_data_pdh_cert_export);
														
 
															+	case SEV_CMD_LAUNCH_START:		return sizeof(struct sev_data_launch_start);
														
 
															+	case SEV_CMD_LAUNCH_UPDATE_DATA:	return sizeof(struct sev_data_launch_update_data);
														
 
															+	case SEV_CMD_LAUNCH_UPDATE_VMSA:	return sizeof(struct sev_data_launch_update_vmsa);
														
 
															+	case SEV_CMD_LAUNCH_FINISH:		return sizeof(struct sev_data_launch_finish);
														
 
															+	case SEV_CMD_LAUNCH_MEASURE:		return sizeof(struct sev_data_launch_measure);
														
 
															+	case SEV_CMD_ACTIVATE:			return sizeof(struct sev_data_activate);
														
 
															+	case SEV_CMD_DEACTIVATE:		return sizeof(struct sev_data_deactivate);
														
 
															+	case SEV_CMD_DECOMMISSION:		return sizeof(struct sev_data_decommission);
														
 
															+	case SEV_CMD_GUEST_STATUS:		return sizeof(struct sev_data_guest_status);
														
 
															+	case SEV_CMD_DBG_DECRYPT:		return sizeof(struct sev_data_dbg);
														
 
															+	case SEV_CMD_DBG_ENCRYPT:		return sizeof(struct sev_data_dbg);
														
 
															+	case SEV_CMD_SEND_START:		return sizeof(struct sev_data_send_start);
														
 
															+	case SEV_CMD_SEND_UPDATE_DATA:		return sizeof(struct sev_data_send_update_data);
														
 
															+	case SEV_CMD_SEND_UPDATE_VMSA:		return sizeof(struct sev_data_send_update_vmsa);
														
 
															+	case SEV_CMD_SEND_FINISH:		return sizeof(struct sev_data_send_finish);
														
 
															+	case SEV_CMD_RECEIVE_START:		return sizeof(struct sev_data_receive_start);
														
 
															+	case SEV_CMD_RECEIVE_FINISH:		return sizeof(struct sev_data_receive_finish);
														
 
															+	case SEV_CMD_RECEIVE_UPDATE_DATA:	return sizeof(struct sev_data_receive_update_data);
														
 
															+	case SEV_CMD_RECEIVE_UPDATE_VMSA:	return sizeof(struct sev_data_receive_update_vmsa);
														
 
															+	case SEV_CMD_LAUNCH_UPDATE_SECRET:	return sizeof(struct sev_data_launch_secret);
														
 
															+	default:				return 0;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
														
 
															+{
														
 
															+	struct psp_device *psp = psp_master;
														
 
															+	unsigned int phys_lsb, phys_msb;
														
 
															+	unsigned int reg, ret = 0;
														
 
															+
														
 
															+	if (!psp)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	/* Get the physical address of the command buffer */
														
 
															+	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
														
 
															+	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
														
 
															+
														
 
															+	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x\n",
														
 
															+		cmd, phys_msb, phys_lsb);
														
 
															+
														
 
															+	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
														
 
															+			     sev_cmd_buffer_len(cmd), false);
														
 
															+
														
 
															+	iowrite32(phys_lsb, psp->io_regs + PSP_CMDBUFF_ADDR_LO);
														
 
															+	iowrite32(phys_msb, psp->io_regs + PSP_CMDBUFF_ADDR_HI);
														
 
															+
														
 
															+	reg = cmd;
														
 
															+	reg <<= PSP_CMDRESP_CMD_SHIFT;
														
 
															+	reg |= PSP_CMDRESP_IOC;
														
 
															+	iowrite32(reg, psp->io_regs + PSP_CMDRESP);
														
 
															+
														
 
															+	/* wait for command completion */
														
 
															+	sev_wait_cmd_ioc(psp, &reg);
														
 
															+
														
 
															+	if (psp_ret)
														
 
															+		*psp_ret = reg & PSP_CMDRESP_ERR_MASK;
														
 
															+
														
 
															+	if (reg & PSP_CMDRESP_ERR_MASK) {
														
 
															+		dev_dbg(psp->dev, "sev command %#x failed (%#010x)\n",
														
 
															+			cmd, reg & PSP_CMDRESP_ERR_MASK);
														
 
															+		ret = -EIO;
														
 
															+	}
														
 
															+
														
 
															+	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
														
 
															+			     sev_cmd_buffer_len(cmd), false);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_do_cmd(int cmd, void *data, int *psp_ret)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	mutex_lock(&sev_cmd_mutex);
														
 
															+	rc = __sev_do_cmd_locked(cmd, data, psp_ret);
														
 
															+	mutex_unlock(&sev_cmd_mutex);
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+static int __sev_platform_init_locked(int *error)
														
 
															+{
														
 
															+	struct psp_device *psp = psp_master;
														
 
															+	int rc = 0;
														
 
															+
														
 
															+	if (!psp)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	if (psp->sev_state == SEV_STATE_INIT)
														
 
															+		return 0;
														
 
															+
														
 
															+	rc = __sev_do_cmd_locked(SEV_CMD_INIT, &psp->init_cmd_buf, error);
														
 
															+	if (rc)
														
 
															+		return rc;
														
 
															+
														
 
															+	psp->sev_state = SEV_STATE_INIT;
														
 
															+	dev_dbg(psp->dev, "SEV firmware initialized\n");
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+int sev_platform_init(int *error)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	mutex_lock(&sev_cmd_mutex);
														
 
															+	rc = __sev_platform_init_locked(error);
														
 
															+	mutex_unlock(&sev_cmd_mutex);
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_platform_init);
														
 
															+
														
 
															+static int __sev_platform_shutdown_locked(int *error)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	psp_master->sev_state = SEV_STATE_UNINIT;
														
 
															+	dev_dbg(psp_master->dev, "SEV firmware shutdown\n");
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_platform_shutdown(int *error)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	mutex_lock(&sev_cmd_mutex);
														
 
															+	rc = __sev_platform_shutdown_locked(NULL);
														
 
															+	mutex_unlock(&sev_cmd_mutex);
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+static int sev_get_platform_state(int *state, int *error)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS,
														
 
															+				 &psp_master->status_cmd_buf, error);
														
 
															+	if (rc)
														
 
															+		return rc;
														
 
															+
														
 
															+	*state = psp_master->status_cmd_buf.state;
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	int state, rc;
														
 
															+
														
 
															+	/*
														
 
															+	 * The SEV spec requires that FACTORY_RESET must be issued in
														
 
															+	 * UNINIT state. Before we go further lets check if any guest is
														
 
															+	 * active.
														
 
															+	 *
														
 
															+	 * If FW is in WORKING state then deny the request otherwise issue
														
 
															+	 * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET.
														
 
															+	 *
														
 
															+	 */
														
 
															+	rc = sev_get_platform_state(&state, &argp->error);
														
 
															+	if (rc)
														
 
															+		return rc;
														
 
															+
														
 
															+	if (state == SEV_STATE_WORKING)
														
 
															+		return -EBUSY;
														
 
															+
														
 
															+	if (state == SEV_STATE_INIT) {
														
 
															+		rc = __sev_platform_shutdown_locked(&argp->error);
														
 
															+		if (rc)
														
 
															+			return rc;
														
 
															+	}
														
 
															+
														
 
															+	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
														
 
															+}
														
 
															+
														
 
															+static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	struct sev_user_data_status *data = &psp_master->status_cmd_buf;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, data, &argp->error);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	if (copy_to_user((void __user *)argp->data, data, sizeof(*data)))
														
 
															+		ret = -EFAULT;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	int rc;
														
 
															+
														
 
															+	if (psp_master->sev_state == SEV_STATE_UNINIT) {
														
 
															+		rc = __sev_platform_init_locked(&argp->error);
														
 
															+		if (rc)
														
 
															+			return rc;
														
 
															+	}
														
 
															+
														
 
															+	return __sev_do_cmd_locked(cmd, 0, &argp->error);
														
 
															+}
														
 
															+
														
 
															+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	struct sev_user_data_pek_csr input;
														
 
															+	struct sev_data_pek_csr *data;
														
 
															+	void *blob = NULL;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* userspace wants to query CSR length */
														
 
															+	if (!input.address || !input.length)
														
 
															+		goto cmd;
														
 
															+
														
 
															+	/* allocate a physically contiguous buffer to store the CSR blob */
														
 
															+	if (!access_ok(VERIFY_WRITE, input.address, input.length) ||
														
 
															+	    input.length > SEV_FW_BLOB_MAX_SIZE) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	blob = kmalloc(input.length, GFP_KERNEL);
														
 
															+	if (!blob) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	data->address = __psp_pa(blob);
														
 
															+	data->len = input.length;
														
 
															+
														
 
															+cmd:
														
 
															+	if (psp_master->sev_state == SEV_STATE_UNINIT) {
														
 
															+		ret = __sev_platform_init_locked(&argp->error);
														
 
															+		if (ret)
														
 
															+			goto e_free_blob;
														
 
															+	}
														
 
															+
														
 
															+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, data, &argp->error);
														
 
															+
														
 
															+	 /* If we query the CSR length, FW responded with expected data. */
														
 
															+	input.length = data->len;
														
 
															+
														
 
															+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free_blob;
														
 
															+	}
														
 
															+
														
 
															+	if (blob) {
														
 
															+		if (copy_to_user((void __user *)input.address, blob, input.length))
														
 
															+			ret = -EFAULT;
														
 
															+	}
														
 
															+
														
 
															+e_free_blob:
														
 
															+	kfree(blob);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void *psp_copy_user_blob(u64 __user uaddr, u32 len)
														
 
															+{
														
 
															+	void *data;
														
 
															+
														
 
															+	if (!uaddr || !len)
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+
														
 
															+	/* verify that blob length does not exceed our limit */
														
 
															+	if (len > SEV_FW_BLOB_MAX_SIZE)
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+
														
 
															+	data = kmalloc(len, GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return ERR_PTR(-ENOMEM);
														
 
															+
														
 
															+	if (copy_from_user(data, (void __user *)(uintptr_t)uaddr, len))
														
 
															+		goto e_free;
														
 
															+
														
 
															+	return data;
														
 
															+
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ERR_PTR(-EFAULT);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(psp_copy_user_blob);
														
 
															+
														
 
															+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	struct sev_user_data_pek_cert_import input;
														
 
															+	struct sev_data_pek_cert_import *data;
														
 
															+	void *pek_blob, *oca_blob;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* copy PEK certificate blobs from userspace */
														
 
															+	pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len);
														
 
															+	if (IS_ERR(pek_blob)) {
														
 
															+		ret = PTR_ERR(pek_blob);
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	data->pek_cert_address = __psp_pa(pek_blob);
														
 
															+	data->pek_cert_len = input.pek_cert_len;
														
 
															+
														
 
															+	/* copy PEK certificate blobs from userspace */
														
 
															+	oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len);
														
 
															+	if (IS_ERR(oca_blob)) {
														
 
															+		ret = PTR_ERR(oca_blob);
														
 
															+		goto e_free_pek;
														
 
															+	}
														
 
															+
														
 
															+	data->oca_cert_address = __psp_pa(oca_blob);
														
 
															+	data->oca_cert_len = input.oca_cert_len;
														
 
															+
														
 
															+	/* If platform is not in INIT state then transition it to INIT */
														
 
															+	if (psp_master->sev_state != SEV_STATE_INIT) {
														
 
															+		ret = __sev_platform_init_locked(&argp->error);
														
 
															+		if (ret)
														
 
															+			goto e_free_oca;
														
 
															+	}
														
 
															+
														
 
															+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, data, &argp->error);
														
 
															+
														
 
															+e_free_oca:
														
 
															+	kfree(oca_blob);
														
 
															+e_free_pek:
														
 
															+	kfree(pek_blob);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
														
 
															+{
														
 
															+	struct sev_user_data_pdh_cert_export input;
														
 
															+	void *pdh_blob = NULL, *cert_blob = NULL;
														
 
															+	struct sev_data_pdh_cert_export *data;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	data = kzalloc(sizeof(*data), GFP_KERNEL);
														
 
															+	if (!data)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* Userspace wants to query the certificate length. */
														
 
															+	if (!input.pdh_cert_address ||
														
 
															+	    !input.pdh_cert_len ||
														
 
															+	    !input.cert_chain_address)
														
 
															+		goto cmd;
														
 
															+
														
 
															+	/* Allocate a physically contiguous buffer to store the PDH blob. */
														
 
															+	if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
														
 
															+	    !access_ok(VERIFY_WRITE, input.pdh_cert_address, input.pdh_cert_len)) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	/* Allocate a physically contiguous buffer to store the cert chain blob. */
														
 
															+	if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
														
 
															+	    !access_ok(VERIFY_WRITE, input.cert_chain_address, input.cert_chain_len)) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
														
 
															+	if (!pdh_blob) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free;
														
 
															+	}
														
 
															+
														
 
															+	data->pdh_cert_address = __psp_pa(pdh_blob);
														
 
															+	data->pdh_cert_len = input.pdh_cert_len;
														
 
															+
														
 
															+	cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
														
 
															+	if (!cert_blob) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free_pdh;
														
 
															+	}
														
 
															+
														
 
															+	data->cert_chain_address = __psp_pa(cert_blob);
														
 
															+	data->cert_chain_len = input.cert_chain_len;
														
 
															+
														
 
															+cmd:
														
 
															+	/* If platform is not in INIT state then transition it to INIT. */
														
 
															+	if (psp_master->sev_state != SEV_STATE_INIT) {
														
 
															+		ret = __sev_platform_init_locked(&argp->error);
														
 
															+		if (ret)
														
 
															+			goto e_free_cert;
														
 
															+	}
														
 
															+
														
 
															+	ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, data, &argp->error);
														
 
															+
														
 
															+	/* If we query the length, FW responded with expected data. */
														
 
															+	input.cert_chain_len = data->cert_chain_len;
														
 
															+	input.pdh_cert_len = data->pdh_cert_len;
														
 
															+
														
 
															+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto e_free_cert;
														
 
															+	}
														
 
															+
														
 
															+	if (pdh_blob) {
														
 
															+		if (copy_to_user((void __user *)input.pdh_cert_address,
														
 
															+				 pdh_blob, input.pdh_cert_len)) {
														
 
															+			ret = -EFAULT;
														
 
															+			goto e_free_cert;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (cert_blob) {
														
 
															+		if (copy_to_user((void __user *)input.cert_chain_address,
														
 
															+				 cert_blob, input.cert_chain_len))
														
 
															+			ret = -EFAULT;
														
 
															+	}
														
 
															+
														
 
															+e_free_cert:
														
 
															+	kfree(cert_blob);
														
 
															+e_free_pdh:
														
 
															+	kfree(pdh_blob);
														
 
															+e_free:
														
 
															+	kfree(data);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
														
 
															+{
														
 
															+	void __user *argp = (void __user *)arg;
														
 
															+	struct sev_issue_cmd input;
														
 
															+	int ret = -EFAULT;
														
 
															+
														
 
															+	if (!psp_master)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	if (ioctl != SEV_ISSUE_CMD)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	if (input.cmd > SEV_MAX)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	mutex_lock(&sev_cmd_mutex);
														
 
															+
														
 
															+	switch (input.cmd) {
														
 
															+
														
 
															+	case SEV_FACTORY_RESET:
														
 
															+		ret = sev_ioctl_do_reset(&input);
														
 
															+		break;
														
 
															+	case SEV_PLATFORM_STATUS:
														
 
															+		ret = sev_ioctl_do_platform_status(&input);
														
 
															+		break;
														
 
															+	case SEV_PEK_GEN:
														
 
															+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
														
 
															+		break;
														
 
															+	case SEV_PDH_GEN:
														
 
															+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
														
 
															+		break;
														
 
															+	case SEV_PEK_CSR:
														
 
															+		ret = sev_ioctl_do_pek_csr(&input);
														
 
															+		break;
														
 
															+	case SEV_PEK_CERT_IMPORT:
														
 
															+		ret = sev_ioctl_do_pek_import(&input);
														
 
															+		break;
														
 
															+	case SEV_PDH_CERT_EXPORT:
														
 
															+		ret = sev_ioctl_do_pdh_export(&input);
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd)))
														
 
															+		ret = -EFAULT;
														
 
															+out:
														
 
															+	mutex_unlock(&sev_cmd_mutex);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static const struct file_operations sev_fops = {
														
 
															+	.owner	= THIS_MODULE,
														
 
															+	.unlocked_ioctl = sev_ioctl,
														
 
															+};
														
 
															+
														
 
															+int sev_platform_status(struct sev_user_data_status *data, int *error)
														
 
															+{
														
 
															+	return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_platform_status);
														
 
															+
														
 
															+int sev_guest_deactivate(struct sev_data_deactivate *data, int *error)
														
 
															+{
														
 
															+	return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_guest_deactivate);
														
 
															+
														
 
															+int sev_guest_activate(struct sev_data_activate *data, int *error)
														
 
															+{
														
 
															+	return sev_do_cmd(SEV_CMD_ACTIVATE, data, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_guest_activate);
														
 
															+
														
 
															+int sev_guest_decommission(struct sev_data_decommission *data, int *error)
														
 
															+{
														
 
															+	return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_guest_decommission);
														
 
															+
														
 
															+int sev_guest_df_flush(int *error)
														
 
															+{
														
 
															+	return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_guest_df_flush);
														
 
															+
														
 
															+static void sev_exit(struct kref *ref)
														
 
															+{
														
 
															+	struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
														
 
															+
														
 
															+	misc_deregister(&misc_dev->misc);
														
 
															+}
														
 
															+
														
 
															+static int sev_misc_init(struct psp_device *psp)
														
 
															+{
														
 
															+	struct device *dev = psp->dev;
														
 
															+	int ret;
														
 
															+
														
 
															+	/*
														
 
															+	 * SEV feature support can be detected on multiple devices but the SEV
														
 
															+	 * FW commands must be issued on the master. During probe, we do not
														
 
															+	 * know the master hence we create /dev/sev on the first device probe.
														
 
															+	 * sev_do_cmd() finds the right master device to which to issue the
														
 
															+	 * command to the firmware.
														
 
															+	 */
														
 
															+	if (!misc_dev) {
														
 
															+		struct miscdevice *misc;
														
 
															+
														
 
															+		misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
														
 
															+		if (!misc_dev)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		misc = &misc_dev->misc;
														
 
															+		misc->minor = MISC_DYNAMIC_MINOR;
														
 
															+		misc->name = DEVICE_NAME;
														
 
															+		misc->fops = &sev_fops;
														
 
															+
														
 
															+		ret = misc_register(misc);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		kref_init(&misc_dev->refcount);
														
 
															+	} else {
														
 
															+		kref_get(&misc_dev->refcount);
														
 
															+	}
														
 
															+
														
 
															+	init_waitqueue_head(&psp->sev_int_queue);
														
 
															+	psp->sev_misc = misc_dev;
														
 
															+	dev_dbg(dev, "registered SEV device\n");
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int sev_init(struct psp_device *psp)
														
 
															+{
														
 
															+	/* Check if device supports SEV feature */
														
 
															+	if (!(ioread32(psp->io_regs + PSP_FEATURE_REG) & 1)) {
														
 
															+		dev_dbg(psp->dev, "device does not support SEV\n");
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	return sev_misc_init(psp);
														
 
															+}
														
 
															+
														
 
															+int psp_dev_init(struct sp_device *sp)
														
 
															+{
														
 
															+	struct device *dev = sp->dev;
														
 
															+	struct psp_device *psp;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = -ENOMEM;
														
 
															+	psp = psp_alloc_struct(sp);
														
 
															+	if (!psp)
														
 
															+		goto e_err;
														
 
															+
														
 
															+	sp->psp_data = psp;
														
 
															+
														
 
															+	psp->vdata = (struct psp_vdata *)sp->dev_vdata->psp_vdata;
														
 
															+	if (!psp->vdata) {
														
 
															+		ret = -ENODEV;
														
 
															+		dev_err(dev, "missing driver data\n");
														
 
															+		goto e_err;
														
 
															+	}
														
 
															+
														
 
															+	psp->io_regs = sp->io_map + psp->vdata->offset;
														
 
															+
														
 
															+	/* Disable and clear interrupts until ready */
														
 
															+	iowrite32(0, psp->io_regs + PSP_P2CMSG_INTEN);
														
 
															+	iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTSTS);
														
 
															+
														
 
															+	/* Request an irq */
														
 
															+	ret = sp_request_psp_irq(psp->sp, psp_irq_handler, psp->name, psp);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "psp: unable to allocate an IRQ\n");
														
 
															+		goto e_err;
														
 
															+	}
														
 
															+
														
 
															+	ret = sev_init(psp);
														
 
															+	if (ret)
														
 
															+		goto e_irq;
														
 
															+
														
 
															+	if (sp->set_psp_master_device)
														
 
															+		sp->set_psp_master_device(sp);
														
 
															+
														
 
															+	/* Enable interrupt */
														
 
															+	iowrite32(-1, psp->io_regs + PSP_P2CMSG_INTEN);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_irq:
														
 
															+	sp_free_psp_irq(psp->sp, psp);
														
 
															+e_err:
														
 
															+	sp->psp_data = NULL;
														
 
															+
														
 
															+	dev_notice(dev, "psp initialization failed\n");
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void psp_dev_destroy(struct sp_device *sp)
														
 
															+{
														
 
															+	struct psp_device *psp = sp->psp_data;
														
 
															+
														
 
															+	if (psp->sev_misc)
														
 
															+		kref_put(&misc_dev->refcount, sev_exit);
														
 
															+
														
 
															+	sp_free_psp_irq(sp, psp);
														
 
															+}
														
 
															+
														
 
															+int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
														
 
															+				void *data, int *error)
														
 
															+{
														
 
															+	if (!filep || filep->f_op != &sev_fops)
														
 
															+		return -EBADF;
														
 
															+
														
 
															+	return  sev_do_cmd(cmd, data, error);
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
														
 
															+
														
 
															+void psp_pci_init(void)
														
 
															+{
														
 
															+	struct sev_user_data_status *status;
														
 
															+	struct sp_device *sp;
														
 
															+	int error, rc;
														
 
															+
														
 
															+	sp = sp_get_psp_master_device();
														
 
															+	if (!sp)
														
 
															+		return;
														
 
															+
														
 
															+	psp_master = sp->psp_data;
														
 
															+
														
 
															+	/* Initialize the platform */
														
 
															+	rc = sev_platform_init(&error);
														
 
															+	if (rc) {
														
 
															+		dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error);
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	/* Display SEV firmware version */
														
 
															+	status = &psp_master->status_cmd_buf;
														
 
															+	rc = sev_platform_status(status, &error);
														
 
															+	if (rc) {
														
 
															+		dev_err(sp->dev, "SEV: failed to get status error %#x\n", error);
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	dev_info(sp->dev, "SEV API:%d.%d build:%d\n", status->api_major,
														
 
															+		 status->api_minor, status->build);
														
 
															+	return;
														
 
															+
														
 
															+err:
														
 
															+	psp_master = NULL;
														
 
															+}
														
 
															+
														
 
															+void psp_pci_exit(void)
														
 
															+{
														
 
															+	if (!psp_master)
														
 
															+		return;
														
 
															+
														
 
															+	sev_platform_shutdown(NULL);
														
 
															+}
														
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -0,0 +1,83 @@
 
															+/*
														
 
															+ * AMD Platform Security Processor (PSP) interface driver
														
 
															+ *
														
 
															+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Brijesh Singh <brijesh.singh@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __PSP_DEV_H__
														
 
															+#define __PSP_DEV_H__
														
 
															+
														
 
															+#include <linux/device.h>
														
 
															+#include <linux/pci.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/wait.h>
														
 
															+#include <linux/dmapool.h>
														
 
															+#include <linux/hw_random.h>
														
 
															+#include <linux/bitops.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/irqreturn.h>
														
 
															+#include <linux/dmaengine.h>
														
 
															+#include <linux/psp-sev.h>
														
 
															+#include <linux/miscdevice.h>
														
 
															+
														
 
															+#include "sp-dev.h"
														
 
															+
														
 
															+#define PSP_C2PMSG(_num)		((_num) << 2)
														
 
															+#define PSP_CMDRESP			PSP_C2PMSG(32)
														
 
															+#define PSP_CMDBUFF_ADDR_LO		PSP_C2PMSG(56)
														
 
															+#define PSP_CMDBUFF_ADDR_HI             PSP_C2PMSG(57)
														
 
															+#define PSP_FEATURE_REG			PSP_C2PMSG(63)
														
 
															+
														
 
															+#define PSP_P2CMSG(_num)		((_num) << 2)
														
 
															+#define PSP_CMD_COMPLETE_REG		1
														
 
															+#define PSP_CMD_COMPLETE		PSP_P2CMSG(PSP_CMD_COMPLETE_REG)
														
 
															+
														
 
															+#define PSP_P2CMSG_INTEN		0x0110
														
 
															+#define PSP_P2CMSG_INTSTS		0x0114
														
 
															+
														
 
															+#define PSP_C2PMSG_ATTR_0		0x0118
														
 
															+#define PSP_C2PMSG_ATTR_1		0x011c
														
 
															+#define PSP_C2PMSG_ATTR_2		0x0120
														
 
															+#define PSP_C2PMSG_ATTR_3		0x0124
														
 
															+#define PSP_P2CMSG_ATTR_0		0x0128
														
 
															+
														
 
															+#define PSP_CMDRESP_CMD_SHIFT		16
														
 
															+#define PSP_CMDRESP_IOC			BIT(0)
														
 
															+#define PSP_CMDRESP_RESP		BIT(31)
														
 
															+#define PSP_CMDRESP_ERR_MASK		0xffff
														
 
															+
														
 
															+#define MAX_PSP_NAME_LEN		16
														
 
															+
														
 
															+struct sev_misc_dev {
														
 
															+	struct kref refcount;
														
 
															+	struct miscdevice misc;
														
 
															+};
														
 
															+
														
 
															+struct psp_device {
														
 
															+	struct list_head entry;
														
 
															+
														
 
															+	struct psp_vdata *vdata;
														
 
															+	char name[MAX_PSP_NAME_LEN];
														
 
															+
														
 
															+	struct device *dev;
														
 
															+	struct sp_device *sp;
														
 
															+
														
 
															+	void __iomem *io_regs;
														
 
															+
														
 
															+	int sev_state;
														
 
															+	unsigned int sev_int_rcvd;
														
 
															+	wait_queue_head_t sev_int_queue;
														
 
															+	struct sev_misc_dev *sev_misc;
														
 
															+	struct sev_user_data_status status_cmd_buf;
														
 
															+	struct sev_data_init init_cmd_buf;
														
 
															+};
														
 
															+
														
 
															+#endif /* __PSP_DEV_H */