Browse Source

Merge tag 'kvmarm-for-v4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm updates for 4.19

- Support for Group0 interrupts in guests
- Cache management optimizations for ARMv8.4 systems
- Userspace interface for RAS, allowing error retrival and injection
- Fault path optimization
- Emulated physical timer fixes
- Random cleanups
Paolo Bonzini 7 years ago
parent
commit
631989303b
43 changed files with 834 additions and 173 deletions
  1. 76 4
      Documentation/virtual/kvm/api.txt
  2. 8 0
      Documentation/virtual/kvm/devices/arm-vgic-v3.txt
  3. 9 6
      Documentation/virtual/kvm/devices/arm-vgic.txt
  4. 11 1
      arch/arm/include/asm/kvm_emulate.h
  5. 5 0
      arch/arm/include/asm/kvm_host.h
  6. 3 11
      arch/arm/include/asm/kvm_mmu.h
  7. 13 0
      arch/arm/include/uapi/asm/kvm.h
  8. 24 1
      arch/arm/kvm/coproc.c
  9. 23 0
      arch/arm/kvm/guest.c
  10. 2 1
      arch/arm64/include/asm/cpucaps.h
  11. 1 0
      arch/arm64/include/asm/kvm_arm.h
  12. 17 0
      arch/arm64/include/asm/kvm_emulate.h
  13. 15 13
      arch/arm64/include/asm/kvm_host.h
  14. 27 8
      arch/arm64/include/asm/kvm_mmu.h
  15. 7 0
      arch/arm64/include/asm/memory.h
  16. 22 2
      arch/arm64/include/asm/pgtable-prot.h
  17. 3 0
      arch/arm64/include/asm/sysreg.h
  18. 13 0
      arch/arm64/include/uapi/asm/kvm.h
  19. 20 0
      arch/arm64/kernel/cpufeature.c
  20. 33 0
      arch/arm64/kvm/guest.c
  21. 3 3
      arch/arm64/kvm/hyp-init.S
  22. 0 5
      arch/arm64/kvm/hyp/sysreg-sr.c
  23. 3 3
      arch/arm64/kvm/inject_fault.c
  24. 4 0
      arch/arm64/kvm/reset.c
  25. 49 5
      arch/arm64/kvm/sys_regs.c
  26. 8 1
      include/kvm/arm_vgic.h
  27. 10 0
      include/linux/irqchip/arm-gic-v3.h
  28. 11 0
      include/linux/irqchip/arm-gic.h
  29. 1 0
      include/uapi/linux/kvm.h
  30. 10 5
      virt/kvm/arm/arch_timer.c
  31. 51 0
      virt/kvm/arm/arm.c
  32. 68 19
      virt/kvm/arm/mmu.c
  33. 37 13
      virt/kvm/arm/vgic/vgic-debug.c
  34. 22 2
      virt/kvm/arm/vgic/vgic-init.c
  35. 17 10
      virt/kvm/arm/vgic/vgic-its.c
  36. 59 7
      virt/kvm/arm/vgic/vgic-mmio-v2.c
  37. 55 17
      virt/kvm/arm/vgic/vgic-mmio-v3.c
  38. 51 5
      virt/kvm/arm/vgic/vgic-mmio.c
  39. 17 8
      virt/kvm/arm/vgic/vgic-mmio.h
  40. 7 3
      virt/kvm/arm/vgic/vgic-v2.c
  41. 5 8
      virt/kvm/arm/vgic/vgic-v3.c
  42. 7 12
      virt/kvm/arm/vgic/vgic.c
  43. 7 0
      virt/kvm/arm/vgic/vgic.h

+ 76 - 4
Documentation/virtual/kvm/api.txt

@@ -835,11 +835,13 @@ struct kvm_clock_data {
 
 
 Capability: KVM_CAP_VCPU_EVENTS
 Capability: KVM_CAP_VCPU_EVENTS
 Extended by: KVM_CAP_INTR_SHADOW
 Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86
-Type: vm ioctl
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
 Parameters: struct kvm_vcpu_event (out)
 Parameters: struct kvm_vcpu_event (out)
 Returns: 0 on success, -1 on error
 Returns: 0 on success, -1 on error
 
 
+X86:
+
 Gets currently pending exceptions, interrupts, and NMIs as well as related
 Gets currently pending exceptions, interrupts, and NMIs as well as related
 states of the vcpu.
 states of the vcpu.
 
 
@@ -881,15 +883,64 @@ Only two fields are defined in the flags field:
 - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
 - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
   smi contains a valid state.
   smi contains a valid state.
 
 
+ARM/ARM64:
+
+If the guest accesses a device that is being emulated by the host kernel in
+such a way that a real device would generate a physical SError, KVM may make
+a virtual SError pending for that VCPU. This system error interrupt remains
+pending until the guest takes the exception by unmasking PSTATE.A.
+
+Running the VCPU may cause it to take a pending SError, or make an access that
+causes an SError to become pending. The event's description is only valid while
+the VPCU is not running.
+
+This API provides a way to read and write the pending 'event' state that is not
+visible to the guest. To save, restore or migrate a VCPU the struct representing
+the state can be read then written using this GET/SET API, along with the other
+guest-visible registers. It is not possible to 'cancel' an SError that has been
+made pending.
+
+A device being emulated in user-space may also wish to generate an SError. To do
+this the events structure can be populated by user-space. The current state
+should be read first, to ensure no existing SError is pending. If an existing
+SError is pending, the architecture's 'Multiple SError interrupts' rules should
+be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
+Serviceability (RAS) Specification").
+
+SError exceptions always have an ESR value. Some CPUs have the ability to
+specify what the virtual SError's ESR value should be. These systems will
+advertise KVM_CAP_ARM_SET_SERROR_ESR. In this case exception.has_esr will
+always have a non-zero value when read, and the agent making an SError pending
+should specify the ISS field in the lower 24 bits of exception.serror_esr. If
+the system supports KVM_CAP_ARM_SET_SERROR_ESR, but user-space sets the events
+with exception.has_esr as zero, KVM will choose an ESR.
+
+Specifying exception.has_esr on a system that does not support it will return
+-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
+will return -EINVAL.
+
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 4.32 KVM_SET_VCPU_EVENTS
 4.32 KVM_SET_VCPU_EVENTS
 
 
 Capability: KVM_CAP_VCPU_EVENTS
 Capability: KVM_CAP_VCPU_EVENTS
 Extended by: KVM_CAP_INTR_SHADOW
 Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86
-Type: vm ioctl
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
 Parameters: struct kvm_vcpu_event (in)
 Parameters: struct kvm_vcpu_event (in)
 Returns: 0 on success, -1 on error
 Returns: 0 on success, -1 on error
 
 
+X86:
+
 Set pending exceptions, interrupts, and NMIs as well as related states of the
 Set pending exceptions, interrupts, and NMIs as well as related states of the
 vcpu.
 vcpu.
 
 
@@ -910,6 +961,13 @@ shall be written into the VCPU.
 
 
 KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
 KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
 
 
+ARM/ARM64:
+
+Set the pending SError exception state for this VCPU. It is not possible to
+'cancel' an Serror that has been made pending.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
 
 
 4.33 KVM_GET_DEBUGREGS
 4.33 KVM_GET_DEBUGREGS
 
 
@@ -4690,3 +4748,17 @@ This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
 hypercalls:
 hypercalls:
 HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
 HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
 HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
 HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
+
+8.19 KVM_CAP_ARM_SET_SERROR_ESR
+
+Architectures: arm, arm64
+
+This capability indicates that userspace can specify (via the
+KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
+takes a virtual SError interrupt exception.
+If KVM advertises this capability, userspace can only specify the ISS field for
+the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
+CPU when the exception is taken. If this virtual SError is taken to EL1 using
+AArch64, this value will be reported in the ISS field of ESR_ELx.
+
+See KVM_CAP_VCPU_EVENTS for more details.

+ 8 - 0
Documentation/virtual/kvm/devices/arm-vgic-v3.txt

@@ -100,6 +100,14 @@ Groups:
     Note that distributor fields are not banked, but return the same value
     Note that distributor fields are not banked, but return the same value
     regardless of the mpidr used to access the register.
     regardless of the mpidr used to access the register.
 
 
+    GICD_IIDR.Revision is updated when the KVM implementation is changed in a
+    way directly observable by the guest or userspace.  Userspace should read
+    GICD_IIDR from KVM and write back the read value to confirm its expected
+    behavior is aligned with the KVM implementation.  Userspace should set
+    GICD_IIDR before setting any other registers to ensure the expected
+    behavior.
+
+
     The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
     The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
     that a write of a clear bit has no effect, whereas a write with a set bit
     that a write of a clear bit has no effect, whereas a write with a set bit
     clears that value.  To allow userspace to freely set the values of these two
     clears that value.  To allow userspace to freely set the values of these two

+ 9 - 6
Documentation/virtual/kvm/devices/arm-vgic.txt

@@ -49,9 +49,15 @@ Groups:
     index is specified with the vcpu_index field.  Note that most distributor
     index is specified with the vcpu_index field.  Note that most distributor
     fields are not banked, but return the same value regardless of the
     fields are not banked, but return the same value regardless of the
     vcpu_index used to access the register.
     vcpu_index used to access the register.
-  Limitations:
-    - Priorities are not implemented, and registers are RAZ/WI
-    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+
+    GICD_IIDR.Revision is updated when the KVM implementation of an emulated
+    GICv2 is changed in a way directly observable by the guest or userspace.
+    Userspace should read GICD_IIDR from KVM and write back the read value to
+    confirm its expected behavior is aligned with the KVM implementation.
+    Userspace should set GICD_IIDR before setting any other registers (both
+    KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
+    the expected behavior. Unless GICD_IIDR has been set from userspace, writes
+    to the interrupt group registers (GICD_IGROUPR) are ignored.
   Errors:
   Errors:
     -ENXIO: Getting or setting this register is not yet supported
     -ENXIO: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
     -EBUSY: One or more VCPUs are running
@@ -94,9 +100,6 @@ Groups:
     use the lower 5 bits to communicate with the KVM device and must shift the
     use the lower 5 bits to communicate with the KVM device and must shift the
     value left by 3 places to obtain the actual priority mask level.
     value left by 3 places to obtain the actual priority mask level.
 
 
-  Limitations:
-    - Priorities are not implemented, and registers are RAZ/WI
-    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
   Errors:
   Errors:
     -ENXIO: Getting or setting this register is not yet supported
     -ENXIO: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
     -EBUSY: One or more VCPUs are running

+ 11 - 1
arch/arm/include/asm/kvm_emulate.h

@@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr;
 	return (unsigned long *)&vcpu->arch.hcr;
 }
 }
 
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr |= HCR_TWE;
+}
+
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
 {
-	return 1;
+	return true;
 }
 }
 
 
 static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
 static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)

+ 5 - 0
arch/arm/include/asm/kvm_host.h

@@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 unsigned long kvm_call_hyp(void *hypfn, ...);
 unsigned long kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 void force_vm_exit(const cpumask_t *mask);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
 
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);

+ 3 - 11
arch/arm/include/asm/kvm_mmu.h

@@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 void kvm_clear_hyp_idmap(void);
 
 
-static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
-{
-	*pmd = new_pmd;
-	dsb(ishst);
-}
-
-static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
-	*pte = new_pte;
-	dsb(ishst);
-}
+#define kvm_mk_pmd(ptep)	__pmd(__pa(ptep) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)	__pud(__pa(pmdp) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)	({ BUILD_BUG(); 0; })
 
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
 {

+ 13 - 0
arch/arm/include/uapi/asm/kvm.h

@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 struct kvm_arch_memory_slot {
 };
 };
 
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
 #define KVM_REG_ARM_COPROC_SHIFT	16

+ 24 - 1
arch/arm/kvm/coproc.c

@@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 			   const struct coproc_reg *r)
 			   const struct coproc_reg *r)
 {
 {
 	u64 reg;
 	u64 reg;
+	bool g1;
 
 
 	if (!p->is_write)
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 		return read_from_write_only(vcpu, p);
@@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 	reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
 	reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
 	reg |= *vcpu_reg(vcpu, p->Rt1) ;
 	reg |= *vcpu_reg(vcpu, p->Rt1) ;
 
 
-	vgic_v3_dispatch_sgi(vcpu, reg);
+	/*
+	 * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
+	 * Group0 SGIs only, while ICC_SGI1R can generate either group,
+	 * depending on the SGI configuration. ICC_ASGI1R is effectively
+	 * equivalent to ICC_SGI0R, as there is no "alternative" secure
+	 * group.
+	 */
+	switch (p->Op1) {
+	default:		/* Keep GCC quiet */
+	case 0:			/* ICC_SGI1R */
+		g1 = true;
+		break;
+	case 1:			/* ICC_ASGI1R */
+	case 2:			/* ICC_SGI0R */
+		g1 = false;
+		break;
+	}
+
+	vgic_v3_dispatch_sgi(vcpu, reg, g1);
 
 
 	return true;
 	return true;
 }
 }
@@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = {
 
 
 	/* ICC_SGI1R */
 	/* ICC_SGI1R */
 	{ CRm64(12), Op1( 0), is64, access_gic_sgi},
 	{ CRm64(12), Op1( 0), is64, access_gic_sgi},
+	/* ICC_ASGI1R */
+	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
+	/* ICC_SGI0R */
+	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 
 
 	/* VBAR: swapped by interrupt.S. */
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,

+ 23 - 0
arch/arm/kvm/guest.c

@@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+
+	return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	bool serror_pending = events->exception.serror_pending;
+	bool has_esr = events->exception.serror_has_esr;
+
+	if (serror_pending && has_esr)
+		return -EINVAL;
+	else if (serror_pending)
+		kvm_inject_vabt(vcpu);
+
+	return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 int __attribute_const__ kvm_target_cpu(void)
 {
 {
 	switch (read_cpuid_part()) {
 	switch (read_cpuid_part()) {

+ 2 - 1
arch/arm64/include/asm/cpucaps.h

@@ -50,7 +50,8 @@
 #define ARM64_HW_DBM				29
 #define ARM64_HW_DBM				29
 #define ARM64_SSBD				30
 #define ARM64_SSBD				30
 #define ARM64_MISMATCHED_CACHE_TYPE		31
 #define ARM64_MISMATCHED_CACHE_TYPE		31
+#define ARM64_HAS_STAGE2_FWB			32
 
 
-#define ARM64_NCAPS				32
+#define ARM64_NCAPS				33
 
 
 #endif /* __ASM_CPUCAPS_H */
 #endif /* __ASM_CPUCAPS_H */

+ 1 - 0
arch/arm64/include/asm/kvm_arm.h

@@ -23,6 +23,7 @@
 #include <asm/types.h>
 #include <asm/types.h>
 
 
 /* Hyp Configuration Register (HCR) bits */
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_FWB		(UL(1) << 46)
 #define HCR_TEA		(UL(1) << 37)
 #define HCR_TEA		(UL(1) << 37)
 #define HCR_TERR	(UL(1) << 36)
 #define HCR_TERR	(UL(1) << 36)
 #define HCR_TLOR	(UL(1) << 35)
 #define HCR_TLOR	(UL(1) << 35)

+ 17 - 0
arch/arm64/include/asm/kvm_emulate.h

@@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 		/* trap error record accesses */
 		/* trap error record accesses */
 		vcpu->arch.hcr_el2 |= HCR_TERR;
 		vcpu->arch.hcr_el2 |= HCR_TERR;
 	}
 	}
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		vcpu->arch.hcr_el2 |= HCR_FWB;
 
 
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr_el2;
 	return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 }
 
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 |= HCR_TWE;
+}
+
+static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vsesr_el2;
+}
+
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
 {
 {
 	vcpu->arch.vsesr_el2 = vsesr;
 	vcpu->arch.vsesr_el2 = vsesr;

+ 15 - 13
arch/arm64/include/asm/kvm_host.h

@@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
 
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
@@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 int kvm_perf_teardown(void);
 
 
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
+
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
 
-void __kvm_set_tpidr_el2(u64 tpidr_el2);
 DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 
 
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 				       unsigned long vector_ptr)
 {
 {
-	u64 tpidr_el2;
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
+			 (u64)kvm_ksym_ref(kvm_host_cpu_state));
 
 
 	/*
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
 	 * Call initialization code, and switch to the full blown HYP code.
@@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 * cpus_have_const_cap() wrapper.
 	 * cpus_have_const_cap() wrapper.
 	 */
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
-	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
-
-	/*
-	 * Calculate the raw per-cpu offset without a translation from the
-	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
-	 * so that we can use adr_l to access per-cpu variables in EL2.
-	 */
-	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
-		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
-
-	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
+	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
 }
 }
 
 
 static inline bool kvm_arch_check_sve_has_vhe(void)
 static inline bool kvm_arch_check_sve_has_vhe(void)

+ 27 - 8
arch/arm64/include/asm/kvm_mmu.h

@@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 void kvm_clear_hyp_idmap(void);
 
 
-#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
-#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
+#define kvm_mk_pmd(ptep)					\
+	__pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)					\
+	__pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)					\
+	__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
 
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
 {
@@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
 {
 	void *va = page_address(pfn_to_page(pfn));
 	void *va = page_address(pfn_to_page(pfn));
 
 
+	/*
+	 * With FWB, we ensure that the guest always accesses memory using
+	 * cacheable attributes, and we don't have to clean to PoC when
+	 * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+	 * PoU is not required either in this case.
+	 */
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		return;
+
 	kvm_flush_dcache_to_poc(va, size);
 	kvm_flush_dcache_to_poc(va, size);
 }
 }
 
 
@@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
 
 
 static inline void __kvm_flush_dcache_pte(pte_t pte)
 static inline void __kvm_flush_dcache_pte(pte_t pte)
 {
 {
-	struct page *page = pte_page(pte);
-	kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pte_page(pte);
+		kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+	}
 }
 }
 
 
 static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
 static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
 {
 {
-	struct page *page = pmd_page(pmd);
-	kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pmd_page(pmd);
+		kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+	}
 }
 }
 
 
 static inline void __kvm_flush_dcache_pud(pud_t pud)
 static inline void __kvm_flush_dcache_pud(pud_t pud)
 {
 {
-	struct page *page = pud_page(pud);
-	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pud_page(pud);
+		kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+	}
 }
 }
 
 
 #define kvm_virt_to_phys(x)		__pa_symbol(x)
 #define kvm_virt_to_phys(x)		__pa_symbol(x)

+ 7 - 0
arch/arm64/include/asm/memory.h

@@ -155,6 +155,13 @@
 #define MT_S2_NORMAL		0xf
 #define MT_S2_NORMAL		0xf
 #define MT_S2_DEVICE_nGnRE	0x1
 #define MT_S2_DEVICE_nGnRE	0x1
 
 
+/*
+ * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
+ * Stage-2 enforces Normal-WB and Device-nGnRE
+ */
+#define MT_S2_FWB_NORMAL	6
+#define MT_S2_FWB_DEVICE_nGnRE	1
+
 #ifdef CONFIG_ARM64_4K_PAGES
 #ifdef CONFIG_ARM64_4K_PAGES
 #define IOREMAP_MAX_ORDER	(PUD_SHIFT)
 #define IOREMAP_MAX_ORDER	(PUD_SHIFT)
 #else
 #else

+ 22 - 2
arch/arm64/include/asm/pgtable-prot.h

@@ -67,8 +67,28 @@
 #define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
 
-#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
-#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_MEMATTR(attr)						\
+	({								\
+		u64 __val;						\
+		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
+			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
+		else							\
+			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\
+		__val;							\
+	 })
+
+#define PAGE_S2_XN							\
+	({								\
+		u64 __val;						\
+		if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))		\
+			__val = 0;					\
+		else							\
+			__val = PTE_S2_XN;				\
+		__val;							\
+	})
+
+#define PAGE_S2			__pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
+#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
 
 
 #define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)

+ 3 - 0
arch/arm64/include/asm/sysreg.h

@@ -314,6 +314,8 @@
 #define SYS_ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
 #define SYS_ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
 #define SYS_ICC_RPR_EL1			sys_reg(3, 0, 12, 11, 3)
 #define SYS_ICC_RPR_EL1			sys_reg(3, 0, 12, 11, 3)
 #define SYS_ICC_SGI1R_EL1		sys_reg(3, 0, 12, 11, 5)
 #define SYS_ICC_SGI1R_EL1		sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_ASGI1R_EL1		sys_reg(3, 0, 12, 11, 6)
+#define SYS_ICC_SGI0R_EL1		sys_reg(3, 0, 12, 11, 7)
 #define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
 #define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
 #define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
 #define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
 #define SYS_ICC_HPPIR1_EL1		sys_reg(3, 0, 12, 12, 2)
 #define SYS_ICC_HPPIR1_EL1		sys_reg(3, 0, 12, 12, 2)
@@ -579,6 +581,7 @@
 #define ID_AA64MMFR1_VMIDBITS_16	2
 #define ID_AA64MMFR1_VMIDBITS_16	2
 
 
 /* id_aa64mmfr2 */
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_FWB_SHIFT		40
 #define ID_AA64MMFR2_AT_SHIFT		32
 #define ID_AA64MMFR2_AT_SHIFT		32
 #define ID_AA64MMFR2_LVA_SHIFT		16
 #define ID_AA64MMFR2_LVA_SHIFT		16
 #define ID_AA64MMFR2_IESB_SHIFT		12
 #define ID_AA64MMFR2_IESB_SHIFT		12

+ 13 - 0
arch/arm64/include/uapi/asm/kvm.h

@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 struct kvm_arch_memory_slot {
 };
 };
 
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
 #define KVM_REG_ARM_COPROC_SHIFT	16

+ 20 - 0
arch/arm64/kernel/cpufeature.c

@@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 };
 };
 
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
@@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 }
 }
 #endif
 #endif
 
 
+static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
+{
+	u64 val = read_sysreg_s(SYS_CLIDR_EL1);
+
+	/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
+	WARN_ON(val & (7 << 27 | 7 << 21));
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 	{
 		.desc = "GIC system register CPU interface",
 		.desc = "GIC system register CPU interface",
@@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cache_dic,
 		.matches = has_cache_dic,
 	},
 	},
+	{
+		.desc = "Stage-2 Force Write-Back",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.capability = ARM64_HAS_STAGE2_FWB,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+		.min_field_value = 1,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_has_fwb,
+	},
 #ifdef CONFIG_ARM64_HW_AFDBM
 #ifdef CONFIG_ARM64_HW_AFDBM
 	{
 	{
 		/*
 		/*

+ 33 - 0
arch/arm64/kvm/guest.c

@@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
+	events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+
+	if (events->exception.serror_pending && events->exception.serror_has_esr)
+		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+
+	return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	bool serror_pending = events->exception.serror_pending;
+	bool has_esr = events->exception.serror_has_esr;
+
+	if (serror_pending && has_esr) {
+		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+			return -EINVAL;
+
+		if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
+			kvm_set_sei_esr(vcpu, events->exception.serror_esr);
+		else
+			return -EINVAL;
+	} else if (serror_pending) {
+		kvm_inject_vabt(vcpu);
+	}
+
+	return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 int __attribute_const__ kvm_target_cpu(void)
 {
 {
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long implementor = read_cpuid_implementor();

+ 3 - 3
arch/arm64/kvm/hyp-init.S

@@ -57,6 +57,7 @@ __invalid:
 	 * x0: HYP pgd
 	 * x0: HYP pgd
 	 * x1: HYP stack
 	 * x1: HYP stack
 	 * x2: HYP vectors
 	 * x2: HYP vectors
+	 * x3: per-CPU offset
 	 */
 	 */
 __do_hyp_init:
 __do_hyp_init:
 	/* Check for a stub HVC call */
 	/* Check for a stub HVC call */
@@ -119,9 +120,8 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	mov	sp, x1
 	mov	sp, x1
 	msr	vbar_el2, x2
 	msr	vbar_el2, x2
 
 
-	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
-	mrs	x1, tpidr_el1
-	msr	tpidr_el2, x1
+	/* Set tpidr_el2 for use by HYP */
+	msr	tpidr_el2, x3
 
 
 	/* Hello, World! */
 	/* Hello, World! */
 	eret
 	eret

+ 0 - 5
arch/arm64/kvm/hyp/sysreg-sr.c

@@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
 
 
 	vcpu->arch.sysregs_loaded_on_cpu = false;
 	vcpu->arch.sysregs_loaded_on_cpu = false;
 }
 }
-
-void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
-{
-	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
-}

+ 3 - 3
arch/arm64/kvm/inject_fault.c

@@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 		inject_undef64(vcpu);
 		inject_undef64(vcpu);
 }
 }
 
 
-static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
 {
 {
-	vcpu_set_vsesr(vcpu, esr);
+	vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
 	*vcpu_hcr(vcpu) |= HCR_VSE;
 	*vcpu_hcr(vcpu) |= HCR_VSE;
 }
 }
 
 
@@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
  */
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
 {
-	pend_guest_serror(vcpu, ESR_ELx_ISV);
+	kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
 }
 }

+ 4 - 0
arch/arm64/kvm/reset.c

@@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PMU_V3:
 	case KVM_CAP_ARM_PMU_V3:
 		r = kvm_arm_support_pmu_v3();
 		r = kvm_arm_support_pmu_v3();
 		break;
 		break;
+	case KVM_CAP_ARM_INJECT_SERROR_ESR:
+		r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+		break;
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_VCPU_ATTRIBUTES:
 	case KVM_CAP_VCPU_ATTRIBUTES:
+	case KVM_CAP_VCPU_EVENTS:
 		r = 1;
 		r = 1;
 		break;
 		break;
 	default:
 	default:

+ 49 - 5
arch/arm64/kvm/sys_regs.c

@@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	if (!p->is_write)
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p, r);
 		return read_from_write_only(vcpu, p, r);
 
 
-	kvm_set_way_flush(vcpu);
+	/*
+	 * Only track S/W ops if we don't have FWB. It still indicates
+	 * that the guest is a bit broken (S/W operations should only
+	 * be done by firmware, knowing that there is only a single
+	 * CPU left in the system, and certainly not from non-secure
+	 * software).
+	 */
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		kvm_set_way_flush(vcpu);
+
 	return true;
 	return true;
 }
 }
 
 
@@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 			   struct sys_reg_params *p,
 			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 			   const struct sys_reg_desc *r)
 {
 {
+	bool g1;
+
 	if (!p->is_write)
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p, r);
 		return read_from_write_only(vcpu, p, r);
 
 
-	vgic_v3_dispatch_sgi(vcpu, p->regval);
+	/*
+	 * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
+	 * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
+	 * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
+	 * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
+	 * group.
+	 */
+	if (p->is_aarch32) {
+		switch (p->Op1) {
+		default:		/* Keep GCC quiet */
+		case 0:			/* ICC_SGI1R */
+			g1 = true;
+			break;
+		case 1:			/* ICC_ASGI1R */
+		case 2:			/* ICC_SGI0R */
+			g1 = false;
+			break;
+		}
+	} else {
+		switch (p->Op2) {
+		default:		/* Keep GCC quiet */
+		case 5:			/* ICC_SGI1R_EL1 */
+			g1 = true;
+			break;
+		case 6:			/* ICC_ASGI1R_EL1 */
+		case 7:			/* ICC_SGI0R_EL1 */
+			g1 = false;
+			break;
+		}
+	}
+
+	vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
 
 
 	return true;
 	return true;
 }
 }
@@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
 	{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+	{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
+	{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
 	{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
@@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = {
  * register).
  * register).
  */
  */
 static const struct sys_reg_desc cp15_regs[] = {
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
-
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 static const struct sys_reg_desc cp15_64_regs[] = {
 static const struct sys_reg_desc cp15_64_regs[] = {
 	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
 	{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
-	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+	{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
 	{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
 	{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
 };
 };
 
 

+ 8 - 1
include/kvm/arm_vgic.h

@@ -133,6 +133,7 @@ struct vgic_irq {
 	u8 source;			/* GICv2 SGIs only */
 	u8 source;			/* GICv2 SGIs only */
 	u8 active_source;		/* GICv2 SGIs only */
 	u8 active_source;		/* GICv2 SGIs only */
 	u8 priority;
 	u8 priority;
+	u8 group;			/* 0 == group 0, 1 == group 1 */
 	enum vgic_irq_config config;	/* Level or edge */
 	enum vgic_irq_config config;	/* Level or edge */
 
 
 	/*
 	/*
@@ -217,6 +218,12 @@ struct vgic_dist {
 	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
 	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
 	u32			vgic_model;
 	u32			vgic_model;
 
 
+	/* Implementation revision as reported in the GICD_IIDR */
+	u32			implementation_rev;
+
+	/* Userspace can write to GICv2 IGROUPR */
+	bool			v2_groups_user_writable;
+
 	/* Do injected MSIs require an additional device ID? */
 	/* Do injected MSIs require an additional device ID? */
 	bool			msis_require_devid;
 	bool			msis_require_devid;
 
 
@@ -366,7 +373,7 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
 
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);
 
 
 /**
 /**
  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW

+ 10 - 0
include/linux/irqchip/arm-gic-v3.h

@@ -61,6 +61,16 @@
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 /*
 /*
  * In systems with a single security state (what we emulate in KVM)
  * In systems with a single security state (what we emulate in KVM)
  * the meaning of the interrupt group enable bits is slightly different
  * the meaning of the interrupt group enable bits is slightly different

+ 11 - 0
include/linux/irqchip/arm-gic.h

@@ -71,6 +71,16 @@
 					(GICD_INT_DEF_PRI << 8) |\
 					(GICD_INT_DEF_PRI << 8) |\
 					GICD_INT_DEF_PRI)
 					GICD_INT_DEF_PRI)
 
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 #define GICH_HCR			0x0
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
 #define GICH_VMCR			0x8
@@ -94,6 +104,7 @@
 #define GICH_LR_PENDING_BIT		(1 << 28)
 #define GICH_LR_PENDING_BIT		(1 << 28)
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
 #define GICH_LR_EOI			(1 << 19)
+#define GICH_LR_GROUP1			(1 << 30)
 #define GICH_LR_HW			(1 << 31)
 #define GICH_LR_HW			(1 << 31)
 
 
 #define GICH_VMCR_ENABLE_GRP0_SHIFT	0
 #define GICH_VMCR_ENABLE_GRP0_SHIFT	0

+ 1 - 0
include/uapi/linux/kvm.h

@@ -951,6 +951,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_TLBFLUSH 155
 #define KVM_CAP_HYPERV_TLBFLUSH 155
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 
 
 #ifdef KVM_CAP_IRQ_ROUTING
 #ifdef KVM_CAP_IRQ_ROUTING
 
 

+ 10 - 5
virt/kvm/arm/arch_timer.c

@@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 
 	/*
 	/*
-	 * If the timer can fire now we have just raised the IRQ line and we
-	 * don't need to have a soft timer scheduled for the future.  If the
-	 * timer cannot fire at all, then we also don't need a soft timer.
+	 * If the timer can fire now, we don't need to have a soft timer
+	 * scheduled for the future.  If the timer cannot fire at all,
+	 * then we also don't need a soft timer.
 	 */
 	 */
 	if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
 	if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
 		soft_timer_cancel(&timer->phys_timer, NULL);
 		soft_timer_cancel(&timer->phys_timer, NULL);
@@ -332,10 +332,10 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 	level = kvm_timer_should_fire(vtimer);
 	level = kvm_timer_should_fire(vtimer);
 	kvm_timer_update_irq(vcpu, level, vtimer);
 	kvm_timer_update_irq(vcpu, level, vtimer);
 
 
+	phys_timer_emulate(vcpu);
+
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-
-	phys_timer_emulate(vcpu);
 }
 }
 
 
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
@@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 
 	if (unlikely(!timer->enabled))
 	if (unlikely(!timer->enabled))
 		return;
 		return;
@@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 
 
 	/* Set the background timer for the physical timer emulation. */
 	/* Set the background timer for the physical timer emulation. */
 	phys_timer_emulate(vcpu);
 	phys_timer_emulate(vcpu);
+
+	/* If the timer fired while we weren't running, inject it now */
+	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 }
 }
 
 
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)

+ 51 - 0
virt/kvm/arm/arm.c

@@ -30,6 +30,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
 #include <linux/irqbypass.h>
+#include <linux/sched/stat.h>
 #include <trace/events/kvm.h>
 #include <trace/events/kvm.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
 #include <kvm/arm_psci.h>
@@ -380,6 +381,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_timer_vcpu_load(vcpu);
 	kvm_timer_vcpu_load(vcpu);
 	kvm_vcpu_load_sysregs(vcpu);
 	kvm_vcpu_load_sysregs(vcpu);
 	kvm_arch_vcpu_load_fp(vcpu);
 	kvm_arch_vcpu_load_fp(vcpu);
+
+	if (single_task_running())
+		vcpu_clear_wfe_traps(vcpu);
+	else
+		vcpu_set_wfe_traps(vcpu);
 }
 }
 
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1044,6 +1050,32 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
 	return ret;
 	return ret;
 }
 }
 
 
+static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu_events *events)
+{
+	memset(events, 0, sizeof(*events));
+
+	return __kvm_arm_vcpu_get_events(vcpu, events);
+}
+
+static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu_events *events)
+{
+	int i;
+
+	/* check whether the reserved field is zero */
+	for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
+		if (events->reserved[i])
+			return -EINVAL;
+
+	/* check whether the pad field is zero */
+	for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
+		if (events->exception.pad[i])
+			return -EINVAL;
+
+	return __kvm_arm_vcpu_set_events(vcpu, events);
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 			 unsigned int ioctl, unsigned long arg)
 {
 {
@@ -1124,6 +1156,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_arm_vcpu_has_attr(vcpu, &attr);
 		r = kvm_arm_vcpu_has_attr(vcpu, &attr);
 		break;
 		break;
 	}
 	}
+	case KVM_GET_VCPU_EVENTS: {
+		struct kvm_vcpu_events events;
+
+		if (kvm_arm_vcpu_get_events(vcpu, &events))
+			return -EINVAL;
+
+		if (copy_to_user(argp, &events, sizeof(events)))
+			return -EFAULT;
+
+		return 0;
+	}
+	case KVM_SET_VCPU_EVENTS: {
+		struct kvm_vcpu_events events;
+
+		if (copy_from_user(&events, argp, sizeof(events)))
+			return -EFAULT;
+
+		return kvm_arm_vcpu_set_events(vcpu, &events);
+	}
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 	}
 	}

+ 68 - 19
virt/kvm/arm/mmu.c

@@ -177,6 +177,35 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
 	put_page(virt_to_page(pmd));
 	put_page(virt_to_page(pmd));
 }
 }
 
 
+static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
+{
+	WRITE_ONCE(*ptep, new_pte);
+	dsb(ishst);
+}
+
+static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
+{
+	WRITE_ONCE(*pmdp, new_pmd);
+	dsb(ishst);
+}
+
+static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
+{
+	kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
+}
+
+static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
+{
+	WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
+	dsb(ishst);
+}
+
+static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp)
+{
+	WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp));
+	dsb(ishst);
+}
+
 /*
 /*
  * Unmapping vs dcache management:
  * Unmapping vs dcache management:
  *
  *
@@ -196,6 +225,10 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
  * This is why right after unmapping a page/section and invalidating
  * This is why right after unmapping a page/section and invalidating
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
  * the IO subsystem will never hit in the cache.
+ *
+ * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
+ * we then fully enforce cacheability of RAM, no matter what the guest
+ * does.
  */
  */
 static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 		       phys_addr_t addr, phys_addr_t end)
@@ -576,7 +609,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
 		pte = pte_offset_kernel(pmd, addr);
 		pte = pte_offset_kernel(pmd, addr);
 		kvm_set_pte(pte, pfn_pte(pfn, prot));
 		kvm_set_pte(pte, pfn_pte(pfn, prot));
 		get_page(virt_to_page(pte));
 		get_page(virt_to_page(pte));
-		kvm_flush_dcache_to_poc(pte, sizeof(*pte));
 		pfn++;
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
 	} while (addr += PAGE_SIZE, addr != end);
 }
 }
@@ -601,9 +633,8 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 				kvm_err("Cannot allocate Hyp pte\n");
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
 				return -ENOMEM;
 			}
 			}
-			pmd_populate_kernel(NULL, pmd, pte);
+			kvm_pmd_populate(pmd, pte);
 			get_page(virt_to_page(pmd));
 			get_page(virt_to_page(pmd));
-			kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
 		}
 		}
 
 
 		next = pmd_addr_end(addr, end);
 		next = pmd_addr_end(addr, end);
@@ -634,9 +665,8 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
 				kvm_err("Cannot allocate Hyp pmd\n");
 				kvm_err("Cannot allocate Hyp pmd\n");
 				return -ENOMEM;
 				return -ENOMEM;
 			}
 			}
-			pud_populate(NULL, pud, pmd);
+			kvm_pud_populate(pud, pmd);
 			get_page(virt_to_page(pud));
 			get_page(virt_to_page(pud));
-			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
 		}
 		}
 
 
 		next = pud_addr_end(addr, end);
 		next = pud_addr_end(addr, end);
@@ -671,9 +701,8 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
 				err = -ENOMEM;
 				err = -ENOMEM;
 				goto out;
 				goto out;
 			}
 			}
-			pgd_populate(NULL, pgd, pud);
+			kvm_pgd_populate(pgd, pud);
 			get_page(virt_to_page(pgd));
 			get_page(virt_to_page(pgd));
-			kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
 		}
 		}
 
 
 		next = pgd_addr_end(addr, end);
 		next = pgd_addr_end(addr, end);
@@ -1015,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
 	pmd = stage2_get_pmd(kvm, cache, addr);
 	pmd = stage2_get_pmd(kvm, cache, addr);
 	VM_BUG_ON(!pmd);
 	VM_BUG_ON(!pmd);
 
 
-	/*
-	 * Mapping in huge pages should only happen through a fault.  If a
-	 * page is merged into a transparent huge page, the individual
-	 * subpages of that huge page should be unmapped through MMU
-	 * notifiers before we get here.
-	 *
-	 * Merging of CompoundPages is not supported; they should become
-	 * splitting first, unmapped, merged, and mapped back in on-demand.
-	 */
-	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
 	old_pmd = *pmd;
 	old_pmd = *pmd;
 	if (pmd_present(old_pmd)) {
 	if (pmd_present(old_pmd)) {
+		/*
+		 * Multiple vcpus faulting on the same PMD entry, can
+		 * lead to them sequentially updating the PMD with the
+		 * same value. Following the break-before-make
+		 * (pmd_clear() followed by tlb_flush()) process can
+		 * hinder forward progress due to refaults generated
+		 * on missing translations.
+		 *
+		 * Skip updating the page table if the entry is
+		 * unchanged.
+		 */
+		if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+			return 0;
+
+		/*
+		 * Mapping in huge pages should only happen through a
+		 * fault.  If a page is merged into a transparent huge
+		 * page, the individual subpages of that huge page
+		 * should be unmapped through MMU notifiers before we
+		 * get here.
+		 *
+		 * Merging of CompoundPages is not supported; they
+		 * should become splitting first, unmapped, merged,
+		 * and mapped back in on-demand.
+		 */
+		VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
 		pmd_clear(pmd);
 		pmd_clear(pmd);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
 	} else {
@@ -1090,7 +1135,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 		if (!cache)
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
 			return 0; /* ignore calls from kvm_set_spte_hva */
 		pte = mmu_memory_cache_alloc(cache);
 		pte = mmu_memory_cache_alloc(cache);
-		pmd_populate_kernel(NULL, pmd, pte);
+		kvm_pmd_populate(pmd, pte);
 		get_page(virt_to_page(pmd));
 		get_page(virt_to_page(pmd));
 	}
 	}
 
 
@@ -1102,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	/* Create 2nd stage page table mapping - Level 3 */
 	/* Create 2nd stage page table mapping - Level 3 */
 	old_pte = *pte;
 	old_pte = *pte;
 	if (pte_present(old_pte)) {
 	if (pte_present(old_pte)) {
+		/* Skip page table update if there is no change */
+		if (pte_val(old_pte) == pte_val(*new_pte))
+			return 0;
+
 		kvm_set_pte(pte, __pte(0));
 		kvm_set_pte(pte, __pte(0));
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	} else {
 	} else {

+ 37 - 13
virt/kvm/arm/vgic/vgic-debug.c

@@ -36,9 +36,12 @@
 struct vgic_state_iter {
 struct vgic_state_iter {
 	int nr_cpus;
 	int nr_cpus;
 	int nr_spis;
 	int nr_spis;
+	int nr_lpis;
 	int dist_id;
 	int dist_id;
 	int vcpu_id;
 	int vcpu_id;
 	int intid;
 	int intid;
+	int lpi_idx;
+	u32 *lpi_array;
 };
 };
 
 
 static void iter_next(struct vgic_state_iter *iter)
 static void iter_next(struct vgic_state_iter *iter)
@@ -52,6 +55,12 @@ static void iter_next(struct vgic_state_iter *iter)
 	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
 	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
 	    ++iter->vcpu_id < iter->nr_cpus)
 	    ++iter->vcpu_id < iter->nr_cpus)
 		iter->intid = 0;
 		iter->intid = 0;
+
+	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
+		if (iter->lpi_idx < iter->nr_lpis)
+			iter->intid = iter->lpi_array[iter->lpi_idx];
+		iter->lpi_idx++;
+	}
 }
 }
 
 
 static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
 static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
@@ -63,6 +72,11 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
 
 
 	iter->nr_cpus = nr_cpus;
 	iter->nr_cpus = nr_cpus;
 	iter->nr_spis = kvm->arch.vgic.nr_spis;
 	iter->nr_spis = kvm->arch.vgic.nr_spis;
+	if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+		iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
+		if (iter->nr_lpis < 0)
+			iter->nr_lpis = 0;
+	}
 
 
 	/* Fast forward to the right position if needed */
 	/* Fast forward to the right position if needed */
 	while (pos--)
 	while (pos--)
@@ -73,7 +87,8 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
 {
 {
 	return iter->dist_id > 0 &&
 	return iter->dist_id > 0 &&
 		iter->vcpu_id == iter->nr_cpus &&
 		iter->vcpu_id == iter->nr_cpus &&
-		(iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
+		iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
+		iter->lpi_idx > iter->nr_lpis;
 }
 }
 
 
 static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
 static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
@@ -130,6 +145,7 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
 
 
 	mutex_lock(&kvm->lock);
 	mutex_lock(&kvm->lock);
 	iter = kvm->arch.vgic.iter;
 	iter = kvm->arch.vgic.iter;
+	kfree(iter->lpi_array);
 	kfree(iter);
 	kfree(iter);
 	kvm->arch.vgic.iter = NULL;
 	kvm->arch.vgic.iter = NULL;
 	mutex_unlock(&kvm->lock);
 	mutex_unlock(&kvm->lock);
@@ -137,17 +153,20 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
 
 
 static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
 static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
 {
 {
+	bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
+
 	seq_printf(s, "Distributor\n");
 	seq_printf(s, "Distributor\n");
 	seq_printf(s, "===========\n");
 	seq_printf(s, "===========\n");
-	seq_printf(s, "vgic_model:\t%s\n",
-		   (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
-		   "GICv3" : "GICv2");
+	seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
 	seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
 	seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
+	if (v3)
+		seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
 	seq_printf(s, "enabled:\t%d\n", dist->enabled);
 	seq_printf(s, "enabled:\t%d\n", dist->enabled);
 	seq_printf(s, "\n");
 	seq_printf(s, "\n");
 
 
 	seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
 	seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
 	seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
 	seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+	seq_printf(s, "G=group\n");
 }
 }
 
 
 static void print_header(struct seq_file *s, struct vgic_irq *irq,
 static void print_header(struct seq_file *s, struct vgic_irq *irq,
@@ -162,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
 	}
 	}
 
 
 	seq_printf(s, "\n");
 	seq_printf(s, "\n");
-	seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHC     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
-	seq_printf(s, "---------------------------------------------------------------\n");
+	seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHCG     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
+	seq_printf(s, "----------------------------------------------------------------\n");
 }
 }
 
 
 static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
@@ -174,15 +193,17 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 		type = "SGI";
 		type = "SGI";
 	else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
 	else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
 		type = "PPI";
 		type = "PPI";
-	else
+	else if (irq->intid < VGIC_MAX_SPI)
 		type = "SPI";
 		type = "SPI";
+	else
+		type = "LPI";
 
 
 	if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
 	if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
 		print_header(s, irq, vcpu);
 		print_header(s, irq, vcpu);
 
 
 	seq_printf(s, "       %s %4d "
 	seq_printf(s, "       %s %4d "
 		      "    %2d "
 		      "    %2d "
-		      "%d%d%d%d%d%d "
+		      "%d%d%d%d%d%d%d "
 		      "%8d "
 		      "%8d "
 		      "%8x "
 		      "%8x "
 		      " %2x "
 		      " %2x "
@@ -197,12 +218,12 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 			irq->enabled,
 			irq->enabled,
 			irq->hw,
 			irq->hw,
 			irq->config == VGIC_CONFIG_LEVEL,
 			irq->config == VGIC_CONFIG_LEVEL,
+			irq->group,
 			irq->hwintid,
 			irq->hwintid,
 			irq->mpidr,
 			irq->mpidr,
 			irq->source,
 			irq->source,
 			irq->priority,
 			irq->priority,
 			(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
 			(irq->vcpu) ? irq->vcpu->vcpu_id : -1);
-
 }
 }
 
 
 static int vgic_debug_show(struct seq_file *s, void *v)
 static int vgic_debug_show(struct seq_file *s, void *v)
@@ -221,17 +242,20 @@ static int vgic_debug_show(struct seq_file *s, void *v)
 	if (!kvm->arch.vgic.initialized)
 	if (!kvm->arch.vgic.initialized)
 		return 0;
 		return 0;
 
 
-	if (iter->vcpu_id < iter->nr_cpus) {
+	if (iter->vcpu_id < iter->nr_cpus)
 		vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
 		vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
-		irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
-	} else {
-		irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
+
+	irq = vgic_get_irq(kvm, vcpu, iter->intid);
+	if (!irq) {
+		seq_printf(s, "       LPI %4d freed\n", iter->intid);
+		return 0;
 	}
 	}
 
 
 	spin_lock_irqsave(&irq->irq_lock, flags);
 	spin_lock_irqsave(&irq->irq_lock, flags);
 	print_irq_state(s, irq, vcpu);
 	print_irq_state(s, irq, vcpu);
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
 
 
+	vgic_put_irq(kvm, irq);
 	return 0;
 	return 0;
 }
 }
 
 

+ 22 - 2
virt/kvm/arm/vgic/vgic-init.c

@@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 		irq->vcpu = NULL;
 		irq->vcpu = NULL;
 		irq->target_vcpu = vcpu0;
 		irq->target_vcpu = vcpu0;
 		kref_init(&irq->refcount);
 		kref_init(&irq->refcount);
-		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
 			irq->targets = 0;
 			irq->targets = 0;
-		else
+			irq->group = 0;
+		} else {
 			irq->mpidr = 0;
 			irq->mpidr = 0;
+			irq->group = 1;
+		}
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 			/* PPIs */
 			/* PPIs */
 			irq->config = VGIC_CONFIG_LEVEL;
 			irq->config = VGIC_CONFIG_LEVEL;
 		}
 		}
+
+		/*
+		 * GICv3 can only be created via the KVM_DEVICE_CREATE API and
+		 * so we always know the emulation type at this point as it's
+		 * either explicitly configured as GICv3, or explicitly
+		 * configured as GICv2, or not configured yet which also
+		 * implies GICv2.
+		 */
+		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+			irq->group = 1;
+		else
+			irq->group = 0;
 	}
 	}
 
 
 	if (!irqchip_in_kernel(vcpu->kvm))
 	if (!irqchip_in_kernel(vcpu->kvm))
@@ -271,6 +286,10 @@ int vgic_init(struct kvm *kvm)
 	if (vgic_initialized(kvm))
 	if (vgic_initialized(kvm))
 		return 0;
 		return 0;
 
 
+	/* Are we also in the middle of creating a VCPU? */
+	if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+		return -EBUSY;
+
 	/* freeze the number of spis */
 	/* freeze the number of spis */
 	if (!dist->nr_spis)
 	if (!dist->nr_spis)
 		dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
 		dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
@@ -294,6 +313,7 @@ int vgic_init(struct kvm *kvm)
 
 
 	vgic_debug_init(kvm);
 	vgic_debug_init(kvm);
 
 
+	dist->implementation_rev = 2;
 	dist->initialized = true;
 	dist->initialized = true;
 
 
 out:
 out:

+ 17 - 10
virt/kvm/arm/vgic/vgic-its.c

@@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
 	kref_init(&irq->refcount);
 	kref_init(&irq->refcount);
 	irq->intid = intid;
 	irq->intid = intid;
 	irq->target_vcpu = vcpu;
 	irq->target_vcpu = vcpu;
+	irq->group = 1;
 
 
 	spin_lock_irqsave(&dist->lpi_list_lock, flags);
 	spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
 
@@ -168,8 +169,14 @@ struct vgic_its_abi {
 	int (*commit)(struct vgic_its *its);
 	int (*commit)(struct vgic_its *its);
 };
 };
 
 
+#define ABI_0_ESZ	8
+#define ESZ_MAX		ABI_0_ESZ
+
 static const struct vgic_its_abi its_table_abi_versions[] = {
 static const struct vgic_its_abi its_table_abi_versions[] = {
-	[0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
+	[0] = {
+	 .cte_esz = ABI_0_ESZ,
+	 .dte_esz = ABI_0_ESZ,
+	 .ite_esz = ABI_0_ESZ,
 	 .save_tables = vgic_its_save_tables_v0,
 	 .save_tables = vgic_its_save_tables_v0,
 	 .restore_tables = vgic_its_restore_tables_v0,
 	 .restore_tables = vgic_its_restore_tables_v0,
 	 .commit = vgic_its_commit_v0,
 	 .commit = vgic_its_commit_v0,
@@ -183,7 +190,7 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
 	return &its_table_abi_versions[its->abi_rev];
 	return &its_table_abi_versions[its->abi_rev];
 }
 }
 
 
-int vgic_its_set_abi(struct vgic_its *its, int rev)
+static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
 {
 {
 	const struct vgic_its_abi *abi;
 	const struct vgic_its_abi *abi;
 
 
@@ -312,9 +319,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
  * enumerate those LPIs without holding any lock.
  * enumerate those LPIs without holding any lock.
  * Returns their number and puts the kmalloc'ed array into intid_ptr.
  * Returns their number and puts the kmalloc'ed array into intid_ptr.
  */
  */
-static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
 {
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct vgic_irq *irq;
 	struct vgic_irq *irq;
 	unsigned long flags;
 	unsigned long flags;
 	u32 *intids;
 	u32 *intids;
@@ -337,7 +344,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 		if (i == irq_count)
 		if (i == irq_count)
 			break;
 			break;
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
-		if (irq->target_vcpu != vcpu)
+		if (vcpu && irq->target_vcpu != vcpu)
 			continue;
 			continue;
 		intids[i++] = irq->intid;
 		intids[i++] = irq->intid;
 	}
 	}
@@ -429,7 +436,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	unsigned long flags;
 	u8 pendmask;
 	u8 pendmask;
 
 
-	nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
+	nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
 	if (nr_irqs < 0)
 	if (nr_irqs < 0)
 		return nr_irqs;
 		return nr_irqs;
 
 
@@ -1154,7 +1161,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 
 
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
 
-	irq_count = vgic_copy_lpi_list(vcpu, &intids);
+	irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
 	if (irq_count < 0)
 	if (irq_count < 0)
 		return irq_count;
 		return irq_count;
 
 
@@ -1202,7 +1209,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
 	vcpu1 = kvm_get_vcpu(kvm, target1_addr);
 	vcpu1 = kvm_get_vcpu(kvm, target1_addr);
 	vcpu2 = kvm_get_vcpu(kvm, target2_addr);
 	vcpu2 = kvm_get_vcpu(kvm, target2_addr);
 
 
-	irq_count = vgic_copy_lpi_list(vcpu1, &intids);
+	irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
 	if (irq_count < 0)
 	if (irq_count < 0)
 		return irq_count;
 		return irq_count;
 
 
@@ -1881,14 +1888,14 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
  * Return: < 0 on error, 0 if last element was identified, 1 otherwise
  * Return: < 0 on error, 0 if last element was identified, 1 otherwise
  * (the last element may not be found on second level tables)
  * (the last element may not be found on second level tables)
  */
  */
-static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
 			  int start_id, entry_fn_t fn, void *opaque)
 			  int start_id, entry_fn_t fn, void *opaque)
 {
 {
 	struct kvm *kvm = its->dev->kvm;
 	struct kvm *kvm = its->dev->kvm;
 	unsigned long len = size;
 	unsigned long len = size;
 	int id = start_id;
 	int id = start_id;
 	gpa_t gpa = base;
 	gpa_t gpa = base;
-	char entry[esz];
+	char entry[ESZ_MAX];
 	int ret;
 	int ret;
 
 
 	memset(entry, 0, esz);
 	memset(entry, 0, esz);

+ 59 - 7
virt/kvm/arm/vgic/vgic-mmio-v2.c

@@ -22,22 +22,33 @@
 #include "vgic.h"
 #include "vgic.h"
 #include "vgic-mmio.h"
 #include "vgic-mmio.h"
 
 
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 1: Report GICv2 interrupts as group 0 instead of group 1
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ * 	       their configured groups.
+ */
+
 static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
 static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 					    gpa_t addr, unsigned int len)
 {
 {
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 	u32 value;
 	u32 value;
 
 
 	switch (addr & 0x0c) {
 	switch (addr & 0x0c) {
 	case GIC_DIST_CTRL:
 	case GIC_DIST_CTRL:
-		value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
+		value = vgic->enabled ? GICD_ENABLE : 0;
 		break;
 		break;
 	case GIC_DIST_CTR:
 	case GIC_DIST_CTR:
-		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
 		value = (value >> 5) - 1;
 		value = (value >> 5) - 1;
 		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		break;
 		break;
 	case GIC_DIST_IIDR:
 	case GIC_DIST_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 		break;
 	default:
 	default:
 		return 0;
 		return 0;
@@ -66,6 +77,42 @@ static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
 	}
 	}
 }
 }
 
 
+static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
+					   gpa_t addr, unsigned int len,
+					   unsigned long val)
+{
+	switch (addr & 0x0c) {
+	case GIC_DIST_IIDR:
+		if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
+			return -EINVAL;
+
+		/*
+		 * If we observe a write to GICD_IIDR we know that userspace
+		 * has been updated and has had a chance to cope with older
+		 * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting
+		 * interrupts as group 1, and therefore we now allow groups to
+		 * be user writable.  Doing this by default would break
+		 * migration from old kernels to new kernels with legacy
+		 * userspace.
+		 */
+		vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+		return 0;
+	}
+
+	vgic_mmio_write_v2_misc(vcpu, addr, len, val);
+	return 0;
+}
+
+static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu,
+					    gpa_t addr, unsigned int len,
+					    unsigned long val)
+{
+	if (vcpu->kvm->arch.vgic.v2_groups_user_writable)
+		vgic_mmio_write_group(vcpu, addr, len, val);
+
+	return 0;
+}
+
 static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
 static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
 				 gpa_t addr, unsigned int len,
 				 gpa_t addr, unsigned int len,
 				 unsigned long val)
 				 unsigned long val)
@@ -352,17 +399,22 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
 
 
 		if (n > vgic_v3_max_apr_idx(vcpu))
 		if (n > vgic_v3_max_apr_idx(vcpu))
 			return;
 			return;
+
+		n = array_index_nospec(n, 4);
+
 		/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
 		/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
 		vgicv3->vgic_ap1r[n] = val;
 		vgicv3->vgic_ap1r[n] = val;
 	}
 	}
 }
 }
 
 
 static const struct vgic_register_region vgic_v2_dist_registers[] = {
 static const struct vgic_register_region vgic_v2_dist_registers[] = {
-	REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
-		vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
-		VGIC_ACCESS_32bit),
+	REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL,
+		vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc,
+		NULL, vgic_mmio_uaccess_write_v2_misc,
+		12, VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
 	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
-		vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+		vgic_mmio_read_group, vgic_mmio_write_group,
+		NULL, vgic_mmio_uaccess_write_v2_group, 1,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
 	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,

+ 55 - 17
virt/kvm/arm/vgic/vgic-mmio-v3.c

@@ -59,19 +59,27 @@ bool vgic_supports_direct_msis(struct kvm *kvm)
 	return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
 	return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
 }
 }
 
 
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ * 	       their configured groups.
+ */
+
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 					    gpa_t addr, unsigned int len)
 {
 {
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 	u32 value = 0;
 	u32 value = 0;
 
 
 	switch (addr & 0x0c) {
 	switch (addr & 0x0c) {
 	case GICD_CTLR:
 	case GICD_CTLR:
-		if (vcpu->kvm->arch.vgic.enabled)
+		if (vgic->enabled)
 			value |= GICD_CTLR_ENABLE_SS_G1;
 			value |= GICD_CTLR_ENABLE_SS_G1;
 		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
 		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
 		break;
 		break;
 	case GICD_TYPER:
 	case GICD_TYPER:
-		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
 		value = (value >> 5) - 1;
 		value = (value >> 5) - 1;
 		if (vgic_has_its(vcpu->kvm)) {
 		if (vgic_has_its(vcpu->kvm)) {
 			value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
 			value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
@@ -81,7 +89,9 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 		}
 		}
 		break;
 		break;
 	case GICD_IIDR:
 	case GICD_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 		break;
 	default:
 	default:
 		return 0;
 		return 0;
@@ -110,6 +120,20 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
 	}
 	}
 }
 }
 
 
+static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
+					   gpa_t addr, unsigned int len,
+					   unsigned long val)
+{
+	switch (addr & 0x0c) {
+	case GICD_IIDR:
+		if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+			return -EINVAL;
+	}
+
+	vgic_mmio_write_v3_misc(vcpu, addr, len, val);
+	return 0;
+}
+
 static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
 static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 					    gpa_t addr, unsigned int len)
 {
 {
@@ -246,9 +270,9 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
 	return value;
 	return value;
 }
 }
 
 
-static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
-					  gpa_t addr, unsigned int len,
-					  unsigned long val)
+static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+					 gpa_t addr, unsigned int len,
+					 unsigned long val)
 {
 {
 	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 	int i;
 	int i;
@@ -273,6 +297,8 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
 
 
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
+
+	return 0;
 }
 }
 
 
 /* We want to avoid outer shareable. */
 /* We want to avoid outer shareable. */
@@ -444,14 +470,15 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
 	}
 	}
 
 
 static const struct vgic_register_region vgic_v3_dist_registers[] = {
 static const struct vgic_register_region vgic_v3_dist_registers[] = {
-	REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
-		vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
-		VGIC_ACCESS_32bit),
+	REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR,
+		vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc,
+		NULL, vgic_mmio_uaccess_write_v3_misc,
+		16, VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
 	REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
 		vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
 		vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
-		vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+		vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
@@ -465,7 +492,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
-		vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
 	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -524,7 +551,7 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
 
 
 static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
 static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
 	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
 	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
-		vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+		vgic_mmio_read_group, vgic_mmio_write_group, 4,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
 	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
@@ -538,7 +565,7 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
 	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
-		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
 		VGIC_ACCESS_32bit),
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
 	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -873,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
 /**
 /**
  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
  * @vcpu: The VCPU requesting a SGI
  * @vcpu: The VCPU requesting a SGI
- * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
+ * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
+ * @allow_group1: Does the sysreg access allow generation of G1 SGIs
  *
  *
  * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
  * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
  * This will trap in sys_regs.c and call this function.
  * This will trap in sys_regs.c and call this function.
@@ -883,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
  * check for matching ones. If this bit is set, we signal all, but not the
  * check for matching ones. If this bit is set, we signal all, but not the
  * calling VCPU.
  * calling VCPU.
  */
  */
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 {
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *c_vcpu;
 	struct kvm_vcpu *c_vcpu;
@@ -932,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
 		irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
 		irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
 
 
 		spin_lock_irqsave(&irq->irq_lock, flags);
 		spin_lock_irqsave(&irq->irq_lock, flags);
-		irq->pending_latch = true;
 
 
-		vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+		/*
+		 * An access targetting Group0 SGIs can only generate
+		 * those, while an access targetting Group1 SGIs can
+		 * generate interrupts of either group.
+		 */
+		if (!irq->group || allow_group1) {
+			irq->pending_latch = true;
+			vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+		} else {
+			spin_unlock_irqrestore(&irq->irq_lock, flags);
+		}
+
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
 }
 }

+ 51 - 5
virt/kvm/arm/vgic/vgic-mmio.c

@@ -40,6 +40,51 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
 	/* Ignore */
 	/* Ignore */
 }
 }
 
 
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+			       unsigned int len, unsigned long val)
+{
+	/* Ignore */
+	return 0;
+}
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
+				   gpa_t addr, unsigned int len)
+{
+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+	u32 value = 0;
+	int i;
+
+	/* Loop over all IRQs affected by this read */
+	for (i = 0; i < len * 8; i++) {
+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+		if (irq->group)
+			value |= BIT(i);
+
+		vgic_put_irq(vcpu->kvm, irq);
+	}
+
+	return value;
+}
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+			   unsigned int len, unsigned long val)
+{
+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+	int i;
+	unsigned long flags;
+
+	for (i = 0; i < len * 8; i++) {
+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+		spin_lock_irqsave(&irq->irq_lock, flags);
+		irq->group = !!(val & BIT(i));
+		vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+		vgic_put_irq(vcpu->kvm, irq);
+	}
+}
+
 /*
 /*
  * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
  * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
  * of the enabled bit, so there is only one function for both here.
  * of the enabled bit, so there is only one function for both here.
@@ -363,11 +408,12 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
 	mutex_unlock(&vcpu->kvm->lock);
 	mutex_unlock(&vcpu->kvm->lock);
 }
 }
 
 
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
 				     gpa_t addr, unsigned int len,
 				     gpa_t addr, unsigned int len,
 				     unsigned long val)
 				     unsigned long val)
 {
 {
 	__vgic_mmio_write_cactive(vcpu, addr, len, val);
 	__vgic_mmio_write_cactive(vcpu, addr, len, val);
+	return 0;
 }
 }
 
 
 static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
 static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
@@ -399,11 +445,12 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
 	mutex_unlock(&vcpu->kvm->lock);
 	mutex_unlock(&vcpu->kvm->lock);
 }
 }
 
 
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
 				     gpa_t addr, unsigned int len,
 				     gpa_t addr, unsigned int len,
 				     unsigned long val)
 				     unsigned long val)
 {
 {
 	__vgic_mmio_write_sactive(vcpu, addr, len, val);
 	__vgic_mmio_write_sactive(vcpu, addr, len, val);
+	return 0;
 }
 }
 
 
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
@@ -735,10 +782,9 @@ static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 
 
 	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
 	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
 	if (region->uaccess_write)
 	if (region->uaccess_write)
-		region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
-	else
-		region->write(r_vcpu, addr, sizeof(u32), *val);
+		return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
 
 
+	region->write(r_vcpu, addr, sizeof(u32), *val);
 	return 0;
 	return 0;
 }
 }
 
 

+ 17 - 8
virt/kvm/arm/vgic/vgic-mmio.h

@@ -37,8 +37,8 @@ struct vgic_register_region {
 	unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
 	unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
 				      unsigned int len);
 				      unsigned int len);
 	union {
 	union {
-		void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
-				      unsigned int len, unsigned long val);
+		int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+				     unsigned int len, unsigned long val);
 		int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
 		int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
 					 gpa_t addr, unsigned int len,
 					 gpa_t addr, unsigned int len,
 					 unsigned long val);
 					 unsigned long val);
@@ -134,6 +134,15 @@ unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
 void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
 void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
 			unsigned int len, unsigned long val);
 			unsigned int len, unsigned long val);
 
 
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+			       unsigned int len, unsigned long val);
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr,
+				   unsigned int len);
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+			   unsigned int len, unsigned long val);
+
 unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
 unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
 				    gpa_t addr, unsigned int len);
 				    gpa_t addr, unsigned int len);
 
 
@@ -167,13 +176,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
 			     gpa_t addr, unsigned int len,
 			     gpa_t addr, unsigned int len,
 			     unsigned long val);
 			     unsigned long val);
 
 
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
-				     gpa_t addr, unsigned int len,
-				     unsigned long val);
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+				    gpa_t addr, unsigned int len,
+				    unsigned long val);
 
 
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
-				     gpa_t addr, unsigned int len,
-				     unsigned long val);
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+				    gpa_t addr, unsigned int len,
+				    unsigned long val);
 
 
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
 				      gpa_t addr, unsigned int len);
 				      gpa_t addr, unsigned int len);

+ 7 - 3
virt/kvm/arm/vgic/vgic-v2.c

@@ -62,7 +62,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
 	struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
 	int lr;
 	int lr;
-	unsigned long flags;
+
+	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
 
 	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
 	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
 
 
@@ -83,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
 
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 
 
-		spin_lock_irqsave(&irq->irq_lock, flags);
+		spin_lock(&irq->irq_lock);
 
 
 		/* Always preserve the active bit */
 		/* Always preserve the active bit */
 		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@@ -126,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 				vgic_irq_set_phys_active(irq, false);
 				vgic_irq_set_phys_active(irq, false);
 		}
 		}
 
 
-		spin_unlock_irqrestore(&irq->irq_lock, flags);
+		spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
 
 
@@ -159,6 +160,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 		}
 	}
 	}
 
 
+	if (irq->group)
+		val |= GICH_LR_GROUP1;
+
 	if (irq->hw) {
 	if (irq->hw) {
 		val |= GICH_LR_HW;
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;

+ 5 - 8
virt/kvm/arm/vgic/vgic-v3.c

@@ -46,7 +46,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 	struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
 	struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	int lr;
 	int lr;
-	unsigned long flags;
+
+	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
 
 	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
 	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
 
 
@@ -75,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		if (!irq)	/* An LPI could have been unmapped. */
 		if (!irq)	/* An LPI could have been unmapped. */
 			continue;
 			continue;
 
 
-		spin_lock_irqsave(&irq->irq_lock, flags);
+		spin_lock(&irq->irq_lock);
 
 
 		/* Always preserve the active bit */
 		/* Always preserve the active bit */
 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
@@ -118,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 				vgic_irq_set_phys_active(irq, false);
 				vgic_irq_set_phys_active(irq, false);
 		}
 		}
 
 
-		spin_unlock_irqrestore(&irq->irq_lock, flags);
+		spin_unlock(&irq->irq_lock);
 		vgic_put_irq(vcpu->kvm, irq);
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 	}
 
 
@@ -197,11 +198,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
 	if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
 		irq->line_level = false;
 		irq->line_level = false;
 
 
-	/*
-	 * We currently only support Group1 interrupts, which is a
-	 * known defect. This needs to be addressed at some point.
-	 */
-	if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+	if (irq->group)
 		val |= ICH_LR_GROUP;
 		val |= ICH_LR_GROUP;
 
 
 	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
 	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;

+ 7 - 12
virt/kvm/arm/vgic/vgic.c

@@ -28,12 +28,6 @@
 #define CREATE_TRACE_POINTS
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 #include "trace.h"
 
 
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
-#else
-#define DEBUG_SPINLOCK_BUG_ON(p)
-#endif
-
 struct vgic_global kvm_vgic_global_state __ro_after_init = {
 struct vgic_global kvm_vgic_global_state __ro_after_init = {
 	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
 	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
 };
 };
@@ -599,10 +593,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 {
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_irq *irq, *tmp;
 	struct vgic_irq *irq, *tmp;
-	unsigned long flags;
+
+	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
 
 retry:
 retry:
-	spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
+	spin_lock(&vgic_cpu->ap_list_lock);
 
 
 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
@@ -643,7 +638,7 @@ retry:
 		/* This interrupt looks like it has to be migrated. */
 		/* This interrupt looks like it has to be migrated. */
 
 
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&irq->irq_lock);
-		spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+		spin_unlock(&vgic_cpu->ap_list_lock);
 
 
 		/*
 		/*
 		 * Ensure locking order by always locking the smallest
 		 * Ensure locking order by always locking the smallest
@@ -657,7 +652,7 @@ retry:
 			vcpuB = vcpu;
 			vcpuB = vcpu;
 		}
 		}
 
 
-		spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+		spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 		spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 		spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 				 SINGLE_DEPTH_NESTING);
 				 SINGLE_DEPTH_NESTING);
 		spin_lock(&irq->irq_lock);
 		spin_lock(&irq->irq_lock);
@@ -682,7 +677,7 @@ retry:
 
 
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&irq->irq_lock);
 		spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 		spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
-		spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+		spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 
 
 		if (target_vcpu_needs_kick) {
 		if (target_vcpu_needs_kick) {
 			kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 			kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
@@ -692,7 +687,7 @@ retry:
 		goto retry;
 		goto retry;
 	}
 	}
 
 
-	spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+	spin_unlock(&vgic_cpu->ap_list_lock);
 }
 }
 
 
 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)

+ 7 - 0
virt/kvm/arm/vgic/vgic.h

@@ -103,6 +103,12 @@
 #define KVM_VGIC_V3_RDIST_COUNT_MASK	GENMASK_ULL(63, 52)
 #define KVM_VGIC_V3_RDIST_COUNT_MASK	GENMASK_ULL(63, 52)
 #define KVM_VGIC_V3_RDIST_COUNT_SHIFT	52
 #define KVM_VGIC_V3_RDIST_COUNT_SHIFT	52
 
 
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
+#else
+#define DEBUG_SPINLOCK_BUG_ON(p)
+#endif
+
 /* Requires the irq_lock to be held by the caller. */
 /* Requires the irq_lock to be held by the caller. */
 static inline bool irq_is_pending(struct vgic_irq *irq)
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
 {
@@ -305,6 +311,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
 		(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
 		(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
 }
 }
 
 
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 			 u32 devid, u32 eventid, struct vgic_irq **irq);
 			 u32 devid, u32 eventid, struct vgic_irq **irq);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);