9 years ago · e28e909c36
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6491,6 +6491,7 @@ F:	arch/*/include/asm/kvm*
 
				 F:	include/linux/kvm*
			
 
				 F:	include/uapi/linux/kvm*
			
 
				 F:	virt/kvm/
			
 
				+F:	tools/kvm/
			
 
				 
			
 
				 KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
			
 
				 M:	Joerg Roedel <joro@8bytes.org>
			
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -41,6 +41,8 @@
 
				 
			
 
				 #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
			
 
				 
			
 
				+#define KVM_REQ_VCPU_EXIT	8
			
 
				+
			
 
				 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
			
 
				 int __attribute_const__ kvm_target_cpu(void);
			
 
				 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
			
@@ -226,6 +228,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 
				 
			
 
				 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
			
 
				 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
			
 
				+void kvm_arm_halt_guest(struct kvm *kvm);
			
 
				+void kvm_arm_resume_guest(struct kvm *kvm);
			
 
				+void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
			
 
				 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
			
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,6 +28,9 @@ struct kvm_decode {
 
				 	bool sign_extend;
			
 
				 };
			
 
				 
			
 
				+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
			
 
				+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
			
 
				+
			
 
				 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
			
 
				 		 phys_addr_t fault_ipa);
			
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -46,6 +46,13 @@ config KVM_ARM_HOST
 
				 	---help---
			
 
				 	  Provides host support for ARM processors.
			
 
				 
			
 
				+config KVM_NEW_VGIC
			
 
				+	bool "New VGIC implementation"
			
 
				+	depends on KVM
			
 
				+	default y
			
 
				+	---help---
			
 
				+	  uses the new VGIC implementation
			
 
				+
			
 
				 source drivers/vhost/Kconfig
			
 
				 
			
 
				 endif # VIRTUALIZATION
			
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,7 +21,18 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
				 obj-y += kvm-arm.o init.o interrupts.o
			
 
				 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
			
 
				 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
			
 
				+
			
 
				+ifeq ($(CONFIG_KVM_NEW_VGIC),y)
			
 
				+obj-y += $(KVM)/arm/vgic/vgic.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-init.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-v2.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-mmio.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
			
 
				+obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
			
 
				+else
			
 
				 obj-y += $(KVM)/arm/vgic.o
			
 
				 obj-y += $(KVM)/arm/vgic-v2.o
			
 
				 obj-y += $(KVM)/arm/vgic-v2-emul.o
			
 
				+endif
			
 
				 obj-y += $(KVM)/arm/arch_timer.o
			
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -455,7 +455,7 @@ static void update_vttbr(struct kvm *kvm)
 
				 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct kvm *kvm = vcpu->kvm;
			
 
				-	int ret;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	if (likely(vcpu->arch.has_run_once))
			
 
				 		return 0;
			
@@ -478,9 +478,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 
				 	 * interrupts from the virtual timer with a userspace gic.
			
 
				 	 */
			
 
				 	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
			
 
				-		kvm_timer_enable(kvm);
			
 
				+		ret = kvm_timer_enable(vcpu);
			
 
				 
			
 
				-	return 0;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 bool kvm_arch_intc_initialized(struct kvm *kvm)
			
@@ -488,30 +488,37 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 
				 	return vgic_initialized(kvm);
			
 
				 }
			
 
				 
			
 
				-static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
			
 
				-static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
			
 
				-
			
 
				-static void kvm_arm_halt_guest(struct kvm *kvm)
			
 
				+void kvm_arm_halt_guest(struct kvm *kvm)
			
 
				 {
			
 
				 	int i;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				 	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				 		vcpu->arch.pause = true;
			
 
				-	force_vm_exit(cpu_all_mask);
			
 
				+	kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
			
 
				+}
			
 
				+
			
 
				+void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	vcpu->arch.pause = true;
			
 
				+	kvm_vcpu_kick(vcpu);
			
 
				 }
			
 
				 
			
 
				-static void kvm_arm_resume_guest(struct kvm *kvm)
			
 
				+void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				+
			
 
				+	vcpu->arch.pause = false;
			
 
				+	swake_up(wq);
			
 
				+}
			
 
				+
			
 
				+void kvm_arm_resume_guest(struct kvm *kvm)
			
 
				 {
			
 
				 	int i;
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				-		struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
			
 
				-
			
 
				-		vcpu->arch.pause = false;
			
 
				-		swake_up(wq);
			
 
				-	}
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+		kvm_arm_resume_vcpu(vcpu);
			
 
				 }
			
 
				 
			
 
				 static void vcpu_sleep(struct kvm_vcpu *vcpu)
			
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,7 +23,7 @@
 
				 
			
 
				 #include "trace.h"
			
 
				 
			
 
				-static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
			
 
				+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data)
			
 
				 {
			
 
				 	void *datap = NULL;
			
 
				 	union {
			
@@ -55,7 +55,7 @@ static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
 
				 	memcpy(buf, datap, len);
			
 
				 }
			
 
				 
			
 
				-static unsigned long mmio_read_buf(char *buf, unsigned int len)
			
 
				+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
			
 
				 {
			
 
				 	unsigned long data = 0;
			
 
				 	union {
			
@@ -66,7 +66,7 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len)
 
				 
			
 
				 	switch (len) {
			
 
				 	case 1:
			
 
				-		data = buf[0];
			
 
				+		data = *(u8 *)buf;
			
 
				 		break;
			
 
				 	case 2:
			
 
				 		memcpy(&tmp.hword, buf, len);
			
@@ -87,11 +87,10 @@ static unsigned long mmio_read_buf(char *buf, unsigned int len)
 
				 
			
 
				 /**
			
 
				  * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
			
 
				+ *			     or in-kernel IO emulation
			
 
				+ *
			
 
				  * @vcpu: The VCPU pointer
			
 
				  * @run:  The VCPU run struct containing the mmio data
			
 
				- *
			
 
				- * This should only be called after returning from userspace for MMIO load
			
 
				- * emulation.
			
 
				  */
			
 
				 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
			
 
				 {
			
@@ -104,7 +103,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
				 		if (len > sizeof(unsigned long))
			
 
				 			return -EINVAL;
			
 
				 
			
 
				-		data = mmio_read_buf(run->mmio.data, len);
			
 
				+		data = kvm_mmio_read_buf(run->mmio.data, len);
			
 
				 
			
 
				 		if (vcpu->arch.mmio_decode.sign_extend &&
			
 
				 		    len < sizeof(unsigned long)) {
			
@@ -190,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
				 					       len);
			
 
				 
			
 
				 		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
			
 
				-		mmio_write_buf(data_buf, len, data);
			
 
				+		kvm_mmio_write_buf(data_buf, len, data);
			
 
				 
			
 
				 		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
			
 
				 				       data_buf);
			
@@ -206,18 +205,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
				 	run->mmio.is_write	= is_write;
			
 
				 	run->mmio.phys_addr	= fault_ipa;
			
 
				 	run->mmio.len		= len;
			
 
				-	if (is_write)
			
 
				-		memcpy(run->mmio.data, data_buf, len);
			
 
				 
			
 
				 	if (!ret) {
			
 
				 		/* We handled the access successfully in the kernel. */
			
 
				+		if (!is_write)
			
 
				+			memcpy(run->mmio.data, data_buf, len);
			
 
				 		vcpu->stat.mmio_exit_kernel++;
			
 
				 		kvm_handle_mmio_return(vcpu, run);
			
 
				 		return 1;
			
 
				-	} else {
			
 
				-		vcpu->stat.mmio_exit_user++;
			
 
				 	}
			
 
				 
			
 
				+	if (is_write)
			
 
				+		memcpy(run->mmio.data, data_buf, len);
			
 
				+	vcpu->stat.mmio_exit_user++;
			
 
				 	run->exit_reason	= KVM_EXIT_MMIO;
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
 
				 
			
 
				 #define KVM_VCPU_MAX_FEATURES 4
			
 
				 
			
 
				+#define KVM_REQ_VCPU_EXIT	8
			
 
				+
			
 
				 int __attribute_const__ kvm_target_cpu(void);
			
 
				 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
			
 
				 int kvm_arch_dev_ioctl_check_extension(long ext);
			
@@ -327,6 +329,10 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 
				 
			
 
				 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
			
 
				 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
			
 
				+void kvm_arm_halt_guest(struct kvm *kvm);
			
 
				+void kvm_arm_resume_guest(struct kvm *kvm);
			
 
				+void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
			
 
				+void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 u64 __kvm_call_hyp(void *hypfn, ...);
			
 
				 #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
			
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -30,6 +30,9 @@ struct kvm_decode {
 
				 	bool sign_extend;
			
 
				 };
			
 
				 
			
 
				+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
			
 
				+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
			
 
				+
			
 
				 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
			
 
				 		 phys_addr_t fault_ipa);
			
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -54,6 +54,13 @@ config KVM_ARM_PMU
 
				 	  Adds support for a virtual Performance Monitoring Unit (PMU) in
			
 
				 	  virtual machines.
			
 
				 
			
 
				+config KVM_NEW_VGIC
			
 
				+	bool "New VGIC implementation"
			
 
				+	depends on KVM
			
 
				+	default y
			
 
				+        ---help---
			
 
				+          uses the new VGIC implementation
			
 
				+
			
 
				 source drivers/vhost/Kconfig
			
 
				 
			
 
				 endif # VIRTUALIZATION
			
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -20,10 +20,22 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
			
 
				 
			
 
				+ifeq ($(CONFIG_KVM_NEW_VGIC),y)
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
			
 
				+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
			
 
				+else
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
			
 
				+endif
			
 
				 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
			
 
				 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
			
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -162,7 +162,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 
				 		esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
			
 
				 
			
 
				 	if (!is_iabt)
			
 
				-		esr |= ESR_ELx_EC_DABT_LOW;
			
 
				+		esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
			
 
				 
			
 
				 	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
			
 
				 }
			
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -2,10 +2,12 @@
 
				 #define _UAPI__SVM_H
			
 
				 
			
 
				 #define SVM_EXIT_READ_CR0      0x000
			
 
				+#define SVM_EXIT_READ_CR2      0x002
			
 
				 #define SVM_EXIT_READ_CR3      0x003
			
 
				 #define SVM_EXIT_READ_CR4      0x004
			
 
				 #define SVM_EXIT_READ_CR8      0x008
			
 
				 #define SVM_EXIT_WRITE_CR0     0x010
			
 
				+#define SVM_EXIT_WRITE_CR2     0x012
			
 
				 #define SVM_EXIT_WRITE_CR3     0x013
			
 
				 #define SVM_EXIT_WRITE_CR4     0x014
			
 
				 #define SVM_EXIT_WRITE_CR8     0x018
			
@@ -80,10 +82,12 @@
 
				 
			
 
				 #define SVM_EXIT_REASONS \
			
 
				 	{ SVM_EXIT_READ_CR0,    "read_cr0" }, \
			
 
				+	{ SVM_EXIT_READ_CR2,    "read_cr2" }, \
			
 
				 	{ SVM_EXIT_READ_CR3,    "read_cr3" }, \
			
 
				 	{ SVM_EXIT_READ_CR4,    "read_cr4" }, \
			
 
				 	{ SVM_EXIT_READ_CR8,    "read_cr8" }, \
			
 
				 	{ SVM_EXIT_WRITE_CR0,   "write_cr0" }, \
			
 
				+	{ SVM_EXIT_WRITE_CR2,   "write_cr2" }, \
			
 
				 	{ SVM_EXIT_WRITE_CR3,   "write_cr3" }, \
			
 
				 	{ SVM_EXIT_WRITE_CR4,   "write_cr4" }, \
			
 
				 	{ SVM_EXIT_WRITE_CR8,   "write_cr8" }, \
			
@@ -91,26 +95,57 @@
 
				 	{ SVM_EXIT_READ_DR1,    "read_dr1" }, \
			
 
				 	{ SVM_EXIT_READ_DR2,    "read_dr2" }, \
			
 
				 	{ SVM_EXIT_READ_DR3,    "read_dr3" }, \
			
 
				+	{ SVM_EXIT_READ_DR4,    "read_dr4" }, \
			
 
				+	{ SVM_EXIT_READ_DR5,    "read_dr5" }, \
			
 
				+	{ SVM_EXIT_READ_DR6,    "read_dr6" }, \
			
 
				+	{ SVM_EXIT_READ_DR7,    "read_dr7" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR0,   "write_dr0" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR1,   "write_dr1" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR2,   "write_dr2" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR3,   "write_dr3" }, \
			
 
				+	{ SVM_EXIT_WRITE_DR4,   "write_dr4" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR5,   "write_dr5" }, \
			
 
				+	{ SVM_EXIT_WRITE_DR6,   "write_dr6" }, \
			
 
				 	{ SVM_EXIT_WRITE_DR7,   "write_dr7" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + DE_VECTOR,       "DE excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + OF_VECTOR,       "OF excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + BR_VECTOR,       "BR excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" }, \
			
 
				-	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + DF_VECTOR,       "DF excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + TS_VECTOR,       "TS excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + NP_VECTOR,       "NP excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + SS_VECTOR,       "SS excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + GP_VECTOR,       "GP excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + MF_VECTOR,       "MF excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + AC_VECTOR,       "AC excp" }, \
			
 
				 	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" }, \
			
 
				+	{ SVM_EXIT_EXCP_BASE + XM_VECTOR,       "XF excp" }, \
			
 
				 	{ SVM_EXIT_INTR,        "interrupt" }, \
			
 
				 	{ SVM_EXIT_NMI,         "nmi" }, \
			
 
				 	{ SVM_EXIT_SMI,         "smi" }, \
			
 
				 	{ SVM_EXIT_INIT,        "init" }, \
			
 
				 	{ SVM_EXIT_VINTR,       "vintr" }, \
			
 
				 	{ SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
			
 
				+	{ SVM_EXIT_IDTR_READ,   "read_idtr" }, \
			
 
				+	{ SVM_EXIT_GDTR_READ,   "read_gdtr" }, \
			
 
				+	{ SVM_EXIT_LDTR_READ,   "read_ldtr" }, \
			
 
				+	{ SVM_EXIT_TR_READ,     "read_rt" }, \
			
 
				+	{ SVM_EXIT_IDTR_WRITE,  "write_idtr" }, \
			
 
				+	{ SVM_EXIT_GDTR_WRITE,  "write_gdtr" }, \
			
 
				+	{ SVM_EXIT_LDTR_WRITE,  "write_ldtr" }, \
			
 
				+	{ SVM_EXIT_TR_WRITE,    "write_rt" }, \
			
 
				+	{ SVM_EXIT_RDTSC,       "rdtsc" }, \
			
 
				+	{ SVM_EXIT_RDPMC,       "rdpmc" }, \
			
 
				+	{ SVM_EXIT_PUSHF,       "pushf" }, \
			
 
				+	{ SVM_EXIT_POPF,        "popf" }, \
			
 
				 	{ SVM_EXIT_CPUID,       "cpuid" }, \
			
 
				+	{ SVM_EXIT_RSM,         "rsm" }, \
			
 
				+	{ SVM_EXIT_IRET,        "iret" }, \
			
 
				+	{ SVM_EXIT_SWINT,       "swint" }, \
			
 
				 	{ SVM_EXIT_INVD,        "invd" }, \
			
 
				 	{ SVM_EXIT_PAUSE,       "pause" }, \
			
 
				 	{ SVM_EXIT_HLT,         "hlt" }, \
			
@@ -119,6 +154,7 @@
 
				 	{ SVM_EXIT_IOIO,        "io" }, \
			
 
				 	{ SVM_EXIT_MSR,         "msr" }, \
			
 
				 	{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
			
 
				+	{ SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \
			
 
				 	{ SVM_EXIT_SHUTDOWN,    "shutdown" }, \
			
 
				 	{ SVM_EXIT_VMRUN,       "vmrun" }, \
			
 
				 	{ SVM_EXIT_VMMCALL,     "hypercall" }, \
			
@@ -127,14 +163,16 @@
 
				 	{ SVM_EXIT_STGI,        "stgi" }, \
			
 
				 	{ SVM_EXIT_CLGI,        "clgi" }, \
			
 
				 	{ SVM_EXIT_SKINIT,      "skinit" }, \
			
 
				+	{ SVM_EXIT_RDTSCP,      "rdtscp" }, \
			
 
				+	{ SVM_EXIT_ICEBP,       "icebp" }, \
			
 
				 	{ SVM_EXIT_WBINVD,      "wbinvd" }, \
			
 
				 	{ SVM_EXIT_MONITOR,     "monitor" }, \
			
 
				 	{ SVM_EXIT_MWAIT,       "mwait" }, \
			
 
				 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
			
 
				 	{ SVM_EXIT_NPF,         "npf" }, \
			
 
				-	{ SVM_EXIT_RSM,         "rsm" }, \
			
 
				 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
			
 
				-	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }
			
 
				+	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
			
 
				+	{ SVM_EXIT_ERR,         "invalid_guest_state" }
			
 
				 
			
 
				 
			
 
				 #endif /* _UAPI__SVM_H */
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -84,7 +84,7 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 
				 #define TSC_RATIO_MIN		0x0000000000000001ULL
			
 
				 #define TSC_RATIO_MAX		0x000000ffffffffffULL
			
 
				 
			
 
				-#define AVIC_HPA_MASK	~((0xFFFULL << 52) || 0xFFF)
			
 
				+#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
			
 
				 
			
 
				 /*
			
 
				  * 0xff is broadcast, so the max index allowed for physical APIC ID
			
@@ -3597,7 +3597,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 
				 	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
			
 
				 	u32 icrl = svm->vmcb->control.exit_info_1;
			
 
				 	u32 id = svm->vmcb->control.exit_info_2 >> 32;
			
 
				-	u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
			
 
				+	u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
			
 
				 	struct kvm_lapic *apic = svm->vcpu.arch.apic;
			
 
				 
			
 
				 	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
 
				 
			
 
				 	if (is_guest_mode(vcpu))
			
 
				 		msr_bitmap = vmx_msr_bitmap_nested;
			
 
				-	else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
			
 
				+	else if (cpu_has_secondary_exec_ctrls() &&
			
 
				+		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
			
 
				+		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
			
 
				 		if (is_long_mode(vcpu))
			
 
				 			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
			
 
				 		else
			
@@ -4787,6 +4789,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 
			
 
				 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
			
 
				+	if (cpu_has_secondary_exec_ctrls()) {
			
 
				+		if (kvm_vcpu_apicv_active(vcpu))
			
 
				+			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
			
 
				+				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
			
 
				+				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
			
 
				+		else
			
 
				+			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
			
 
				+					SECONDARY_EXEC_APIC_REGISTER_VIRT |
			
 
				+					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
			
 
				+	}
			
 
				+
			
 
				+	if (cpu_has_vmx_msr_bitmap())
			
 
				+		vmx_set_msr_bitmap(vcpu);
			
 
				 }
			
 
				 
			
 
				 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
			
@@ -6333,23 +6348,20 @@ static __init int hardware_setup(void)
 
				 
			
 
				 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
			
 
				 
			
 
				-	if (enable_apicv) {
			
 
				-		for (msr = 0x800; msr <= 0x8ff; msr++)
			
 
				-			vmx_disable_intercept_msr_read_x2apic(msr);
			
 
				-
			
 
				-		/* According SDM, in x2apic mode, the whole id reg is used.
			
 
				-		 * But in KVM, it only use the highest eight bits. Need to
			
 
				-		 * intercept it */
			
 
				-		vmx_enable_intercept_msr_read_x2apic(0x802);
			
 
				-		/* TMCCT */
			
 
				-		vmx_enable_intercept_msr_read_x2apic(0x839);
			
 
				-		/* TPR */
			
 
				-		vmx_disable_intercept_msr_write_x2apic(0x808);
			
 
				-		/* EOI */
			
 
				-		vmx_disable_intercept_msr_write_x2apic(0x80b);
			
 
				-		/* SELF-IPI */
			
 
				-		vmx_disable_intercept_msr_write_x2apic(0x83f);
			
 
				-	}
			
 
				+	for (msr = 0x800; msr <= 0x8ff; msr++)
			
 
				+		vmx_disable_intercept_msr_read_x2apic(msr);
			
 
				+
			
 
				+	/* According SDM, in x2apic mode, the whole id reg is used.  But in
			
 
				+	 * KVM, it only use the highest eight bits. Need to intercept it */
			
 
				+	vmx_enable_intercept_msr_read_x2apic(0x802);
			
 
				+	/* TMCCT */
			
 
				+	vmx_enable_intercept_msr_read_x2apic(0x839);
			
 
				+	/* TPR */
			
 
				+	vmx_disable_intercept_msr_write_x2apic(0x808);
			
 
				+	/* EOI */
			
 
				+	vmx_disable_intercept_msr_write_x2apic(0x80b);
			
 
				+	/* SELF-IPI */
			
 
				+	vmx_disable_intercept_msr_write_x2apic(0x83f);
			
 
				 
			
 
				 	if (enable_ept) {
			
 
				 		kvm_mmu_set_mask_ptes(0ull,
			
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -24,9 +24,6 @@
 
				 #include <linux/workqueue.h>
			
 
				 
			
 
				 struct arch_timer_kvm {
			
 
				-	/* Is the timer enabled */
			
 
				-	bool			enabled;
			
 
				-
			
 
				 	/* Virtual offset */
			
 
				 	cycle_t			cntvoff;
			
 
				 };
			
@@ -53,15 +50,15 @@ struct arch_timer_cpu {
 
				 	/* Timer IRQ */
			
 
				 	struct kvm_irq_level		irq;
			
 
				 
			
 
				-	/* VGIC mapping */
			
 
				-	struct irq_phys_map		*map;
			
 
				-
			
 
				 	/* Active IRQ state caching */
			
 
				 	bool				active_cleared_last;
			
 
				+
			
 
				+	/* Is the timer enabled */
			
 
				+	bool			enabled;
			
 
				 };
			
 
				 
			
 
				 int kvm_timer_hyp_init(void);
			
 
				-void kvm_timer_enable(struct kvm *kvm);
			
 
				+int kvm_timer_enable(struct kvm_vcpu *vcpu);
			
 
				 void kvm_timer_init(struct kvm *kvm);
			
 
				 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
			
 
				 			 const struct kvm_irq_level *irq);
			
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -19,6 +19,10 @@
 
				 #ifndef __ASM_ARM_KVM_VGIC_H
			
 
				 #define __ASM_ARM_KVM_VGIC_H
			
 
				 
			
 
				+#ifdef CONFIG_KVM_NEW_VGIC
			
 
				+#include <kvm/vgic/vgic.h>
			
 
				+#else
			
 
				+
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/irqreturn.h>
			
@@ -158,7 +162,6 @@ struct vgic_io_device {
 
				 struct irq_phys_map {
			
 
				 	u32			virt_irq;
			
 
				 	u32			phys_irq;
			
 
				-	u32			irq;
			
 
				 };
			
 
				 
			
 
				 struct irq_phys_map_entry {
			
@@ -305,9 +308,6 @@ struct vgic_cpu {
 
				 	unsigned long   *active_shared;
			
 
				 	unsigned long   *pend_act_shared;
			
 
				 
			
 
				-	/* Number of list registers on this CPU */
			
 
				-	int		nr_lr;
			
 
				-
			
 
				 	/* CPU vif control registers for world switch */
			
 
				 	union {
			
 
				 		struct vgic_v2_cpu_if	vgic_v2;
			
@@ -342,17 +342,18 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 
				 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
			
 
				 			bool level);
			
 
				 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
			
 
				-			       struct irq_phys_map *map, bool level);
			
 
				+			       unsigned int virt_irq, bool level);
			
 
				 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
			
 
				 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
			
 
				-struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
			
 
				-					   int virt_irq, int irq);
			
 
				-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
			
 
				-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
			
 
				+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq);
			
 
				+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
			
 
				+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
			
 
				 
			
 
				 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
			
 
				 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
			
 
				 #define vgic_ready(k)		((k)->arch.vgic.ready)
			
 
				+#define vgic_valid_spi(k, i)	(((i) >= VGIC_NR_PRIVATE_IRQS) && \
			
 
				+				 ((i) < (k)->arch.vgic.nr_irqs))
			
 
				 
			
 
				 int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
			
 
				 		  const struct vgic_ops **ops,
			
@@ -370,4 +371,5 @@ static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#endif	/* old VGIC include */
			
 
				 #endif
			
--- a/include/kvm/vgic/vgic.h
+++ b/include/kvm/vgic/vgic.h
@@ -0,0 +1,246 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+#ifndef __ASM_ARM_KVM_VGIC_VGIC_H
			
 
				+#define __ASM_ARM_KVM_VGIC_VGIC_H
			
 
				+
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/irqreturn.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <kvm/iodev.h>
			
 
				+
			
 
				+#define VGIC_V3_MAX_CPUS	255
			
 
				+#define VGIC_V2_MAX_CPUS	8
			
 
				+#define VGIC_NR_IRQS_LEGACY     256
			
 
				+#define VGIC_NR_SGIS		16
			
 
				+#define VGIC_NR_PPIS		16
			
 
				+#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
			
 
				+#define VGIC_MAX_PRIVATE	(VGIC_NR_PRIVATE_IRQS - 1)
			
 
				+#define VGIC_MAX_SPI		1019
			
 
				+#define VGIC_MAX_RESERVED	1023
			
 
				+#define VGIC_MIN_LPI		8192
			
 
				+
			
 
				+enum vgic_type {
			
 
				+	VGIC_V2,		/* Good ol' GICv2 */
			
 
				+	VGIC_V3,		/* New fancy GICv3 */
			
 
				+};
			
 
				+
			
 
				+/* same for all guests, as depending only on the _host's_ GIC model */
			
 
				+struct vgic_global {
			
 
				+	/* type of the host GIC */
			
 
				+	enum vgic_type		type;
			
 
				+
			
 
				+	/* Physical address of vgic virtual cpu interface */
			
 
				+	phys_addr_t		vcpu_base;
			
 
				+
			
 
				+	/* virtual control interface mapping */
			
 
				+	void __iomem		*vctrl_base;
			
 
				+
			
 
				+	/* Number of implemented list registers */
			
 
				+	int			nr_lr;
			
 
				+
			
 
				+	/* Maintenance IRQ number */
			
 
				+	unsigned int		maint_irq;
			
 
				+
			
 
				+	/* maximum number of VCPUs allowed (GICv2 limits us to 8) */
			
 
				+	int			max_gic_vcpus;
			
 
				+
			
 
				+	/* Only needed for the legacy KVM_CREATE_IRQCHIP */
			
 
				+	bool			can_emulate_gicv2;
			
 
				+};
			
 
				+
			
 
				+extern struct vgic_global kvm_vgic_global_state;
			
 
				+
			
 
				+#define VGIC_V2_MAX_LRS		(1 << 6)
			
 
				+#define VGIC_V3_MAX_LRS		16
			
 
				+#define VGIC_V3_LR_INDEX(lr)	(VGIC_V3_MAX_LRS - 1 - lr)
			
 
				+
			
 
				+enum vgic_irq_config {
			
 
				+	VGIC_CONFIG_EDGE = 0,
			
 
				+	VGIC_CONFIG_LEVEL
			
 
				+};
			
 
				+
			
 
				+struct vgic_irq {
			
 
				+	spinlock_t irq_lock;		/* Protects the content of the struct */
			
 
				+	struct list_head ap_list;
			
 
				+
			
 
				+	struct kvm_vcpu *vcpu;		/* SGIs and PPIs: The VCPU
			
 
				+					 * SPIs and LPIs: The VCPU whose ap_list
			
 
				+					 * this is queued on.
			
 
				+					 */
			
 
				+
			
 
				+	struct kvm_vcpu *target_vcpu;	/* The VCPU that this interrupt should
			
 
				+					 * be sent to, as a result of the
			
 
				+					 * targets reg (v2) or the
			
 
				+					 * affinity reg (v3).
			
 
				+					 */
			
 
				+
			
 
				+	u32 intid;			/* Guest visible INTID */
			
 
				+	bool pending;
			
 
				+	bool line_level;		/* Level only */
			
 
				+	bool soft_pending;		/* Level only */
			
 
				+	bool active;			/* not used for LPIs */
			
 
				+	bool enabled;
			
 
				+	bool hw;			/* Tied to HW IRQ */
			
 
				+	u32 hwintid;			/* HW INTID number */
			
 
				+	union {
			
 
				+		u8 targets;			/* GICv2 target VCPUs mask */
			
 
				+		u32 mpidr;			/* GICv3 target VCPU */
			
 
				+	};
			
 
				+	u8 source;			/* GICv2 SGIs only */
			
 
				+	u8 priority;
			
 
				+	enum vgic_irq_config config;	/* Level or edge */
			
 
				+};
			
 
				+
			
 
				+struct vgic_register_region;
			
 
				+
			
 
				+struct vgic_io_device {
			
 
				+	gpa_t base_addr;
			
 
				+	struct kvm_vcpu *redist_vcpu;
			
 
				+	const struct vgic_register_region *regions;
			
 
				+	int nr_regions;
			
 
				+	struct kvm_io_device dev;
			
 
				+};
			
 
				+
			
 
				+struct vgic_dist {
			
 
				+	bool			in_kernel;
			
 
				+	bool			ready;
			
 
				+	bool			initialized;
			
 
				+
			
 
				+	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
			
 
				+	u32			vgic_model;
			
 
				+
			
 
				+	int			nr_spis;
			
 
				+
			
 
				+	/* TODO: Consider moving to global state */
			
 
				+	/* Virtual control interface mapping */
			
 
				+	void __iomem		*vctrl_base;
			
 
				+
			
 
				+	/* base addresses in guest physical address space: */
			
 
				+	gpa_t			vgic_dist_base;		/* distributor */
			
 
				+	union {
			
 
				+		/* either a GICv2 CPU interface */
			
 
				+		gpa_t			vgic_cpu_base;
			
 
				+		/* or a number of GICv3 redistributor regions */
			
 
				+		gpa_t			vgic_redist_base;
			
 
				+	};
			
 
				+
			
 
				+	/* distributor enabled */
			
 
				+	bool			enabled;
			
 
				+
			
 
				+	struct vgic_irq		*spis;
			
 
				+
			
 
				+	struct vgic_io_device	dist_iodev;
			
 
				+	struct vgic_io_device	*redist_iodevs;
			
 
				+};
			
 
				+
			
 
				+struct vgic_v2_cpu_if {
			
 
				+	u32		vgic_hcr;
			
 
				+	u32		vgic_vmcr;
			
 
				+	u32		vgic_misr;	/* Saved only */
			
 
				+	u64		vgic_eisr;	/* Saved only */
			
 
				+	u64		vgic_elrsr;	/* Saved only */
			
 
				+	u32		vgic_apr;
			
 
				+	u32		vgic_lr[VGIC_V2_MAX_LRS];
			
 
				+};
			
 
				+
			
 
				+struct vgic_v3_cpu_if {
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+	u32		vgic_hcr;
			
 
				+	u32		vgic_vmcr;
			
 
				+	u32		vgic_sre;	/* Restored only, change ignored */
			
 
				+	u32		vgic_misr;	/* Saved only */
			
 
				+	u32		vgic_eisr;	/* Saved only */
			
 
				+	u32		vgic_elrsr;	/* Saved only */
			
 
				+	u32		vgic_ap0r[4];
			
 
				+	u32		vgic_ap1r[4];
			
 
				+	u64		vgic_lr[VGIC_V3_MAX_LRS];
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+struct vgic_cpu {
			
 
				+	/* CPU vif control registers for world switch */
			
 
				+	union {
			
 
				+		struct vgic_v2_cpu_if	vgic_v2;
			
 
				+		struct vgic_v3_cpu_if	vgic_v3;
			
 
				+	};
			
 
				+
			
 
				+	unsigned int used_lrs;
			
 
				+	struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
			
 
				+
			
 
				+	spinlock_t ap_list_lock;	/* Protects the ap_list */
			
 
				+
			
 
				+	/*
			
 
				+	 * List of IRQs that this VCPU should consider because they are either
			
 
				+	 * Active or Pending (hence the name; AP list), or because they recently
			
 
				+	 * were one of the two and need to be migrated off this list to another
			
 
				+	 * VCPU.
			
 
				+	 */
			
 
				+	struct list_head ap_list_head;
			
 
				+
			
 
				+	u64 live_lrs;
			
 
				+};
			
 
				+
			
 
				+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
			
 
				+void kvm_vgic_early_init(struct kvm *kvm);
			
 
				+int kvm_vgic_create(struct kvm *kvm, u32 type);
			
 
				+void kvm_vgic_destroy(struct kvm *kvm);
			
 
				+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
			
 
				+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
			
 
				+int kvm_vgic_map_resources(struct kvm *kvm);
			
 
				+int kvm_vgic_hyp_init(void);
			
 
				+
			
 
				+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
			
 
				+			bool level);
			
 
				+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
			
 
				+			       bool level);
			
 
				+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
			
 
				+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
			
 
				+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
			
 
				+
			
 
				+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
			
 
				+#define vgic_initialized(k)	((k)->arch.vgic.initialized)
			
 
				+#define vgic_ready(k)		((k)->arch.vgic.ready)
			
 
				+#define vgic_valid_spi(k, i)	(((i) >= VGIC_NR_PRIVATE_IRQS) && \
			
 
				+			((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
			
 
				+
			
 
				+bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
			
 
				+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
			
 
				+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
			
 
				+
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
			
 
				+#else
			
 
				+static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
			
 
				+ *
			
 
				+ * The host's GIC naturally limits the maximum amount of VCPUs a guest
			
 
				+ * can use.
			
 
				+ */
			
 
				+static inline int kvm_vgic_get_max_vcpus(void)
			
 
				+{
			
 
				+	return kvm_vgic_global_state.max_gic_vcpus;
			
 
				+}
			
 
				+
			
 
				+#endif /* __ASM_ARM_KVM_VGIC_VGIC_H */
			
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -273,6 +273,12 @@
 
				 #define ICH_LR_ACTIVE_BIT		(1ULL << 63)
			
 
				 #define ICH_LR_PHYS_ID_SHIFT		32
			
 
				 #define ICH_LR_PHYS_ID_MASK		(0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
			
 
				+#define ICH_LR_PRIORITY_SHIFT		48
			
 
				+
			
 
				+/* These are for GICv2 emulation only */
			
 
				+#define GICH_LR_VIRTUALID		(0x3ffUL << 0)
			
 
				+#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
			
 
				+#define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
			
 
				 
			
 
				 #define ICH_MISR_EOI			(1 << 0)
			
 
				 #define ICH_MISR_U			(1 << 1)
			
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -33,6 +33,7 @@
 
				 
			
 
				 #define GIC_DIST_CTRL			0x000
			
 
				 #define GIC_DIST_CTR			0x004
			
 
				+#define GIC_DIST_IIDR			0x008
			
 
				 #define GIC_DIST_IGROUP			0x080
			
 
				 #define GIC_DIST_ENABLE_SET		0x100
			
 
				 #define GIC_DIST_ENABLE_CLEAR		0x180
			
@@ -76,6 +77,7 @@
 
				 #define GICH_LR_VIRTUALID		(0x3ff << 0)
			
 
				 #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
			
 
				 #define GICH_LR_PHYSID_CPUID		(0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
			
 
				+#define GICH_LR_PRIORITY_SHIFT		23
			
 
				 #define GICH_LR_STATE			(3 << 28)
			
 
				 #define GICH_LR_PENDING_BIT		(1 << 28)
			
 
				 #define GICH_LR_ACTIVE_BIT		(1 << 29)
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -412,6 +412,8 @@ struct kvm {
 
				 #endif
			
 
				 	long tlbs_dirty;
			
 
				 	struct list_head devices;
			
 
				+	struct dentry *debugfs_dentry;
			
 
				+	struct kvm_stat_data **debugfs_stat_data;
			
 
				 };
			
 
				 
			
 
				 #define kvm_err(fmt, ...) \
			
@@ -991,6 +993,11 @@ enum kvm_stat_kind {
 
				 	KVM_STAT_VCPU,
			
 
				 };
			
 
				 
			
 
				+struct kvm_stat_data {
			
 
				+	int offset;
			
 
				+	struct kvm *kvm;
			
 
				+};
			
 
				+
			
 
				 struct kvm_stats_debugfs_item {
			
 
				 	const char *name;
			
 
				 	int offset;
			
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -108,7 +108,7 @@ TRACE_EVENT(kvm_ioapic_set_irq,
 
				 		__entry->coalesced	= coalesced;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
			
 
				+	TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s",
			
 
				 		  __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
			
 
				 		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
			
 
				 		  (__entry->e & (1<<11)) ? "logical" : "physical",
			
@@ -129,7 +129,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
 
				 		__entry->e		= e;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("dst %x vec=%u (%s|%s|%s%s)",
			
 
				+	TP_printk("dst %x vec %u (%s|%s|%s%s)",
			
 
				 		  (u8)(__entry->e >> 56), (u8)__entry->e,
			
 
				 		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
			
 
				 		  (__entry->e & (1<<11)) ? "logical" : "physical",
			
@@ -151,7 +151,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
				 		__entry->data		= data;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("dst %u vec %x (%s|%s|%s%s)",
			
 
				+	TP_printk("dst %u vec %u (%s|%s|%s%s)",
			
 
				 		  (u8)(__entry->address >> 12), (u8)__entry->data,
			
 
				 		  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
			
 
				 		  (__entry->address & (1<<2)) ? "logical" : "physical",
			
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -16,6 +16,7 @@ help:
 
				 	@echo '  gpio                   - GPIO tools'
			
 
				 	@echo '  hv                     - tools used when in Hyper-V clients'
			
 
				 	@echo '  iio                    - IIO tools'
			
 
				+	@echo '  kvm_stat               - top-like utility for displaying kvm statistics'
			
 
				 	@echo '  lguest                 - a minimal 32-bit x86 hypervisor'
			
 
				 	@echo '  net                    - misc networking tools'
			
 
				 	@echo '  perf                   - Linux performance measurement and analysis tool'
			
@@ -110,10 +111,13 @@ tmon_install:
 
				 freefall_install:
			
 
				 	$(call descend,laptop/$(@:_install=),install)
			
 
				 
			
 
				+kvm_stat_install:
			
 
				+	$(call descend,kvm/$(@:_install=),install)
			
 
				+
			
 
				 install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
			
 
				 		perf_install selftests_install turbostat_install usb_install \
			
 
				 		virtio_install vm_install net_install x86_energy_perf_policy_install \
			
 
				-		tmon_install freefall_install objtool_install
			
 
				+		tmon_install freefall_install objtool_install kvm_stat_install
			
 
				 
			
 
				 acpi_clean:
			
 
				 	$(call descend,power/acpi,clean)
			
--- a/tools/kvm/kvm_stat/Makefile
+++ b/tools/kvm/kvm_stat/Makefile
@@ -0,0 +1,41 @@
 
				+include ../../scripts/Makefile.include
			
 
				+include ../../scripts/utilities.mak
			
 
				+BINDIR=usr/bin
			
 
				+MANDIR=usr/share/man
			
 
				+MAN1DIR=$(MANDIR)/man1
			
 
				+
			
 
				+MAN1=kvm_stat.1
			
 
				+
			
 
				+A2X=a2x
			
 
				+a2x_path := $(call get-executable,$(A2X))
			
 
				+
			
 
				+all: man
			
 
				+
			
 
				+ifneq ($(findstring $(MAKEFLAGS),s),s)
			
 
				+  ifneq ($(V),1)
			
 
				+     QUIET_A2X = @echo '  A2X     '$@;
			
 
				+  endif
			
 
				+endif
			
 
				+
			
 
				+%.1: %.txt
			
 
				+ifeq ($(a2x_path),)
			
 
				+	$(error "You need to install asciidoc for man pages")
			
 
				+else
			
 
				+	$(QUIET_A2X)$(A2X) --doctype manpage --format manpage $<
			
 
				+endif
			
 
				+
			
 
				+clean:
			
 
				+	rm -f $(MAN1)
			
 
				+
			
 
				+man: $(MAN1)
			
 
				+
			
 
				+install-man: man
			
 
				+	install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR)
			
 
				+	install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR)
			
 
				+
			
 
				+install-tools:
			
 
				+	install -d -m 755 $(INSTALL_ROOT)/$(BINDIR)
			
 
				+	install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
			
 
				+
			
 
				+install: install-tools install-man
			
 
				+.PHONY: all clean man install-tools install-man install
			
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -0,0 +1,1127 @@
 
				+#!/usr/bin/python
			
 
				+#
			
 
				+# top-like utility for displaying kvm statistics
			
 
				+#
			
 
				+# Copyright 2006-2008 Qumranet Technologies
			
 
				+# Copyright 2008-2011 Red Hat, Inc.
			
 
				+#
			
 
				+# Authors:
			
 
				+#  Avi Kivity <avi@redhat.com>
			
 
				+#
			
 
				+# This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+# the COPYING file in the top-level directory.
			
 
				+"""The kvm_stat module outputs statistics about running KVM VMs
			
 
				+
			
 
				+Three different ways of output formatting are available:
			
 
				+- as a top-like text ui
			
 
				+- in a key -> value format
			
 
				+- in an all keys, all values format
			
 
				+
			
 
				+The data is sampled from the KVM's debugfs entries and its perf events.
			
 
				+"""
			
 
				+
			
 
				+import curses
			
 
				+import sys
			
 
				+import os
			
 
				+import time
			
 
				+import optparse
			
 
				+import ctypes
			
 
				+import fcntl
			
 
				+import resource
			
 
				+import struct
			
 
				+import re
			
 
				+from collections import defaultdict
			
 
				+from time import sleep
			
 
				+
			
 
				+VMX_EXIT_REASONS = {
			
 
				+    'EXCEPTION_NMI':        0,
			
 
				+    'EXTERNAL_INTERRUPT':   1,
			
 
				+    'TRIPLE_FAULT':         2,
			
 
				+    'PENDING_INTERRUPT':    7,
			
 
				+    'NMI_WINDOW':           8,
			
 
				+    'TASK_SWITCH':          9,
			
 
				+    'CPUID':                10,
			
 
				+    'HLT':                  12,
			
 
				+    'INVLPG':               14,
			
 
				+    'RDPMC':                15,
			
 
				+    'RDTSC':                16,
			
 
				+    'VMCALL':               18,
			
 
				+    'VMCLEAR':              19,
			
 
				+    'VMLAUNCH':             20,
			
 
				+    'VMPTRLD':              21,
			
 
				+    'VMPTRST':              22,
			
 
				+    'VMREAD':               23,
			
 
				+    'VMRESUME':             24,
			
 
				+    'VMWRITE':              25,
			
 
				+    'VMOFF':                26,
			
 
				+    'VMON':                 27,
			
 
				+    'CR_ACCESS':            28,
			
 
				+    'DR_ACCESS':            29,
			
 
				+    'IO_INSTRUCTION':       30,
			
 
				+    'MSR_READ':             31,
			
 
				+    'MSR_WRITE':            32,
			
 
				+    'INVALID_STATE':        33,
			
 
				+    'MWAIT_INSTRUCTION':    36,
			
 
				+    'MONITOR_INSTRUCTION':  39,
			
 
				+    'PAUSE_INSTRUCTION':    40,
			
 
				+    'MCE_DURING_VMENTRY':   41,
			
 
				+    'TPR_BELOW_THRESHOLD':  43,
			
 
				+    'APIC_ACCESS':          44,
			
 
				+    'EPT_VIOLATION':        48,
			
 
				+    'EPT_MISCONFIG':        49,
			
 
				+    'WBINVD':               54,
			
 
				+    'XSETBV':               55,
			
 
				+    'APIC_WRITE':           56,
			
 
				+    'INVPCID':              58,
			
 
				+}
			
 
				+
			
 
				+SVM_EXIT_REASONS = {
			
 
				+    'READ_CR0':       0x000,
			
 
				+    'READ_CR3':       0x003,
			
 
				+    'READ_CR4':       0x004,
			
 
				+    'READ_CR8':       0x008,
			
 
				+    'WRITE_CR0':      0x010,
			
 
				+    'WRITE_CR3':      0x013,
			
 
				+    'WRITE_CR4':      0x014,
			
 
				+    'WRITE_CR8':      0x018,
			
 
				+    'READ_DR0':       0x020,
			
 
				+    'READ_DR1':       0x021,
			
 
				+    'READ_DR2':       0x022,
			
 
				+    'READ_DR3':       0x023,
			
 
				+    'READ_DR4':       0x024,
			
 
				+    'READ_DR5':       0x025,
			
 
				+    'READ_DR6':       0x026,
			
 
				+    'READ_DR7':       0x027,
			
 
				+    'WRITE_DR0':      0x030,
			
 
				+    'WRITE_DR1':      0x031,
			
 
				+    'WRITE_DR2':      0x032,
			
 
				+    'WRITE_DR3':      0x033,
			
 
				+    'WRITE_DR4':      0x034,
			
 
				+    'WRITE_DR5':      0x035,
			
 
				+    'WRITE_DR6':      0x036,
			
 
				+    'WRITE_DR7':      0x037,
			
 
				+    'EXCP_BASE':      0x040,
			
 
				+    'INTR':           0x060,
			
 
				+    'NMI':            0x061,
			
 
				+    'SMI':            0x062,
			
 
				+    'INIT':           0x063,
			
 
				+    'VINTR':          0x064,
			
 
				+    'CR0_SEL_WRITE':  0x065,
			
 
				+    'IDTR_READ':      0x066,
			
 
				+    'GDTR_READ':      0x067,
			
 
				+    'LDTR_READ':      0x068,
			
 
				+    'TR_READ':        0x069,
			
 
				+    'IDTR_WRITE':     0x06a,
			
 
				+    'GDTR_WRITE':     0x06b,
			
 
				+    'LDTR_WRITE':     0x06c,
			
 
				+    'TR_WRITE':       0x06d,
			
 
				+    'RDTSC':          0x06e,
			
 
				+    'RDPMC':          0x06f,
			
 
				+    'PUSHF':          0x070,
			
 
				+    'POPF':           0x071,
			
 
				+    'CPUID':          0x072,
			
 
				+    'RSM':            0x073,
			
 
				+    'IRET':           0x074,
			
 
				+    'SWINT':          0x075,
			
 
				+    'INVD':           0x076,
			
 
				+    'PAUSE':          0x077,
			
 
				+    'HLT':            0x078,
			
 
				+    'INVLPG':         0x079,
			
 
				+    'INVLPGA':        0x07a,
			
 
				+    'IOIO':           0x07b,
			
 
				+    'MSR':            0x07c,
			
 
				+    'TASK_SWITCH':    0x07d,
			
 
				+    'FERR_FREEZE':    0x07e,
			
 
				+    'SHUTDOWN':       0x07f,
			
 
				+    'VMRUN':          0x080,
			
 
				+    'VMMCALL':        0x081,
			
 
				+    'VMLOAD':         0x082,
			
 
				+    'VMSAVE':         0x083,
			
 
				+    'STGI':           0x084,
			
 
				+    'CLGI':           0x085,
			
 
				+    'SKINIT':         0x086,
			
 
				+    'RDTSCP':         0x087,
			
 
				+    'ICEBP':          0x088,
			
 
				+    'WBINVD':         0x089,
			
 
				+    'MONITOR':        0x08a,
			
 
				+    'MWAIT':          0x08b,
			
 
				+    'MWAIT_COND':     0x08c,
			
 
				+    'XSETBV':         0x08d,
			
 
				+    'NPF':            0x400,
			
 
				+}
			
 
				+
			
 
				+# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
			
 
				+AARCH64_EXIT_REASONS = {
			
 
				+    'UNKNOWN':      0x00,
			
 
				+    'WFI':          0x01,
			
 
				+    'CP15_32':      0x03,
			
 
				+    'CP15_64':      0x04,
			
 
				+    'CP14_MR':      0x05,
			
 
				+    'CP14_LS':      0x06,
			
 
				+    'FP_ASIMD':     0x07,
			
 
				+    'CP10_ID':      0x08,
			
 
				+    'CP14_64':      0x0C,
			
 
				+    'ILL_ISS':      0x0E,
			
 
				+    'SVC32':        0x11,
			
 
				+    'HVC32':        0x12,
			
 
				+    'SMC32':        0x13,
			
 
				+    'SVC64':        0x15,
			
 
				+    'HVC64':        0x16,
			
 
				+    'SMC64':        0x17,
			
 
				+    'SYS64':        0x18,
			
 
				+    'IABT':         0x20,
			
 
				+    'IABT_HYP':     0x21,
			
 
				+    'PC_ALIGN':     0x22,
			
 
				+    'DABT':         0x24,
			
 
				+    'DABT_HYP':     0x25,
			
 
				+    'SP_ALIGN':     0x26,
			
 
				+    'FP_EXC32':     0x28,
			
 
				+    'FP_EXC64':     0x2C,
			
 
				+    'SERROR':       0x2F,
			
 
				+    'BREAKPT':      0x30,
			
 
				+    'BREAKPT_HYP':  0x31,
			
 
				+    'SOFTSTP':      0x32,
			
 
				+    'SOFTSTP_HYP':  0x33,
			
 
				+    'WATCHPT':      0x34,
			
 
				+    'WATCHPT_HYP':  0x35,
			
 
				+    'BKPT32':       0x38,
			
 
				+    'VECTOR32':     0x3A,
			
 
				+    'BRK64':        0x3C,
			
 
				+}
			
 
				+
			
 
				+# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
			
 
				+USERSPACE_EXIT_REASONS = {
			
 
				+    'UNKNOWN':          0,
			
 
				+    'EXCEPTION':        1,
			
 
				+    'IO':               2,
			
 
				+    'HYPERCALL':        3,
			
 
				+    'DEBUG':            4,
			
 
				+    'HLT':              5,
			
 
				+    'MMIO':             6,
			
 
				+    'IRQ_WINDOW_OPEN':  7,
			
 
				+    'SHUTDOWN':         8,
			
 
				+    'FAIL_ENTRY':       9,
			
 
				+    'INTR':             10,
			
 
				+    'SET_TPR':          11,
			
 
				+    'TPR_ACCESS':       12,
			
 
				+    'S390_SIEIC':       13,
			
 
				+    'S390_RESET':       14,
			
 
				+    'DCR':              15,
			
 
				+    'NMI':              16,
			
 
				+    'INTERNAL_ERROR':   17,
			
 
				+    'OSI':              18,
			
 
				+    'PAPR_HCALL':       19,
			
 
				+    'S390_UCONTROL':    20,
			
 
				+    'WATCHDOG':         21,
			
 
				+    'S390_TSCH':        22,
			
 
				+    'EPR':              23,
			
 
				+    'SYSTEM_EVENT':     24,
			
 
				+}
			
 
				+
			
 
				+IOCTL_NUMBERS = {
			
 
				+    'SET_FILTER':  0x40082406,
			
 
				+    'ENABLE':      0x00002400,
			
 
				+    'DISABLE':     0x00002401,
			
 
				+    'RESET':       0x00002403,
			
 
				+}
			
 
				+
			
 
				+class Arch(object):
			
 
				+    """Encapsulates global architecture specific data.
			
 
				+
			
 
				+    Contains the performance event open syscall and ioctl numbers, as
			
 
				+    well as the VM exit reasons for the architecture it runs on.
			
 
				+
			
 
				+    """
			
 
				+    @staticmethod
			
 
				+    def get_arch():
			
 
				+        machine = os.uname()[4]
			
 
				+
			
 
				+        if machine.startswith('ppc'):
			
 
				+            return ArchPPC()
			
 
				+        elif machine.startswith('aarch64'):
			
 
				+            return ArchA64()
			
 
				+        elif machine.startswith('s390'):
			
 
				+            return ArchS390()
			
 
				+        else:
			
 
				+            # X86_64
			
 
				+            for line in open('/proc/cpuinfo'):
			
 
				+                if not line.startswith('flags'):
			
 
				+                    continue
			
 
				+
			
 
				+                flags = line.split()
			
 
				+                if 'vmx' in flags:
			
 
				+                    return ArchX86(VMX_EXIT_REASONS)
			
 
				+                if 'svm' in flags:
			
 
				+                    return ArchX86(SVM_EXIT_REASONS)
			
 
				+                return
			
 
				+
			
 
				+class ArchX86(Arch):
			
 
				+    def __init__(self, exit_reasons):
			
 
				+        self.sc_perf_evt_open = 298
			
 
				+        self.ioctl_numbers = IOCTL_NUMBERS
			
 
				+        self.exit_reasons = exit_reasons
			
 
				+
			
 
				+class ArchPPC(Arch):
			
 
				+    def __init__(self):
			
 
				+        self.sc_perf_evt_open = 319
			
 
				+        self.ioctl_numbers = IOCTL_NUMBERS
			
 
				+        self.ioctl_numbers['ENABLE'] = 0x20002400
			
 
				+        self.ioctl_numbers['DISABLE'] = 0x20002401
			
 
				+        self.ioctl_numbers['RESET'] = 0x20002403
			
 
				+
			
 
				+        # PPC comes in 32 and 64 bit and some generated ioctl
			
 
				+        # numbers depend on the wordsize.
			
 
				+        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
			
 
				+        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
			
 
				+        self.exit_reasons = {}
			
 
				+
			
 
				+class ArchA64(Arch):
			
 
				+    def __init__(self):
			
 
				+        self.sc_perf_evt_open = 241
			
 
				+        self.ioctl_numbers = IOCTL_NUMBERS
			
 
				+        self.exit_reasons = AARCH64_EXIT_REASONS
			
 
				+
			
 
				+class ArchS390(Arch):
			
 
				+    def __init__(self):
			
 
				+        self.sc_perf_evt_open = 331
			
 
				+        self.ioctl_numbers = IOCTL_NUMBERS
			
 
				+        self.exit_reasons = None
			
 
				+
			
 
				+ARCH = Arch.get_arch()
			
 
				+
			
 
				+
			
 
				+def walkdir(path):
			
 
				+    """Returns os.walk() data for specified directory.
			
 
				+
			
 
				+    As it is only a wrapper it returns the same 3-tuple of (dirpath,
			
 
				+    dirnames, filenames).
			
 
				+    """
			
 
				+    return next(os.walk(path))
			
 
				+
			
 
				+
			
 
				+def parse_int_list(list_string):
			
 
				+    """Returns an int list from a string of comma separated integers and
			
 
				+    integer ranges."""
			
 
				+    integers = []
			
 
				+    members = list_string.split(',')
			
 
				+
			
 
				+    for member in members:
			
 
				+        if '-' not in member:
			
 
				+            integers.append(int(member))
			
 
				+        else:
			
 
				+            int_range = member.split('-')
			
 
				+            integers.extend(range(int(int_range[0]),
			
 
				+                                  int(int_range[1]) + 1))
			
 
				+
			
 
				+    return integers
			
 
				+
			
 
				+
			
 
				+def get_online_cpus():
			
 
				+    """Returns a list of cpu id integers."""
			
 
				+    with open('/sys/devices/system/cpu/online') as cpu_list:
			
 
				+        cpu_string = cpu_list.readline()
			
 
				+        return parse_int_list(cpu_string)
			
 
				+
			
 
				+
			
 
				+def get_filters():
			
 
				+    """Returns a dict of trace events, their filter ids and
			
 
				+    the values that can be filtered.
			
 
				+
			
 
				+    Trace events can be filtered for special values by setting a
			
 
				+    filter string via an ioctl. The string normally has the format
			
 
				+    identifier==value. For each filter a new event will be created, to
			
 
				+    be able to distinguish the events.
			
 
				+
			
 
				+    """
			
 
				+    filters = {}
			
 
				+    filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
			
 
				+    if ARCH.exit_reasons:
			
 
				+        filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
			
 
				+    return filters
			
 
				+
			
 
				+libc = ctypes.CDLL('libc.so.6', use_errno=True)
			
 
				+syscall = libc.syscall
			
 
				+
			
 
				+class perf_event_attr(ctypes.Structure):
			
 
				+    """Struct that holds the necessary data to set up a trace event.
			
 
				+
			
 
				+    For an extensive explanation see perf_event_open(2) and
			
 
				+    include/uapi/linux/perf_event.h, struct perf_event_attr
			
 
				+
			
 
				+    All fields that are not initialized in the constructor are 0.
			
 
				+
			
 
				+    """
			
 
				+    _fields_ = [('type', ctypes.c_uint32),
			
 
				+                ('size', ctypes.c_uint32),
			
 
				+                ('config', ctypes.c_uint64),
			
 
				+                ('sample_freq', ctypes.c_uint64),
			
 
				+                ('sample_type', ctypes.c_uint64),
			
 
				+                ('read_format', ctypes.c_uint64),
			
 
				+                ('flags', ctypes.c_uint64),
			
 
				+                ('wakeup_events', ctypes.c_uint32),
			
 
				+                ('bp_type', ctypes.c_uint32),
			
 
				+                ('bp_addr', ctypes.c_uint64),
			
 
				+                ('bp_len', ctypes.c_uint64),
			
 
				+                ]
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(self.__class__, self).__init__()
			
 
				+        self.type = PERF_TYPE_TRACEPOINT
			
 
				+        self.size = ctypes.sizeof(self)
			
 
				+        self.read_format = PERF_FORMAT_GROUP
			
 
				+
			
 
				+def perf_event_open(attr, pid, cpu, group_fd, flags):
			
 
				+    """Wrapper for the sys_perf_evt_open() syscall.
			
 
				+
			
 
				+    Used to set up performance events, returns a file descriptor or -1
			
 
				+    on error.
			
 
				+
			
 
				+    Attributes are:
			
 
				+    - syscall number
			
 
				+    - struct perf_event_attr *
			
 
				+    - pid or -1 to monitor all pids
			
 
				+    - cpu number or -1 to monitor all cpus
			
 
				+    - The file descriptor of the group leader or -1 to create a group.
			
 
				+    - flags
			
 
				+
			
 
				+    """
			
 
				+    return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
			
 
				+                   ctypes.c_int(pid), ctypes.c_int(cpu),
			
 
				+                   ctypes.c_int(group_fd), ctypes.c_long(flags))
			
 
				+
			
 
				+PERF_TYPE_TRACEPOINT = 2
			
 
				+PERF_FORMAT_GROUP = 1 << 3
			
 
				+
			
 
				+PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
			
 
				+PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
			
 
				+
			
 
				+class Group(object):
			
 
				+    """Represents a perf event group."""
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.events = []
			
 
				+
			
 
				+    def add_event(self, event):
			
 
				+        self.events.append(event)
			
 
				+
			
 
				+    def read(self):
			
 
				+        """Returns a dict with 'event name: value' for all events in the
			
 
				+        group.
			
 
				+
			
 
				+        Values are read by reading from the file descriptor of the
			
 
				+        event that is the group leader. See perf_event_open(2) for
			
 
				+        details.
			
 
				+
			
 
				+        Read format for the used event configuration is:
			
 
				+        struct read_format {
			
 
				+            u64 nr; /* The number of events */
			
 
				+            struct {
			
 
				+                u64 value; /* The value of the event */
			
 
				+            } values[nr];
			
 
				+        };
			
 
				+
			
 
				+        """
			
 
				+        length = 8 * (1 + len(self.events))
			
 
				+        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
			
 
				+        return dict(zip([event.name for event in self.events],
			
 
				+                        struct.unpack(read_format,
			
 
				+                                      os.read(self.events[0].fd, length))))
			
 
				+
			
 
				+class Event(object):
			
 
				+    """Represents a performance event and manages its life cycle."""
			
 
				+    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
			
 
				+                 trace_filter, trace_set='kvm'):
			
 
				+        self.name = name
			
 
				+        self.fd = None
			
 
				+        self.setup_event(group, trace_cpu, trace_pid, trace_point,
			
 
				+                         trace_filter, trace_set)
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        """Closes the event's file descriptor.
			
 
				+
			
 
				+        As no python file object was created for the file descriptor,
			
 
				+        python will not reference count the descriptor and will not
			
 
				+        close it itself automatically, so we do it.
			
 
				+
			
 
				+        """
			
 
				+        if self.fd:
			
 
				+            os.close(self.fd)
			
 
				+
			
 
				+    def setup_event_attribute(self, trace_set, trace_point):
			
 
				+        """Returns an initialized ctype perf_event_attr struct."""
			
 
				+
			
 
				+        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
			
 
				+                               trace_point, 'id')
			
 
				+
			
 
				+        event_attr = perf_event_attr()
			
 
				+        event_attr.config = int(open(id_path).read())
			
 
				+        return event_attr
			
 
				+
			
 
				+    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
			
 
				+                    trace_filter, trace_set):
			
 
				+        """Sets up the perf event in Linux.
			
 
				+
			
 
				+        Issues the syscall to register the event in the kernel and
			
 
				+        then sets the optional filter.
			
 
				+
			
 
				+        """
			
 
				+
			
 
				+        event_attr = self.setup_event_attribute(trace_set, trace_point)
			
 
				+
			
 
				+        # First event will be group leader.
			
 
				+        group_leader = -1
			
 
				+
			
 
				+        # All others have to pass the leader's descriptor instead.
			
 
				+        if group.events:
			
 
				+            group_leader = group.events[0].fd
			
 
				+
			
 
				+        fd = perf_event_open(event_attr, trace_pid,
			
 
				+                             trace_cpu, group_leader, 0)
			
 
				+        if fd == -1:
			
 
				+            err = ctypes.get_errno()
			
 
				+            raise OSError(err, os.strerror(err),
			
 
				+                          'while calling sys_perf_event_open().')
			
 
				+
			
 
				+        if trace_filter:
			
 
				+            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
			
 
				+                        trace_filter)
			
 
				+
			
 
				+        self.fd = fd
			
 
				+
			
 
				+    def enable(self):
			
 
				+        """Enables the trace event in the kernel.
			
 
				+
			
 
				+        Enabling the group leader makes reading counters from it and the
			
 
				+        events under it possible.
			
 
				+
			
 
				+        """
			
 
				+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
			
 
				+
			
 
				+    def disable(self):
			
 
				+        """Disables the trace event in the kernel.
			
 
				+
			
 
				+        Disabling the group leader makes reading all counters under it
			
 
				+        impossible.
			
 
				+
			
 
				+        """
			
 
				+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
			
 
				+
			
 
				+    def reset(self):
			
 
				+        """Resets the count of the trace event in the kernel."""
			
 
				+        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
			
 
				+
			
 
				+class TracepointProvider(object):
			
 
				+    """Data provider for the stats class.
			
 
				+
			
 
				+    Manages the events/groups from which it acquires its data.
			
 
				+
			
 
				+    """
			
 
				+    def __init__(self):
			
 
				+        self.group_leaders = []
			
 
				+        self.filters = get_filters()
			
 
				+        self._fields = self.get_available_fields()
			
 
				+        self._pid = 0
			
 
				+
			
 
				+    def get_available_fields(self):
			
 
				+        """Returns a list of available event's of format 'event name(filter
			
 
				+        name)'.
			
 
				+
			
 
				+        All available events have directories under
			
 
				+        /sys/kernel/debug/tracing/events/ which export information
			
 
				+        about the specific event. Therefore, listing the dirs gives us
			
 
				+        a list of all available events.
			
 
				+
			
 
				+        Some events like the vm exit reasons can be filtered for
			
 
				+        specific values. To take account for that, the routine below
			
 
				+        creates special fields with the following format:
			
 
				+        event name(filter name)
			
 
				+
			
 
				+        """
			
 
				+        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
			
 
				+        fields = walkdir(path)[1]
			
 
				+        extra = []
			
 
				+        for field in fields:
			
 
				+            if field in self.filters:
			
 
				+                filter_name_, filter_dicts = self.filters[field]
			
 
				+                for name in filter_dicts:
			
 
				+                    extra.append(field + '(' + name + ')')
			
 
				+        fields += extra
			
 
				+        return fields
			
 
				+
			
 
				+    def setup_traces(self):
			
 
				+        """Creates all event and group objects needed to be able to retrieve
			
 
				+        data."""
			
 
				+        if self._pid > 0:
			
 
				+            # Fetch list of all threads of the monitored pid, as qemu
			
 
				+            # starts a thread for each vcpu.
			
 
				+            path = os.path.join('/proc', str(self._pid), 'task')
			
 
				+            groupids = walkdir(path)[1]
			
 
				+        else:
			
 
				+            groupids = get_online_cpus()
			
 
				+
			
 
				+        # The constant is needed as a buffer for python libs, std
			
 
				+        # streams and other files that the script opens.
			
 
				+        newlim = len(groupids) * len(self._fields) + 50
			
 
				+        try:
			
 
				+            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
			
 
				+
			
 
				+            if hardlim < newlim:
			
 
				+                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
			
 
				+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
			
 
				+            else:
			
 
				+                # Raising the soft limit is sufficient.
			
 
				+                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
			
 
				+
			
 
				+        except ValueError:
			
 
				+            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
			
 
				+
			
 
				+        for groupid in groupids:
			
 
				+            group = Group()
			
 
				+            for name in self._fields:
			
 
				+                tracepoint = name
			
 
				+                tracefilter = None
			
 
				+                match = re.match(r'(.*)\((.*)\)', name)
			
 
				+                if match:
			
 
				+                    tracepoint, sub = match.groups()
			
 
				+                    tracefilter = ('%s==%d\0' %
			
 
				+                                   (self.filters[tracepoint][0],
			
 
				+                                    self.filters[tracepoint][1][sub]))
			
 
				+
			
 
				+                # From perf_event_open(2):
			
 
				+                # pid > 0 and cpu == -1
			
 
				+                # This measures the specified process/thread on any CPU.
			
 
				+                #
			
 
				+                # pid == -1 and cpu >= 0
			
 
				+                # This measures all processes/threads on the specified CPU.
			
 
				+                trace_cpu = groupid if self._pid == 0 else -1
			
 
				+                trace_pid = int(groupid) if self._pid != 0 else -1
			
 
				+
			
 
				+                group.add_event(Event(name=name,
			
 
				+                                      group=group,
			
 
				+                                      trace_cpu=trace_cpu,
			
 
				+                                      trace_pid=trace_pid,
			
 
				+                                      trace_point=tracepoint,
			
 
				+                                      trace_filter=tracefilter))
			
 
				+
			
 
				+            self.group_leaders.append(group)
			
 
				+
			
 
				+    def available_fields(self):
			
 
				+        return self.get_available_fields()
			
 
				+
			
 
				+    @property
			
 
				+    def fields(self):
			
 
				+        return self._fields
			
 
				+
			
 
				+    @fields.setter
			
 
				+    def fields(self, fields):
			
 
				+        """Enables/disables the (un)wanted events"""
			
 
				+        self._fields = fields
			
 
				+        for group in self.group_leaders:
			
 
				+            for index, event in enumerate(group.events):
			
 
				+                if event.name in fields:
			
 
				+                    event.reset()
			
 
				+                    event.enable()
			
 
				+                else:
			
 
				+                    # Do not disable the group leader.
			
 
				+                    # It would disable all of its events.
			
 
				+                    if index != 0:
			
 
				+                        event.disable()
			
 
				+
			
 
				+    @property
			
 
				+    def pid(self):
			
 
				+        return self._pid
			
 
				+
			
 
				+    @pid.setter
			
 
				+    def pid(self, pid):
			
 
				+        """Changes the monitored pid by setting new traces."""
			
 
				+        self._pid = pid
			
 
				+        # The garbage collector will get rid of all Event/Group
			
 
				+        # objects and open files after removing the references.
			
 
				+        self.group_leaders = []
			
 
				+        self.setup_traces()
			
 
				+        self.fields = self._fields
			
 
				+
			
 
				+    def read(self):
			
 
				+        """Returns 'event name: current value' for all enabled events."""
			
 
				+        ret = defaultdict(int)
			
 
				+        for group in self.group_leaders:
			
 
				+            for name, val in group.read().iteritems():
			
 
				+                if name in self._fields:
			
 
				+                    ret[name] += val
			
 
				+        return ret
			
 
				+
			
 
				+class DebugfsProvider(object):
			
 
				+    """Provides data from the files that KVM creates in the kvm debugfs
			
 
				+    folder."""
			
 
				+    def __init__(self):
			
 
				+        self._fields = self.get_available_fields()
			
 
				+        self._pid = 0
			
 
				+        self.do_read = True
			
 
				+
			
 
				+    def get_available_fields(self):
			
 
				+        """"Returns a list of available fields.
			
 
				+
			
 
				+        The fields are all available KVM debugfs files
			
 
				+
			
 
				+        """
			
 
				+        return walkdir(PATH_DEBUGFS_KVM)[2]
			
 
				+
			
 
				+    @property
			
 
				+    def fields(self):
			
 
				+        return self._fields
			
 
				+
			
 
				+    @fields.setter
			
 
				+    def fields(self, fields):
			
 
				+        self._fields = fields
			
 
				+
			
 
				+    @property
			
 
				+    def pid(self):
			
 
				+        return self._pid
			
 
				+
			
 
				+    @pid.setter
			
 
				+    def pid(self, pid):
			
 
				+        if pid != 0:
			
 
				+            self._pid = pid
			
 
				+
			
 
				+            vms = walkdir(PATH_DEBUGFS_KVM)[1]
			
 
				+            if len(vms) == 0:
			
 
				+                self.do_read = False
			
 
				+
			
 
				+            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
			
 
				+
			
 
				+        else:
			
 
				+            self.paths = ['']
			
 
				+            self.do_read = True
			
 
				+
			
 
				+    def read(self):
			
 
				+        """Returns a dict with format:'file name / field -> current value'."""
			
 
				+        results = {}
			
 
				+
			
 
				+        # If no debugfs filtering support is available, then don't read.
			
 
				+        if not self.do_read:
			
 
				+            return results
			
 
				+
			
 
				+        for path in self.paths:
			
 
				+            for field in self._fields:
			
 
				+                results[field] = results.get(field, 0) \
			
 
				+                                 + self.read_field(field, path)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def read_field(self, field, path):
			
 
				+        """Returns the value of a single field from a specific VM."""
			
 
				+        try:
			
 
				+            return int(open(os.path.join(PATH_DEBUGFS_KVM,
			
 
				+                                         path,
			
 
				+                                         field))
			
 
				+                       .read())
			
 
				+        except IOError:
			
 
				+            return 0
			
 
				+
			
 
				+class Stats(object):
			
 
				+    """Manages the data providers and the data they provide.
			
 
				+
			
 
				+    It is used to set filters on the provider's data and collect all
			
 
				+    provider data.
			
 
				+
			
 
				+    """
			
 
				+    def __init__(self, providers, pid, fields=None):
			
 
				+        self.providers = providers
			
 
				+        self._pid_filter = pid
			
 
				+        self._fields_filter = fields
			
 
				+        self.values = {}
			
 
				+        self.update_provider_pid()
			
 
				+        self.update_provider_filters()
			
 
				+
			
 
				+    def update_provider_filters(self):
			
 
				+        """Propagates fields filters to providers."""
			
 
				+        def wanted(key):
			
 
				+            if not self._fields_filter:
			
 
				+                return True
			
 
				+            return re.match(self._fields_filter, key) is not None
			
 
				+
			
 
				+        # As we reset the counters when updating the fields we can
			
 
				+        # also clear the cache of old values.
			
 
				+        self.values = {}
			
 
				+        for provider in self.providers:
			
 
				+            provider_fields = [key for key in provider.get_available_fields()
			
 
				+                               if wanted(key)]
			
 
				+            provider.fields = provider_fields
			
 
				+
			
 
				+    def update_provider_pid(self):
			
 
				+        """Propagates pid filters to providers."""
			
 
				+        for provider in self.providers:
			
 
				+            provider.pid = self._pid_filter
			
 
				+
			
 
				+    @property
			
 
				+    def fields_filter(self):
			
 
				+        return self._fields_filter
			
 
				+
			
 
				+    @fields_filter.setter
			
 
				+    def fields_filter(self, fields_filter):
			
 
				+        self._fields_filter = fields_filter
			
 
				+        self.update_provider_filters()
			
 
				+
			
 
				+    @property
			
 
				+    def pid_filter(self):
			
 
				+        return self._pid_filter
			
 
				+
			
 
				+    @pid_filter.setter
			
 
				+    def pid_filter(self, pid):
			
 
				+        self._pid_filter = pid
			
 
				+        self.values = {}
			
 
				+        self.update_provider_pid()
			
 
				+
			
 
				+    def get(self):
			
 
				+        """Returns a dict with field -> (value, delta to last value) of all
			
 
				+        provider data."""
			
 
				+        for provider in self.providers:
			
 
				+            new = provider.read()
			
 
				+            for key in provider.fields:
			
 
				+                oldval = self.values.get(key, (0, 0))
			
 
				+                newval = new.get(key, 0)
			
 
				+                newdelta = None
			
 
				+                if oldval is not None:
			
 
				+                    newdelta = newval - oldval[0]
			
 
				+                self.values[key] = (newval, newdelta)
			
 
				+        return self.values
			
 
				+
			
 
				+LABEL_WIDTH = 40
			
 
				+NUMBER_WIDTH = 10
			
 
				+
			
 
				+class Tui(object):
			
 
				+    """Instruments curses to draw a nice text ui."""
			
 
				+    def __init__(self, stats):
			
 
				+        self.stats = stats
			
 
				+        self.screen = None
			
 
				+        self.drilldown = False
			
 
				+        self.update_drilldown()
			
 
				+
			
 
				+    def __enter__(self):
			
 
				+        """Initialises curses for later use.  Based on curses.wrapper
			
 
				+           implementation from the Python standard library."""
			
 
				+        self.screen = curses.initscr()
			
 
				+        curses.noecho()
			
 
				+        curses.cbreak()
			
 
				+
			
 
				+        # The try/catch works around a minor bit of
			
 
				+        # over-conscientiousness in the curses module, the error
			
 
				+        # return from C start_color() is ignorable.
			
 
				+        try:
			
 
				+            curses.start_color()
			
 
				+        except:
			
 
				+            pass
			
 
				+
			
 
				+        curses.use_default_colors()
			
 
				+        return self
			
 
				+
			
 
				+    def __exit__(self, *exception):
			
 
				+        """Resets the terminal to its normal state.  Based on curses.wrappre
			
 
				+           implementation from the Python standard library."""
			
 
				+        if self.screen:
			
 
				+            self.screen.keypad(0)
			
 
				+            curses.echo()
			
 
				+            curses.nocbreak()
			
 
				+            curses.endwin()
			
 
				+
			
 
				+    def update_drilldown(self):
			
 
				+        """Sets or removes a filter that only allows fields without braces."""
			
 
				+        if not self.stats.fields_filter:
			
 
				+            self.stats.fields_filter = r'^[^\(]*$'
			
 
				+
			
 
				+        elif self.stats.fields_filter == r'^[^\(]*$':
			
 
				+            self.stats.fields_filter = None
			
 
				+
			
 
				+    def update_pid(self, pid):
			
 
				+        """Propagates pid selection to stats object."""
			
 
				+        self.stats.pid_filter = pid
			
 
				+
			
 
				+    def refresh(self, sleeptime):
			
 
				+        """Refreshes on-screen data."""
			
 
				+        self.screen.erase()
			
 
				+        if self.stats.pid_filter > 0:
			
 
				+            self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
			
 
				+                               .format(self.stats.pid_filter),
			
 
				+                               curses.A_BOLD)
			
 
				+        else:
			
 
				+            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
			
 
				+        self.screen.addstr(2, 1, 'Event')
			
 
				+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
			
 
				+                           len('Total'), 'Total')
			
 
				+        self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
			
 
				+                           len('Current'), 'Current')
			
 
				+        row = 3
			
 
				+        stats = self.stats.get()
			
 
				+        def sortkey(x):
			
 
				+            if stats[x][1]:
			
 
				+                return (-stats[x][1], -stats[x][0])
			
 
				+            else:
			
 
				+                return (0, -stats[x][0])
			
 
				+        for key in sorted(stats.keys(), key=sortkey):
			
 
				+
			
 
				+            if row >= self.screen.getmaxyx()[0]:
			
 
				+                break
			
 
				+            values = stats[key]
			
 
				+            if not values[0] and not values[1]:
			
 
				+                break
			
 
				+            col = 1
			
 
				+            self.screen.addstr(row, col, key)
			
 
				+            col += LABEL_WIDTH
			
 
				+            self.screen.addstr(row, col, '%10d' % (values[0],))
			
 
				+            col += NUMBER_WIDTH
			
 
				+            if values[1] is not None:
			
 
				+                self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
			
 
				+            row += 1
			
 
				+        self.screen.refresh()
			
 
				+
			
 
				+    def show_filter_selection(self):
			
 
				+        """Draws filter selection mask.
			
 
				+
			
 
				+        Asks for a valid regex and sets the fields filter accordingly.
			
 
				+
			
 
				+        """
			
 
				+        while True:
			
 
				+            self.screen.erase()
			
 
				+            self.screen.addstr(0, 0,
			
 
				+                               "Show statistics for events matching a regex.",
			
 
				+                               curses.A_BOLD)
			
 
				+            self.screen.addstr(2, 0,
			
 
				+                               "Current regex: {0}"
			
 
				+                               .format(self.stats.fields_filter))
			
 
				+            self.screen.addstr(3, 0, "New regex: ")
			
 
				+            curses.echo()
			
 
				+            regex = self.screen.getstr()
			
 
				+            curses.noecho()
			
 
				+            if len(regex) == 0:
			
 
				+                return
			
 
				+            try:
			
 
				+                re.compile(regex)
			
 
				+                self.stats.fields_filter = regex
			
 
				+                return
			
 
				+            except re.error:
			
 
				+                continue
			
 
				+
			
 
				+    def show_vm_selection(self):
			
 
				+        """Draws PID selection mask.
			
 
				+
			
 
				+        Asks for a pid until a valid pid or 0 has been entered.
			
 
				+
			
 
				+        """
			
 
				+        while True:
			
 
				+            self.screen.erase()
			
 
				+            self.screen.addstr(0, 0,
			
 
				+                               'Show statistics for specific pid.',
			
 
				+                               curses.A_BOLD)
			
 
				+            self.screen.addstr(1, 0,
			
 
				+                               'This might limit the shown data to the trace '
			
 
				+                               'statistics.')
			
 
				+
			
 
				+            curses.echo()
			
 
				+            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
			
 
				+            pid = self.screen.getstr()
			
 
				+            curses.noecho()
			
 
				+
			
 
				+            try:
			
 
				+                pid = int(pid)
			
 
				+
			
 
				+                if pid == 0:
			
 
				+                    self.update_pid(pid)
			
 
				+                    break
			
 
				+                else:
			
 
				+                    if not os.path.isdir(os.path.join('/proc/', str(pid))):
			
 
				+                        continue
			
 
				+                    else:
			
 
				+                        self.update_pid(pid)
			
 
				+                        break
			
 
				+
			
 
				+            except ValueError:
			
 
				+                continue
			
 
				+
			
 
				+    def show_stats(self):
			
 
				+        """Refreshes the screen and processes user input."""
			
 
				+        sleeptime = 0.25
			
 
				+        while True:
			
 
				+            self.refresh(sleeptime)
			
 
				+            curses.halfdelay(int(sleeptime * 10))
			
 
				+            sleeptime = 3
			
 
				+            try:
			
 
				+                char = self.screen.getkey()
			
 
				+                if char == 'x':
			
 
				+                    self.drilldown = not self.drilldown
			
 
				+                    self.update_drilldown()
			
 
				+                if char == 'q':
			
 
				+                    break
			
 
				+                if char == 'f':
			
 
				+                    self.show_filter_selection()
			
 
				+                if char == 'p':
			
 
				+                    self.show_vm_selection()
			
 
				+            except KeyboardInterrupt:
			
 
				+                break
			
 
				+            except curses.error:
			
 
				+                continue
			
 
				+
			
 
				+def batch(stats):
			
 
				+    """Prints statistics in a key, value format."""
			
 
				+    s = stats.get()
			
 
				+    time.sleep(1)
			
 
				+    s = stats.get()
			
 
				+    for key in sorted(s.keys()):
			
 
				+        values = s[key]
			
 
				+        print '%-42s%10d%10d' % (key, values[0], values[1])
			
 
				+
			
 
				+def log(stats):
			
 
				+    """Prints statistics as reiterating key block, multiple value blocks."""
			
 
				+    keys = sorted(stats.get().iterkeys())
			
 
				+    def banner():
			
 
				+        for k in keys:
			
 
				+            print '%s' % k,
			
 
				+        print
			
 
				+    def statline():
			
 
				+        s = stats.get()
			
 
				+        for k in keys:
			
 
				+            print ' %9d' % s[k][1],
			
 
				+        print
			
 
				+    line = 0
			
 
				+    banner_repeat = 20
			
 
				+    while True:
			
 
				+        time.sleep(1)
			
 
				+        if line % banner_repeat == 0:
			
 
				+            banner()
			
 
				+        statline()
			
 
				+        line += 1
			
 
				+
			
 
				+def get_options():
			
 
				+    """Returns processed program arguments."""
			
 
				+    description_text = """
			
 
				+This script displays various statistics about VMs running under KVM.
			
 
				+The statistics are gathered from the KVM debugfs entries and / or the
			
 
				+currently available perf traces.
			
 
				+
			
 
				+The monitoring takes additional cpu cycles and might affect the VM's
			
 
				+performance.
			
 
				+
			
 
				+Requirements:
			
 
				+- Access to:
			
 
				+    /sys/kernel/debug/kvm
			
 
				+    /sys/kernel/debug/trace/events/*
			
 
				+    /proc/pid/task
			
 
				+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
			
 
				+  CAP_SYS_ADMIN and perf events are used.
			
 
				+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
			
 
				+  the large number of files that are possibly opened.
			
 
				+"""
			
 
				+
			
 
				+    class PlainHelpFormatter(optparse.IndentedHelpFormatter):
			
 
				+        def format_description(self, description):
			
 
				+            if description:
			
 
				+                return description + "\n"
			
 
				+            else:
			
 
				+                return ""
			
 
				+
			
 
				+    optparser = optparse.OptionParser(description=description_text,
			
 
				+                                      formatter=PlainHelpFormatter())
			
 
				+    optparser.add_option('-1', '--once', '--batch',
			
 
				+                         action='store_true',
			
 
				+                         default=False,
			
 
				+                         dest='once',
			
 
				+                         help='run in batch mode for one second',
			
 
				+                         )
			
 
				+    optparser.add_option('-l', '--log',
			
 
				+                         action='store_true',
			
 
				+                         default=False,
			
 
				+                         dest='log',
			
 
				+                         help='run in logging mode (like vmstat)',
			
 
				+                         )
			
 
				+    optparser.add_option('-t', '--tracepoints',
			
 
				+                         action='store_true',
			
 
				+                         default=False,
			
 
				+                         dest='tracepoints',
			
 
				+                         help='retrieve statistics from tracepoints',
			
 
				+                         )
			
 
				+    optparser.add_option('-d', '--debugfs',
			
 
				+                         action='store_true',
			
 
				+                         default=False,
			
 
				+                         dest='debugfs',
			
 
				+                         help='retrieve statistics from debugfs',
			
 
				+                         )
			
 
				+    optparser.add_option('-f', '--fields',
			
 
				+                         action='store',
			
 
				+                         default=None,
			
 
				+                         dest='fields',
			
 
				+                         help='fields to display (regex)',
			
 
				+                         )
			
 
				+    optparser.add_option('-p', '--pid',
			
 
				+                        action='store',
			
 
				+                        default=0,
			
 
				+                        type=int,
			
 
				+                        dest='pid',
			
 
				+                        help='restrict statistics to pid',
			
 
				+                        )
			
 
				+    (options, _) = optparser.parse_args(sys.argv)
			
 
				+    return options
			
 
				+
			
 
				+def get_providers(options):
			
 
				+    """Returns a list of data providers depending on the passed options."""
			
 
				+    providers = []
			
 
				+
			
 
				+    if options.tracepoints:
			
 
				+        providers.append(TracepointProvider())
			
 
				+    if options.debugfs:
			
 
				+        providers.append(DebugfsProvider())
			
 
				+    if len(providers) == 0:
			
 
				+        providers.append(TracepointProvider())
			
 
				+
			
 
				+    return providers
			
 
				+
			
 
				+def check_access(options):
			
 
				+    """Exits if the current user can't access all needed directories."""
			
 
				+    if not os.path.exists('/sys/kernel/debug'):
			
 
				+        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    if not os.path.exists(PATH_DEBUGFS_KVM):
			
 
				+        sys.stderr.write("Please make sure, that debugfs is mounted and "
			
 
				+                         "readable by the current user:\n"
			
 
				+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
			
 
				+                         "Also ensure, that the kvm modules are loaded.\n")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
			
 
				+                                                     or not options.debugfs):
			
 
				+        sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
			
 
				+                         "when using the option -t (default).\n"
			
 
				+                         "If it is enabled, make {0} readable by the "
			
 
				+                         "current user.\n"
			
 
				+                         .format(PATH_DEBUGFS_TRACING))
			
 
				+        if options.tracepoints:
			
 
				+            sys.exit(1)
			
 
				+
			
 
				+        sys.stderr.write("Falling back to debugfs statistics!\n")
			
 
				+        options.debugfs = True
			
 
				+        sleep(5)
			
 
				+
			
 
				+    return options
			
 
				+
			
 
				+def main():
			
 
				+    options = get_options()
			
 
				+    options = check_access(options)
			
 
				+
			
 
				+    if (options.pid > 0 and
			
 
				+        not os.path.isdir(os.path.join('/proc/',
			
 
				+                                       str(options.pid)))):
			
 
				+        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
			
 
				+        sys.exit('Specified pid does not exist.')
			
 
				+
			
 
				+    providers = get_providers(options)
			
 
				+    stats = Stats(providers, options.pid, fields=options.fields)
			
 
				+
			
 
				+    if options.log:
			
 
				+        log(stats)
			
 
				+    elif not options.once:
			
 
				+        with Tui(stats) as tui:
			
 
				+            tui.show_stats()
			
 
				+    else:
			
 
				+        batch(stats)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -0,0 +1,63 @@
 
				+kvm_stat(1)
			
 
				+===========
			
 
				+
			
 
				+NAME
			
 
				+----
			
 
				+kvm_stat - Report KVM kernel module event counters
			
 
				+
			
 
				+SYNOPSIS
			
 
				+--------
			
 
				+[verse]
			
 
				+'kvm_stat' [OPTION]...
			
 
				+
			
 
				+DESCRIPTION
			
 
				+-----------
			
 
				+kvm_stat prints counts of KVM kernel module trace events.  These events signify
			
 
				+state transitions such as guest mode entry and exit.
			
 
				+
			
 
				+This tool is useful for observing guest behavior from the host perspective.
			
 
				+Often conclusions about performance or buggy behavior can be drawn from the
			
 
				+output.
			
 
				+
			
 
				+The set of KVM kernel module trace events may be specific to the kernel version
			
 
				+or architecture.  It is best to check the KVM kernel module source code for the
			
 
				+meaning of events.
			
 
				+
			
 
				+OPTIONS
			
 
				+-------
			
 
				+-1::
			
 
				+--once::
			
 
				+--batch::
			
 
				+	run in batch mode for one second
			
 
				+
			
 
				+-l::
			
 
				+--log::
			
 
				+	run in logging mode (like vmstat)
			
 
				+
			
 
				+-t::
			
 
				+--tracepoints::
			
 
				+	retrieve statistics from tracepoints
			
 
				+
			
 
				+-d::
			
 
				+--debugfs::
			
 
				+	retrieve statistics from debugfs
			
 
				+
			
 
				+-p<pid>::
			
 
				+--pid=<pid>::
			
 
				+	limit statistics to one virtual machine (pid)
			
 
				+
			
 
				+-f<fields>::
			
 
				+--fields=<fields>::
			
 
				+	fields to display (regex)
			
 
				+
			
 
				+-h::
			
 
				+--help::
			
 
				+	show help message
			
 
				+
			
 
				+SEE ALSO
			
 
				+--------
			
 
				+'perf'(1), 'trace-cmd'(1)
			
 
				+
			
 
				+AUTHOR
			
 
				+------
			
 
				+Stefan Hajnoczi <stefanha@redhat.com>
			
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -20,6 +20,7 @@
 
				 #include <linux/kvm.h>
			
 
				 #include <linux/kvm_host.h>
			
 
				 #include <linux/interrupt.h>
			
 
				+#include <linux/irq.h>
			
 
				 
			
 
				 #include <clocksource/arm_arch_timer.h>
			
 
				 #include <asm/arch_timer.h>
			
@@ -174,10 +175,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
 
				 
			
 
				 	timer->active_cleared_last = false;
			
 
				 	timer->irq.level = new_level;
			
 
				-	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
			
 
				+	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
			
 
				 				   timer->irq.level);
			
 
				 	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
			
 
				-					 timer->map,
			
 
				+					 timer->irq.irq,
			
 
				 					 timer->irq.level);
			
 
				 	WARN_ON(ret);
			
 
				 }
			
@@ -196,7 +197,7 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 
				 	 * because the guest would never see the interrupt.  Instead wait
			
 
				 	 * until we call this function from kvm_timer_flush_hwstate.
			
 
				 	 */
			
 
				-	if (!vgic_initialized(vcpu->kvm))
			
 
				+	if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
			
 
				 		return -ENODEV;
			
 
				 
			
 
				 	if (kvm_timer_should_fire(vcpu) != timer->irq.level)
			
@@ -274,10 +275,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 
				 	* to ensure that hardware interrupts from the timer triggers a guest
			
 
				 	* exit.
			
 
				 	*/
			
 
				-	if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
			
 
				-		phys_active = true;
			
 
				-	else
			
 
				-		phys_active = false;
			
 
				+	phys_active = timer->irq.level ||
			
 
				+			kvm_vgic_map_is_active(vcpu, timer->irq.irq);
			
 
				 
			
 
				 	/*
			
 
				 	 * We want to avoid hitting the (re)distributor as much as
			
@@ -302,7 +301,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 
				 	if (timer->active_cleared_last && !phys_active)
			
 
				 		return;
			
 
				 
			
 
				-	ret = irq_set_irqchip_state(timer->map->irq,
			
 
				+	ret = irq_set_irqchip_state(host_vtimer_irq,
			
 
				 				    IRQCHIP_STATE_ACTIVE,
			
 
				 				    phys_active);
			
 
				 	WARN_ON(ret);
			
@@ -334,7 +333,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 
				 			 const struct kvm_irq_level *irq)
			
 
				 {
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				-	struct irq_phys_map *map;
			
 
				 
			
 
				 	/*
			
 
				 	 * The vcpu timer irq number cannot be determined in
			
@@ -353,15 +351,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 
				 	timer->cntv_ctl = 0;
			
 
				 	kvm_timer_update_state(vcpu);
			
 
				 
			
 
				-	/*
			
 
				-	 * Tell the VGIC that the virtual interrupt is tied to a
			
 
				-	 * physical interrupt. We do that once per VCPU.
			
 
				-	 */
			
 
				-	map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
			
 
				-	if (WARN_ON(IS_ERR(map)))
			
 
				-		return PTR_ERR(map);
			
 
				-
			
 
				-	timer->map = map;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -487,14 +476,43 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 
			
 
				 	timer_disarm(timer);
			
 
				-	if (timer->map)
			
 
				-		kvm_vgic_unmap_phys_irq(vcpu, timer->map);
			
 
				+	kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
			
 
				 }
			
 
				 
			
 
				-void kvm_timer_enable(struct kvm *kvm)
			
 
				+int kvm_timer_enable(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	if (kvm->arch.timer.enabled)
			
 
				-		return;
			
 
				+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				+	struct irq_desc *desc;
			
 
				+	struct irq_data *data;
			
 
				+	int phys_irq;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (timer->enabled)
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Find the physical IRQ number corresponding to the host_vtimer_irq
			
 
				+	 */
			
 
				+	desc = irq_to_desc(host_vtimer_irq);
			
 
				+	if (!desc) {
			
 
				+		kvm_err("%s: no interrupt descriptor\n", __func__);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	data = irq_desc_get_irq_data(desc);
			
 
				+	while (data->parent_data)
			
 
				+		data = data->parent_data;
			
 
				+
			
 
				+	phys_irq = data->hwirq;
			
 
				+
			
 
				+	/*
			
 
				+	 * Tell the VGIC that the virtual interrupt is tied to a
			
 
				+	 * physical interrupt. We do that once per VCPU.
			
 
				+	 */
			
 
				+	ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				 
			
 
				 	/*
			
 
				 	 * There is a potential race here between VCPUs starting for the first
			
@@ -505,7 +523,9 @@ void kvm_timer_enable(struct kvm *kvm)
 
				 	 * the arch timers are enabled.
			
 
				 	 */
			
 
				 	if (timecounter && wqueue)
			
 
				-		kvm->arch.timer.enabled = 1;
			
 
				+		timer->enabled = 1;
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_timer_init(struct kvm *kvm)
			
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -24,11 +24,10 @@
 
				 /* vcpu is already in the HYP VA space */
			
 
				 void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
			
 
				 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
			
 
				 	u64 val;
			
 
				 
			
 
				-	if (kvm->arch.timer.enabled) {
			
 
				+	if (timer->enabled) {
			
 
				 		timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
			
 
				 		timer->cntv_cval = read_sysreg_el0(cntv_cval);
			
 
				 	}
			
@@ -60,7 +59,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
 
				 	val |= CNTHCTL_EL1PCTEN;
			
 
				 	write_sysreg(val, cnthctl_el2);
			
 
				 
			
 
				-	if (kvm->arch.timer.enabled) {
			
 
				+	if (timer->enabled) {
			
 
				 		write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
			
 
				 		write_sysreg_el0(timer->cntv_cval, cntv_cval);
			
 
				 		isb();
			
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,11 +21,18 @@
 
				 
			
 
				 #include <asm/kvm_hyp.h>
			
 
				 
			
 
				+#ifdef CONFIG_KVM_NEW_VGIC
			
 
				+extern struct vgic_global kvm_vgic_global_state;
			
 
				+#define vgic_v2_params kvm_vgic_global_state
			
 
				+#else
			
 
				+extern struct vgic_params vgic_v2_params;
			
 
				+#endif
			
 
				+
			
 
				 static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
			
 
				 					    void __iomem *base)
			
 
				 {
			
 
				 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				-	int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
			
 
				+	int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
			
 
				 	u32 eisr0, eisr1;
			
 
				 	int i;
			
 
				 	bool expect_mi;
			
@@ -67,7 +74,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
 
				 static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
			
 
				 {
			
 
				 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				-	int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
			
 
				+	int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
			
 
				 	u32 elrsr0, elrsr1;
			
 
				 
			
 
				 	elrsr0 = readl_relaxed(base + GICH_ELRSR0);
			
@@ -86,7 +93,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 
				 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
			
 
				 {
			
 
				 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				-	int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
			
 
				+	int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
			
 
				 	int i;
			
 
				 
			
 
				 	for (i = 0; i < nr_lr; i++) {
			
@@ -141,13 +148,13 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
 
				 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				 	struct vgic_dist *vgic = &kvm->arch.vgic;
			
 
				 	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
			
 
				-	int i, nr_lr;
			
 
				+	int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
			
 
				+	int i;
			
 
				 	u64 live_lrs = 0;
			
 
				 
			
 
				 	if (!base)
			
 
				 		return;
			
 
				 
			
 
				-	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
			
 
				 
			
 
				 	for (i = 0; i < nr_lr; i++)
			
 
				 		if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
			
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -436,7 +436,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
			
 
				+#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
			
 
				+
			
 
				+/*
			
 
				+ * For one VM the interrupt type must be same for each vcpu.
			
 
				+ * As a PPI, the interrupt number is the same for all vcpus,
			
 
				+ * while as an SPI it must be a separate number per vcpu.
			
 
				+ */
			
 
				+static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
			
 
				 {
			
 
				 	int i;
			
 
				 	struct kvm_vcpu *vcpu;
			
@@ -445,7 +452,7 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
 
				 		if (!kvm_arm_pmu_irq_initialized(vcpu))
			
 
				 			continue;
			
 
				 
			
 
				-		if (is_ppi) {
			
 
				+		if (irq_is_ppi(irq)) {
			
 
				 			if (vcpu->arch.pmu.irq_num != irq)
			
 
				 				return false;
			
 
				 		} else {
			
@@ -457,7 +464,6 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
			
 
				 {
			
 
				 	switch (attr->attr) {
			
@@ -471,14 +477,11 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 
				 		if (get_user(irq, uaddr))
			
 
				 			return -EFAULT;
			
 
				 
			
 
				-		/*
			
 
				-		 * The PMU overflow interrupt could be a PPI or SPI, but for one
			
 
				-		 * VM the interrupt type must be same for each vcpu. As a PPI,
			
 
				-		 * the interrupt number is the same for all vcpus, while as an
			
 
				-		 * SPI it must be a separate number per vcpu.
			
 
				-		 */
			
 
				-		if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs ||
			
 
				-		    !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS))
			
 
				+		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
			
 
				+		if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (!pmu_irq_is_valid(vcpu->kvm, irq))
			
 
				 			return -EINVAL;
			
 
				 
			
 
				 		if (kvm_arm_pmu_irq_initialized(vcpu))
			
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -171,7 +171,7 @@ static const struct vgic_ops vgic_v2_ops = {
 
				 	.enable			= vgic_v2_enable,
			
 
				 };
			
 
				 
			
 
				-static struct vgic_params vgic_v2_params;
			
 
				+struct vgic_params __section(.hyp.text) vgic_v2_params;
			
 
				 
			
 
				 static void vgic_cpu_init_lrs(void *params)
			
 
				 {
			
@@ -201,6 +201,8 @@ int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
 
				 	const struct resource *vctrl_res = &gic_kvm_info->vctrl;
			
 
				 	const struct resource *vcpu_res = &gic_kvm_info->vcpu;
			
 
				 
			
 
				+	memset(vgic, 0, sizeof(*vgic));
			
 
				+
			
 
				 	if (!gic_kvm_info->maint_irq) {
			
 
				 		kvm_err("error getting vgic maintenance irq\n");
			
 
				 		ret = -ENXIO;
			
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -29,12 +29,6 @@
 
				 #include <asm/kvm_asm.h>
			
 
				 #include <asm/kvm_mmu.h>
			
 
				 
			
 
				-/* These are for GICv2 emulation only */
			
 
				-#define GICH_LR_VIRTUALID		(0x3ffUL << 0)
			
 
				-#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
			
 
				-#define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
			
 
				-#define ICH_LR_VIRTUALID_MASK		(BIT_ULL(32) - 1)
			
 
				-
			
 
				 static u32 ich_vtr_el2;
			
 
				 
			
 
				 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
			
@@ -43,7 +37,7 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 
				 	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr];
			
 
				 
			
 
				 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
			
 
				-		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
			
 
				+		lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK;
			
 
				 	else
			
 
				 		lr_desc.irq = val & GICH_LR_VIRTUALID;
			
 
				 
			
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -690,12 +690,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 
				  */
			
 
				 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				 	u64 elrsr = vgic_get_elrsr(vcpu);
			
 
				 	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
			
 
				 	int i;
			
 
				 
			
 
				-	for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
			
 
				+	for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) {
			
 
				 		struct vgic_lr lr = vgic_get_lr(vcpu, i);
			
 
				 
			
 
				 		/*
			
@@ -820,7 +819,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
 
				 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
			
 
				 	struct vgic_io_device *iodev = container_of(this,
			
 
				 						    struct vgic_io_device, dev);
			
 
				-	struct kvm_run *run = vcpu->run;
			
 
				 	const struct vgic_io_range *range;
			
 
				 	struct kvm_exit_mmio mmio;
			
 
				 	bool updated_state;
			
@@ -849,12 +847,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
 
				 		updated_state = false;
			
 
				 	}
			
 
				 	spin_unlock(&dist->lock);
			
 
				-	run->mmio.is_write	= is_write;
			
 
				-	run->mmio.len		= len;
			
 
				-	run->mmio.phys_addr	= addr;
			
 
				-	memcpy(run->mmio.data, val, len);
			
 
				-
			
 
				-	kvm_handle_mmio_return(vcpu, run);
			
 
				 
			
 
				 	if (updated_state)
			
 
				 		vgic_kick_vcpus(vcpu->kvm);
			
@@ -1102,18 +1094,18 @@ static bool dist_active_irq(struct kvm_vcpu *vcpu)
 
				 	return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
			
 
				 }
			
 
				 
			
 
				-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
			
 
				+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) {
			
 
				+	for (i = 0; i < vgic->nr_lr; i++) {
			
 
				 		struct vgic_lr vlr = vgic_get_lr(vcpu, i);
			
 
				 
			
 
				-		if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE)
			
 
				+		if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE)
			
 
				 			return true;
			
 
				 	}
			
 
				 
			
 
				-	return vgic_irq_is_active(vcpu, map->virt_irq);
			
 
				+	return vgic_irq_is_active(vcpu, virt_irq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1521,7 +1513,6 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 
				 }
			
 
				 
			
 
				 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
			
 
				-				   struct irq_phys_map *map,
			
 
				 				   unsigned int irq_num, bool level)
			
 
				 {
			
 
				 	struct vgic_dist *dist = &kvm->arch.vgic;
			
@@ -1660,14 +1651,14 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 
				 	if (map)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
			
 
				+	return vgic_update_irq_pending(kvm, cpuid, irq_num, level);
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
			
 
				  * @kvm:     The VM structure pointer
			
 
				  * @cpuid:   The CPU for PPIs
			
 
				- * @map:     Pointer to a irq_phys_map structure describing the mapping
			
 
				+ * @virt_irq: The virtual IRQ to be injected
			
 
				  * @level:   Edge-triggered:  true:  to trigger the interrupt
			
 
				  *			      false: to ignore the call
			
 
				  *	     Level-sensitive  true:  raise the input signal
			
@@ -1678,7 +1669,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 
				  * being HIGH and 0 being LOW and all devices being active-HIGH.
			
 
				  */
			
 
				 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
			
 
				-			       struct irq_phys_map *map, bool level)
			
 
				+			       unsigned int virt_irq, bool level)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -1686,7 +1677,7 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				-	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
			
 
				+	return vgic_update_irq_pending(kvm, cpuid, virt_irq, level);
			
 
				 }
			
 
				 
			
 
				 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
			
@@ -1712,43 +1703,28 @@ static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
 
				 /**
			
 
				  * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
			
 
				  * @vcpu: The VCPU pointer
			
 
				- * @virt_irq: The virtual irq number
			
 
				- * @irq: The Linux IRQ number
			
 
				+ * @virt_irq: The virtual IRQ number for the guest
			
 
				+ * @phys_irq: The hardware IRQ number of the host
			
 
				  *
			
 
				  * Establish a mapping between a guest visible irq (@virt_irq) and a
			
 
				- * Linux irq (@irq). On injection, @virt_irq will be associated with
			
 
				- * the physical interrupt represented by @irq. This mapping can be
			
 
				+ * hardware irq (@phys_irq). On injection, @virt_irq will be associated with
			
 
				+ * the physical interrupt represented by @phys_irq. This mapping can be
			
 
				  * established multiple times as long as the parameters are the same.
			
 
				  *
			
 
				- * Returns a valid pointer on success, and an error pointer otherwise
			
 
				+ * Returns 0 on success or an error value otherwise.
			
 
				  */
			
 
				-struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
			
 
				-					   int virt_irq, int irq)
			
 
				+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
			
 
				 {
			
 
				 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
			
 
				 	struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
			
 
				 	struct irq_phys_map *map;
			
 
				 	struct irq_phys_map_entry *entry;
			
 
				-	struct irq_desc *desc;
			
 
				-	struct irq_data *data;
			
 
				-	int phys_irq;
			
 
				-
			
 
				-	desc = irq_to_desc(irq);
			
 
				-	if (!desc) {
			
 
				-		kvm_err("%s: no interrupt descriptor\n", __func__);
			
 
				-		return ERR_PTR(-EINVAL);
			
 
				-	}
			
 
				-
			
 
				-	data = irq_desc_get_irq_data(desc);
			
 
				-	while (data->parent_data)
			
 
				-		data = data->parent_data;
			
 
				-
			
 
				-	phys_irq = data->hwirq;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	/* Create a new mapping */
			
 
				 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
			
 
				 	if (!entry)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	spin_lock(&dist->irq_phys_map_lock);
			
 
				 
			
@@ -1756,9 +1732,8 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
 
				 	map = vgic_irq_map_search(vcpu, virt_irq);
			
 
				 	if (map) {
			
 
				 		/* Make sure this mapping matches */
			
 
				-		if (map->phys_irq != phys_irq	||
			
 
				-		    map->irq      != irq)
			
 
				-			map = ERR_PTR(-EINVAL);
			
 
				+		if (map->phys_irq != phys_irq)
			
 
				+			ret = -EINVAL;
			
 
				 
			
 
				 		/* Found an existing, valid mapping */
			
 
				 		goto out;
			
@@ -1767,7 +1742,6 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
 
				 	map           = &entry->map;
			
 
				 	map->virt_irq = virt_irq;
			
 
				 	map->phys_irq = phys_irq;
			
 
				-	map->irq      = irq;
			
 
				 
			
 
				 	list_add_tail_rcu(&entry->entry, root);
			
 
				 
			
@@ -1775,9 +1749,9 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
 
				 	spin_unlock(&dist->irq_phys_map_lock);
			
 
				 	/* If we've found a hit in the existing list, free the useless
			
 
				 	 * entry */
			
 
				-	if (IS_ERR(map) || map != &entry->map)
			
 
				+	if (ret || map != &entry->map)
			
 
				 		kfree(entry);
			
 
				-	return map;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
			
@@ -1813,25 +1787,22 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
 
				 /**
			
 
				  * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
			
 
				  * @vcpu: The VCPU pointer
			
 
				- * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
			
 
				+ * @virt_irq: The virtual IRQ number to be unmapped
			
 
				  *
			
 
				  * Remove an existing mapping between virtual and physical interrupts.
			
 
				  */
			
 
				-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
			
 
				+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
			
 
				 {
			
 
				 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
			
 
				 	struct irq_phys_map_entry *entry;
			
 
				 	struct list_head *root;
			
 
				 
			
 
				-	if (!map)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
			
 
				+	root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
			
 
				 
			
 
				 	spin_lock(&dist->irq_phys_map_lock);
			
 
				 
			
 
				 	list_for_each_entry(entry, root, entry) {
			
 
				-		if (&entry->map == map) {
			
 
				+		if (entry->map.virt_irq == virt_irq) {
			
 
				 			list_del_rcu(&entry->entry);
			
 
				 			call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
			
 
				 			break;
			
@@ -1887,13 +1858,6 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Store the number of LRs per vcpu, so we don't have to go
			
 
				-	 * all the way to the distributor structure to find out. Only
			
 
				-	 * assembly code should use this one.
			
 
				-	 */
			
 
				-	vgic_cpu->nr_lr = vgic->nr_lr;
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -0,0 +1,452 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/uaccess.h>
			
 
				+#include <linux/interrupt.h>
			
 
				+#include <linux/cpu.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+#include <asm/kvm_mmu.h>
			
 
				+#include "vgic.h"
			
 
				+
			
 
				+/*
			
 
				+ * Initialization rules: there are multiple stages to the vgic
			
 
				+ * initialization, both for the distributor and the CPU interfaces.
			
 
				+ *
			
 
				+ * Distributor:
			
 
				+ *
			
 
				+ * - kvm_vgic_early_init(): initialization of static data that doesn't
			
 
				+ *   depend on any sizing information or emulation type. No allocation
			
 
				+ *   is allowed there.
			
 
				+ *
			
 
				+ * - vgic_init(): allocation and initialization of the generic data
			
 
				+ *   structures that depend on sizing information (number of CPUs,
			
 
				+ *   number of interrupts). Also initializes the vcpu specific data
			
 
				+ *   structures. Can be executed lazily for GICv2.
			
 
				+ *
			
 
				+ * CPU Interface:
			
 
				+ *
			
 
				+ * - kvm_vgic_cpu_early_init(): initialization of static data that
			
 
				+ *   doesn't depend on any sizing information or emulation type. No
			
 
				+ *   allocation is allowed there.
			
 
				+ */
			
 
				+
			
 
				+/* EARLY INIT */
			
 
				+
			
 
				+/*
			
 
				+ * Those 2 functions should not be needed anymore but they
			
 
				+ * still are called from arm.c
			
 
				+ */
			
 
				+void kvm_vgic_early_init(struct kvm *kvm)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+/* CREATION */
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_create: triggered by the instantiation of the VGIC device by
			
 
				+ * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
			
 
				+ * or through the generic KVM_CREATE_DEVICE API ioctl.
			
 
				+ * irqchip_in_kernel() tells you if this function succeeded or not.
			
 
				+ * @kvm: kvm struct pointer
			
 
				+ * @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
			
 
				+ */
			
 
				+int kvm_vgic_create(struct kvm *kvm, u32 type)
			
 
				+{
			
 
				+	int i, vcpu_lock_idx = -1, ret;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	if (irqchip_in_kernel(kvm)) {
			
 
				+		ret = -EEXIST;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * This function is also called by the KVM_CREATE_IRQCHIP handler,
			
 
				+	 * which had no chance yet to check the availability of the GICv2
			
 
				+	 * emulation. So check this here again. KVM_CREATE_DEVICE does
			
 
				+	 * the proper checks already.
			
 
				+	 */
			
 
				+	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
			
 
				+		!kvm_vgic_global_state.can_emulate_gicv2) {
			
 
				+		ret = -ENODEV;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
			
 
				+	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
			
 
				+	 * that no other VCPUs are run while we create the vgic.
			
 
				+	 */
			
 
				+	ret = -EBUSY;
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				+		if (!mutex_trylock(&vcpu->mutex))
			
 
				+			goto out_unlock;
			
 
				+		vcpu_lock_idx = i;
			
 
				+	}
			
 
				+
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm) {
			
 
				+		if (vcpu->arch.has_run_once)
			
 
				+			goto out_unlock;
			
 
				+	}
			
 
				+	ret = 0;
			
 
				+
			
 
				+	if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
			
 
				+		kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
			
 
				+	else
			
 
				+		kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
			
 
				+
			
 
				+	if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
			
 
				+		ret = -E2BIG;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	kvm->arch.vgic.in_kernel = true;
			
 
				+	kvm->arch.vgic.vgic_model = type;
			
 
				+
			
 
				+	/*
			
 
				+	 * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
			
 
				+	 * it is stored in distributor struct for asm save/restore purpose
			
 
				+	 */
			
 
				+	kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
			
 
				+
			
 
				+	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
			
 
				+	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
			
 
				+	kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
			
 
				+
			
 
				+out_unlock:
			
 
				+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
			
 
				+		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
			
 
				+		mutex_unlock(&vcpu->mutex);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* INIT/DESTROY */
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_dist_init: initialize the dist data structures
			
 
				+ * @kvm: kvm struct pointer
			
 
				+ * @nr_spis: number of spis, frozen by caller
			
 
				+ */
			
 
				+static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+	struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
			
 
				+	int i;
			
 
				+
			
 
				+	dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
			
 
				+	if (!dist->spis)
			
 
				+		return  -ENOMEM;
			
 
				+
			
 
				+	/*
			
 
				+	 * In the following code we do not take the irq struct lock since
			
 
				+	 * no other action on irq structs can happen while the VGIC is
			
 
				+	 * not initialized yet:
			
 
				+	 * If someone wants to inject an interrupt or does a MMIO access, we
			
 
				+	 * require prior initialization in case of a virtual GICv3 or trigger
			
 
				+	 * initialization when using a virtual GICv2.
			
 
				+	 */
			
 
				+	for (i = 0; i < nr_spis; i++) {
			
 
				+		struct vgic_irq *irq = &dist->spis[i];
			
 
				+
			
 
				+		irq->intid = i + VGIC_NR_PRIVATE_IRQS;
			
 
				+		INIT_LIST_HEAD(&irq->ap_list);
			
 
				+		spin_lock_init(&irq->irq_lock);
			
 
				+		irq->vcpu = NULL;
			
 
				+		irq->target_vcpu = vcpu0;
			
 
				+		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
			
 
				+			irq->targets = 0;
			
 
				+		else
			
 
				+			irq->mpidr = 0;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_vcpu_init: initialize the vcpu data structures and
			
 
				+ * enable the VCPU interface
			
 
				+ * @vcpu: the VCPU which's VGIC should be initialized
			
 
				+ */
			
 
				+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+	int i;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
			
 
				+	spin_lock_init(&vgic_cpu->ap_list_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Enable and configure all SGIs to be edge-triggered and
			
 
				+	 * configure all PPIs as level-triggered.
			
 
				+	 */
			
 
				+	for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
			
 
				+		struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
			
 
				+
			
 
				+		INIT_LIST_HEAD(&irq->ap_list);
			
 
				+		spin_lock_init(&irq->irq_lock);
			
 
				+		irq->intid = i;
			
 
				+		irq->vcpu = NULL;
			
 
				+		irq->target_vcpu = vcpu;
			
 
				+		irq->targets = 1U << vcpu->vcpu_id;
			
 
				+		if (vgic_irq_is_sgi(i)) {
			
 
				+			/* SGIs */
			
 
				+			irq->enabled = 1;
			
 
				+			irq->config = VGIC_CONFIG_EDGE;
			
 
				+		} else {
			
 
				+			/* PPIs */
			
 
				+			irq->config = VGIC_CONFIG_LEVEL;
			
 
				+		}
			
 
				+	}
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_enable(vcpu);
			
 
				+	else
			
 
				+		vgic_v3_enable(vcpu);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * vgic_init: allocates and initializes dist and vcpu data structures
			
 
				+ * depending on two dimensioning parameters:
			
 
				+ * - the number of spis
			
 
				+ * - the number of vcpus
			
 
				+ * The function is generally called when nr_spis has been explicitly set
			
 
				+ * by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
			
 
				+ * vgic_initialized() returns true when this function has succeeded.
			
 
				+ * Must be called with kvm->lock held!
			
 
				+ */
			
 
				+int vgic_init(struct kvm *kvm)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	int ret = 0, i;
			
 
				+
			
 
				+	if (vgic_initialized(kvm))
			
 
				+		return 0;
			
 
				+
			
 
				+	/* freeze the number of spis */
			
 
				+	if (!dist->nr_spis)
			
 
				+		dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
			
 
				+
			
 
				+	ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
			
 
				+	if (ret)
			
 
				+		goto out;
			
 
				+
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+		kvm_vgic_vcpu_init(vcpu);
			
 
				+
			
 
				+	dist->initialized = true;
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void kvm_vgic_dist_destroy(struct kvm *kvm)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	dist->ready = false;
			
 
				+	dist->initialized = false;
			
 
				+
			
 
				+	kfree(dist->spis);
			
 
				+	kfree(dist->redist_iodevs);
			
 
				+	dist->nr_spis = 0;
			
 
				+
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+}
			
 
				+
			
 
				+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+
			
 
				+	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
			
 
				+}
			
 
				+
			
 
				+void kvm_vgic_destroy(struct kvm *kvm)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	int i;
			
 
				+
			
 
				+	kvm_vgic_dist_destroy(kvm);
			
 
				+
			
 
				+	kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				+		kvm_vgic_vcpu_destroy(vcpu);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
			
 
				+ * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
			
 
				+ * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
			
 
				+ * @kvm: kvm struct pointer
			
 
				+ */
			
 
				+int vgic_lazy_init(struct kvm *kvm)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (unlikely(!vgic_initialized(kvm))) {
			
 
				+		/*
			
 
				+		 * We only provide the automatic initialization of the VGIC
			
 
				+		 * for the legacy case of a GICv2. Any other type must
			
 
				+		 * be explicitly initialized once setup with the respective
			
 
				+		 * KVM device call.
			
 
				+		 */
			
 
				+		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
			
 
				+			return -EBUSY;
			
 
				+
			
 
				+		mutex_lock(&kvm->lock);
			
 
				+		ret = vgic_init(kvm);
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* RESOURCE MAPPING */
			
 
				+
			
 
				+/**
			
 
				+ * Map the MMIO regions depending on the VGIC model exposed to the guest
			
 
				+ * called on the first VCPU run.
			
 
				+ * Also map the virtual CPU interface into the VM.
			
 
				+ * v2/v3 derivatives call vgic_init if not already done.
			
 
				+ * vgic_ready() returns true if this function has succeeded.
			
 
				+ * @kvm: kvm struct pointer
			
 
				+ */
			
 
				+int kvm_vgic_map_resources(struct kvm *kvm)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
			
 
				+		ret = vgic_v2_map_resources(kvm);
			
 
				+	else
			
 
				+		ret = vgic_v3_map_resources(kvm);
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* GENERIC PROBE */
			
 
				+
			
 
				+static void vgic_init_maintenance_interrupt(void *info)
			
 
				+{
			
 
				+	enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
			
 
				+}
			
 
				+
			
 
				+static int vgic_cpu_notify(struct notifier_block *self,
			
 
				+			   unsigned long action, void *cpu)
			
 
				+{
			
 
				+	switch (action) {
			
 
				+	case CPU_STARTING:
			
 
				+	case CPU_STARTING_FROZEN:
			
 
				+		vgic_init_maintenance_interrupt(NULL);
			
 
				+		break;
			
 
				+	case CPU_DYING:
			
 
				+	case CPU_DYING_FROZEN:
			
 
				+		disable_percpu_irq(kvm_vgic_global_state.maint_irq);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return NOTIFY_OK;
			
 
				+}
			
 
				+
			
 
				+static struct notifier_block vgic_cpu_nb = {
			
 
				+	.notifier_call = vgic_cpu_notify,
			
 
				+};
			
 
				+
			
 
				+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
			
 
				+{
			
 
				+	/*
			
 
				+	 * We cannot rely on the vgic maintenance interrupt to be
			
 
				+	 * delivered synchronously. This means we can only use it to
			
 
				+	 * exit the VM, and we perform the handling of EOIed
			
 
				+	 * interrupts on the exit path (see vgic_process_maintenance).
			
 
				+	 */
			
 
				+	return IRQ_HANDLED;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
			
 
				+ * according to the host GIC model. Accordingly calls either
			
 
				+ * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
			
 
				+ * instantiated by a guest later on .
			
 
				+ */
			
 
				+int kvm_vgic_hyp_init(void)
			
 
				+{
			
 
				+	const struct gic_kvm_info *gic_kvm_info;
			
 
				+	int ret;
			
 
				+
			
 
				+	gic_kvm_info = gic_get_kvm_info();
			
 
				+	if (!gic_kvm_info)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (!gic_kvm_info->maint_irq) {
			
 
				+		kvm_err("No vgic maintenance irq\n");
			
 
				+		return -ENXIO;
			
 
				+	}
			
 
				+
			
 
				+	switch (gic_kvm_info->type) {
			
 
				+	case GIC_V2:
			
 
				+		ret = vgic_v2_probe(gic_kvm_info);
			
 
				+		break;
			
 
				+	case GIC_V3:
			
 
				+		ret = vgic_v3_probe(gic_kvm_info);
			
 
				+		break;
			
 
				+	default:
			
 
				+		ret = -ENODEV;
			
 
				+	};
			
 
				+
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
			
 
				+	ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
			
 
				+				 vgic_maintenance_handler,
			
 
				+				 "vgic", kvm_get_running_vcpus());
			
 
				+	if (ret) {
			
 
				+		kvm_err("Cannot register interrupt %d\n",
			
 
				+			kvm_vgic_global_state.maint_irq);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	ret = __register_cpu_notifier(&vgic_cpu_nb);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Cannot register vgic CPU notifier\n");
			
 
				+		goto out_free_irq;
			
 
				+	}
			
 
				+
			
 
				+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
			
 
				+
			
 
				+	kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
			
 
				+	return 0;
			
 
				+
			
 
				+out_free_irq:
			
 
				+	free_percpu_irq(kvm_vgic_global_state.maint_irq,
			
 
				+			kvm_get_running_vcpus());
			
 
				+	return ret;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -0,0 +1,52 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <trace/events/kvm.h>
			
 
				+
			
 
				+int kvm_irq_map_gsi(struct kvm *kvm,
			
 
				+		    struct kvm_kernel_irq_routing_entry *entries,
			
 
				+		    int gsi)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip,
			
 
				+			 unsigned int pin)
			
 
				+{
			
 
				+	return pin;
			
 
				+}
			
 
				+
			
 
				+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
			
 
				+		u32 irq, int level, bool line_status)
			
 
				+{
			
 
				+	unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
			
 
				+
			
 
				+	trace_kvm_set_irq(irq, level, irq_source_id);
			
 
				+
			
 
				+	BUG_ON(!vgic_initialized(kvm));
			
 
				+
			
 
				+	return kvm_vgic_inject_irq(kvm, 0, spi, level);
			
 
				+}
			
 
				+
			
 
				+/* MSI not implemented yet */
			
 
				+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
			
 
				+		struct kvm *kvm, int irq_source_id,
			
 
				+		int level, bool line_status)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -0,0 +1,431 @@
 
				+/*
			
 
				+ * VGIC: KVM DEVICE API
			
 
				+ *
			
 
				+ * Copyright (C) 2015 ARM Ltd.
			
 
				+ * Author: Marc Zyngier <marc.zyngier@arm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ */
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+#include <linux/uaccess.h>
			
 
				+#include <asm/kvm_mmu.h>
			
 
				+#include "vgic.h"
			
 
				+
			
 
				+/* common helpers */
			
 
				+
			
 
				+static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
			
 
				+			     phys_addr_t addr, phys_addr_t alignment)
			
 
				+{
			
 
				+	if (addr & ~KVM_PHYS_MASK)
			
 
				+		return -E2BIG;
			
 
				+
			
 
				+	if (!IS_ALIGNED(addr, alignment))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
			
 
				+		return -EEXIST;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_addr - set or get vgic VM base addresses
			
 
				+ * @kvm:   pointer to the vm struct
			
 
				+ * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
			
 
				+ * @addr:  pointer to address value
			
 
				+ * @write: if true set the address in the VM address space, if false read the
			
 
				+ *          address
			
 
				+ *
			
 
				+ * Set or get the vgic base addresses for the distributor and the virtual CPU
			
 
				+ * interface in the VM physical address space.  These addresses are properties
			
 
				+ * of the emulated core/SoC and therefore user space initially knows this
			
 
				+ * information.
			
 
				+ * Check them for sanity (alignment, double assignment). We can't check for
			
 
				+ * overlapping regions in case of a virtual GICv3 here, since we don't know
			
 
				+ * the number of VCPUs yet, so we defer this check to map_resources().
			
 
				+ */
			
 
				+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct vgic_dist *vgic = &kvm->arch.vgic;
			
 
				+	int type_needed;
			
 
				+	phys_addr_t *addr_ptr, alignment;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	switch (type) {
			
 
				+	case KVM_VGIC_V2_ADDR_TYPE_DIST:
			
 
				+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
			
 
				+		addr_ptr = &vgic->vgic_dist_base;
			
 
				+		alignment = SZ_4K;
			
 
				+		break;
			
 
				+	case KVM_VGIC_V2_ADDR_TYPE_CPU:
			
 
				+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
			
 
				+		addr_ptr = &vgic->vgic_cpu_base;
			
 
				+		alignment = SZ_4K;
			
 
				+		break;
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+	case KVM_VGIC_V3_ADDR_TYPE_DIST:
			
 
				+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
			
 
				+		addr_ptr = &vgic->vgic_dist_base;
			
 
				+		alignment = SZ_64K;
			
 
				+		break;
			
 
				+	case KVM_VGIC_V3_ADDR_TYPE_REDIST:
			
 
				+		type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
			
 
				+		addr_ptr = &vgic->vgic_redist_base;
			
 
				+		alignment = SZ_64K;
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		r = -ENODEV;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (vgic->vgic_model != type_needed) {
			
 
				+		r = -ENODEV;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (write) {
			
 
				+		r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
			
 
				+		if (!r)
			
 
				+			*addr_ptr = *addr;
			
 
				+	} else {
			
 
				+		*addr = *addr_ptr;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int vgic_set_common_attr(struct kvm_device *dev,
			
 
				+				struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
			
 
				+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
			
 
				+		u64 addr;
			
 
				+		unsigned long type = (unsigned long)attr->attr;
			
 
				+
			
 
				+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
			
 
				+		return (r == -ENODEV) ? -ENXIO : r;
			
 
				+	}
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
			
 
				+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
			
 
				+		u32 val;
			
 
				+		int ret = 0;
			
 
				+
			
 
				+		if (get_user(val, uaddr))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		/*
			
 
				+		 * We require:
			
 
				+		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
			
 
				+		 * - at most 1024 interrupts
			
 
				+		 * - a multiple of 32 interrupts
			
 
				+		 */
			
 
				+		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
			
 
				+		    val > VGIC_MAX_RESERVED ||
			
 
				+		    (val & 31))
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		mutex_lock(&dev->kvm->lock);
			
 
				+
			
 
				+		if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
			
 
				+			ret = -EBUSY;
			
 
				+		else
			
 
				+			dev->kvm->arch.vgic.nr_spis =
			
 
				+				val - VGIC_NR_PRIVATE_IRQS;
			
 
				+
			
 
				+		mutex_unlock(&dev->kvm->lock);
			
 
				+
			
 
				+		return ret;
			
 
				+	}
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CTRL: {
			
 
				+		switch (attr->attr) {
			
 
				+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
			
 
				+			mutex_lock(&dev->kvm->lock);
			
 
				+			r = vgic_init(dev->kvm);
			
 
				+			mutex_unlock(&dev->kvm->lock);
			
 
				+			return r;
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+static int vgic_get_common_attr(struct kvm_device *dev,
			
 
				+				struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	int r = -ENXIO;
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
			
 
				+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
			
 
				+		u64 addr;
			
 
				+		unsigned long type = (unsigned long)attr->attr;
			
 
				+
			
 
				+		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
			
 
				+		if (r)
			
 
				+			return (r == -ENODEV) ? -ENXIO : r;
			
 
				+
			
 
				+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
			
 
				+			return -EFAULT;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
			
 
				+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
			
 
				+
			
 
				+		r = put_user(dev->kvm->arch.vgic.nr_spis +
			
 
				+			     VGIC_NR_PRIVATE_IRQS, uaddr);
			
 
				+		break;
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int vgic_create(struct kvm_device *dev, u32 type)
			
 
				+{
			
 
				+	return kvm_vgic_create(dev->kvm, type);
			
 
				+}
			
 
				+
			
 
				+static void vgic_destroy(struct kvm_device *dev)
			
 
				+{
			
 
				+	kfree(dev);
			
 
				+}
			
 
				+
			
 
				+void kvm_register_vgic_device(unsigned long type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case KVM_DEV_TYPE_ARM_VGIC_V2:
			
 
				+		kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
			
 
				+					KVM_DEV_TYPE_ARM_VGIC_V2);
			
 
				+		break;
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+	case KVM_DEV_TYPE_ARM_VGIC_V3:
			
 
				+		kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
			
 
				+					KVM_DEV_TYPE_ARM_VGIC_V3);
			
 
				+		break;
			
 
				+#endif
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/** vgic_attr_regs_access: allows user space to read/write VGIC registers
			
 
				+ *
			
 
				+ * @dev: kvm device handle
			
 
				+ * @attr: kvm device attribute
			
 
				+ * @reg: address the value is read or written
			
 
				+ * @is_write: write flag
			
 
				+ *
			
 
				+ */
			
 
				+static int vgic_attr_regs_access(struct kvm_device *dev,
			
 
				+				 struct kvm_device_attr *attr,
			
 
				+				 u32 *reg, bool is_write)
			
 
				+{
			
 
				+	gpa_t addr;
			
 
				+	int cpuid, ret, c;
			
 
				+	struct kvm_vcpu *vcpu, *tmp_vcpu;
			
 
				+	int vcpu_lock_idx = -1;
			
 
				+
			
 
				+	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
			
 
				+		 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
			
 
				+	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
			
 
				+	addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
			
 
				+
			
 
				+	mutex_lock(&dev->kvm->lock);
			
 
				+
			
 
				+	ret = vgic_init(dev->kvm);
			
 
				+	if (ret)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
			
 
				+	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
			
 
				+	 * that no other VCPUs are run and fiddle with the vgic state while we
			
 
				+	 * access it.
			
 
				+	 */
			
 
				+	ret = -EBUSY;
			
 
				+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
			
 
				+		if (!mutex_trylock(&tmp_vcpu->mutex))
			
 
				+			goto out;
			
 
				+		vcpu_lock_idx = c;
			
 
				+	}
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
			
 
				+		ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
			
 
				+		break;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
			
 
				+		ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
			
 
				+		break;
			
 
				+	default:
			
 
				+		ret = -EINVAL;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
			
 
				+		tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx);
			
 
				+		mutex_unlock(&tmp_vcpu->mutex);
			
 
				+	}
			
 
				+
			
 
				+	mutex_unlock(&dev->kvm->lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* V2 ops */
			
 
				+
			
 
				+static int vgic_v2_set_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = vgic_set_common_attr(dev, attr);
			
 
				+	if (ret != -ENXIO)
			
 
				+		return ret;
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
			
 
				+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
			
 
				+		u32 reg;
			
 
				+
			
 
				+		if (get_user(reg, uaddr))
			
 
				+			return -EFAULT;
			
 
				+
			
 
				+		return vgic_attr_regs_access(dev, attr, &reg, true);
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+static int vgic_v2_get_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = vgic_get_common_attr(dev, attr);
			
 
				+	if (ret != -ENXIO)
			
 
				+		return ret;
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
			
 
				+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
			
 
				+		u32 reg = 0;
			
 
				+
			
 
				+		ret = vgic_attr_regs_access(dev, attr, &reg, false);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		return put_user(reg, uaddr);
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+static int vgic_v2_has_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
			
 
				+		switch (attr->attr) {
			
 
				+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
			
 
				+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
			
 
				+			return 0;
			
 
				+		}
			
 
				+		break;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
			
 
				+		return vgic_v2_has_attr_regs(dev, attr);
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
			
 
				+		return 0;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
			
 
				+		switch (attr->attr) {
			
 
				+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
			
 
				+	.name = "kvm-arm-vgic-v2",
			
 
				+	.create = vgic_create,
			
 
				+	.destroy = vgic_destroy,
			
 
				+	.set_attr = vgic_v2_set_attr,
			
 
				+	.get_attr = vgic_v2_get_attr,
			
 
				+	.has_attr = vgic_v2_has_attr,
			
 
				+};
			
 
				+
			
 
				+/* V3 ops */
			
 
				+
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+
			
 
				+static int vgic_v3_set_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	return vgic_set_common_attr(dev, attr);
			
 
				+}
			
 
				+
			
 
				+static int vgic_v3_get_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	return vgic_get_common_attr(dev, attr);
			
 
				+}
			
 
				+
			
 
				+static int vgic_v3_has_attr(struct kvm_device *dev,
			
 
				+			    struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
			
 
				+		switch (attr->attr) {
			
 
				+		case KVM_VGIC_V3_ADDR_TYPE_DIST:
			
 
				+		case KVM_VGIC_V3_ADDR_TYPE_REDIST:
			
 
				+			return 0;
			
 
				+		}
			
 
				+		break;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
			
 
				+		return 0;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
			
 
				+		switch (attr->attr) {
			
 
				+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+struct kvm_device_ops kvm_arm_vgic_v3_ops = {
			
 
				+	.name = "kvm-arm-vgic-v3",
			
 
				+	.create = vgic_create,
			
 
				+	.destroy = vgic_destroy,
			
 
				+	.set_attr = vgic_v3_set_attr,
			
 
				+	.get_attr = vgic_v3_get_attr,
			
 
				+	.has_attr = vgic_v3_has_attr,
			
 
				+};
			
 
				+
			
 
				+#endif /* CONFIG_KVM_ARM_VGIC_V3 */
			
 
				+
			
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -0,0 +1,446 @@
 
				+/*
			
 
				+ * VGICv2 MMIO handling functions
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/irqchip/arm-gic.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/iodev.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+#include "vgic-mmio.h"
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
			
 
				+					    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 value;
			
 
				+
			
 
				+	switch (addr & 0x0c) {
			
 
				+	case GIC_DIST_CTRL:
			
 
				+		value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
			
 
				+		break;
			
 
				+	case GIC_DIST_CTR:
			
 
				+		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
			
 
				+		value = (value >> 5) - 1;
			
 
				+		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
			
 
				+		break;
			
 
				+	case GIC_DIST_IIDR:
			
 
				+		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len,
			
 
				+				    unsigned long val)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
			
 
				+	bool was_enabled = dist->enabled;
			
 
				+
			
 
				+	switch (addr & 0x0c) {
			
 
				+	case GIC_DIST_CTRL:
			
 
				+		dist->enabled = val & GICD_ENABLE;
			
 
				+		if (!was_enabled && dist->enabled)
			
 
				+			vgic_kick_vcpus(vcpu->kvm);
			
 
				+		break;
			
 
				+	case GIC_DIST_CTR:
			
 
				+	case GIC_DIST_IIDR:
			
 
				+		/* Nothing to do */
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
			
 
				+				 gpa_t addr, unsigned int len,
			
 
				+				 unsigned long val)
			
 
				+{
			
 
				+	int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus);
			
 
				+	int intid = val & 0xf;
			
 
				+	int targets = (val >> 16) & 0xff;
			
 
				+	int mode = (val >> 24) & 0x03;
			
 
				+	int c;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	switch (mode) {
			
 
				+	case 0x0:		/* as specified by targets */
			
 
				+		break;
			
 
				+	case 0x1:
			
 
				+		targets = (1U << nr_vcpus) - 1;			/* all, ... */
			
 
				+		targets &= ~(1U << source_vcpu->vcpu_id);	/* but self */
			
 
				+		break;
			
 
				+	case 0x2:		/* this very vCPU only */
			
 
				+		targets = (1U << source_vcpu->vcpu_id);
			
 
				+		break;
			
 
				+	case 0x3:		/* reserved */
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) {
			
 
				+		struct vgic_irq *irq;
			
 
				+
			
 
				+		if (!(targets & (1U << c)))
			
 
				+			continue;
			
 
				+
			
 
				+		irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		irq->pending = true;
			
 
				+		irq->source |= 1U << source_vcpu->vcpu_id;
			
 
				+
			
 
				+		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
			
 
				+					   gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
			
 
				+	int i;
			
 
				+	u64 val = 0;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		val |= (u64)irq->targets << (i * 8);
			
 
				+	}
			
 
				+
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
			
 
				+				   gpa_t addr, unsigned int len,
			
 
				+				   unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
			
 
				+	int i;
			
 
				+
			
 
				+	/* GICD_ITARGETSR[0-7] are read-only */
			
 
				+	if (intid < VGIC_NR_PRIVATE_IRQS)
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i);
			
 
				+		int target;
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		irq->targets = (val >> (i * 8)) & 0xff;
			
 
				+		target = irq->targets ? __ffs(irq->targets) : 0;
			
 
				+		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
			
 
				+					    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = addr & 0x0f;
			
 
				+	int i;
			
 
				+	u64 val = 0;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		val |= (u64)irq->source << (i * 8);
			
 
				+	}
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
			
 
				+				     gpa_t addr, unsigned int len,
			
 
				+				     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = addr & 0x0f;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		irq->source &= ~((val >> (i * 8)) & 0xff);
			
 
				+		if (!irq->source)
			
 
				+			irq->pending = false;
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
			
 
				+				     gpa_t addr, unsigned int len,
			
 
				+				     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = addr & 0x0f;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		irq->source |= (val >> (i * 8)) & 0xff;
			
 
				+
			
 
				+		if (irq->source) {
			
 
				+			irq->pending = true;
			
 
				+			vgic_queue_irq_unlock(vcpu->kvm, irq);
			
 
				+		} else {
			
 
				+			spin_unlock(&irq->irq_lock);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_set_vmcr(vcpu, vmcr);
			
 
				+	else
			
 
				+		vgic_v3_set_vmcr(vcpu, vmcr);
			
 
				+}
			
 
				+
			
 
				+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_get_vmcr(vcpu, vmcr);
			
 
				+	else
			
 
				+		vgic_v3_get_vmcr(vcpu, vmcr);
			
 
				+}
			
 
				+
			
 
				+#define GICC_ARCH_VERSION_V2	0x2
			
 
				+
			
 
				+/* These are for userland accesses only, there is no guest-facing emulation. */
			
 
				+static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
			
 
				+					   gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	struct vgic_vmcr vmcr;
			
 
				+	u32 val;
			
 
				+
			
 
				+	vgic_get_vmcr(vcpu, &vmcr);
			
 
				+
			
 
				+	switch (addr & 0xff) {
			
 
				+	case GIC_CPU_CTRL:
			
 
				+		val = vmcr.ctlr;
			
 
				+		break;
			
 
				+	case GIC_CPU_PRIMASK:
			
 
				+		val = vmcr.pmr;
			
 
				+		break;
			
 
				+	case GIC_CPU_BINPOINT:
			
 
				+		val = vmcr.bpr;
			
 
				+		break;
			
 
				+	case GIC_CPU_ALIAS_BINPOINT:
			
 
				+		val = vmcr.abpr;
			
 
				+		break;
			
 
				+	case GIC_CPU_IDENT:
			
 
				+		val = ((PRODUCT_ID_KVM << 20) |
			
 
				+		       (GICC_ARCH_VERSION_V2 << 16) |
			
 
				+		       IMPLEMENTER_ARM);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
			
 
				+				   gpa_t addr, unsigned int len,
			
 
				+				   unsigned long val)
			
 
				+{
			
 
				+	struct vgic_vmcr vmcr;
			
 
				+
			
 
				+	vgic_get_vmcr(vcpu, &vmcr);
			
 
				+
			
 
				+	switch (addr & 0xff) {
			
 
				+	case GIC_CPU_CTRL:
			
 
				+		vmcr.ctlr = val;
			
 
				+		break;
			
 
				+	case GIC_CPU_PRIMASK:
			
 
				+		vmcr.pmr = val;
			
 
				+		break;
			
 
				+	case GIC_CPU_BINPOINT:
			
 
				+		vmcr.bpr = val;
			
 
				+		break;
			
 
				+	case GIC_CPU_ALIAS_BINPOINT:
			
 
				+		vmcr.abpr = val;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	vgic_set_vmcr(vcpu, &vmcr);
			
 
				+}
			
 
				+
			
 
				+static const struct vgic_register_region vgic_v2_dist_registers[] = {
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
			
 
				+		vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
			
 
				+		vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
			
 
				+		vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
			
 
				+		vgic_mmio_read_target, vgic_mmio_write_target, 8,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
			
 
				+		vgic_mmio_read_config, vgic_mmio_write_config, 2,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR,
			
 
				+		vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET,
			
 
				+		vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+};
			
 
				+
			
 
				+static const struct vgic_register_region vgic_v2_cpu_registers[] = {
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL,
			
 
				+		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK,
			
 
				+		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT,
			
 
				+		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT,
			
 
				+		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
			
 
				+		vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+};
			
 
				+
			
 
				+unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
			
 
				+{
			
 
				+	dev->regions = vgic_v2_dist_registers;
			
 
				+	dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
			
 
				+
			
 
				+	kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
			
 
				+
			
 
				+	return SZ_4K;
			
 
				+}
			
 
				+
			
 
				+int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
			
 
				+{
			
 
				+	int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
			
 
				+	const struct vgic_register_region *regions;
			
 
				+	gpa_t addr;
			
 
				+	int nr_regions, i, len;
			
 
				+
			
 
				+	addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
			
 
				+
			
 
				+	switch (attr->group) {
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
			
 
				+		regions = vgic_v2_dist_registers;
			
 
				+		nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
			
 
				+		break;
			
 
				+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
			
 
				+		regions = vgic_v2_cpu_registers;
			
 
				+		nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return -ENXIO;
			
 
				+	}
			
 
				+
			
 
				+	/* We only support aligned 32-bit accesses. */
			
 
				+	if (addr & 3)
			
 
				+		return -ENXIO;
			
 
				+
			
 
				+	for (i = 0; i < nr_regions; i++) {
			
 
				+		if (regions[i].bits_per_irq)
			
 
				+			len = (regions[i].bits_per_irq * nr_irqs) / 8;
			
 
				+		else
			
 
				+			len = regions[i].len;
			
 
				+
			
 
				+		if (regions[i].reg_offset <= addr &&
			
 
				+		    regions[i].reg_offset + len > addr)
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	return -ENXIO;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * When userland tries to access the VGIC register handlers, we need to
			
 
				+ * create a usable struct vgic_io_device to be passed to the handlers and we
			
 
				+ * have to set up a buffer similar to what would have happened if a guest MMIO
			
 
				+ * access occurred, including doing endian conversions on BE systems.
			
 
				+ */
			
 
				+static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
			
 
				+			bool is_write, int offset, u32 *val)
			
 
				+{
			
 
				+	unsigned int len = 4;
			
 
				+	u8 buf[4];
			
 
				+	int ret;
			
 
				+
			
 
				+	if (is_write) {
			
 
				+		vgic_data_host_to_mmio_bus(buf, len, *val);
			
 
				+		ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
			
 
				+	} else {
			
 
				+		ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
			
 
				+		if (!ret)
			
 
				+			*val = vgic_data_mmio_bus_to_host(buf, len);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
			
 
				+			  int offset, u32 *val)
			
 
				+{
			
 
				+	struct vgic_io_device dev = {
			
 
				+		.regions = vgic_v2_cpu_registers,
			
 
				+		.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers),
			
 
				+	};
			
 
				+
			
 
				+	return vgic_uaccess(vcpu, &dev, is_write, offset, val);
			
 
				+}
			
 
				+
			
 
				+int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
			
 
				+			 int offset, u32 *val)
			
 
				+{
			
 
				+	struct vgic_io_device dev = {
			
 
				+		.regions = vgic_v2_dist_registers,
			
 
				+		.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers),
			
 
				+	};
			
 
				+
			
 
				+	return vgic_uaccess(vcpu, &dev, is_write, offset, val);
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -0,0 +1,455 @@
 
				+/*
			
 
				+ * VGICv3 MMIO handling functions
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/irqchip/arm-gic-v3.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/iodev.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+
			
 
				+#include <asm/kvm_emulate.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+#include "vgic-mmio.h"
			
 
				+
			
 
				+/* extract @num bytes at @offset bytes offset in data */
			
 
				+static unsigned long extract_bytes(unsigned long data, unsigned int offset,
			
 
				+				   unsigned int num)
			
 
				+{
			
 
				+	return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0);
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
			
 
				+					    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 value = 0;
			
 
				+
			
 
				+	switch (addr & 0x0c) {
			
 
				+	case GICD_CTLR:
			
 
				+		if (vcpu->kvm->arch.vgic.enabled)
			
 
				+			value |= GICD_CTLR_ENABLE_SS_G1;
			
 
				+		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
			
 
				+		break;
			
 
				+	case GICD_TYPER:
			
 
				+		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
			
 
				+		value = (value >> 5) - 1;
			
 
				+		value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
			
 
				+		break;
			
 
				+	case GICD_IIDR:
			
 
				+		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len,
			
 
				+				    unsigned long val)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
			
 
				+	bool was_enabled = dist->enabled;
			
 
				+
			
 
				+	switch (addr & 0x0c) {
			
 
				+	case GICD_CTLR:
			
 
				+		dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
			
 
				+
			
 
				+		if (!was_enabled && dist->enabled)
			
 
				+			vgic_kick_vcpus(vcpu->kvm);
			
 
				+		break;
			
 
				+	case GICD_TYPER:
			
 
				+	case GICD_IIDR:
			
 
				+		return;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
			
 
				+					    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	int intid = VGIC_ADDR_TO_INTID(addr, 64);
			
 
				+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
			
 
				+
			
 
				+	if (!irq)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* The upper word is RAZ for us. */
			
 
				+	if (addr & 4)
			
 
				+		return 0;
			
 
				+
			
 
				+	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len,
			
 
				+				    unsigned long val)
			
 
				+{
			
 
				+	int intid = VGIC_ADDR_TO_INTID(addr, 64);
			
 
				+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
			
 
				+
			
 
				+	if (!irq)
			
 
				+		return;
			
 
				+
			
 
				+	/* The upper word is WI for us since we don't implement Aff3. */
			
 
				+	if (addr & 4)
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+	/* We only care about and preserve Aff0, Aff1 and Aff2. */
			
 
				+	irq->mpidr = val & GENMASK(23, 0);
			
 
				+	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
			
 
				+
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
			
 
				+					      gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
			
 
				+	int target_vcpu_id = vcpu->vcpu_id;
			
 
				+	u64 value;
			
 
				+
			
 
				+	value = (mpidr & GENMASK(23, 0)) << 32;
			
 
				+	value |= ((target_vcpu_id & 0xffff) << 8);
			
 
				+	if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
			
 
				+		value |= GICR_TYPER_LAST;
			
 
				+
			
 
				+	return extract_bytes(value, addr & 7, len);
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
			
 
				+					     gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
			
 
				+}
			
 
				+
			
 
				+static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
			
 
				+					      gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	switch (addr & 0xffff) {
			
 
				+	case GICD_PIDR2:
			
 
				+		/* report a GICv3 compliant implementation */
			
 
				+		return 0x3b;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
			
 
				+ * redistributors, while SPIs are covered by registers in the distributor
			
 
				+ * block. Trying to set private IRQs in this block gets ignored.
			
 
				+ * We take some special care here to fix the calculation of the register
			
 
				+ * offset.
			
 
				+ */
			
 
				+#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc)	\
			
 
				+	{								\
			
 
				+		.reg_offset = off,					\
			
 
				+		.bits_per_irq = bpi,					\
			
 
				+		.len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8,		\
			
 
				+		.access_flags = acc,					\
			
 
				+		.read = vgic_mmio_read_raz,				\
			
 
				+		.write = vgic_mmio_write_wi,				\
			
 
				+	}, {								\
			
 
				+		.reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8,	\
			
 
				+		.bits_per_irq = bpi,					\
			
 
				+		.len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8,	\
			
 
				+		.access_flags = acc,					\
			
 
				+		.read = rd,						\
			
 
				+		.write = wr,						\
			
 
				+	}
			
 
				+
			
 
				+static const struct vgic_register_region vgic_v3_dist_registers[] = {
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
			
 
				+		vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
			
 
				+		vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
			
 
				+		vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
			
 
				+		vgic_mmio_read_config, vgic_mmio_write_config, 2,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
			
 
				+		vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
			
 
				+		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
			
 
				+		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+};
			
 
				+
			
 
				+static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
			
 
				+		vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
			
 
				+		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
			
 
				+		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
			
 
				+		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
			
 
				+		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
			
 
				+		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+};
			
 
				+
			
 
				+static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
			
 
				+		vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
			
 
				+		vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
			
 
				+		vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
			
 
				+		vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
			
 
				+		vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
			
 
				+		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
			
 
				+		vgic_mmio_read_config, vgic_mmio_write_config, 8,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+	REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
			
 
				+		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
			
 
				+		VGIC_ACCESS_32bit),
			
 
				+};
			
 
				+
			
 
				+unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
			
 
				+{
			
 
				+	dev->regions = vgic_v3_dist_registers;
			
 
				+	dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
			
 
				+
			
 
				+	kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
			
 
				+
			
 
				+	return SZ_64K;
			
 
				+}
			
 
				+
			
 
				+int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
			
 
				+{
			
 
				+	int nr_vcpus = atomic_read(&kvm->online_vcpus);
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	struct vgic_io_device *devices;
			
 
				+	int c, ret = 0;
			
 
				+
			
 
				+	devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2,
			
 
				+			  GFP_KERNEL);
			
 
				+	if (!devices)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	kvm_for_each_vcpu(c, vcpu, kvm) {
			
 
				+		gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
			
 
				+		gpa_t sgi_base = rd_base + SZ_64K;
			
 
				+		struct vgic_io_device *rd_dev = &devices[c * 2];
			
 
				+		struct vgic_io_device *sgi_dev = &devices[c * 2 + 1];
			
 
				+
			
 
				+		kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
			
 
				+		rd_dev->base_addr = rd_base;
			
 
				+		rd_dev->regions = vgic_v3_rdbase_registers;
			
 
				+		rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
			
 
				+		rd_dev->redist_vcpu = vcpu;
			
 
				+
			
 
				+		mutex_lock(&kvm->slots_lock);
			
 
				+		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
			
 
				+					      SZ_64K, &rd_dev->dev);
			
 
				+		mutex_unlock(&kvm->slots_lock);
			
 
				+
			
 
				+		if (ret)
			
 
				+			break;
			
 
				+
			
 
				+		kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
			
 
				+		sgi_dev->base_addr = sgi_base;
			
 
				+		sgi_dev->regions = vgic_v3_sgibase_registers;
			
 
				+		sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
			
 
				+		sgi_dev->redist_vcpu = vcpu;
			
 
				+
			
 
				+		mutex_lock(&kvm->slots_lock);
			
 
				+		ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
			
 
				+					      SZ_64K, &sgi_dev->dev);
			
 
				+		mutex_unlock(&kvm->slots_lock);
			
 
				+		if (ret) {
			
 
				+			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
			
 
				+						  &rd_dev->dev);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (ret) {
			
 
				+		/* The current c failed, so we start with the previous one. */
			
 
				+		for (c--; c >= 0; c--) {
			
 
				+			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
			
 
				+						  &devices[c * 2].dev);
			
 
				+			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
			
 
				+						  &devices[c * 2 + 1].dev);
			
 
				+		}
			
 
				+		kfree(devices);
			
 
				+	} else {
			
 
				+		kvm->arch.vgic.redist_iodevs = devices;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
			
 
				+ * generation register ICC_SGI1R_EL1) with a given VCPU.
			
 
				+ * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
			
 
				+ * return -1.
			
 
				+ */
			
 
				+static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned long affinity;
			
 
				+	int level0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Split the current VCPU's MPIDR into affinity level 0 and the
			
 
				+	 * rest as this is what we have to compare against.
			
 
				+	 */
			
 
				+	affinity = kvm_vcpu_get_mpidr_aff(vcpu);
			
 
				+	level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
			
 
				+	affinity &= ~MPIDR_LEVEL_MASK;
			
 
				+
			
 
				+	/* bail out if the upper three levels don't match */
			
 
				+	if (sgi_aff != affinity)
			
 
				+		return -1;
			
 
				+
			
 
				+	/* Is this VCPU's bit set in the mask ? */
			
 
				+	if (!(sgi_cpu_mask & BIT(level0)))
			
 
				+		return -1;
			
 
				+
			
 
				+	return level0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The ICC_SGI* registers encode the affinity differently from the MPIDR,
			
 
				+ * so provide a wrapper to use the existing defines to isolate a certain
			
 
				+ * affinity level.
			
 
				+ */
			
 
				+#define SGI_AFFINITY_LEVEL(reg, level) \
			
 
				+	((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
			
 
				+	>> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
			
 
				+
			
 
				+/**
			
 
				+ * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
			
 
				+ * @vcpu: The VCPU requesting a SGI
			
 
				+ * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
			
 
				+ *
			
 
				+ * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
			
 
				+ * This will trap in sys_regs.c and call this function.
			
 
				+ * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
			
 
				+ * target processors as well as a bitmask of 16 Aff0 CPUs.
			
 
				+ * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
			
 
				+ * check for matching ones. If this bit is set, we signal all, but not the
			
 
				+ * calling VCPU.
			
 
				+ */
			
 
				+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
			
 
				+{
			
 
				+	struct kvm *kvm = vcpu->kvm;
			
 
				+	struct kvm_vcpu *c_vcpu;
			
 
				+	u16 target_cpus;
			
 
				+	u64 mpidr;
			
 
				+	int sgi, c;
			
 
				+	int vcpu_id = vcpu->vcpu_id;
			
 
				+	bool broadcast;
			
 
				+
			
 
				+	sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
			
 
				+	broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
			
 
				+	target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
			
 
				+	mpidr = SGI_AFFINITY_LEVEL(reg, 3);
			
 
				+	mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
			
 
				+	mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
			
 
				+
			
 
				+	/*
			
 
				+	 * We iterate over all VCPUs to find the MPIDRs matching the request.
			
 
				+	 * If we have handled one CPU, we clear its bit to detect early
			
 
				+	 * if we are already finished. This avoids iterating through all
			
 
				+	 * VCPUs when most of the times we just signal a single VCPU.
			
 
				+	 */
			
 
				+	kvm_for_each_vcpu(c, c_vcpu, kvm) {
			
 
				+		struct vgic_irq *irq;
			
 
				+
			
 
				+		/* Exit early if we have dealt with all requested CPUs */
			
 
				+		if (!broadcast && target_cpus == 0)
			
 
				+			break;
			
 
				+
			
 
				+		/* Don't signal the calling VCPU */
			
 
				+		if (broadcast && c == vcpu_id)
			
 
				+			continue;
			
 
				+
			
 
				+		if (!broadcast) {
			
 
				+			int level0;
			
 
				+
			
 
				+			level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
			
 
				+			if (level0 == -1)
			
 
				+				continue;
			
 
				+
			
 
				+			/* remove this matching VCPU from the mask */
			
 
				+			target_cpus &= ~BIT(level0);
			
 
				+		}
			
 
				+
			
 
				+		irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		irq->pending = true;
			
 
				+
			
 
				+		vgic_queue_irq_unlock(vcpu->kvm, irq);
			
 
				+	}
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -0,0 +1,526 @@
 
				+/*
			
 
				+ * VGIC MMIO handling functions
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/bitops.h>
			
 
				+#include <linux/bsearch.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/iodev.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+#include "vgic-mmio.h"
			
 
				+
			
 
				+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
			
 
				+				 gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
			
 
				+				 gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	return -1UL;
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
			
 
				+			unsigned int len, unsigned long val)
			
 
				+{
			
 
				+	/* Ignore */
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
			
 
				+ * of the enabled bit, so there is only one function for both here.
			
 
				+ */
			
 
				+unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	u32 value = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	/* Loop over all IRQs affected by this read */
			
 
				+	for (i = 0; i < len * 8; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		if (irq->enabled)
			
 
				+			value |= (1U << i);
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		irq->enabled = true;
			
 
				+		vgic_queue_irq_unlock(vcpu->kvm, irq);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		irq->enabled = false;
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
			
 
				+				     gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	u32 value = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	/* Loop over all IRQs affected by this read */
			
 
				+	for (i = 0; i < len * 8; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		if (irq->pending)
			
 
				+			value |= (1U << i);
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		irq->pending = true;
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL)
			
 
				+			irq->soft_pending = true;
			
 
				+
			
 
				+		vgic_queue_irq_unlock(vcpu->kvm, irq);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL) {
			
 
				+			irq->soft_pending = false;
			
 
				+			irq->pending = irq->line_level;
			
 
				+		} else {
			
 
				+			irq->pending = false;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	u32 value = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	/* Loop over all IRQs affected by this read */
			
 
				+	for (i = 0; i < len * 8; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		if (irq->active)
			
 
				+			value |= (1U << i);
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
			
 
				+				    bool new_active_state)
			
 
				+{
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+	/*
			
 
				+	 * If this virtual IRQ was written into a list register, we
			
 
				+	 * have to make sure the CPU that runs the VCPU thread has
			
 
				+	 * synced back LR state to the struct vgic_irq.  We can only
			
 
				+	 * know this for sure, when either this irq is not assigned to
			
 
				+	 * anyone's AP list anymore, or the VCPU thread is not
			
 
				+	 * running on any CPUs.
			
 
				+	 *
			
 
				+	 * In the opposite case, we know the VCPU thread may be on its
			
 
				+	 * way back from the guest and still has to sync back this
			
 
				+	 * IRQ, so we release and re-acquire the spin_lock to let the
			
 
				+	 * other thread sync back the IRQ.
			
 
				+	 */
			
 
				+	while (irq->vcpu && /* IRQ may have state in an LR somewhere */
			
 
				+	       irq->vcpu->cpu != -1) { /* VCPU thread is running */
			
 
				+		BUG_ON(irq->intid < VGIC_NR_PRIVATE_IRQS);
			
 
				+		cond_resched_lock(&irq->irq_lock);
			
 
				+	}
			
 
				+
			
 
				+	irq->active = new_active_state;
			
 
				+	if (new_active_state)
			
 
				+		vgic_queue_irq_unlock(vcpu->kvm, irq);
			
 
				+	else
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
			
 
				+ * is not queued on some running VCPU's LRs, because then the change to the
			
 
				+ * active state can be overwritten when the VCPU's state is synced coming back
			
 
				+ * from the guest.
			
 
				+ *
			
 
				+ * For shared interrupts, we have to stop all the VCPUs because interrupts can
			
 
				+ * be migrated while we don't hold the IRQ locks and we don't want to be
			
 
				+ * chasing moving targets.
			
 
				+ *
			
 
				+ * For private interrupts, we only have to make sure the single and only VCPU
			
 
				+ * that can potentially queue the IRQ is stopped.
			
 
				+ */
			
 
				+static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
			
 
				+{
			
 
				+	if (intid < VGIC_NR_PRIVATE_IRQS)
			
 
				+		kvm_arm_halt_vcpu(vcpu);
			
 
				+	else
			
 
				+		kvm_arm_halt_guest(vcpu->kvm);
			
 
				+}
			
 
				+
			
 
				+/* See vgic_change_active_prepare */
			
 
				+static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
			
 
				+{
			
 
				+	if (intid < VGIC_NR_PRIVATE_IRQS)
			
 
				+		kvm_arm_resume_vcpu(vcpu);
			
 
				+	else
			
 
				+		kvm_arm_resume_guest(vcpu->kvm);
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	vgic_change_active_prepare(vcpu, intid);
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+		vgic_mmio_change_active(vcpu, irq, false);
			
 
				+	}
			
 
				+	vgic_change_active_finish(vcpu, intid);
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
			
 
				+	int i;
			
 
				+
			
 
				+	vgic_change_active_prepare(vcpu, intid);
			
 
				+	for_each_set_bit(i, &val, len * 8) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+		vgic_mmio_change_active(vcpu, irq, true);
			
 
				+	}
			
 
				+	vgic_change_active_finish(vcpu, intid);
			
 
				+}
			
 
				+
			
 
				+unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
			
 
				+				      gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
			
 
				+	int i;
			
 
				+	u64 val = 0;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		val |= (u64)irq->priority << (i * 8);
			
 
				+	}
			
 
				+
			
 
				+	return val;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We currently don't handle changing the priority of an interrupt that
			
 
				+ * is already pending on a VCPU. If there is a need for this, we would
			
 
				+ * need to make this VCPU exit and re-evaluate the priorities, potentially
			
 
				+ * leading to this interrupt getting presented now to the guest (if it has
			
 
				+ * been masked by the priority mask before).
			
 
				+ */
			
 
				+void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < len; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		/* Narrow the priority range to what we actually support */
			
 
				+		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
			
 
				+	u32 value = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < len * 4; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		if (irq->config == VGIC_CONFIG_EDGE)
			
 
				+			value |= (2U << (i * 2));
			
 
				+	}
			
 
				+
			
 
				+	return value;
			
 
				+}
			
 
				+
			
 
				+void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
			
 
				+			    gpa_t addr, unsigned int len,
			
 
				+			    unsigned long val)
			
 
				+{
			
 
				+	u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < len * 4; i++) {
			
 
				+		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
			
 
				+
			
 
				+		/*
			
 
				+		 * The configuration cannot be changed for SGIs in general,
			
 
				+		 * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
			
 
				+		 * code relies on PPIs being level triggered, so we also
			
 
				+		 * make them read-only here.
			
 
				+		 */
			
 
				+		if (intid + i < VGIC_NR_PRIVATE_IRQS)
			
 
				+			continue;
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		if (test_bit(i * 2 + 1, &val)) {
			
 
				+			irq->config = VGIC_CONFIG_EDGE;
			
 
				+		} else {
			
 
				+			irq->config = VGIC_CONFIG_LEVEL;
			
 
				+			irq->pending = irq->line_level | irq->soft_pending;
			
 
				+		}
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int match_region(const void *key, const void *elt)
			
 
				+{
			
 
				+	const unsigned int offset = (unsigned long)key;
			
 
				+	const struct vgic_register_region *region = elt;
			
 
				+
			
 
				+	if (offset < region->reg_offset)
			
 
				+		return -1;
			
 
				+
			
 
				+	if (offset >= region->reg_offset + region->len)
			
 
				+		return 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Find the proper register handler entry given a certain address offset. */
			
 
				+static const struct vgic_register_region *
			
 
				+vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
			
 
				+		      unsigned int offset)
			
 
				+{
			
 
				+	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
			
 
				+		       sizeof(region[0]), match_region);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * kvm_mmio_read_buf() returns a value in a format where it can be converted
			
 
				+ * to a byte array and be directly observed as the guest wanted it to appear
			
 
				+ * in memory if it had done the store itself, which is LE for the GIC, as the
			
 
				+ * guest knows the GIC is always LE.
			
 
				+ *
			
 
				+ * We convert this value to the CPUs native format to deal with it as a data
			
 
				+ * value.
			
 
				+ */
			
 
				+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
			
 
				+{
			
 
				+	unsigned long data = kvm_mmio_read_buf(val, len);
			
 
				+
			
 
				+	switch (len) {
			
 
				+	case 1:
			
 
				+		return data;
			
 
				+	case 2:
			
 
				+		return le16_to_cpu(data);
			
 
				+	case 4:
			
 
				+		return le32_to_cpu(data);
			
 
				+	default:
			
 
				+		return le64_to_cpu(data);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * kvm_mmio_write_buf() expects a value in a format such that if converted to
			
 
				+ * a byte array it is observed as the guest would see it if it could perform
			
 
				+ * the load directly.  Since the GIC is LE, and the guest knows this, the
			
 
				+ * guest expects a value in little endian format.
			
 
				+ *
			
 
				+ * We convert the data value from the CPUs native format to LE so that the
			
 
				+ * value is returned in the proper format.
			
 
				+ */
			
 
				+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
			
 
				+				unsigned long data)
			
 
				+{
			
 
				+	switch (len) {
			
 
				+	case 1:
			
 
				+		break;
			
 
				+	case 2:
			
 
				+		data = cpu_to_le16(data);
			
 
				+		break;
			
 
				+	case 4:
			
 
				+		data = cpu_to_le32(data);
			
 
				+		break;
			
 
				+	default:
			
 
				+		data = cpu_to_le64(data);
			
 
				+	}
			
 
				+
			
 
				+	kvm_mmio_write_buf(buf, len, data);
			
 
				+}
			
 
				+
			
 
				+static
			
 
				+struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
			
 
				+{
			
 
				+	return container_of(dev, struct vgic_io_device, dev);
			
 
				+}
			
 
				+
			
 
				+static bool check_region(const struct vgic_register_region *region,
			
 
				+			 gpa_t addr, int len)
			
 
				+{
			
 
				+	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
			
 
				+		return true;
			
 
				+	if ((region->access_flags & VGIC_ACCESS_32bit) &&
			
 
				+	    len == sizeof(u32) && !(addr & 3))
			
 
				+		return true;
			
 
				+	if ((region->access_flags & VGIC_ACCESS_64bit) &&
			
 
				+	    len == sizeof(u64) && !(addr & 7))
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
			
 
				+			      gpa_t addr, int len, void *val)
			
 
				+{
			
 
				+	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
			
 
				+	const struct vgic_register_region *region;
			
 
				+	struct kvm_vcpu *r_vcpu;
			
 
				+	unsigned long data;
			
 
				+
			
 
				+	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
			
 
				+				       addr - iodev->base_addr);
			
 
				+	if (!region || !check_region(region, addr, len)) {
			
 
				+		memset(val, 0, len);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
			
 
				+	data = region->read(r_vcpu, addr, len);
			
 
				+	vgic_data_host_to_mmio_bus(val, len, data);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
			
 
				+			       gpa_t addr, int len, const void *val)
			
 
				+{
			
 
				+	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
			
 
				+	const struct vgic_register_region *region;
			
 
				+	struct kvm_vcpu *r_vcpu;
			
 
				+	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
			
 
				+
			
 
				+	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
			
 
				+				       addr - iodev->base_addr);
			
 
				+	if (!region)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (!check_region(region, addr, len))
			
 
				+		return 0;
			
 
				+
			
 
				+	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
			
 
				+	region->write(r_vcpu, addr, len, data);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct kvm_io_device_ops kvm_io_gic_ops = {
			
 
				+	.read = dispatch_mmio_read,
			
 
				+	.write = dispatch_mmio_write,
			
 
				+};
			
 
				+
			
 
				+int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
			
 
				+			     enum vgic_type type)
			
 
				+{
			
 
				+	struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
			
 
				+	int ret = 0;
			
 
				+	unsigned int len;
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case VGIC_V2:
			
 
				+		len = vgic_v2_init_dist_iodev(io_device);
			
 
				+		break;
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+	case VGIC_V3:
			
 
				+		len = vgic_v3_init_dist_iodev(io_device);
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		BUG_ON(1);
			
 
				+	}
			
 
				+
			
 
				+	io_device->base_addr = dist_base_address;
			
 
				+	io_device->redist_vcpu = NULL;
			
 
				+
			
 
				+	mutex_lock(&kvm->slots_lock);
			
 
				+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
			
 
				+				      len, &io_device->dev);
			
 
				+	mutex_unlock(&kvm->slots_lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -0,0 +1,150 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+#ifndef __KVM_ARM_VGIC_MMIO_H__
			
 
				+#define __KVM_ARM_VGIC_MMIO_H__
			
 
				+
			
 
				+struct vgic_register_region {
			
 
				+	unsigned int reg_offset;
			
 
				+	unsigned int len;
			
 
				+	unsigned int bits_per_irq;
			
 
				+	unsigned int access_flags;
			
 
				+	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
			
 
				+			      unsigned int len);
			
 
				+	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
			
 
				+		      unsigned long val);
			
 
				+};
			
 
				+
			
 
				+extern struct kvm_io_device_ops kvm_io_gic_ops;
			
 
				+
			
 
				+#define VGIC_ACCESS_8bit	1
			
 
				+#define VGIC_ACCESS_32bit	2
			
 
				+#define VGIC_ACCESS_64bit	4
			
 
				+
			
 
				+/*
			
 
				+ * Generate a mask that covers the number of bytes required to address
			
 
				+ * up to 1024 interrupts, each represented by <bits> bits. This assumes
			
 
				+ * that <bits> is a power of two.
			
 
				+ */
			
 
				+#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
			
 
				+
			
 
				+/*
			
 
				+ * (addr & mask) gives us the byte offset for the INT ID, so we want to
			
 
				+ * divide this with 'bytes per irq' to get the INT ID, which is given
			
 
				+ * by '(bits) / 8'.  But we do this with fixed-point-arithmetic and
			
 
				+ * take advantage of the fact that division by a fraction equals
			
 
				+ * multiplication with the inverted fraction, and scale up both the
			
 
				+ * numerator and denominator with 8 to support at most 64 bits per IRQ:
			
 
				+ */
			
 
				+#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
			
 
				+					64 / (bits) / 8)
			
 
				+
			
 
				+/*
			
 
				+ * Some VGIC registers store per-IRQ information, with a different number
			
 
				+ * of bits per IRQ. For those registers this macro is used.
			
 
				+ * The _WITH_LENGTH version instantiates registers with a fixed length
			
 
				+ * and is mutually exclusive with the _PER_IRQ version.
			
 
				+ */
			
 
				+#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc)		\
			
 
				+	{								\
			
 
				+		.reg_offset = off,					\
			
 
				+		.bits_per_irq = bpi,					\
			
 
				+		.len = bpi * 1024 / 8,					\
			
 
				+		.access_flags = acc,					\
			
 
				+		.read = rd,						\
			
 
				+		.write = wr,						\
			
 
				+	}
			
 
				+
			
 
				+#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc)		\
			
 
				+	{								\
			
 
				+		.reg_offset = off,					\
			
 
				+		.bits_per_irq = 0,					\
			
 
				+		.len = length,						\
			
 
				+		.access_flags = acc,					\
			
 
				+		.read = rd,						\
			
 
				+		.write = wr,						\
			
 
				+	}
			
 
				+
			
 
				+int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
			
 
				+				  struct vgic_register_region *reg_desc,
			
 
				+				  struct vgic_io_device *region,
			
 
				+				  int nr_irqs, bool offset_private);
			
 
				+
			
 
				+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
			
 
				+
			
 
				+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
			
 
				+				unsigned long data);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
			
 
				+				 gpa_t addr, unsigned int len);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
			
 
				+				 gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
			
 
				+			unsigned int len, unsigned long val);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val);
			
 
				+
			
 
				+void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
			
 
				+				     gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val);
			
 
				+
			
 
				+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val);
			
 
				+
			
 
				+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
			
 
				+			     gpa_t addr, unsigned int len,
			
 
				+			     unsigned long val);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
			
 
				+				      gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
			
 
				+			      gpa_t addr, unsigned int len,
			
 
				+			      unsigned long val);
			
 
				+
			
 
				+unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
			
 
				+				    gpa_t addr, unsigned int len);
			
 
				+
			
 
				+void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
			
 
				+			    gpa_t addr, unsigned int len,
			
 
				+			    unsigned long val);
			
 
				+
			
 
				+unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
			
 
				+
			
 
				+unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
			
 
				+
			
 
				+#endif
			
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -0,0 +1,352 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/irqchip/arm-gic.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+#include <asm/kvm_mmu.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+
			
 
				+/*
			
 
				+ * Call this function to convert a u64 value to an unsigned long * bitmask
			
 
				+ * in a way that works on both 32-bit and 64-bit LE and BE platforms.
			
 
				+ *
			
 
				+ * Warning: Calling this function may modify *val.
			
 
				+ */
			
 
				+static unsigned long *u64_to_bitmask(u64 *val)
			
 
				+{
			
 
				+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
			
 
				+	*val = (*val >> 32) | (*val << 32);
			
 
				+#endif
			
 
				+	return (unsigned long *)val;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				+
			
 
				+	if (cpuif->vgic_misr & GICH_MISR_EOI) {
			
 
				+		u64 eisr = cpuif->vgic_eisr;
			
 
				+		unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
			
 
				+		int lr;
			
 
				+
			
 
				+		for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
			
 
				+			u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
			
 
				+
			
 
				+			WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
			
 
				+
			
 
				+			kvm_notify_acked_irq(vcpu->kvm, 0,
			
 
				+					     intid - VGIC_NR_PRIVATE_IRQS);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* check and disable underflow maintenance IRQ */
			
 
				+	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
			
 
				+
			
 
				+	/*
			
 
				+	 * In the next iterations of the vcpu loop, if we sync the
			
 
				+	 * vgic state after flushing it, but before entering the guest
			
 
				+	 * (this happens for pending signals and vmid rollovers), then
			
 
				+	 * make sure we don't pick up any old maintenance interrupts
			
 
				+	 * here.
			
 
				+	 */
			
 
				+	cpuif->vgic_eisr = 0;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				+
			
 
				+	cpuif->vgic_hcr |= GICH_HCR_UIE;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * transfer the content of the LRs back into the corresponding ap_list:
			
 
				+ * - active bit is transferred as is
			
 
				+ * - pending bit is
			
 
				+ *   - transferred as is in case of edge sensitive IRQs
			
 
				+ *   - set to the line-level (resample time) for level sensitive IRQs
			
 
				+ */
			
 
				+void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
			
 
				+	int lr;
			
 
				+
			
 
				+	for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
			
 
				+		u32 val = cpuif->vgic_lr[lr];
			
 
				+		u32 intid = val & GICH_LR_VIRTUALID;
			
 
				+		struct vgic_irq *irq;
			
 
				+
			
 
				+		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		/* Always preserve the active bit */
			
 
				+		irq->active = !!(val & GICH_LR_ACTIVE_BIT);
			
 
				+
			
 
				+		/* Edge is the only case where we preserve the pending bit */
			
 
				+		if (irq->config == VGIC_CONFIG_EDGE &&
			
 
				+		    (val & GICH_LR_PENDING_BIT)) {
			
 
				+			irq->pending = true;
			
 
				+
			
 
				+			if (vgic_irq_is_sgi(intid)) {
			
 
				+				u32 cpuid = val & GICH_LR_PHYSID_CPUID;
			
 
				+
			
 
				+				cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
			
 
				+				irq->source |= (1 << cpuid);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* Clear soft pending state when level IRQs have been acked */
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL &&
			
 
				+		    !(val & GICH_LR_PENDING_BIT)) {
			
 
				+			irq->soft_pending = false;
			
 
				+			irq->pending = irq->line_level;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Populates the particular LR with the state of a given IRQ:
			
 
				+ * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
			
 
				+ * - for a level sensitive IRQ the pending state value is unchanged;
			
 
				+ *   it is dictated directly by the input level
			
 
				+ *
			
 
				+ * If @irq describes an SGI with multiple sources, we choose the
			
 
				+ * lowest-numbered source VCPU and clear that bit in the source bitmap.
			
 
				+ *
			
 
				+ * The irq_lock must be held by the caller.
			
 
				+ */
			
 
				+void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
			
 
				+{
			
 
				+	u32 val = irq->intid;
			
 
				+
			
 
				+	if (irq->pending) {
			
 
				+		val |= GICH_LR_PENDING_BIT;
			
 
				+
			
 
				+		if (irq->config == VGIC_CONFIG_EDGE)
			
 
				+			irq->pending = false;
			
 
				+
			
 
				+		if (vgic_irq_is_sgi(irq->intid)) {
			
 
				+			u32 src = ffs(irq->source);
			
 
				+
			
 
				+			BUG_ON(!src);
			
 
				+			val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
			
 
				+			irq->source &= ~(1 << (src - 1));
			
 
				+			if (irq->source)
			
 
				+				irq->pending = true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (irq->active)
			
 
				+		val |= GICH_LR_ACTIVE_BIT;
			
 
				+
			
 
				+	if (irq->hw) {
			
 
				+		val |= GICH_LR_HW;
			
 
				+		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
			
 
				+	} else {
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL)
			
 
				+			val |= GICH_LR_EOI;
			
 
				+	}
			
 
				+
			
 
				+	/* The GICv2 LR only holds five bits of priority. */
			
 
				+	val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
			
 
				+
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
			
 
				+{
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
			
 
				+{
			
 
				+	u32 vmcr;
			
 
				+
			
 
				+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
			
 
				+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
			
 
				+		GICH_VMCR_ALIAS_BINPOINT_MASK;
			
 
				+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
			
 
				+		GICH_VMCR_BINPOINT_MASK;
			
 
				+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
			
 
				+		GICH_VMCR_PRIMASK_MASK;
			
 
				+
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
			
 
				+{
			
 
				+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
			
 
				+
			
 
				+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
			
 
				+			GICH_VMCR_CTRL_SHIFT;
			
 
				+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
			
 
				+			GICH_VMCR_ALIAS_BINPOINT_SHIFT;
			
 
				+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
			
 
				+			GICH_VMCR_BINPOINT_SHIFT;
			
 
				+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
			
 
				+			GICH_VMCR_PRIMASK_SHIFT;
			
 
				+}
			
 
				+
			
 
				+void vgic_v2_enable(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	/*
			
 
				+	 * By forcing VMCR to zero, the GIC will restore the binary
			
 
				+	 * points to their reset values. Anything else resets to zero
			
 
				+	 * anyway.
			
 
				+	 */
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
			
 
				+
			
 
				+	/* Get the show on the road... */
			
 
				+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
			
 
				+}
			
 
				+
			
 
				+/* check for overlapping regions and for regions crossing the end of memory */
			
 
				+static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
			
 
				+{
			
 
				+	if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base)
			
 
				+		return false;
			
 
				+	if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base)
			
 
				+		return false;
			
 
				+
			
 
				+	if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base)
			
 
				+		return true;
			
 
				+	if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base)
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+int vgic_v2_map_resources(struct kvm *kvm)
			
 
				+{
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (vgic_ready(kvm))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
			
 
				+	    IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
			
 
				+		kvm_err("Need to set vgic cpu and dist addresses first\n");
			
 
				+		ret = -ENXIO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
			
 
				+		kvm_err("VGIC CPU and dist frames overlap\n");
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Initialize the vgic if this hasn't already been done on demand by
			
 
				+	 * accessing the vgic state from userspace.
			
 
				+	 */
			
 
				+	ret = vgic_init(kvm);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Unable to initialize VGIC dynamic data structures\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Unable to register VGIC MMIO regions\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
			
 
				+				    kvm_vgic_global_state.vcpu_base,
			
 
				+				    KVM_VGIC_V2_CPU_SIZE, true);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Unable to remap VGIC CPU to VCPU\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	dist->ready = true;
			
 
				+
			
 
				+out:
			
 
				+	if (ret)
			
 
				+		kvm_vgic_destroy(kvm);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
			
 
				+ * @node:	pointer to the DT node
			
 
				+ *
			
 
				+ * Returns 0 if a GICv2 has been found, returns an error code otherwise
			
 
				+ */
			
 
				+int vgic_v2_probe(const struct gic_kvm_info *info)
			
 
				+{
			
 
				+	int ret;
			
 
				+	u32 vtr;
			
 
				+
			
 
				+	if (!info->vctrl.start) {
			
 
				+		kvm_err("GICH not present in the firmware table\n");
			
 
				+		return -ENXIO;
			
 
				+	}
			
 
				+
			
 
				+	if (!PAGE_ALIGNED(info->vcpu.start)) {
			
 
				+		kvm_err("GICV physical address 0x%llx not page aligned\n",
			
 
				+			(unsigned long long)info->vcpu.start);
			
 
				+		return -ENXIO;
			
 
				+	}
			
 
				+
			
 
				+	if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
			
 
				+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
			
 
				+			(unsigned long long)resource_size(&info->vcpu),
			
 
				+			PAGE_SIZE);
			
 
				+		return -ENXIO;
			
 
				+	}
			
 
				+
			
 
				+	kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
			
 
				+						   resource_size(&info->vctrl));
			
 
				+	if (!kvm_vgic_global_state.vctrl_base) {
			
 
				+		kvm_err("Cannot ioremap GICH\n");
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
			
 
				+	kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
			
 
				+
			
 
				+	ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
			
 
				+				     kvm_vgic_global_state.vctrl_base +
			
 
				+					 resource_size(&info->vctrl),
			
 
				+				     info->vctrl.start);
			
 
				+
			
 
				+	if (ret) {
			
 
				+		kvm_err("Cannot map VCTRL into hyp\n");
			
 
				+		iounmap(kvm_vgic_global_state.vctrl_base);
			
 
				+		return ret;
			
 
				+	}
			
 
				+
			
 
				+	kvm_vgic_global_state.can_emulate_gicv2 = true;
			
 
				+	kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
			
 
				+
			
 
				+	kvm_vgic_global_state.vcpu_base = info->vcpu.start;
			
 
				+	kvm_vgic_global_state.type = VGIC_V2;
			
 
				+	kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
			
 
				+
			
 
				+	kvm_info("vgic-v2@%llx\n", info->vctrl.start);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -0,0 +1,330 @@
 
				+/*
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/irqchip/arm-gic-v3.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <kvm/arm_vgic.h>
			
 
				+#include <asm/kvm_mmu.h>
			
 
				+#include <asm/kvm_asm.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+
			
 
				+void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
			
 
				+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
			
 
				+
			
 
				+	if (cpuif->vgic_misr & ICH_MISR_EOI) {
			
 
				+		unsigned long eisr_bmap = cpuif->vgic_eisr;
			
 
				+		int lr;
			
 
				+
			
 
				+		for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
			
 
				+			u32 intid;
			
 
				+			u64 val = cpuif->vgic_lr[lr];
			
 
				+
			
 
				+			if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
			
 
				+				intid = val & ICH_LR_VIRTUAL_ID_MASK;
			
 
				+			else
			
 
				+				intid = val & GICH_LR_VIRTUALID;
			
 
				+
			
 
				+			WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
			
 
				+
			
 
				+			kvm_notify_acked_irq(vcpu->kvm, 0,
			
 
				+					     intid - VGIC_NR_PRIVATE_IRQS);
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * In the next iterations of the vcpu loop, if we sync
			
 
				+		 * the vgic state after flushing it, but before
			
 
				+		 * entering the guest (this happens for pending
			
 
				+		 * signals and vmid rollovers), then make sure we
			
 
				+		 * don't pick up any old maintenance interrupts here.
			
 
				+		 */
			
 
				+		cpuif->vgic_eisr = 0;
			
 
				+	}
			
 
				+
			
 
				+	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
			
 
				+
			
 
				+	cpuif->vgic_hcr |= ICH_HCR_UIE;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
			
 
				+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
			
 
				+	int lr;
			
 
				+
			
 
				+	for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
			
 
				+		u64 val = cpuif->vgic_lr[lr];
			
 
				+		u32 intid;
			
 
				+		struct vgic_irq *irq;
			
 
				+
			
 
				+		if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
			
 
				+			intid = val & ICH_LR_VIRTUAL_ID_MASK;
			
 
				+		else
			
 
				+			intid = val & GICH_LR_VIRTUALID;
			
 
				+		irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		/* Always preserve the active bit */
			
 
				+		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
			
 
				+
			
 
				+		/* Edge is the only case where we preserve the pending bit */
			
 
				+		if (irq->config == VGIC_CONFIG_EDGE &&
			
 
				+		    (val & ICH_LR_PENDING_BIT)) {
			
 
				+			irq->pending = true;
			
 
				+
			
 
				+			if (vgic_irq_is_sgi(intid) &&
			
 
				+			    model == KVM_DEV_TYPE_ARM_VGIC_V2) {
			
 
				+				u32 cpuid = val & GICH_LR_PHYSID_CPUID;
			
 
				+
			
 
				+				cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
			
 
				+				irq->source |= (1 << cpuid);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* Clear soft pending state when level irqs have been acked */
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL &&
			
 
				+		    !(val & ICH_LR_PENDING_BIT)) {
			
 
				+			irq->soft_pending = false;
			
 
				+			irq->pending = irq->line_level;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Requires the irq to be locked already */
			
 
				+void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
			
 
				+{
			
 
				+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
			
 
				+	u64 val = irq->intid;
			
 
				+
			
 
				+	if (irq->pending) {
			
 
				+		val |= ICH_LR_PENDING_BIT;
			
 
				+
			
 
				+		if (irq->config == VGIC_CONFIG_EDGE)
			
 
				+			irq->pending = false;
			
 
				+
			
 
				+		if (vgic_irq_is_sgi(irq->intid) &&
			
 
				+		    model == KVM_DEV_TYPE_ARM_VGIC_V2) {
			
 
				+			u32 src = ffs(irq->source);
			
 
				+
			
 
				+			BUG_ON(!src);
			
 
				+			val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
			
 
				+			irq->source &= ~(1 << (src - 1));
			
 
				+			if (irq->source)
			
 
				+				irq->pending = true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (irq->active)
			
 
				+		val |= ICH_LR_ACTIVE_BIT;
			
 
				+
			
 
				+	if (irq->hw) {
			
 
				+		val |= ICH_LR_HW;
			
 
				+		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
			
 
				+	} else {
			
 
				+		if (irq->config == VGIC_CONFIG_LEVEL)
			
 
				+			val |= ICH_LR_EOI;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We currently only support Group1 interrupts, which is a
			
 
				+	 * known defect. This needs to be addressed at some point.
			
 
				+	 */
			
 
				+	if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
			
 
				+		val |= ICH_LR_GROUP;
			
 
				+
			
 
				+	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
			
 
				+
			
 
				+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
			
 
				+{
			
 
				+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
			
 
				+{
			
 
				+	u32 vmcr;
			
 
				+
			
 
				+	vmcr  = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
			
 
				+	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
			
 
				+	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
			
 
				+	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
			
 
				+
			
 
				+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
			
 
				+{
			
 
				+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
			
 
				+
			
 
				+	vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
			
 
				+	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
			
 
				+	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
			
 
				+	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
			
 
				+}
			
 
				+
			
 
				+void vgic_v3_enable(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
			
 
				+
			
 
				+	/*
			
 
				+	 * By forcing VMCR to zero, the GIC will restore the binary
			
 
				+	 * points to their reset values. Anything else resets to zero
			
 
				+	 * anyway.
			
 
				+	 */
			
 
				+	vgic_v3->vgic_vmcr = 0;
			
 
				+	vgic_v3->vgic_elrsr = ~0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
			
 
				+	 * way, so we force SRE to 1 to demonstrate this to the guest.
			
 
				+	 * This goes with the spec allowing the value to be RAO/WI.
			
 
				+	 */
			
 
				+	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
			
 
				+		vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
			
 
				+	else
			
 
				+		vgic_v3->vgic_sre = 0;
			
 
				+
			
 
				+	/* Get the show on the road... */
			
 
				+	vgic_v3->vgic_hcr = ICH_HCR_EN;
			
 
				+}
			
 
				+
			
 
				+/* check for overlapping regions and for regions crossing the end of memory */
			
 
				+static bool vgic_v3_check_base(struct kvm *kvm)
			
 
				+{
			
 
				+	struct vgic_dist *d = &kvm->arch.vgic;
			
 
				+	gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
			
 
				+
			
 
				+	redist_size *= atomic_read(&kvm->online_vcpus);
			
 
				+
			
 
				+	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
			
 
				+		return false;
			
 
				+	if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
			
 
				+		return false;
			
 
				+
			
 
				+	if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
			
 
				+		return true;
			
 
				+	if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+int vgic_v3_map_resources(struct kvm *kvm)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	struct vgic_dist *dist = &kvm->arch.vgic;
			
 
				+
			
 
				+	if (vgic_ready(kvm))
			
 
				+		goto out;
			
 
				+
			
 
				+	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
			
 
				+	    IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
			
 
				+		kvm_err("Need to set vgic distributor addresses first\n");
			
 
				+		ret = -ENXIO;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!vgic_v3_check_base(kvm)) {
			
 
				+		kvm_err("VGIC redist and dist frames overlap\n");
			
 
				+		ret = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * For a VGICv3 we require the userland to explicitly initialize
			
 
				+	 * the VGIC before we need to use it.
			
 
				+	 */
			
 
				+	if (!vgic_initialized(kvm)) {
			
 
				+		ret = -EBUSY;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Unable to register VGICv3 dist MMIO regions\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
			
 
				+	if (ret) {
			
 
				+		kvm_err("Unable to register VGICv3 redist MMIO regions\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	dist->ready = true;
			
 
				+
			
 
				+out:
			
 
				+	if (ret)
			
 
				+		kvm_vgic_destroy(kvm);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
			
 
				+ * @node:	pointer to the DT node
			
 
				+ *
			
 
				+ * Returns 0 if a GICv3 has been found, returns an error code otherwise
			
 
				+ */
			
 
				+int vgic_v3_probe(const struct gic_kvm_info *info)
			
 
				+{
			
 
				+	u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
			
 
				+
			
 
				+	/*
			
 
				+	 * The ListRegs field is 5 bits, but there is a architectural
			
 
				+	 * maximum of 16 list registers. Just ignore bit 4...
			
 
				+	 */
			
 
				+	kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
			
 
				+	kvm_vgic_global_state.can_emulate_gicv2 = false;
			
 
				+
			
 
				+	if (!info->vcpu.start) {
			
 
				+		kvm_info("GICv3: no GICV resource entry\n");
			
 
				+		kvm_vgic_global_state.vcpu_base = 0;
			
 
				+	} else if (!PAGE_ALIGNED(info->vcpu.start)) {
			
 
				+		pr_warn("GICV physical address 0x%llx not page aligned\n",
			
 
				+			(unsigned long long)info->vcpu.start);
			
 
				+		kvm_vgic_global_state.vcpu_base = 0;
			
 
				+	} else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
			
 
				+		pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
			
 
				+			(unsigned long long)resource_size(&info->vcpu),
			
 
				+			PAGE_SIZE);
			
 
				+		kvm_vgic_global_state.vcpu_base = 0;
			
 
				+	} else {
			
 
				+		kvm_vgic_global_state.vcpu_base = info->vcpu.start;
			
 
				+		kvm_vgic_global_state.can_emulate_gicv2 = true;
			
 
				+		kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
			
 
				+		kvm_info("vgic-v2@%llx\n", info->vcpu.start);
			
 
				+	}
			
 
				+	if (kvm_vgic_global_state.vcpu_base == 0)
			
 
				+		kvm_info("disabling GICv2 emulation\n");
			
 
				+	kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
			
 
				+
			
 
				+	kvm_vgic_global_state.vctrl_base = NULL;
			
 
				+	kvm_vgic_global_state.type = VGIC_V3;
			
 
				+	kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -0,0 +1,619 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/list_sort.h>
			
 
				+
			
 
				+#include "vgic.h"
			
 
				+
			
 
				+#define CREATE_TRACE_POINTS
			
 
				+#include "../trace.h"
			
 
				+
			
 
				+#ifdef CONFIG_DEBUG_SPINLOCK
			
 
				+#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
			
 
				+#else
			
 
				+#define DEBUG_SPINLOCK_BUG_ON(p)
			
 
				+#endif
			
 
				+
			
 
				+struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
			
 
				+
			
 
				+/*
			
 
				+ * Locking order is always:
			
 
				+ *   vgic_cpu->ap_list_lock
			
 
				+ *     vgic_irq->irq_lock
			
 
				+ *
			
 
				+ * (that is, always take the ap_list_lock before the struct vgic_irq lock).
			
 
				+ *
			
 
				+ * When taking more than one ap_list_lock at the same time, always take the
			
 
				+ * lowest numbered VCPU's ap_list_lock first, so:
			
 
				+ *   vcpuX->vcpu_id < vcpuY->vcpu_id:
			
 
				+ *     spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
			
 
				+ *     spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
			
 
				+ */
			
 
				+
			
 
				+struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
			
 
				+			      u32 intid)
			
 
				+{
			
 
				+	/* SGIs and PPIs */
			
 
				+	if (intid <= VGIC_MAX_PRIVATE)
			
 
				+		return &vcpu->arch.vgic_cpu.private_irqs[intid];
			
 
				+
			
 
				+	/* SPIs */
			
 
				+	if (intid <= VGIC_MAX_SPI)
			
 
				+		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
			
 
				+
			
 
				+	/* LPIs are not yet covered */
			
 
				+	if (intid >= VGIC_MIN_LPI)
			
 
				+		return NULL;
			
 
				+
			
 
				+	WARN(1, "Looking up struct vgic_irq for reserved INTID");
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_target_oracle - compute the target vcpu for an irq
			
 
				+ *
			
 
				+ * @irq:	The irq to route. Must be already locked.
			
 
				+ *
			
 
				+ * Based on the current state of the interrupt (enabled, pending,
			
 
				+ * active, vcpu and target_vcpu), compute the next vcpu this should be
			
 
				+ * given to. Return NULL if this shouldn't be injected at all.
			
 
				+ *
			
 
				+ * Requires the IRQ lock to be held.
			
 
				+ */
			
 
				+static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
			
 
				+{
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
			
 
				+
			
 
				+	/* If the interrupt is active, it must stay on the current vcpu */
			
 
				+	if (irq->active)
			
 
				+		return irq->vcpu ? : irq->target_vcpu;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the IRQ is not active but enabled and pending, we should direct
			
 
				+	 * it to its configured target VCPU.
			
 
				+	 * If the distributor is disabled, pending interrupts shouldn't be
			
 
				+	 * forwarded.
			
 
				+	 */
			
 
				+	if (irq->enabled && irq->pending) {
			
 
				+		if (unlikely(irq->target_vcpu &&
			
 
				+			     !irq->target_vcpu->kvm->arch.vgic.enabled))
			
 
				+			return NULL;
			
 
				+
			
 
				+		return irq->target_vcpu;
			
 
				+	}
			
 
				+
			
 
				+	/* If neither active nor pending and enabled, then this IRQ should not
			
 
				+	 * be queued to any VCPU.
			
 
				+	 */
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The order of items in the ap_lists defines how we'll pack things in LRs as
			
 
				+ * well, the first items in the list being the first things populated in the
			
 
				+ * LRs.
			
 
				+ *
			
 
				+ * A hard rule is that active interrupts can never be pushed out of the LRs
			
 
				+ * (and therefore take priority) since we cannot reliably trap on deactivation
			
 
				+ * of IRQs and therefore they have to be present in the LRs.
			
 
				+ *
			
 
				+ * Otherwise things should be sorted by the priority field and the GIC
			
 
				+ * hardware support will take care of preemption of priority groups etc.
			
 
				+ *
			
 
				+ * Return negative if "a" sorts before "b", 0 to preserve order, and positive
			
 
				+ * to sort "b" before "a".
			
 
				+ */
			
 
				+static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
			
 
				+{
			
 
				+	struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
			
 
				+	struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
			
 
				+	bool penda, pendb;
			
 
				+	int ret;
			
 
				+
			
 
				+	spin_lock(&irqa->irq_lock);
			
 
				+	spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	if (irqa->active || irqb->active) {
			
 
				+		ret = (int)irqb->active - (int)irqa->active;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	penda = irqa->enabled && irqa->pending;
			
 
				+	pendb = irqb->enabled && irqb->pending;
			
 
				+
			
 
				+	if (!penda || !pendb) {
			
 
				+		ret = (int)pendb - (int)penda;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	/* Both pending and enabled, sort by priority */
			
 
				+	ret = irqa->priority - irqb->priority;
			
 
				+out:
			
 
				+	spin_unlock(&irqb->irq_lock);
			
 
				+	spin_unlock(&irqa->irq_lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/* Must be called with the ap_list_lock held */
			
 
				+static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
			
 
				+
			
 
				+	list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Only valid injection if changing level for level-triggered IRQs or for a
			
 
				+ * rising edge.
			
 
				+ */
			
 
				+static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
			
 
				+{
			
 
				+	switch (irq->config) {
			
 
				+	case VGIC_CONFIG_LEVEL:
			
 
				+		return irq->line_level != level;
			
 
				+	case VGIC_CONFIG_EDGE:
			
 
				+		return level;
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
			
 
				+ * Do the queuing if necessary, taking the right locks in the right order.
			
 
				+ * Returns true when the IRQ was queued, false otherwise.
			
 
				+ *
			
 
				+ * Needs to be entered with the IRQ lock already held, but will return
			
 
				+ * with all locks dropped.
			
 
				+ */
			
 
				+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
			
 
				+
			
 
				+retry:
			
 
				+	vcpu = vgic_target_oracle(irq);
			
 
				+	if (irq->vcpu || !vcpu) {
			
 
				+		/*
			
 
				+		 * If this IRQ is already on a VCPU's ap_list, then it
			
 
				+		 * cannot be moved or modified and there is no more work for
			
 
				+		 * us to do.
			
 
				+		 *
			
 
				+		 * Otherwise, if the irq is not pending and enabled, it does
			
 
				+		 * not need to be inserted into an ap_list and there is also
			
 
				+		 * no more work for us to do.
			
 
				+		 */
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We must unlock the irq lock to take the ap_list_lock where
			
 
				+	 * we are going to insert this new pending interrupt.
			
 
				+	 */
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+	/* someone can do stuff here, which we re-check below */
			
 
				+
			
 
				+	spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Did something change behind our backs?
			
 
				+	 *
			
 
				+	 * There are two cases:
			
 
				+	 * 1) The irq lost its pending state or was disabled behind our
			
 
				+	 *    backs and/or it was queued to another VCPU's ap_list.
			
 
				+	 * 2) Someone changed the affinity on this irq behind our
			
 
				+	 *    backs and we are now holding the wrong ap_list_lock.
			
 
				+	 *
			
 
				+	 * In both cases, drop the locks and retry.
			
 
				+	 */
			
 
				+
			
 
				+	if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+		spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		goto retry;
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
			
 
				+	irq->vcpu = vcpu;
			
 
				+
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+	spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
			
 
				+
			
 
				+	kvm_vcpu_kick(vcpu);
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
			
 
				+				   unsigned int intid, bool level,
			
 
				+				   bool mapped_irq)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	struct vgic_irq *irq;
			
 
				+	int ret;
			
 
				+
			
 
				+	trace_vgic_update_irq_pending(cpuid, intid, level);
			
 
				+
			
 
				+	ret = vgic_lazy_init(kvm);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	vcpu = kvm_get_vcpu(kvm, cpuid);
			
 
				+	if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	irq = vgic_get_irq(kvm, vcpu, intid);
			
 
				+	if (!irq)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (irq->hw != mapped_irq)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+	if (!vgic_validate_injection(irq, level)) {
			
 
				+		/* Nothing to see here, move along... */
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (irq->config == VGIC_CONFIG_LEVEL) {
			
 
				+		irq->line_level = level;
			
 
				+		irq->pending = level || irq->soft_pending;
			
 
				+	} else {
			
 
				+		irq->pending = true;
			
 
				+	}
			
 
				+
			
 
				+	vgic_queue_irq_unlock(kvm, irq);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
			
 
				+ * @kvm:     The VM structure pointer
			
 
				+ * @cpuid:   The CPU for PPIs
			
 
				+ * @intid:   The INTID to inject a new state to.
			
 
				+ * @level:   Edge-triggered:  true:  to trigger the interrupt
			
 
				+ *			      false: to ignore the call
			
 
				+ *	     Level-sensitive  true:  raise the input signal
			
 
				+ *			      false: lower the input signal
			
 
				+ *
			
 
				+ * The VGIC is not concerned with devices being active-LOW or active-HIGH for
			
 
				+ * level-sensitive interrupts.  You can think of the level parameter as 1
			
 
				+ * being HIGH and 0 being LOW and all devices being active-HIGH.
			
 
				+ */
			
 
				+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
			
 
				+			bool level)
			
 
				+{
			
 
				+	return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
			
 
				+}
			
 
				+
			
 
				+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
			
 
				+			       bool level)
			
 
				+{
			
 
				+	return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
			
 
				+}
			
 
				+
			
 
				+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
			
 
				+{
			
 
				+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
			
 
				+
			
 
				+	BUG_ON(!irq);
			
 
				+
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+	irq->hw = true;
			
 
				+	irq->hwintid = phys_irq;
			
 
				+
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
			
 
				+{
			
 
				+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
			
 
				+
			
 
				+	BUG_ON(!irq);
			
 
				+
			
 
				+	if (!vgic_initialized(vcpu->kvm))
			
 
				+		return -EAGAIN;
			
 
				+
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+	irq->hw = false;
			
 
				+	irq->hwintid = 0;
			
 
				+
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * vgic_prune_ap_list - Remove non-relevant interrupts from the list
			
 
				+ *
			
 
				+ * @vcpu: The VCPU pointer
			
 
				+ *
			
 
				+ * Go over the list of "interesting" interrupts, and prune those that we
			
 
				+ * won't have to consider in the near future.
			
 
				+ */
			
 
				+static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+	struct vgic_irq *irq, *tmp;
			
 
				+
			
 
				+retry:
			
 
				+	spin_lock(&vgic_cpu->ap_list_lock);
			
 
				+
			
 
				+	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
			
 
				+		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
			
 
				+
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		BUG_ON(vcpu != irq->vcpu);
			
 
				+
			
 
				+		target_vcpu = vgic_target_oracle(irq);
			
 
				+
			
 
				+		if (!target_vcpu) {
			
 
				+			/*
			
 
				+			 * We don't need to process this interrupt any
			
 
				+			 * further, move it off the list.
			
 
				+			 */
			
 
				+			list_del(&irq->ap_list);
			
 
				+			irq->vcpu = NULL;
			
 
				+			spin_unlock(&irq->irq_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (target_vcpu == vcpu) {
			
 
				+			/* We're on the right CPU */
			
 
				+			spin_unlock(&irq->irq_lock);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* This interrupt looks like it has to be migrated. */
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+		spin_unlock(&vgic_cpu->ap_list_lock);
			
 
				+
			
 
				+		/*
			
 
				+		 * Ensure locking order by always locking the smallest
			
 
				+		 * ID first.
			
 
				+		 */
			
 
				+		if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
			
 
				+			vcpuA = vcpu;
			
 
				+			vcpuB = target_vcpu;
			
 
				+		} else {
			
 
				+			vcpuA = target_vcpu;
			
 
				+			vcpuB = vcpu;
			
 
				+		}
			
 
				+
			
 
				+		spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
			
 
				+		spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
			
 
				+				 SINGLE_DEPTH_NESTING);
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		/*
			
 
				+		 * If the affinity has been preserved, move the
			
 
				+		 * interrupt around. Otherwise, it means things have
			
 
				+		 * changed while the interrupt was unlocked, and we
			
 
				+		 * need to replay this.
			
 
				+		 *
			
 
				+		 * In all cases, we cannot trust the list not to have
			
 
				+		 * changed, so we restart from the beginning.
			
 
				+		 */
			
 
				+		if (target_vcpu == vgic_target_oracle(irq)) {
			
 
				+			struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
			
 
				+
			
 
				+			list_del(&irq->ap_list);
			
 
				+			irq->vcpu = target_vcpu;
			
 
				+			list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+		spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
			
 
				+		spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
			
 
				+		goto retry;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&vgic_cpu->ap_list_lock);
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_process_maintenance(vcpu);
			
 
				+	else
			
 
				+		vgic_v3_process_maintenance(vcpu);
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_fold_lr_state(vcpu);
			
 
				+	else
			
 
				+		vgic_v3_fold_lr_state(vcpu);
			
 
				+}
			
 
				+
			
 
				+/* Requires the irq_lock to be held. */
			
 
				+static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
			
 
				+				    struct vgic_irq *irq, int lr)
			
 
				+{
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
			
 
				+
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_populate_lr(vcpu, irq, lr);
			
 
				+	else
			
 
				+		vgic_v3_populate_lr(vcpu, irq, lr);
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_clear_lr(vcpu, lr);
			
 
				+	else
			
 
				+		vgic_v3_clear_lr(vcpu, lr);
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (kvm_vgic_global_state.type == VGIC_V2)
			
 
				+		vgic_v2_set_underflow(vcpu);
			
 
				+	else
			
 
				+		vgic_v3_set_underflow(vcpu);
			
 
				+}
			
 
				+
			
 
				+/* Requires the ap_list_lock to be held. */
			
 
				+static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+	struct vgic_irq *irq;
			
 
				+	int count = 0;
			
 
				+
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
			
 
				+
			
 
				+	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		/* GICv2 SGIs can count for more than one... */
			
 
				+		if (vgic_irq_is_sgi(irq->intid) && irq->source)
			
 
				+			count += hweight8(irq->source);
			
 
				+		else
			
 
				+			count++;
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+	}
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+/* Requires the VCPU's ap_list_lock to be held. */
			
 
				+static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+	struct vgic_irq *irq;
			
 
				+	int count = 0;
			
 
				+
			
 
				+	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
			
 
				+
			
 
				+	if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
			
 
				+		vgic_set_underflow(vcpu);
			
 
				+		vgic_sort_ap_list(vcpu);
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+
			
 
				+		if (unlikely(vgic_target_oracle(irq) != vcpu))
			
 
				+			goto next;
			
 
				+
			
 
				+		/*
			
 
				+		 * If we get an SGI with multiple sources, try to get
			
 
				+		 * them in all at once.
			
 
				+		 */
			
 
				+		do {
			
 
				+			vgic_populate_lr(vcpu, irq, count++);
			
 
				+		} while (irq->source && count < kvm_vgic_global_state.nr_lr);
			
 
				+
			
 
				+next:
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+		if (count == kvm_vgic_global_state.nr_lr)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	vcpu->arch.vgic_cpu.used_lrs = count;
			
 
				+
			
 
				+	/* Nuke remaining LRs */
			
 
				+	for ( ; count < kvm_vgic_global_state.nr_lr; count++)
			
 
				+		vgic_clear_lr(vcpu, count);
			
 
				+}
			
 
				+
			
 
				+/* Sync back the hardware VGIC state into our emulation after a guest's run. */
			
 
				+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	vgic_process_maintenance_interrupt(vcpu);
			
 
				+	vgic_fold_lr_state(vcpu);
			
 
				+	vgic_prune_ap_list(vcpu);
			
 
				+}
			
 
				+
			
 
				+/* Flush our emulation state into the GIC hardware before entering the guest. */
			
 
				+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
			
 
				+	vgic_flush_lr_state(vcpu);
			
 
				+	spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
			
 
				+}
			
 
				+
			
 
				+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
			
 
				+	struct vgic_irq *irq;
			
 
				+	bool pending = false;
			
 
				+
			
 
				+	if (!vcpu->kvm->arch.vgic.enabled)
			
 
				+		return false;
			
 
				+
			
 
				+	spin_lock(&vgic_cpu->ap_list_lock);
			
 
				+
			
 
				+	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
			
 
				+		spin_lock(&irq->irq_lock);
			
 
				+		pending = irq->pending && irq->enabled;
			
 
				+		spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+		if (pending)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&vgic_cpu->ap_list_lock);
			
 
				+
			
 
				+	return pending;
			
 
				+}
			
 
				+
			
 
				+void vgic_kick_vcpus(struct kvm *kvm)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	int c;
			
 
				+
			
 
				+	/*
			
 
				+	 * We've injected an interrupt, time to find out who deserves
			
 
				+	 * a good kick...
			
 
				+	 */
			
 
				+	kvm_for_each_vcpu(c, vcpu, kvm) {
			
 
				+		if (kvm_vgic_vcpu_pending_irq(vcpu))
			
 
				+			kvm_vcpu_kick(vcpu);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
			
 
				+{
			
 
				+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
			
 
				+	bool map_is_active;
			
 
				+
			
 
				+	spin_lock(&irq->irq_lock);
			
 
				+	map_is_active = irq->hw && irq->active;
			
 
				+	spin_unlock(&irq->irq_lock);
			
 
				+
			
 
				+	return map_is_active;
			
 
				+}
			
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -0,0 +1,131 @@
 
				+/*
			
 
				+ * Copyright (C) 2015, 2016 ARM Ltd.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+ */
			
 
				+#ifndef __KVM_ARM_VGIC_NEW_H__
			
 
				+#define __KVM_ARM_VGIC_NEW_H__
			
 
				+
			
 
				+#include <linux/irqchip/arm-gic-common.h>
			
 
				+
			
 
				+#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
			
 
				+#define IMPLEMENTER_ARM		0x43b
			
 
				+
			
 
				+#define VGIC_ADDR_UNDEF		(-1)
			
 
				+#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
			
 
				+
			
 
				+#define INTERRUPT_ID_BITS_SPIS	10
			
 
				+#define VGIC_PRI_BITS		5
			
 
				+
			
 
				+#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
			
 
				+
			
 
				+struct vgic_vmcr {
			
 
				+	u32	ctlr;
			
 
				+	u32	abpr;
			
 
				+	u32	bpr;
			
 
				+	u32	pmr;
			
 
				+};
			
 
				+
			
 
				+struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
			
 
				+			      u32 intid);
			
 
				+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
			
 
				+void vgic_kick_vcpus(struct kvm *kvm);
			
 
				+
			
 
				+void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
			
 
				+void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
			
 
				+void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
			
 
				+void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
			
 
				+void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
			
 
				+int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
			
 
				+int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
			
 
				+			 int offset, u32 *val);
			
 
				+int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
			
 
				+			  int offset, u32 *val);
			
 
				+void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
			
 
				+void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
			
 
				+void vgic_v2_enable(struct kvm_vcpu *vcpu);
			
 
				+int vgic_v2_probe(const struct gic_kvm_info *info);
			
 
				+int vgic_v2_map_resources(struct kvm *kvm);
			
 
				+int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
			
 
				+			     enum vgic_type);
			
 
				+
			
 
				+#ifdef CONFIG_KVM_ARM_VGIC_V3
			
 
				+void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
			
 
				+void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
			
 
				+void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
			
 
				+void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
			
 
				+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
			
 
				+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
			
 
				+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
			
 
				+void vgic_v3_enable(struct kvm_vcpu *vcpu);
			
 
				+int vgic_v3_probe(const struct gic_kvm_info *info);
			
 
				+int vgic_v3_map_resources(struct kvm *kvm);
			
 
				+int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
			
 
				+#else
			
 
				+static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu,
			
 
				+				       struct vgic_irq *irq, int lr)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline
			
 
				+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void vgic_v3_enable(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int vgic_v3_probe(const struct gic_kvm_info *info)
			
 
				+{
			
 
				+	return -ENODEV;
			
 
				+}
			
 
				+
			
 
				+static inline int vgic_v3_map_resources(struct kvm *kvm)
			
 
				+{
			
 
				+	return -ENODEV;
			
 
				+}
			
 
				+
			
 
				+static inline int vgic_register_redist_iodevs(struct kvm *kvm,
			
 
				+					      gpa_t dist_base_address)
			
 
				+{
			
 
				+	return -ENODEV;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void kvm_register_vgic_device(unsigned long type);
			
 
				+int vgic_lazy_init(struct kvm *kvm);
			
 
				+int vgic_init(struct kvm *kvm);
			
 
				+
			
 
				+#endif
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,9 @@
 
				 #define CREATE_TRACE_POINTS
			
 
				 #include <trace/events/kvm.h>
			
 
				 
			
 
				+/* Worst case buffer size needed for holding an integer. */
			
 
				+#define ITOA_MAX_LEN 12
			
 
				+
			
 
				 MODULE_AUTHOR("Qumranet");
			
 
				 MODULE_LICENSE("GPL");
			
 
				 
			
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops;
 
				 struct dentry *kvm_debugfs_dir;
			
 
				 EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
			
 
				 
			
 
				+static int kvm_debugfs_num_entries;
			
 
				+static const struct file_operations *stat_fops_per_vm[];
			
 
				+
			
 
				 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
			
 
				 			   unsigned long arg);
			
 
				 #ifdef CONFIG_KVM_COMPAT
			
@@ -542,6 +548,58 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
 
				 	kvfree(slots);
			
 
				 }
			
 
				 
			
 
				+static void kvm_destroy_vm_debugfs(struct kvm *kvm)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	if (!kvm->debugfs_dentry)
			
 
				+		return;
			
 
				+
			
 
				+	debugfs_remove_recursive(kvm->debugfs_dentry);
			
 
				+
			
 
				+	for (i = 0; i < kvm_debugfs_num_entries; i++)
			
 
				+		kfree(kvm->debugfs_stat_data[i]);
			
 
				+	kfree(kvm->debugfs_stat_data);
			
 
				+}
			
 
				+
			
 
				+static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
			
 
				+{
			
 
				+	char dir_name[ITOA_MAX_LEN * 2];
			
 
				+	struct kvm_stat_data *stat_data;
			
 
				+	struct kvm_stats_debugfs_item *p;
			
 
				+
			
 
				+	if (!debugfs_initialized())
			
 
				+		return 0;
			
 
				+
			
 
				+	snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
			
 
				+	kvm->debugfs_dentry = debugfs_create_dir(dir_name,
			
 
				+						 kvm_debugfs_dir);
			
 
				+	if (!kvm->debugfs_dentry)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
			
 
				+					 sizeof(*kvm->debugfs_stat_data),
			
 
				+					 GFP_KERNEL);
			
 
				+	if (!kvm->debugfs_stat_data)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	for (p = debugfs_entries; p->name; p++) {
			
 
				+		stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
			
 
				+		if (!stat_data)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		stat_data->kvm = kvm;
			
 
				+		stat_data->offset = p->offset;
			
 
				+		kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
			
 
				+		if (!debugfs_create_file(p->name, 0444,
			
 
				+					 kvm->debugfs_dentry,
			
 
				+					 stat_data,
			
 
				+					 stat_fops_per_vm[p->kind]))
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static struct kvm *kvm_create_vm(unsigned long type)
			
 
				 {
			
 
				 	int r, i;
			
@@ -647,6 +705,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 
				 	int i;
			
 
				 	struct mm_struct *mm = kvm->mm;
			
 
				 
			
 
				+	kvm_destroy_vm_debugfs(kvm);
			
 
				 	kvm_arch_sync_events(kvm);
			
 
				 	spin_lock(&kvm_lock);
			
 
				 	list_del(&kvm->vm_list);
			
@@ -2999,8 +3058,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 
				 	}
			
 
				 #endif
			
 
				 	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
			
 
				-	if (r < 0)
			
 
				+	if (r < 0) {
			
 
				 		kvm_put_kvm(kvm);
			
 
				+		return r;
			
 
				+	}
			
 
				+
			
 
				+	if (kvm_create_vm_debugfs(kvm, r) < 0) {
			
 
				+		kvm_put_kvm(kvm);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				 
			
 
				 	return r;
			
 
				 }
			
@@ -3425,15 +3491,114 @@ static struct notifier_block kvm_cpu_notifier = {
 
				 	.notifier_call = kvm_cpu_hotplug,
			
 
				 };
			
 
				 
			
 
				+static int kvm_debugfs_open(struct inode *inode, struct file *file,
			
 
				+			   int (*get)(void *, u64 *), int (*set)(void *, u64),
			
 
				+			   const char *fmt)
			
 
				+{
			
 
				+	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
			
 
				+					  inode->i_private;
			
 
				+
			
 
				+	/* The debugfs files are a reference to the kvm struct which
			
 
				+	 * is still valid when kvm_destroy_vm is called.
			
 
				+	 * To avoid the race between open and the removal of the debugfs
			
 
				+	 * directory we test against the users count.
			
 
				+	 */
			
 
				+	if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	if (simple_attr_open(inode, file, get, set, fmt)) {
			
 
				+		kvm_put_kvm(stat_data->kvm);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvm_debugfs_release(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
			
 
				+					  inode->i_private;
			
 
				+
			
 
				+	simple_attr_release(inode, file);
			
 
				+	kvm_put_kvm(stat_data->kvm);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int vm_stat_get_per_vm(void *data, u64 *val)
			
 
				+{
			
 
				+	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
			
 
				+
			
 
				+	*val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	__simple_attr_check_format("%llu\n", 0ull);
			
 
				+	return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
			
 
				+				NULL, "%llu\n");
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations vm_stat_get_per_vm_fops = {
			
 
				+	.owner   = THIS_MODULE,
			
 
				+	.open    = vm_stat_get_per_vm_open,
			
 
				+	.release = kvm_debugfs_release,
			
 
				+	.read    = simple_attr_read,
			
 
				+	.write   = simple_attr_write,
			
 
				+	.llseek  = generic_file_llseek,
			
 
				+};
			
 
				+
			
 
				+static int vcpu_stat_get_per_vm(void *data, u64 *val)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	*val = 0;
			
 
				+
			
 
				+	kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
			
 
				+		*val += *(u32 *)((void *)vcpu + stat_data->offset);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	__simple_attr_check_format("%llu\n", 0ull);
			
 
				+	return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
			
 
				+				 NULL, "%llu\n");
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations vcpu_stat_get_per_vm_fops = {
			
 
				+	.owner   = THIS_MODULE,
			
 
				+	.open    = vcpu_stat_get_per_vm_open,
			
 
				+	.release = kvm_debugfs_release,
			
 
				+	.read    = simple_attr_read,
			
 
				+	.write   = simple_attr_write,
			
 
				+	.llseek  = generic_file_llseek,
			
 
				+};
			
 
				+
			
 
				+static const struct file_operations *stat_fops_per_vm[] = {
			
 
				+	[KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
			
 
				+	[KVM_STAT_VM]   = &vm_stat_get_per_vm_fops,
			
 
				+};
			
 
				+
			
 
				 static int vm_stat_get(void *_offset, u64 *val)
			
 
				 {
			
 
				 	unsigned offset = (long)_offset;
			
 
				 	struct kvm *kvm;
			
 
				+	struct kvm_stat_data stat_tmp = {.offset = offset};
			
 
				+	u64 tmp_val;
			
 
				 
			
 
				 	*val = 0;
			
 
				 	spin_lock(&kvm_lock);
			
 
				-	list_for_each_entry(kvm, &vm_list, vm_list)
			
 
				-		*val += *(u32 *)((void *)kvm + offset);
			
 
				+	list_for_each_entry(kvm, &vm_list, vm_list) {
			
 
				+		stat_tmp.kvm = kvm;
			
 
				+		vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
			
 
				+		*val += tmp_val;
			
 
				+	}
			
 
				 	spin_unlock(&kvm_lock);
			
 
				 	return 0;
			
 
				 }
			
@@ -3444,15 +3609,16 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 
				 {
			
 
				 	unsigned offset = (long)_offset;
			
 
				 	struct kvm *kvm;
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				-	int i;
			
 
				+	struct kvm_stat_data stat_tmp = {.offset = offset};
			
 
				+	u64 tmp_val;
			
 
				 
			
 
				 	*val = 0;
			
 
				 	spin_lock(&kvm_lock);
			
 
				-	list_for_each_entry(kvm, &vm_list, vm_list)
			
 
				-		kvm_for_each_vcpu(i, vcpu, kvm)
			
 
				-			*val += *(u32 *)((void *)vcpu + offset);
			
 
				-
			
 
				+	list_for_each_entry(kvm, &vm_list, vm_list) {
			
 
				+		stat_tmp.kvm = kvm;
			
 
				+		vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
			
 
				+		*val += tmp_val;
			
 
				+	}
			
 
				 	spin_unlock(&kvm_lock);
			
 
				 	return 0;
			
 
				 }
			
@@ -3473,7 +3639,8 @@ static int kvm_init_debug(void)
 
				 	if (kvm_debugfs_dir == NULL)
			
 
				 		goto out;
			
 
				 
			
 
				-	for (p = debugfs_entries; p->name; ++p) {
			
 
				+	kvm_debugfs_num_entries = 0;
			
 
				+	for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
			
 
				 		if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
			
 
				 					 (void *)(long)p->offset,
			
 
				 					 stat_fops[p->kind]))