12 years ago · 064d1afaa5
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1792,6 +1792,23 @@ registers, find a list below:
 
															   PPC   | KVM_REG_PPC_TSR	| 32
														
 
															   PPC   | KVM_REG_PPC_OR_TSR	| 32
														
 
															   PPC   | KVM_REG_PPC_CLEAR_TSR	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS0	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS1	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS2	| 64
														
 
															+  PPC   | KVM_REG_PPC_MAS7_3	| 64
														
 
															+  PPC   | KVM_REG_PPC_MAS4	| 32
														
 
															+  PPC   | KVM_REG_PPC_MAS6	| 32
														
 
															+  PPC   | KVM_REG_PPC_MMUCFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB0CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB1CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB2CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB3CFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB0PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB1PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB2PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_TLB3PS	| 32
														
 
															+  PPC   | KVM_REG_PPC_EPTCFG	| 32
														
 
															+  PPC   | KVM_REG_PPC_ICP_STATE | 64
														
 
															 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
														
 
															 is the register group type, or coprocessor number:
														
@@ -2173,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data
 
															 written, then `n_invalid' invalid entries, invalidating any previously
														
 
															 valid entries found.
														
 
															+4.79 KVM_CREATE_DEVICE
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_create_device (in/out)
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENODEV: The device type is unknown or unsupported
														
 
															+  EEXIST: Device already created, and this type of device may not
														
 
															+          be instantiated multiple times
														
 
															+
														
 
															+  Other error conditions may be defined by individual device types or
														
 
															+  have their standard meanings.
														
 
															+
														
 
															+Creates an emulated device in the kernel.  The file descriptor returned
														
 
															+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
														
 
															+
														
 
															+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
														
 
															+device type is supported (not necessarily whether it can be created
														
 
															+in the current vm).
														
 
															+
														
 
															+Individual devices should not define flags.  Attributes should be used
														
 
															+for specifying any behavior that is not implied by the device type
														
 
															+number.
														
 
															+
														
 
															+struct kvm_create_device {
														
 
															+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
														
 
															+	__u32	fd;	/* out: device handle */
														
 
															+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
														
 
															+};
														
 
															+
														
 
															+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: device ioctl
														
 
															+Parameters: struct kvm_device_attr
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENXIO:  The group or attribute is unknown/unsupported for this device
														
 
															+  EPERM:  The attribute cannot (currently) be accessed this way
														
 
															+          (e.g. read-only attribute, or attribute that only makes
														
 
															+          sense when the device is in a different state)
														
 
															+
														
 
															+  Other error conditions may be defined by individual device types.
														
 
															+
														
 
															+Gets/sets a specified piece of device configuration and/or state.  The
														
 
															+semantics are device-specific.  See individual device documentation in
														
 
															+the "devices" directory.  As with ONE_REG, the size of the data
														
 
															+transferred is defined by the particular attribute.
														
 
															+
														
 
															+struct kvm_device_attr {
														
 
															+	__u32	flags;		/* no flags currently defined */
														
 
															+	__u32	group;		/* device-defined */
														
 
															+	__u64	attr;		/* group-defined */
														
 
															+	__u64	addr;		/* userspace address of attr data */
														
 
															+};
														
 
															+
														
 
															+4.81 KVM_HAS_DEVICE_ATTR
														
 
															+
														
 
															+Capability: KVM_CAP_DEVICE_CTRL
														
 
															+Type: device ioctl
														
 
															+Parameters: struct kvm_device_attr
														
 
															+Returns: 0 on success, -1 on error
														
 
															+Errors:
														
 
															+  ENXIO:  The group or attribute is unknown/unsupported for this device
														
 
															+
														
 
															+Tests whether a device supports a particular attribute.  A successful
														
 
															+return indicates the attribute is implemented.  It does not necessarily
														
 
															+indicate that the attribute can be read or written in the device's
														
 
															+current state.  "addr" is ignored.
														
 
															 4.77 KVM_ARM_VCPU_INIT
														
@@ -2255,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling
 
															 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
														
 
															 this ioctl twice for any of the base addresses will return -EEXIST.
														
 
															+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
														
 
															+
														
 
															+Capability: KVM_CAP_PPC_RTAS
														
 
															+Architectures: ppc
														
 
															+Type: vm ioctl
														
 
															+Parameters: struct kvm_rtas_token_args
														
 
															+Returns: 0 on success, -1 on error
														
 
															+
														
 
															+Defines a token value for a RTAS (Run Time Abstraction Services)
														
 
															+service in order to allow it to be handled in the kernel.  The
														
 
															+argument struct gives the name of the service, which must be the name
														
 
															+of a service that has a kernel-side implementation.  If the token
														
 
															+value is non-zero, it will be associated with that service, and
														
 
															+subsequent RTAS calls by the guest specifying that token will be
														
 
															+handled by the kernel.  If the token value is 0, then any token
														
 
															+associated with the service will be forgotten, and subsequent RTAS
														
 
															+calls by the guest for that service will be passed to userspace to be
														
 
															+handled.
														
 
															+
														
 
															 5. The kvm_run structure
														
 
															 ------------------------
														
@@ -2658,3 +2764,11 @@ to receive the topmost interrupt vector.
 
															 When disabled (args[0] == 0), behavior is as if this facility is unsupported.
														
 
															 When this capability is enabled, KVM_EXIT_EPR can occur.
														
 
															+
														
 
															+6.6 KVM_CAP_IRQ_MPIC
														
 
															+
														
 
															+Architectures: ppc
														
 
															+Parameters: args[0] is the MPIC device fd
														
 
															+            args[1] is the MPIC CPU number for this vcpu
														
 
															+
														
 
															+This capability connects the vcpu to an in-kernel MPIC device.
														
--- a/Documentation/virtual/kvm/devices/README
+++ b/Documentation/virtual/kvm/devices/README
@@ -0,0 +1 @@
 
															+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
														
--- a/Documentation/virtual/kvm/devices/mpic.txt
+++ b/Documentation/virtual/kvm/devices/mpic.txt
@@ -0,0 +1,56 @@
 
															+MPIC interrupt controller
														
 
															+=========================
														
 
															+
														
 
															+Device types supported:
														
 
															+  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
														
 
															+  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
														
 
															+
														
 
															+Only one MPIC instance, of any type, may be instantiated.  The created
														
 
															+MPIC will act as the system interrupt controller, connecting to each
														
 
															+vcpu's interrupt inputs.
														
 
															+
														
 
															+Groups:
														
 
															+  KVM_DEV_MPIC_GRP_MISC
														
 
															+  Attributes:
														
 
															+    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
														
 
															+      Base address of the 256 KiB MPIC register space.  Must be
														
 
															+      naturally aligned.  A value of zero disables the mapping.
														
 
															+      Reset value is zero.
														
 
															+
														
 
															+  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
														
 
															+    Access an MPIC register, as if the access were made from the guest.
														
 
															+    "attr" is the byte offset into the MPIC register space.  Accesses
														
 
															+    must be 4-byte aligned.
														
 
															+
														
 
															+    MSIs may be signaled by using this attribute group to write
														
 
															+    to the relevant MSIIR.
														
 
															+
														
 
															+  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
														
 
															+    IRQ input line for each standard openpic source.  0 is inactive and 1
														
 
															+    is active, regardless of interrupt sense.
														
 
															+
														
 
															+    For edge-triggered interrupts:  Writing 1 is considered an activating
														
 
															+    edge, and writing 0 is ignored.  Reading returns 1 if a previously
														
 
															+    signaled edge has not been acknowledged, and 0 otherwise.
														
 
															+
														
 
															+    "attr" is the IRQ number.  IRQ numbers for standard sources are the
														
 
															+    byte offset of the relevant IVPR from EIVPR0, divided by 32.
														
 
															+
														
 
															+IRQ Routing:
														
 
															+
														
 
															+  The MPIC emulation supports IRQ routing. Only a single MPIC device can
														
 
															+  be instantiated. Once that device has been created, it's available as
														
 
															+  irqchip id 0.
														
 
															+
														
 
															+  This irqchip 0 has 256 interrupt pins, which expose the interrupts in
														
 
															+  the main array of interrupt sources (a.k.a. "SRC" interrupts).
														
 
															+
														
 
															+  The numbering is the same as the MPIC device tree binding -- based on
														
 
															+  the register offset from the beginning of the sources array, without
														
 
															+  regard to any subdivisions in chip documentation such as "internal"
														
 
															+  or "external" interrupts.
														
 
															+
														
 
															+  Default routes are established for these pins, with the GSI being equal
														
 
															+  to the pin number.
														
 
															+
														
 
															+  Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.
														
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -26,6 +26,7 @@
 
															 #define KVM_USER_MEM_SLOTS 32
														
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
														
 
															+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
														
 
															 /* define exit reasons from vmm to kvm*/
														
 
															 #define EXIT_REASON_VM_PANIC		0
														
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 
															 	select PREEMPT_NOTIFIERS
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															 	select KVM_APIC_ARCHITECTURE
														
 
															 	select KVM_MMIO
														
 
															 	---help---
														
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,7 +49,7 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 
															 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
														
 
															 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															-		coalesced_mmio.o irq_comm.o assigned-dev.o)
														
 
															+		coalesced_mmio.o irq_comm.o assigned-dev.o irqchip.o)
														
 
															 ifeq ($(CONFIG_IOMMU_API),y)
														
 
															 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
														
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 
															 #define H_SET_MODE		0x31C
														
 
															 #define MAX_HCALL_OPCODE	H_SET_MODE
														
 
															+/* Platform specific hcalls, used by KVM */
														
 
															+#define H_RTAS			0xf000
														
 
															+
														
 
															 #ifndef __ASSEMBLY__
														
 
															 /**
														
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
 
															 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
														
 
															 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
														
 
															 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
														
 
															+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															+					  unsigned int vec);
														
 
															 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
														
 
															 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
														
 
															 			   bool upper, u32 val);
														
@@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
 
															 			unsigned long pte_index);
														
 
															 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
														
 
															 			unsigned long *nb_ret);
														
 
															-extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
														
 
															+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
														
 
															+			unsigned long gpa, bool dirty);
														
 
															 extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
														
 
															 			long pte_index, unsigned long pteh, unsigned long ptel);
														
 
															 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
														
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
 
															 		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
														
 
															 }
														
 
															+#ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															+/*
														
 
															+ * Note modification of an HPTE; set the HPTE modified bit
														
 
															+ * if anyone is interested.
														
 
															+ */
														
 
															+static inline void note_hpte_modification(struct kvm *kvm,
														
 
															+					  struct revmap_entry *rev)
														
 
															+{
														
 
															+	if (atomic_read(&kvm->arch.hpte_mod_interest))
														
 
															+		rev->guest_rpte |= HPTE_GR_MODIFIED;
														
 
															+}
														
 
															+#endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															+
														
 
															 #endif /* __ASM_KVM_BOOK3S_64_H__ */
														
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -20,6 +20,11 @@
 
															 #ifndef __ASM_KVM_BOOK3S_ASM_H__
														
 
															 #define __ASM_KVM_BOOK3S_ASM_H__
														
 
															+/* XICS ICP register offsets */
														
 
															+#define XICS_XIRR		4
														
 
															+#define XICS_MFRR		0xc
														
 
															+#define XICS_IPI		2	/* interrupt source # for IPIs */
														
 
															+
														
 
															 #ifdef __ASSEMBLY__
														
 
															 #ifdef CONFIG_KVM_BOOK3S_HANDLER
														
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	u8 hwthread_req;
														
 
															 	u8 hwthread_state;
														
 
															-
														
 
															+	u8 host_ipi;
														
 
															 	struct kvm_vcpu *kvm_vcpu;
														
 
															 	struct kvmppc_vcore *kvm_vcore;
														
 
															 	unsigned long xics_phys;
														
 
															+	u32 saved_xirr;
														
 
															 	u64 dabr;
														
 
															 	u64 host_mmcr[3];
														
 
															 	u32 host_pmc[8];
														
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -44,6 +44,10 @@
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
														
 
															 #endif
														
 
															+/* These values are internal and can be increased later */
														
 
															+#define KVM_NR_IRQCHIPS          1
														
 
															+#define KVM_IRQCHIP_NUM_PINS     256
														
 
															+
														
 
															 #if !defined(CONFIG_KVM_440)
														
 
															 #include <linux/mmu_notifier.h>
														
@@ -188,6 +192,10 @@ struct kvmppc_linear_info {
 
															 	int		 type;
														
 
															 };
														
 
															+/* XICS components, defined in book3s_xics.c */
														
 
															+struct kvmppc_xics;
														
 
															+struct kvmppc_icp;
														
 
															+
														
 
															 /*
														
 
															  * The reverse mapping array has one entry for each HPTE,
														
 
															  * which stores the guest's view of the second word of the HPTE
														
@@ -255,6 +263,13 @@ struct kvm_arch {
 
															 #endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	struct list_head spapr_tce_tables;
														
 
															+	struct list_head rtas_tokens;
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	struct openpic *mpic;
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	struct kvmppc_xics *xics;
														
 
															 #endif
														
 
															 };
														
@@ -301,11 +316,13 @@ struct kvmppc_vcore {
 
															  * that a guest can register.
														
 
															  */
														
 
															 struct kvmppc_vpa {
														
 
															+	unsigned long gpa;	/* Current guest phys addr */
														
 
															 	void *pinned_addr;	/* Address in kernel linear mapping */
														
 
															 	void *pinned_end;	/* End of region */
														
 
															 	unsigned long next_gpa;	/* Guest phys addr for update */
														
 
															 	unsigned long len;	/* Number of bytes required */
														
 
															 	u8 update_pending;	/* 1 => update pinned_addr from next_gpa */
														
 
															+	bool dirty;		/* true => area has been modified by kernel */
														
 
															 };
														
 
															 struct kvmppc_pte {
														
@@ -359,6 +376,11 @@ struct kvmppc_slb {
 
															 #define KVMPPC_BOOKE_MAX_IAC	4
														
 
															 #define KVMPPC_BOOKE_MAX_DAC	2
														
 
															+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
														
 
															+#define KVMPPC_EPR_NONE		0 /* EPR not supported */
														
 
															+#define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
														
 
															+#define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
														
 
															+
														
 
															 struct kvmppc_booke_debug_reg {
														
 
															 	u32 dbcr0;
														
 
															 	u32 dbcr1;
														
@@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
 
															 	u64 dac[KVMPPC_BOOKE_MAX_DAC];
														
 
															 };
														
 
															+#define KVMPPC_IRQ_DEFAULT	0
														
 
															+#define KVMPPC_IRQ_MPIC		1
														
 
															+#define KVMPPC_IRQ_XICS		2
														
 
															+
														
 
															+struct openpic;
														
 
															+
														
 
															 struct kvm_vcpu_arch {
														
 
															 	ulong host_stack;
														
 
															 	u32 host_pid;
														
@@ -502,7 +530,9 @@ struct kvm_vcpu_arch {
 
															 	spinlock_t wdt_lock;
														
 
															 	struct timer_list wdt_timer;
														
 
															 	u32 tlbcfg[4];
														
 
															+	u32 tlbps[4];
														
 
															 	u32 mmucfg;
														
 
															+	u32 eptcfg;
														
 
															 	u32 epr;
														
 
															 	u32 crit_save;
														
 
															 	struct kvmppc_booke_debug_reg dbg_reg;
														
@@ -522,7 +552,7 @@ struct kvm_vcpu_arch {
 
															 	u8 sane;
														
 
															 	u8 cpu_type;
														
 
															 	u8 hcall_needed;
														
 
															-	u8 epr_enabled;
														
 
															+	u8 epr_flags; /* KVMPPC_EPR_xxx */
														
 
															 	u8 epr_needed;
														
 
															 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
														
@@ -549,6 +579,13 @@ struct kvm_vcpu_arch {
 
															 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
														
 
															 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
														
 
															+	int irq_type;		/* one of KVM_IRQ_* */
														
 
															+	int irq_cpu_id;
														
 
															+	struct openpic *mpic;	/* KVM_IRQ_MPIC */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	struct kvmppc_icp *icp; /* XICS presentation controller */
														
 
															+#endif
														
 
															+
														
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	struct kvm_vcpu_arch_shared shregs;
														
@@ -589,5 +626,6 @@ struct kvm_vcpu_arch {
 
															 #define KVM_MMIO_REG_FQPR	0x0060
														
 
															 #define __KVM_HAVE_ARCH_WQP
														
 
															+#define __KVM_HAVE_CREATE_DEVICE
														
 
															 #endif /* __POWERPC_KVM_HOST_H__ */
														
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -44,7 +44,7 @@ enum emulation_result {
 
															 	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
														
 
															 	EMULATE_FAIL,         /* can't emulate this instruction */
														
 
															 	EMULATE_AGAIN,        /* something went wrong. go again */
														
 
															-	EMULATE_DO_PAPR,      /* kvm_run filled with PAPR request */
														
 
															+	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
														
 
															 };
														
 
															 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
														
@@ -130,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 
															 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
														
 
															 			struct kvm_memory_slot *memslot, unsigned long porder);
														
 
															 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
														
 
															+
														
 
															 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
														
 
															 				struct kvm_create_spapr_tce *args);
														
 
															 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
														
@@ -164,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
															 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
														
 
															+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
														
 
															+
														
 
															+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
														
 
															+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
														
 
															+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
														
 
															+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
														
 
															+				u32 priority);
														
 
															+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
														
 
															+				u32 *priority);
														
 
															+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
														
 
															+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
														
 
															+
														
 
															 /*
														
 
															  * Cuts out inst bits with ordering according to spec.
														
 
															  * That means the leftmost bit is zero. All given bits are included.
														
@@ -245,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
 
															 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
														
 
															+struct openpic;
														
 
															+
														
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
														
 
															 {
														
 
															 	paca[cpu].kvm_hstate.xics_phys = addr;
														
 
															 }
														
 
															+static inline u32 kvmppc_get_xics_latch(void)
														
 
															+{
														
 
															+	u32 xirr = get_paca()->kvm_hstate.saved_xirr;
														
 
															+
														
 
															+	get_paca()->kvm_hstate.saved_xirr = 0;
														
 
															+
														
 
															+	return xirr;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
														
 
															+{
														
 
															+	paca[cpu].kvm_hstate.host_ipi = host_ipi;
														
 
															+}
														
 
															+
														
 
															+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
														
 
															 extern void kvm_linear_init(void);
														
 
															 #else
														
@@ -259,6 +289,44 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
															 static inline void kvm_linear_init(void)
														
 
															 {}
														
 
															+
														
 
															+static inline u32 kvmppc_get_xics_latch(void)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
														
 
															+{}
														
 
															+
														
 
															+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	kvm_vcpu_kick(vcpu);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
														
 
															+}
														
 
															+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
														
 
															+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
														
 
															+extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
														
 
															+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
														
 
															+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
														
 
															+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
														
 
															+#else
														
 
															+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
														
 
															+	{ return 0; }
														
 
															+static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
														
 
															+static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
														
 
															+					 unsigned long server)
														
 
															+	{ return -EINVAL; }
														
 
															+static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
														
 
															+					struct kvm_irq_level *args)
														
 
															+	{ return -ENOTTY; }
														
 
															+static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
														
 
															+	{ return 0; }
														
 
															 #endif
														
 
															 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
														
@@ -270,6 +338,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 
															 #endif
														
 
															 }
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+
														
 
															+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
														
 
															+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
														
 
															+			     u32 cpu);
														
 
															+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
														
 
															+
														
 
															+#else
														
 
															+
														
 
															+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
														
 
															+		struct kvm_vcpu *vcpu, u32 cpu)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
														
 
															+		struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+#endif /* CONFIG_KVM_MPIC */
														
 
															+
														
 
															 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
														
 
															 			      struct kvm_config_tlb *cfg);
														
 
															 int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
														
@@ -282,8 +376,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
 
															 static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
														
 
															 {
														
 
															-	/* Clear i-cache for new pages */
														
 
															 	struct page *page;
														
 
															+	/*
														
 
															+	 * We can only access pages that the kernel maps
														
 
															+	 * as memory. Bail out for unmapped ones.
														
 
															+	 */
														
 
															+	if (!pfn_valid(pfn))
														
 
															+		return;
														
 
															+
														
 
															+	/* Clear i-cache for new pages */
														
 
															 	page = pfn_to_page(pfn);
														
 
															 	if (!test_bit(PG_arch_1, &page->flags)) {
														
 
															 		flush_dcache_icache_page(page);
														
@@ -323,4 +424,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 
															 	return ea;
														
 
															 }
														
 
															+extern void xics_wake_cpu(int cpu);
														
 
															+
														
 
															 #endif /* __POWERPC_KVM_PPC_H__ */
														
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -290,6 +290,7 @@
 
															 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
														
 
															 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
														
 
															 #define   LPCR_MER	0x00000800	/* Mediated External Exception */
														
 
															+#define   LPCR_MER_SH	11
														
 
															 #define   LPCR_LPES    0x0000000c
														
 
															 #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
														
 
															 #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
														
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,8 @@
 
															 /* Select powerpc specific features in <linux/kvm.h> */
														
 
															 #define __KVM_HAVE_SPAPR_TCE
														
 
															 #define __KVM_HAVE_PPC_SMT
														
 
															+#define __KVM_HAVE_IRQCHIP
														
 
															+#define __KVM_HAVE_IRQ_LINE
														
 
															 struct kvm_regs {
														
 
															 	__u64 pc;
														
@@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
 
															 /* for KVM_SET_GUEST_DEBUG */
														
 
															 struct kvm_guest_debug_arch {
														
 
															+	struct {
														
 
															+		/* H/W breakpoint/watchpoint address */
														
 
															+		__u64 addr;
														
 
															+		/*
														
 
															+		 * Type denotes h/w breakpoint, read watchpoint, write
														
 
															+		 * watchpoint or watchpoint (both read and write).
														
 
															+		 */
														
 
															+#define KVMPPC_DEBUG_NONE		0x0
														
 
															+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
														
 
															+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
														
 
															+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
														
 
															+		__u32 type;
														
 
															+		__u32 reserved;
														
 
															+	} bp[16];
														
 
															 };
														
 
															+/* Debug related defines */
														
 
															+/*
														
 
															+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
														
 
															+ * and upper 16 bits are architecture specific. Architecture specific defines
														
 
															+ * that ioctl is for setting hardware breakpoint or software breakpoint.
														
 
															+ */
														
 
															+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
														
 
															+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
														
 
															+
														
 
															 /* definition of registers in kvm_run */
														
 
															 struct kvm_sync_regs {
														
 
															 };
														
@@ -299,6 +324,12 @@ struct kvm_allocate_rma {
 
															 	__u64 rma_size;
														
 
															 };
														
 
															+/* for KVM_CAP_PPC_RTAS */
														
 
															+struct kvm_rtas_token_args {
														
 
															+	char name[120];
														
 
															+	__u64 token;	/* Use a token of 0 to undefine a mapping */
														
 
															+};
														
 
															+
														
 
															 struct kvm_book3e_206_tlb_entry {
														
 
															 	__u32 mas8;
														
 
															 	__u32 mas1;
														
@@ -359,6 +390,26 @@ struct kvm_get_htab_header {
 
															 	__u16	n_invalid;
														
 
															 };
														
 
															+/* Per-vcpu XICS interrupt controller state */
														
 
															+#define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
														
 
															+
														
 
															+#define  KVM_REG_PPC_ICP_CPPR_SHIFT	56	/* current proc priority */
														
 
															+#define  KVM_REG_PPC_ICP_CPPR_MASK	0xff
														
 
															+#define  KVM_REG_PPC_ICP_XISR_SHIFT	32	/* interrupt status field */
														
 
															+#define  KVM_REG_PPC_ICP_XISR_MASK	0xffffff
														
 
															+#define  KVM_REG_PPC_ICP_MFRR_SHIFT	24	/* pending IPI priority */
														
 
															+#define  KVM_REG_PPC_ICP_MFRR_MASK	0xff
														
 
															+#define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
														
 
															+#define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
														
 
															+
														
 
															+/* Device control API: PPC-specific devices */
														
 
															+#define KVM_DEV_MPIC_GRP_MISC		1
														
 
															+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
														
 
															+
														
 
															+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
														
 
															+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
														
 
															+
														
 
															+/* One-Reg API: PPC-specific registers */
														
 
															 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
														
 
															 #define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
														
 
															 #define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
														
@@ -426,4 +477,26 @@ struct kvm_get_htab_header {
 
															 /* Debugging: Special instruction for software breakpoint */
														
 
															 #define KVM_REG_PPC_DEBUG_INST	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
														
 
															+/* MMU registers */
														
 
															+#define KVM_REG_PPC_MAS0	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
														
 
															+#define KVM_REG_PPC_MAS1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
														
 
															+#define KVM_REG_PPC_MAS2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
														
 
															+#define KVM_REG_PPC_MAS7_3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
														
 
															+#define KVM_REG_PPC_MAS4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
														
 
															+#define KVM_REG_PPC_MAS6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
														
 
															+#define KVM_REG_PPC_MMUCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
														
 
															+/*
														
 
															+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
														
 
															+ * KVM_CAP_SW_TLB ioctl
														
 
															+ */
														
 
															+#define KVM_REG_PPC_TLB0CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
														
 
															+#define KVM_REG_PPC_TLB1CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
														
 
															+#define KVM_REG_PPC_TLB2CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
														
 
															+#define KVM_REG_PPC_TLB3CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
														
 
															+#define KVM_REG_PPC_TLB0PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
														
 
															+#define KVM_REG_PPC_TLB1PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
														
 
															+#define KVM_REG_PPC_TLB2PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
														
 
															+#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
														
 
															+#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
														
 
															+
														
 
															 #endif /* __LINUX_KVM_POWERPC_H */
														
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -477,6 +477,7 @@ int main(void)
 
															 	DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
														
 
															 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
														
 
															 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
														
 
															+	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
														
 
															 #endif
														
 
															 #ifdef CONFIG_PPC_BOOK3S
														
 
															 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
														
@@ -573,6 +574,8 @@ int main(void)
 
															 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
														
 
															 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
														
 
															 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
														
 
															+	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
														
 
															+	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
														
 
															 	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
														
 
															 	HSTATE_FIELD(HSTATE_PMC, host_pmc);
														
 
															 	HSTATE_FIELD(HSTATE_PURR, host_purr);
														
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_44x *vcpu_44x;
														
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,21 +136,41 @@ config KVM_E500V2
 
															 	  If unsure, say N.
														
 
															 config KVM_E500MC
														
 
															-	bool "KVM support for PowerPC E500MC/E5500 processors"
														
 
															+	bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
														
 
															 	depends on PPC_E500MC
														
 
															 	select KVM
														
 
															 	select KVM_MMIO
														
 
															 	select KVM_BOOKE_HV
														
 
															 	select MMU_NOTIFIER
														
 
															 	---help---
														
 
															-	  Support running unmodified E500MC/E5500 (32-bit) guest kernels in
														
 
															-	  virtual machines on E500MC/E5500 host processors.
														
 
															+	  Support running unmodified E500MC/E5500/E6500 guest kernels in
														
 
															+	  virtual machines on E500MC/E5500/E6500 host processors.
														
 
															 	  This module provides access to the hardware capabilities through
														
 
															 	  a character device node named /dev/kvm.
														
 
															 	  If unsure, say N.
														
 
															+config KVM_MPIC
														
 
															+	bool "KVM in-kernel MPIC emulation"
														
 
															+	depends on KVM && E500
														
 
															+	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															+	select HAVE_KVM_MSI
														
 
															+	help
														
 
															+	  Enable support for emulating MPIC devices inside the
														
 
															+          host kernel, rather than relying on userspace to emulate.
														
 
															+          Currently, support is limited to certain versions of
														
 
															+          Freescale's MPIC implementation.
														
 
															+
														
 
															+config KVM_XICS
														
 
															+	bool "KVM in-kernel XICS emulation"
														
 
															+	depends on KVM_BOOK3S_64 && !KVM_MPIC
														
 
															+	---help---
														
 
															+	  Include support for the XICS (eXternal Interrupt Controller
														
 
															+	  Specification) interrupt controller architecture used on
														
 
															+	  IBM POWER (pSeries) servers.
														
 
															+
														
 
															 source drivers/vhost/Kconfig
														
 
															 endif # VIRTUALIZATION
														
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 
															 	book3s_hv.o \
														
 
															 	book3s_hv_interrupts.o \
														
 
															 	book3s_64_mmu_hv.o
														
 
															+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
														
 
															+	book3s_hv_rm_xics.o
														
 
															 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
														
 
															 	book3s_hv_rmhandlers.o \
														
 
															 	book3s_hv_rm_mmu.o \
														
 
															 	book3s_64_vio_hv.o \
														
 
															 	book3s_hv_ras.o \
														
 
															-	book3s_hv_builtin.o
														
 
															+	book3s_hv_builtin.o \
														
 
															+	$(kvm-book3s_64-builtin-xics-objs-y)
														
 
															+
														
 
															+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
														
 
															+	book3s_xics.o
														
 
															 kvm-book3s_64-module-objs := \
														
 
															 	../../../virt/kvm/kvm_main.o \
														
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
 
															 	emulate.o \
														
 
															 	book3s.o \
														
 
															 	book3s_64_vio.o \
														
 
															+	book3s_rtas.o \
														
 
															 	$(kvm-book3s_64-objs-y)
														
 
															 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
														
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
 
															 	book3s_32_mmu.o
														
 
															 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
														
 
															+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
														
 
															+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
														
 
															+
														
 
															 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
														
 
															 obj-$(CONFIG_KVM_440) += kvm.o
														
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 
															 	return prio;
														
 
															 }
														
 
															-static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
														
 
															 					  unsigned int vec)
														
 
															 {
														
 
															 	unsigned long old_pending = vcpu->arch.pending_exceptions;
														
@@ -535,6 +535,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
															 					 &opcode, sizeof(u32));
														
 
															 			break;
														
 
															 		}
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+		case KVM_REG_PPC_ICP_STATE:
														
 
															+			if (!vcpu->arch.icp) {
														
 
															+				r = -ENXIO;
														
 
															+				break;
														
 
															+			}
														
 
															+			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
														
 
															+			break;
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															 		default:
														
 
															 			r = -EINVAL;
														
 
															 			break;
														
@@ -597,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
															 			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
														
 
															 			break;
														
 
															 #endif /* CONFIG_ALTIVEC */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+		case KVM_REG_PPC_ICP_STATE:
														
 
															+			if (!vcpu->arch.icp) {
														
 
															+				r = -ENXIO;
														
 
															+				break;
														
 
															+			}
														
 
															+			r = kvmppc_xics_set_icp(vcpu,
														
 
															+						set_reg_val(reg->id, val));
														
 
															+			break;
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															 		default:
														
 
															 			r = -EINVAL;
														
 
															 			break;
														
@@ -612,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
															 	return 0;
														
 
															 }
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 void kvmppc_decrementer_func(unsigned long data)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
														
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
															 			/* Harvest R and C */
														
 
															 			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
														
 
															 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
														
 
															-			rev[i].guest_rpte = ptel | rcbits;
														
 
															+			if (rcbits & ~rev[i].guest_rpte) {
														
 
															+				rev[i].guest_rpte = ptel | rcbits;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 		}
														
 
															 		unlock_rmap(rmapp);
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
															 		/* Now check and modify the HPTE */
														
 
															 		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
														
 
															 			kvmppc_clear_ref_hpte(kvm, hptep, i);
														
 
															-			rev[i].guest_rpte |= HPTE_R_R;
														
 
															+			if (!(rev[i].guest_rpte & HPTE_R_R)) {
														
 
															+				rev[i].guest_rpte |= HPTE_R_R;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 			ret = 1;
														
 
															 		}
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 
															 			hptep[1] &= ~HPTE_R_C;
														
 
															 			eieio();
														
 
															 			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
														
 
															-			rev[i].guest_rpte |= HPTE_R_C;
														
 
															+			if (!(rev[i].guest_rpte & HPTE_R_C)) {
														
 
															+				rev[i].guest_rpte |= HPTE_R_C;
														
 
															+				note_hpte_modification(kvm, &rev[i]);
														
 
															+			}
														
 
															 			ret = 1;
														
 
															 		}
														
 
															 		hptep[0] &= ~HPTE_V_HVLOCK;
														
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 
															 	return ret;
														
 
															 }
														
 
															+static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
														
 
															+			      struct kvm_memory_slot *memslot,
														
 
															+			      unsigned long *map)
														
 
															+{
														
 
															+	unsigned long gfn;
														
 
															+
														
 
															+	if (!vpa->dirty || !vpa->pinned_addr)
														
 
															+		return;
														
 
															+	gfn = vpa->gpa >> PAGE_SHIFT;
														
 
															+	if (gfn < memslot->base_gfn ||
														
 
															+	    gfn >= memslot->base_gfn + memslot->npages)
														
 
															+		return;
														
 
															+
														
 
															+	vpa->dirty = false;
														
 
															+	if (map)
														
 
															+		__set_bit_le(gfn - memslot->base_gfn, map);
														
 
															+}
														
 
															+
														
 
															 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
														
 
															 			     unsigned long *map)
														
 
															 {
														
 
															 	unsigned long i;
														
 
															 	unsigned long *rmapp;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															 	preempt_disable();
														
 
															 	rmapp = memslot->arch.rmap;
														
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
															 			__set_bit_le(i, map);
														
 
															 		++rmapp;
														
 
															 	}
														
 
															+
														
 
															+	/* Harvest dirty bits from VPA and DTL updates */
														
 
															+	/* Note: we never modify the SLB shadow buffer areas */
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															+		harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
														
 
															+		harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
														
 
															+		spin_unlock(&vcpu->arch.vpa_update_lock);
														
 
															+	}
														
 
															 	preempt_enable();
														
 
															 	return 0;
														
 
															 }
														
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	unsigned long gfn = gpa >> PAGE_SHIFT;
														
 
															 	struct page *page, *pages[1];
														
 
															 	int npages;
														
 
															-	unsigned long hva, psize, offset;
														
 
															+	unsigned long hva, offset;
														
 
															 	unsigned long pa;
														
 
															 	unsigned long *physp;
														
 
															 	int srcu_idx;
														
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	}
														
 
															 	srcu_read_unlock(&kvm->srcu, srcu_idx);
														
 
															-	psize = PAGE_SIZE;
														
 
															-	if (PageHuge(page)) {
														
 
															-		page = compound_head(page);
														
 
															-		psize <<= compound_order(page);
														
 
															-	}
														
 
															-	offset = gpa & (psize - 1);
														
 
															+	offset = gpa & (PAGE_SIZE - 1);
														
 
															 	if (nb_ret)
														
 
															-		*nb_ret = psize - offset;
														
 
															+		*nb_ret = PAGE_SIZE - offset;
														
 
															 	return page_address(page) + offset;
														
 
															  err:
														
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 
															 	return NULL;
														
 
															 }
														
 
															-void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
														
 
															+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
														
 
															+			     bool dirty)
														
 
															 {
														
 
															 	struct page *page = virt_to_page(va);
														
 
															+	struct kvm_memory_slot *memslot;
														
 
															+	unsigned long gfn;
														
 
															+	unsigned long *rmap;
														
 
															+	int srcu_idx;
														
 
															 	put_page(page);
														
 
															+
														
 
															+	if (!dirty || !kvm->arch.using_mmu_notifiers)
														
 
															+		return;
														
 
															+
														
 
															+	/* We need to mark this page dirty in the rmap chain */
														
 
															+	gfn = gpa >> PAGE_SHIFT;
														
 
															+	srcu_idx = srcu_read_lock(&kvm->srcu);
														
 
															+	memslot = gfn_to_memslot(kvm, gfn);
														
 
															+	if (memslot) {
														
 
															+		rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
														
 
															+		lock_rmap(rmap);
														
 
															+		*rmap |= KVMPPC_RMAP_CHANGED;
														
 
															+		unlock_rmap(rmap);
														
 
															+	}
														
 
															+	srcu_read_unlock(&kvm->srcu, srcu_idx);
														
 
															 }
														
 
															 /*
														
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
 
															 #define HPTE_SIZE	(2 * sizeof(unsigned long))
														
 
															+/*
														
 
															+ * Returns 1 if this HPT entry has been modified or has pending
														
 
															+ * R/C bit changes.
														
 
															+ */
														
 
															+static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
														
 
															+{
														
 
															+	unsigned long rcbits_unset;
														
 
															+
														
 
															+	if (revp->guest_rpte & HPTE_GR_MODIFIED)
														
 
															+		return 1;
														
 
															+
														
 
															+	/* Also need to consider changes in reference and changed bits */
														
 
															+	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
														
 
															+	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
														
 
															+		return 1;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static long record_hpte(unsigned long flags, unsigned long *hptp,
														
 
															 			unsigned long *hpte, struct revmap_entry *revp,
														
 
															 			int want_valid, int first_pass)
														
 
															 {
														
 
															 	unsigned long v, r;
														
 
															+	unsigned long rcbits_unset;
														
 
															 	int ok = 1;
														
 
															 	int valid, dirty;
														
 
															 	/* Unmodified entries are uninteresting except on the first pass */
														
 
															-	dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+	dirty = hpte_dirty(revp, hptp);
														
 
															 	if (!first_pass && !dirty)
														
 
															 		return 0;
														
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 
															 		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
														
 
															 			cpu_relax();
														
 
															 		v = hptp[0];
														
 
															+
														
 
															+		/* re-evaluate valid and dirty from synchronized HPTE value */
														
 
															+		valid = !!(v & HPTE_V_VALID);
														
 
															+		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+
														
 
															+		/* Harvest R and C into guest view if necessary */
														
 
															+		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
														
 
															+		if (valid && (rcbits_unset & hptp[1])) {
														
 
															+			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
														
 
															+				HPTE_GR_MODIFIED;
														
 
															+			dirty = 1;
														
 
															+		}
														
 
															+
														
 
															 		if (v & HPTE_V_ABSENT) {
														
 
															 			v &= ~HPTE_V_ABSENT;
														
 
															 			v |= HPTE_V_VALID;
														
 
															+			valid = 1;
														
 
															 		}
														
 
															-		/* re-evaluate valid and dirty from synchronized HPTE value */
														
 
															-		valid = !!(v & HPTE_V_VALID);
														
 
															 		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
														
 
															 			valid = 0;
														
 
															-		r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
														
 
															-		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
														
 
															+
														
 
															+		r = revp->guest_rpte;
														
 
															 		/* only clear modified if this is the right sort of entry */
														
 
															 		if (valid == want_valid && dirty) {
														
 
															 			r &= ~HPTE_GR_MODIFIED;
														
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 
															 		/* Skip uninteresting entries, i.e. clean on not-first pass */
														
 
															 		if (!first_pass) {
														
 
															 			while (i < kvm->arch.hpt_npte &&
														
 
															-			       !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
														
 
															+			       !hpte_dirty(revp, hptp)) {
														
 
															 				++i;
														
 
															 				hptp += 2;
														
 
															 				++revp;
														
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
															 				run->papr_hcall.args[i] = gpr;
														
 
															 			}
														
 
															-			emulated = EMULATE_DO_PAPR;
														
 
															+			run->exit_reason = KVM_EXIT_PAPR_HCALL;
														
 
															+			vcpu->arch.hcall_needed = 1;
														
 
															+			emulated = EMULATE_EXIT_USER;
														
 
															 			break;
														
 
															 		}
														
 
															 #endif
														
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -66,6 +66,31 @@
 
															 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
														
 
															 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
														
 
															+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	int me;
														
 
															+	int cpu = vcpu->cpu;
														
 
															+	wait_queue_head_t *wqp;
														
 
															+
														
 
															+	wqp = kvm_arch_vcpu_wq(vcpu);
														
 
															+	if (waitqueue_active(wqp)) {
														
 
															+		wake_up_interruptible(wqp);
														
 
															+		++vcpu->stat.halt_wakeup;
														
 
															+	}
														
 
															+
														
 
															+	me = get_cpu();
														
 
															+
														
 
															+	/* CPU points to the first thread of the core */
														
 
															+	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
														
 
															+		int real_cpu = cpu + vcpu->arch.ptid;
														
 
															+		if (paca[real_cpu].kvm_hstate.xics_phys)
														
 
															+			xics_wake_cpu(real_cpu);
														
 
															+		else if (cpu_online(cpu))
														
 
															+			smp_send_reschedule(cpu);
														
 
															+	}
														
 
															+	put_cpu();
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * We use the vcpu_load/put functions to measure stolen time.
														
 
															  * Stolen time is counted as time when either the vcpu is able to
														
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 
															 			len = ((struct reg_vpa *)va)->length.hword;
														
 
															 		else
														
 
															 			len = ((struct reg_vpa *)va)->length.word;
														
 
															-		kvmppc_unpin_guest_page(kvm, va);
														
 
															+		kvmppc_unpin_guest_page(kvm, va, vpa, false);
														
 
															 		/* Check length */
														
 
															 		if (len > nb || len < sizeof(struct reg_vpa))
														
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 
															 		va = NULL;
														
 
															 		nb = 0;
														
 
															 		if (gpa)
														
 
															-			va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
														
 
															+			va = kvmppc_pin_guest_page(kvm, gpa, &nb);
														
 
															 		spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															 		if (gpa == vpap->next_gpa)
														
 
															 			break;
														
 
															 		/* sigh... unpin that one and try again */
														
 
															 		if (va)
														
 
															-			kvmppc_unpin_guest_page(kvm, va);
														
 
															+			kvmppc_unpin_guest_page(kvm, va, gpa, false);
														
 
															 	}
														
 
															 	vpap->update_pending = 0;
														
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 
															 		 * has changed the mappings underlying guest memory,
														
 
															 		 * so unregister the region.
														
 
															 		 */
														
 
															-		kvmppc_unpin_guest_page(kvm, va);
														
 
															+		kvmppc_unpin_guest_page(kvm, va, gpa, false);
														
 
															 		va = NULL;
														
 
															 	}
														
 
															 	if (vpap->pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
														
 
															+		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
														
 
															+					vpap->dirty);
														
 
															+	vpap->gpa = gpa;
														
 
															 	vpap->pinned_addr = va;
														
 
															+	vpap->dirty = false;
														
 
															 	if (va)
														
 
															 		vpap->pinned_end = va + vpap->len;
														
 
															 }
														
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
															 	/* order writing *dt vs. writing vpa->dtl_idx */
														
 
															 	smp_wmb();
														
 
															 	vpa->dtl_idx = ++vcpu->arch.dtl_index;
														
 
															+	vcpu->arch.dtl.dirty = true;
														
 
															 }
														
 
															 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
														
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
															 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
														
 
															 	unsigned long target, ret = H_SUCCESS;
														
 
															 	struct kvm_vcpu *tvcpu;
														
 
															-	int idx;
														
 
															+	int idx, rc;
														
 
															 	switch (req) {
														
 
															 	case H_ENTER:
														
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
															 					kvmppc_get_gpr(vcpu, 5),
														
 
															 					kvmppc_get_gpr(vcpu, 6));
														
 
															 		break;
														
 
															+	case H_RTAS:
														
 
															+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
														
 
															+			return RESUME_HOST;
														
 
															+
														
 
															+		rc = kvmppc_rtas_hcall(vcpu);
														
 
															+
														
 
															+		if (rc == -ENOENT)
														
 
															+			return RESUME_HOST;
														
 
															+		else if (rc == 0)
														
 
															+			break;
														
 
															+
														
 
															+		/* Send the error out to userspace via KVM_RUN */
														
 
															+		return rc;
														
 
															+
														
 
															+	case H_XIRR:
														
 
															+	case H_CPPR:
														
 
															+	case H_EOI:
														
 
															+	case H_IPI:
														
 
															+		if (kvmppc_xics_enabled(vcpu)) {
														
 
															+			ret = kvmppc_xics_hcall(vcpu, req);
														
 
															+			break;
														
 
															+		} /* fallthrough */
														
 
															 	default:
														
 
															 		return RESUME_HOST;
														
 
															 	}
														
@@ -913,15 +964,19 @@ out:
 
															 	return ERR_PTR(err);
														
 
															 }
														
 
															+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
														
 
															+{
														
 
															+	if (vpa->pinned_addr)
														
 
															+		kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
														
 
															+					vpa->dirty);
														
 
															+}
														
 
															+
														
 
															 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	spin_lock(&vcpu->arch.vpa_update_lock);
														
 
															-	if (vcpu->arch.dtl.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
														
 
															-	if (vcpu->arch.slb_shadow.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
														
 
															-	if (vcpu->arch.vpa.pinned_addr)
														
 
															-		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
														
 
															+	unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
														
 
															 	spin_unlock(&vcpu->arch.vpa_update_lock);
														
 
															 	kvm_vcpu_uninit(vcpu);
														
 
															 	kmem_cache_free(kvm_vcpu_cache, vcpu);
														
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
															 }
														
 
															 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
														
 
															-extern void xics_wake_cpu(int cpu);
														
 
															 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
														
 
															 				   struct kvm_vcpu *vcpu)
														
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
															 			break;
														
 
															 		vc->runner = vcpu;
														
 
															 		n_ceded = 0;
														
 
															-		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
														
 
															+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
														
 
															 			if (!v->arch.pending_exceptions)
														
 
															 				n_ceded += v->arch.ceded;
														
 
															+			else
														
 
															+				v->arch.ceded = 0;
														
 
															+		}
														
 
															 		if (n_ceded == vc->n_runnable)
														
 
															 			kvmppc_vcore_blocked(vc);
														
 
															 		else
														
@@ -1821,6 +1878,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
															 	cpumask_setall(&kvm->arch.need_tlb_flush);
														
 
															 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
														
 
															+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
														
 
															 	kvm->arch.rma = NULL;
														
@@ -1866,6 +1924,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 
															 		kvm->arch.rma = NULL;
														
 
															 	}
														
 
															+	kvmppc_rtas_tokens_free(kvm);
														
 
															+
														
 
															 	kvmppc_free_hpt(kvm);
														
 
															 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
														
 
															 }
														
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
														
 
															-/*
														
 
															- * Note modification of an HPTE; set the HPTE modified bit
														
 
															- * if anyone is interested.
														
 
															- */
														
 
															-static inline void note_hpte_modification(struct kvm *kvm,
														
 
															-					  struct revmap_entry *rev)
														
 
															-{
														
 
															-	if (atomic_read(&kvm->arch.hpte_mod_interest))
														
 
															-		rev->guest_rpte |= HPTE_GR_MODIFIED;
														
 
															-}
														
 
															-
														
 
															 /* Remove this HPTE from the chain for a real page */
														
 
															 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
														
 
															 				struct revmap_entry *rev,
														
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,406 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/err.h>
														
 
															+
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/xics.h>
														
 
															+#include <asm/debug.h>
														
 
															+#include <asm/synch.h>
														
 
															+#include <asm/ppc-opcode.h>
														
 
															+
														
 
															+#include "book3s_xics.h"
														
 
															+
														
 
															+#define DEBUG_PASSUP
														
 
															+
														
 
															+static inline void rm_writeb(unsigned long paddr, u8 val)
														
 
															+{
														
 
															+	__asm__ __volatile__("sync; stbcix %0,0,%1"
														
 
															+		: : "r" (val), "r" (paddr) : "memory");
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
														
 
															+				struct kvm_vcpu *this_vcpu)
														
 
															+{
														
 
															+	struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
														
 
															+	unsigned long xics_phys;
														
 
															+	int cpu;
														
 
															+
														
 
															+	/* Mark the target VCPU as having an interrupt pending */
														
 
															+	vcpu->stat.queue_intr++;
														
 
															+	set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
														
 
															+
														
 
															+	/* Kick self ? Just set MER and return */
														
 
															+	if (vcpu == this_vcpu) {
														
 
															+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/* Check if the core is loaded, if not, too hard */
														
 
															+	cpu = vcpu->cpu;
														
 
															+	if (cpu < 0 || cpu >= nr_cpu_ids) {
														
 
															+		this_icp->rm_action |= XICS_RM_KICK_VCPU;
														
 
															+		this_icp->rm_kick_target = vcpu;
														
 
															+		return;
														
 
															+	}
														
 
															+	/* In SMT cpu will always point to thread 0, we adjust it */
														
 
															+	cpu += vcpu->arch.ptid;
														
 
															+
														
 
															+	/* Not too hard, then poke the target */
														
 
															+	xics_phys = paca[cpu].kvm_hstate.xics_phys;
														
 
															+	rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	/* Note: Only called on self ! */
														
 
															+	clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
														
 
															+		  &vcpu->arch.pending_exceptions);
														
 
															+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
														
 
															+}
														
 
															+
														
 
															+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
														
 
															+				     union kvmppc_icp_state old,
														
 
															+				     union kvmppc_icp_state new)
														
 
															+{
														
 
															+	struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
														
 
															+	bool success;
														
 
															+
														
 
															+	/* Calculate new output value */
														
 
															+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
														
 
															+
														
 
															+	/* Attempt atomic update */
														
 
															+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
														
 
															+	if (!success)
														
 
															+		goto bail;
														
 
															+
														
 
															+	/*
														
 
															+	 * Check for output state update
														
 
															+	 *
														
 
															+	 * Note that this is racy since another processor could be updating
														
 
															+	 * the state already. This is why we never clear the interrupt output
														
 
															+	 * here, we only ever set it. The clear only happens prior to doing
														
 
															+	 * an update and only by the processor itself. Currently we do it
														
 
															+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
														
 
															+	 *
														
 
															+	 * We also do not try to figure out whether the EE state has changed,
														
 
															+	 * we unconditionally set it if the new state calls for it. The reason
														
 
															+	 * for that is that we opportunistically remove the pending interrupt
														
 
															+	 * flag when raising CPPR, so we need to set it back here if an
														
 
															+	 * interrupt is still pending.
														
 
															+	 */
														
 
															+	if (new.out_ee)
														
 
															+		icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
														
 
															+
														
 
															+	/* Expose the state change for debug purposes */
														
 
															+	this_vcpu->arch.icp->rm_dbgstate = new;
														
 
															+	this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
														
 
															+
														
 
															+ bail:
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static inline int check_too_hard(struct kvmppc_xics *xics,
														
 
															+				 struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			     u8 new_cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool resend;
														
 
															+
														
 
															+	/*
														
 
															+	 * This handles several related states in one operation:
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR
														
 
															+	 *
														
 
															+	 * Load CPPR with new value and if the XISR is 0
														
 
															+	 * then check for resends:
														
 
															+	 *
														
 
															+	 * ICP State: Resend
														
 
															+	 *
														
 
															+	 * If MFRR is more favored than CPPR, check for IPIs
														
 
															+	 * and notify ICS of a potential resend. This is done
														
 
															+	 * asynchronously (when used in real mode, we will have
														
 
															+	 * to exit here).
														
 
															+	 *
														
 
															+	 * We do not handle the complete Check_IPI as documented
														
 
															+	 * here. In the PAPR, this state will be used for both
														
 
															+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
														
 
															+	 * changing the MFRR state here so we don't need to handle
														
 
															+	 * the case of an MFRR causing a reject of a pending irq,
														
 
															+	 * this will have been handled when the MFRR was set in the
														
 
															+	 * first place.
														
 
															+	 *
														
 
															+	 * Thus we don't have to handle rejects, only resends.
														
 
															+	 *
														
 
															+	 * When implementing real mode for HV KVM, resend will lead to
														
 
															+	 * a H_TOO_HARD return and the whole transaction will be handled
														
 
															+	 * in virtual mode.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Down_CPPR */
														
 
															+		new_state.cppr = new_cppr;
														
 
															+
														
 
															+		/*
														
 
															+		 * Cut down Resend / Check_IPI / IPI
														
 
															+		 *
														
 
															+		 * The logic is that we cannot have a pending interrupt
														
 
															+		 * trumped by an IPI at this point (see above), so we
														
 
															+		 * know that either the pending interrupt is already an
														
 
															+		 * IPI (in which case we don't care to override it) or
														
 
															+		 * it's either more favored than us or non existent
														
 
															+		 */
														
 
															+		if (new_state.mfrr < new_cppr &&
														
 
															+		    new_state.mfrr <= new_state.pending_pri) {
														
 
															+			new_state.pending_pri = new_state.mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		/* Latch/clear resend bit */
														
 
															+		resend = new_state.need_resend;
														
 
															+		new_state.need_resend = 0;
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/*
														
 
															+	 * Now handle resend checks. Those are asynchronous to the ICP
														
 
															+	 * state update in HW (ie bus transactions) so we can handle them
														
 
															+	 * separately here as well.
														
 
															+	 */
														
 
															+	if (resend)
														
 
															+		icp->rm_action |= XICS_RM_CHECK_RESEND;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 xirr;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/* First clear the interrupt */
														
 
															+	icp_rm_clr_vcpu_irq(icp->vcpu);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Accept_Interrupt
														
 
															+	 *
														
 
															+	 * Return the pending interrupt (if any) along with the
														
 
															+	 * current CPPR, then clear the XISR & set CPPR to the
														
 
															+	 * pending priority
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
														
 
															+		if (!old_state.xisr)
														
 
															+			break;
														
 
															+		new_state.cppr = new_state.pending_pri;
														
 
															+		new_state.pending_pri = 0xff;
														
 
															+		new_state.xisr = 0;
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Return the result in GPR4 */
														
 
															+	vcpu->arch.gpr[4] = xirr;
														
 
															+
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
														
 
															+		    unsigned long mfrr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+	bool resend;
														
 
															+	bool local;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	local = this_icp->server_num == server;
														
 
															+	if (local)
														
 
															+		icp = this_icp;
														
 
															+	else
														
 
															+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
														
 
															+	if (!icp)
														
 
															+		return H_PARAMETER;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP state: Set_MFRR
														
 
															+	 *
														
 
															+	 * If the CPPR is more favored than the new MFRR, then
														
 
															+	 * nothing needs to be done as there can be no XISR to
														
 
															+	 * reject.
														
 
															+	 *
														
 
															+	 * If the CPPR is less favored, then we might be replacing
														
 
															+	 * an interrupt, and thus need to possibly reject it as in
														
 
															+	 *
														
 
															+	 * ICP state: Check_IPI
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Set_MFRR */
														
 
															+		new_state.mfrr = mfrr;
														
 
															+
														
 
															+		/* Check_IPI */
														
 
															+		reject = 0;
														
 
															+		resend = false;
														
 
															+		if (mfrr < new_state.cppr) {
														
 
															+			/* Reject a pending interrupt if not an IPI */
														
 
															+			if (mfrr <= new_state.pending_pri)
														
 
															+				reject = new_state.xisr;
														
 
															+			new_state.pending_pri = mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
														
 
															+			resend = new_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Pass rejects to virtual mode */
														
 
															+	if (reject && reject != XICS_IPI) {
														
 
															+		this_icp->rm_action |= XICS_RM_REJECT;
														
 
															+		this_icp->rm_reject = reject;
														
 
															+	}
														
 
															+
														
 
															+	/* Pass resends to virtual mode */
														
 
															+	if (resend)
														
 
															+		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
														
 
															+
														
 
															+	return check_too_hard(xics, this_icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Set_CPPR
														
 
															+	 *
														
 
															+	 * We can safely compare the new value with the current
														
 
															+	 * value outside of the transaction as the CPPR is only
														
 
															+	 * ever changed by the processor on itself
														
 
															+	 */
														
 
															+	if (cppr > icp->state.cppr) {
														
 
															+		icp_rm_down_cppr(xics, icp, cppr);
														
 
															+		goto bail;
														
 
															+	} else if (cppr == icp->state.cppr)
														
 
															+		return H_SUCCESS;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Up_CPPR
														
 
															+	 *
														
 
															+	 * The processor is raising its priority, this can result
														
 
															+	 * in a rejection of a pending interrupt:
														
 
															+	 *
														
 
															+	 * ICP State: Reject_Current
														
 
															+	 *
														
 
															+	 * We can remove EE from the current processor, the update
														
 
															+	 * transaction will set it again if needed
														
 
															+	 */
														
 
															+	icp_rm_clr_vcpu_irq(icp->vcpu);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		reject = 0;
														
 
															+		new_state.cppr = cppr;
														
 
															+
														
 
															+		if (cppr <= new_state.pending_pri) {
														
 
															+			reject = new_state.xisr;
														
 
															+			new_state.xisr = 0;
														
 
															+			new_state.pending_pri = 0xff;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_rm_try_update(icp, old_state, new_state));
														
 
															+
														
 
															+	/* Pass rejects to virtual mode */
														
 
															+	if (reject && reject != XICS_IPI) {
														
 
															+		icp->rm_action |= XICS_RM_REJECT;
														
 
															+		icp->rm_reject = reject;
														
 
															+	}
														
 
															+ bail:
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u32 irq = xirr & 0x00ffffff;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics || !xics->real_mode)
														
 
															+		return H_TOO_HARD;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: EOI
														
 
															+	 *
														
 
															+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
														
 
															+	 * value (ie more favored), we do not check for rejection of
														
 
															+	 * a pending interrupt, this is a SW error and PAPR sepcifies
														
 
															+	 * that we don't have to deal with it.
														
 
															+	 *
														
 
															+	 * The sending of an EOI to the ICS is handled after the
														
 
															+	 * CPPR update
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR which we handle
														
 
															+	 * in a separate function as it's shared with H_CPPR.
														
 
															+	 */
														
 
															+	icp_rm_down_cppr(xics, icp, xirr >> 24);
														
 
															+
														
 
															+	/* IPIs have no EOI */
														
 
															+	if (irq == XICS_IPI)
														
 
															+		goto bail;
														
 
															+	/*
														
 
															+	 * EOI handling: If the interrupt is still asserted, we need to
														
 
															+	 * resend it. We can take a lockless "peek" at the ICS state here.
														
 
															+	 *
														
 
															+	 * "Message" interrupts will never have "asserted" set
														
 
															+	 */
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		goto bail;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Still asserted, resend it, we make it look like a reject */
														
 
															+	if (state->asserted) {
														
 
															+		icp->rm_action |= XICS_RM_REJECT;
														
 
															+		icp->rm_reject = irq;
														
 
															+	}
														
 
															+ bail:
														
 
															+	return check_too_hard(xics, icp);
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
 
															  *                                                                            *
														
 
															  *****************************************************************************/
														
 
															-#define XICS_XIRR		4
														
 
															-#define XICS_QIRR		0xc
														
 
															-#define XICS_IPI		2	/* interrupt source # for IPIs */
														
 
															-
														
 
															 /*
														
 
															  * We come in here when wakened from nap mode on a secondary hw thread.
														
 
															  * Relocation is off and most register values are lost.
														
@@ -101,50 +97,51 @@ kvm_start_guest:
 
															 	li	r0,1
														
 
															 	stb	r0,PACA_NAPSTATELOST(r13)
														
 
															-	/* get vcpu pointer, NULL if we have no vcpu to run */
														
 
															-	ld	r4,HSTATE_KVM_VCPU(r13)
														
 
															-	cmpdi	cr1,r4,0
														
 
															+	/* were we napping due to cede? */
														
 
															+	lbz	r0,HSTATE_NAPPING(r13)
														
 
															+	cmpwi	r0,0
														
 
															+	bne	kvm_end_cede
														
 
															+
														
 
															+	/*
														
 
															+	 * We weren't napping due to cede, so this must be a secondary
														
 
															+	 * thread being woken up to run a guest, or being woken up due
														
 
															+	 * to a stray IPI.  (Or due to some machine check or hypervisor
														
 
															+	 * maintenance interrupt while the core is in KVM.)
														
 
															+	 */
														
 
															 	/* Check the wake reason in SRR1 to see why we got here */
														
 
															 	mfspr	r3,SPRN_SRR1
														
 
															 	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
														
 
															 	cmpwi	r3,4			/* was it an external interrupt? */
														
 
															-	bne	27f
														
 
															-
														
 
															-	/*
														
 
															-	 * External interrupt - for now assume it is an IPI, since we
														
 
															-	 * should never get any other interrupts sent to offline threads.
														
 
															-	 * Only do this for secondary threads.
														
 
															-	 */
														
 
															-	beq	cr1,25f
														
 
															-	lwz	r3,VCPU_PTID(r4)
														
 
															-	cmpwi	r3,0
														
 
															-	beq	27f
														
 
															-25:	ld	r5,HSTATE_XICS_PHYS(r13)
														
 
															-	li	r0,0xff
														
 
															-	li	r6,XICS_QIRR
														
 
															-	li	r7,XICS_XIRR
														
 
															+	bne	27f			/* if not */
														
 
															+	ld	r5,HSTATE_XICS_PHYS(r13)
														
 
															+	li	r7,XICS_XIRR		/* if it was an external interrupt, */
														
 
															 	lwzcix	r8,r5,r7		/* get and ack the interrupt */
														
 
															 	sync
														
 
															 	clrldi.	r9,r8,40		/* get interrupt source ID. */
														
 
															-	beq	27f			/* none there? */
														
 
															-	cmpwi	r9,XICS_IPI
														
 
															-	bne	26f
														
 
															+	beq	28f			/* none there? */
														
 
															+	cmpwi	r9,XICS_IPI		/* was it an IPI? */
														
 
															+	bne	29f
														
 
															+	li	r0,0xff
														
 
															+	li	r6,XICS_MFRR
														
 
															 	stbcix	r0,r5,r6		/* clear IPI */
														
 
															-26:	stwcix	r8,r5,r7		/* EOI the interrupt */
														
 
															-
														
 
															-27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
														
 
															+	stwcix	r8,r5,r7		/* EOI the interrupt */
														
 
															+	sync				/* order loading of vcpu after that */
														
 
															-	/* reload vcpu pointer after clearing the IPI */
														
 
															+	/* get vcpu pointer, NULL if we have no vcpu to run */
														
 
															 	ld	r4,HSTATE_KVM_VCPU(r13)
														
 
															 	cmpdi	r4,0
														
 
															 	/* if we have no vcpu to run, go back to sleep */
														
 
															 	beq	kvm_no_guest
														
 
															+	b	kvmppc_hv_entry
														
 
															-	/* were we napping due to cede? */
														
 
															-	lbz	r0,HSTATE_NAPPING(r13)
														
 
															-	cmpwi	r0,0
														
 
															-	bne	kvm_end_cede
														
 
															+27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
														
 
															+	b	kvm_no_guest
														
 
															+28:	/* SRR1 said external but ICP said nope?? */
														
 
															+	b	kvm_no_guest
														
 
															+29:	/* External non-IPI interrupt to offline secondary thread? help?? */
														
 
															+	stw	r8,HSTATE_SAVED_XIRR(r13)
														
 
															+	b	kvm_no_guest
														
 
															 .global kvmppc_hv_entry
														
 
															 kvmppc_hv_entry:
														
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	lwz	r5, LPPACA_YIELDCOUNT(r3)
														
 
															 	addi	r5, r5, 1
														
 
															 	stw	r5, LPPACA_YIELDCOUNT(r3)
														
 
															+	li	r6, 1
														
 
															+	stb	r6, VCPU_VPA_DIRTY(r4)
														
 
															 25:
														
 
															 	/* Load up DAR and DSISR */
														
 
															 	ld	r5, VCPU_DAR(r4)
														
@@ -485,20 +484,20 @@ toc_tlbie_lock:
 
															 	mtctr	r6
														
 
															 	mtxer	r7
														
 
															+	ld	r10, VCPU_PC(r4)
														
 
															+	ld	r11, VCPU_MSR(r4)
														
 
															 kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
														
 
															 	ld	r6, VCPU_SRR0(r4)
														
 
															 	ld	r7, VCPU_SRR1(r4)
														
 
															-	ld	r10, VCPU_PC(r4)
														
 
															-	ld	r11, VCPU_MSR(r4)	/* r11 = vcpu->arch.msr & ~MSR_HV */
														
 
															+	/* r11 = vcpu->arch.msr & ~MSR_HV */
														
 
															 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
														
 
															 	rotldi	r11, r11, 1 + MSR_HV_LG
														
 
															 	ori	r11, r11, MSR_ME
														
 
															 	/* Check if we can deliver an external or decrementer interrupt now */
														
 
															 	ld	r0,VCPU_PENDING_EXC(r4)
														
 
															-	li	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
														
 
															-	oris	r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
														
 
															+	lis	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
														
 
															 	and	r0,r0,r8
														
 
															 	cmpdi	cr1,r0,0
														
 
															 	andi.	r0,r11,MSR_EE
														
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	/* Move SRR0 and SRR1 into the respective regs */
														
 
															 5:	mtspr	SPRN_SRR0, r6
														
 
															 	mtspr	SPRN_SRR1, r7
														
 
															-	li	r0,0
														
 
															-	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
														
 
															 fast_guest_return:
														
 
															+	li	r0,0
														
 
															+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
														
 
															 	mtspr	SPRN_HSRR0,r10
														
 
															 	mtspr	SPRN_HSRR1,r11
														
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
														
 
															 	beq	hcall_try_real_mode
														
 
															-	/* Check for mediated interrupts (could be done earlier really ...) */
														
 
															+	/* Only handle external interrupts here on arch 206 and later */
														
 
															 BEGIN_FTR_SECTION
														
 
															-	cmpwi	r12,BOOK3S_INTERRUPT_EXTERNAL
														
 
															-	bne+	1f
														
 
															-	andi.	r0,r11,MSR_EE
														
 
															-	beq	1f
														
 
															-	mfspr	r5,SPRN_LPCR
														
 
															-	andi.	r0,r5,LPCR_MER
														
 
															-	bne	bounce_ext_interrupt
														
 
															-1:
														
 
															-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
														
 
															+	b	ext_interrupt_to_host
														
 
															+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
														
 
															+
														
 
															+	/* External interrupt ? */
														
 
															+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	bne+	ext_interrupt_to_host
														
 
															+
														
 
															+	/* External interrupt, first check for host_ipi. If this is
														
 
															+	 * set, we know the host wants us out so let's do it now
														
 
															+	 */
														
 
															+do_ext_interrupt:
														
 
															+	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															+	cmpwi	r0, 0
														
 
															+	bne	ext_interrupt_to_host
														
 
															+
														
 
															+	/* Now read the interrupt from the ICP */
														
 
															+	ld	r5, HSTATE_XICS_PHYS(r13)
														
 
															+	li	r7, XICS_XIRR
														
 
															+	cmpdi	r5, 0
														
 
															+	beq-	ext_interrupt_to_host
														
 
															+	lwzcix	r3, r5, r7
														
 
															+	rlwinm.	r0, r3, 0, 0xffffff
														
 
															+	sync
														
 
															+	beq	3f		/* if nothing pending in the ICP */
														
 
															+
														
 
															+	/* We found something in the ICP...
														
 
															+	 *
														
 
															+	 * If it's not an IPI, stash it in the PACA and return to
														
 
															+	 * the host, we don't (yet) handle directing real external
														
 
															+	 * interrupts directly to the guest
														
 
															+	 */
														
 
															+	cmpwi	r0, XICS_IPI
														
 
															+	bne	ext_stash_for_host
														
 
															+
														
 
															+	/* It's an IPI, clear the MFRR and EOI it */
														
 
															+	li	r0, 0xff
														
 
															+	li	r6, XICS_MFRR
														
 
															+	stbcix	r0, r5, r6		/* clear the IPI */
														
 
															+	stwcix	r3, r5, r7		/* EOI it */
														
 
															+	sync
														
 
															+
														
 
															+	/* We need to re-check host IPI now in case it got set in the
														
 
															+	 * meantime. If it's clear, we bounce the interrupt to the
														
 
															+	 * guest
														
 
															+	 */
														
 
															+	lbz	r0, HSTATE_HOST_IPI(r13)
														
 
															+	cmpwi	r0, 0
														
 
															+	bne-	1f
														
 
															+
														
 
															+	/* Allright, looks like an IPI for the guest, we need to set MER */
														
 
															+3:
														
 
															+	/* Check if any CPU is heading out to the host, if so head out too */
														
 
															+	ld	r5, HSTATE_KVM_VCORE(r13)
														
 
															+	lwz	r0, VCORE_ENTRY_EXIT(r5)
														
 
															+	cmpwi	r0, 0x100
														
 
															+	bge	ext_interrupt_to_host
														
 
															+
														
 
															+	/* See if there is a pending interrupt for the guest */
														
 
															+	mfspr	r8, SPRN_LPCR
														
 
															+	ld	r0, VCPU_PENDING_EXC(r9)
														
 
															+	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
														
 
															+	rldicl.	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
														
 
															+	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
														
 
															+	beq	2f
														
 
															+
														
 
															+	/* And if the guest EE is set, we can deliver immediately, else
														
 
															+	 * we return to the guest with MER set
														
 
															+	 */
														
 
															+	andi.	r0, r11, MSR_EE
														
 
															+	beq	2f
														
 
															+	mtspr	SPRN_SRR0, r10
														
 
															+	mtspr	SPRN_SRR1, r11
														
 
															+	li	r10, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
														
 
															+	rotldi	r11, r11, 63
														
 
															+2:	mr	r4, r9
														
 
															+	mtspr	SPRN_LPCR, r8
														
 
															+	b	fast_guest_return
														
 
															+
														
 
															+	/* We raced with the host, we need to resend that IPI, bummer */
														
 
															+1:	li	r0, IPI_PRIORITY
														
 
															+	stbcix	r0, r5, r6		/* set the IPI */
														
 
															+	sync
														
 
															+	b	ext_interrupt_to_host
														
 
															+
														
 
															+ext_stash_for_host:
														
 
															+	/* It's not an IPI and it's for the host, stash it in the PACA
														
 
															+	 * before exit, it will be picked up by the host ICP driver
														
 
															+	 */
														
 
															+	stw	r3, HSTATE_SAVED_XIRR(r13)
														
 
															+ext_interrupt_to_host:
														
 
															 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
														
 
															 	/* Save DEC */
														
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 
															 	beq	44f
														
 
															 	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
														
 
															 	li	r0,IPI_PRIORITY
														
 
															-	li	r7,XICS_QIRR
														
 
															+	li	r7,XICS_MFRR
														
 
															 	stbcix	r0,r7,r8		/* trigger the IPI */
														
 
															 44:	srdi.	r3,r3,1
														
 
															 	addi	r6,r6,PACA_SIZE
														
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
															 	lwz	r3, LPPACA_YIELDCOUNT(r8)
														
 
															 	addi	r3, r3, 1
														
 
															 	stw	r3, LPPACA_YIELDCOUNT(r8)
														
 
															+	li	r3, 1
														
 
															+	stb	r3, VCPU_VPA_DIRTY(r9)
														
 
															 25:
														
 
															 	/* Save PMU registers if requested */
														
 
															 	/* r8 and cr0.eq are live here */
														
@@ -1350,11 +1433,19 @@ hcall_real_table:
 
															 	.long	0		/* 0x58 */
														
 
															 	.long	0		/* 0x5c */
														
 
															 	.long	0		/* 0x60 */
														
 
															-	.long	0		/* 0x64 */
														
 
															-	.long	0		/* 0x68 */
														
 
															-	.long	0		/* 0x6c */
														
 
															-	.long	0		/* 0x70 */
														
 
															-	.long	0		/* 0x74 */
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	.long	.kvmppc_rm_h_eoi - hcall_real_table
														
 
															+	.long	.kvmppc_rm_h_cppr - hcall_real_table
														
 
															+	.long	.kvmppc_rm_h_ipi - hcall_real_table
														
 
															+	.long	0		/* 0x70 - H_IPOLL */
														
 
															+	.long	.kvmppc_rm_h_xirr - hcall_real_table
														
 
															+#else
														
 
															+	.long	0		/* 0x64 - H_EOI */
														
 
															+	.long	0		/* 0x68 - H_CPPR */
														
 
															+	.long	0		/* 0x6c - H_IPI */
														
 
															+	.long	0		/* 0x70 - H_IPOLL */
														
 
															+	.long	0		/* 0x74 - H_XIRR */
														
 
															+#endif
														
 
															 	.long	0		/* 0x78 */
														
 
															 	.long	0		/* 0x7c */
														
 
															 	.long	0		/* 0x80 */
														
@@ -1405,15 +1496,6 @@ ignore_hdec:
 
															 	mr	r4,r9
														
 
															 	b	fast_guest_return
														
 
															-bounce_ext_interrupt:
														
 
															-	mr	r4,r9
														
 
															-	mtspr	SPRN_SRR0,r10
														
 
															-	mtspr	SPRN_SRR1,r11
														
 
															-	li	r10,BOOK3S_INTERRUPT_EXTERNAL
														
 
															-	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
														
 
															-	rotldi	r11,r11,63
														
 
															-	b	fast_guest_return
														
 
															-
														
 
															 _GLOBAL(kvmppc_h_set_dabr)
														
 
															 	std	r4,VCPU_DABR(r3)
														
 
															 	/* Work around P7 bug where DABR can get corrupted on mtspr */
														
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
															 	b	.
														
 
															 kvm_end_cede:
														
 
															+	/* get vcpu pointer */
														
 
															+	ld	r4, HSTATE_KVM_VCPU(r13)
														
 
															+
														
 
															 	/* Woken by external or decrementer interrupt */
														
 
															 	ld	r1, HSTATE_HOST_R1(r13)
														
@@ -1558,6 +1643,16 @@ kvm_end_cede:
 
															 	li	r0,0
														
 
															 	stb	r0,HSTATE_NAPPING(r13)
														
 
															+	/* Check the wake reason in SRR1 to see why we got here */
														
 
															+	mfspr	r3, SPRN_SRR1
														
 
															+	rlwinm	r3, r3, 44-31, 0x7	/* extract wake reason field */
														
 
															+	cmpwi	r3, 4			/* was it an external interrupt? */
														
 
															+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
														
 
															+	mr	r9, r4
														
 
															+	ld	r10, VCPU_PC(r9)
														
 
															+	ld	r11, VCPU_MSR(r9)
														
 
															+	beq	do_ext_interrupt	/* if so */
														
 
															+
														
 
															 	/* see if any other thread is already exiting */
														
 
															 	lwz	r0,VCORE_ENTRY_EXIT(r5)
														
 
															 	cmpwi	r0,0x100
														
@@ -1577,8 +1672,7 @@ kvm_cede_prodded:
 
															 	/* we've ceded but we want to give control to the host */
														
 
															 kvm_cede_exit:
														
 
															-	li	r3,H_TOO_HARD
														
 
															-	blr
														
 
															+	b	hcall_real_fallback
														
 
															 	/* Try to handle a machine check in real mode */
														
 
															 machine_check_realmode:
														
@@ -1626,7 +1720,7 @@ secondary_nap:
 
															 	beq	37f
														
 
															 	sync
														
 
															 	li	r0, 0xff
														
 
															-	li	r6, XICS_QIRR
														
 
															+	li	r6, XICS_MFRR
														
 
															 	stbcix	r0, r5, r6		/* clear the IPI */
														
 
															 	stwcix	r3, r5, r7		/* EOI it */
														
 
															 37:	sync
														
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -762,9 +762,7 @@ program_interrupt:
 
															 			run->exit_reason = KVM_EXIT_MMIO;
														
 
															 			r = RESUME_HOST_NV;
														
 
															 			break;
														
 
															-		case EMULATE_DO_PAPR:
														
 
															-			run->exit_reason = KVM_EXIT_PAPR_HCALL;
														
 
															-			vcpu->arch.hcall_needed = 1;
														
 
															+		case EMULATE_EXIT_USER:
														
 
															 			r = RESUME_HOST_NV;
														
 
															 			break;
														
 
															 		default:
														
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
															 {
														
 
															 #ifdef CONFIG_PPC64
														
 
															 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
														
 
															+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
														
 
															 #endif
														
 
															 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
														
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 
															 	return EMULATE_DONE;
														
 
															 }
														
 
															+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
														
 
															+{
														
 
															+	long rc = kvmppc_xics_hcall(vcpu, cmd);
														
 
															+	kvmppc_set_gpr(vcpu, 3, rc);
														
 
															+	return EMULATE_DONE;
														
 
															+}
														
 
															+
														
 
															 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
														
 
															 {
														
 
															 	switch (cmd) {
														
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
															 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
														
 
															 		vcpu->stat.halt_wakeup++;
														
 
															 		return EMULATE_DONE;
														
 
															+	case H_XIRR:
														
 
															+	case H_CPPR:
														
 
															+	case H_EOI:
														
 
															+	case H_IPI:
														
 
															+		if (kvmppc_xics_enabled(vcpu))
														
 
															+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
														
 
															+		break;
														
 
															+	case H_RTAS:
														
 
															+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
														
 
															+			return RESUME_HOST;
														
 
															+		if (kvmppc_rtas_hcall(vcpu))
														
 
															+			break;
														
 
															+		kvmppc_set_gpr(vcpu, 3, 0);
														
 
															+		return EMULATE_DONE;
														
 
															 	}
														
 
															 	return EMULATE_FAIL;
														
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,274 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/kvm.h>
														
 
															+#include <linux/err.h>
														
 
															+
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/rtas.h>
														
 
															+
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq, server, priority;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 3 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+	server = args->args[1];
														
 
															+	priority = args->args[2];
														
 
															+
														
 
															+	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq, server, priority;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 3) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	server = priority = 0;
														
 
															+	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
														
 
															+	if (rc) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	args->rets[1] = server;
														
 
															+	args->rets[2] = priority;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+
														
 
															+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
														
 
															+{
														
 
															+	u32 irq;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (args->nargs != 1 || args->nret != 1) {
														
 
															+		rc = -3;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	irq = args->args[0];
														
 
															+
														
 
															+	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
														
 
															+	if (rc)
														
 
															+		rc = -3;
														
 
															+out:
														
 
															+	args->rets[0] = rc;
														
 
															+}
														
 
															+#endif /* CONFIG_KVM_XICS */
														
 
															+
														
 
															+struct rtas_handler {
														
 
															+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
														
 
															+	char *name;
														
 
															+};
														
 
															+
														
 
															+static struct rtas_handler rtas_handlers[] = {
														
 
															+#ifdef CONFIG_KVM_XICS
														
 
															+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
														
 
															+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
														
 
															+	{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
														
 
															+	{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+struct rtas_token_definition {
														
 
															+	struct list_head list;
														
 
															+	struct rtas_handler *handler;
														
 
															+	u64 token;
														
 
															+};
														
 
															+
														
 
															+static int rtas_name_matches(char *s1, char *s2)
														
 
															+{
														
 
															+	struct kvm_rtas_token_args args;
														
 
															+	return !strncmp(s1, s2, sizeof(args.name));
														
 
															+}
														
 
															+
														
 
															+static int rtas_token_undefine(struct kvm *kvm, char *name)
														
 
															+{
														
 
															+	struct rtas_token_definition *d, *tmp;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
														
 
															+		if (rtas_name_matches(d->handler->name, name)) {
														
 
															+			list_del(&d->list);
														
 
															+			kfree(d);
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* It's not an error to undefine an undefined token */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
														
 
															+{
														
 
															+	struct rtas_token_definition *d;
														
 
															+	struct rtas_handler *h = NULL;
														
 
															+	bool found;
														
 
															+	int i;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
														
 
															+		if (d->token == token)
														
 
															+			return -EEXIST;
														
 
															+	}
														
 
															+
														
 
															+	found = false;
														
 
															+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
														
 
															+		h = &rtas_handlers[i];
														
 
															+		if (rtas_name_matches(h->name, name)) {
														
 
															+			found = true;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (!found)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	d = kzalloc(sizeof(*d), GFP_KERNEL);
														
 
															+	if (!d)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	d->handler = h;
														
 
															+	d->token = token;
														
 
															+
														
 
															+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
														
 
															+{
														
 
															+	struct kvm_rtas_token_args args;
														
 
															+	int rc;
														
 
															+
														
 
															+	if (copy_from_user(&args, argp, sizeof(args)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	if (args.token)
														
 
															+		rc = rtas_token_define(kvm, args.name, args.token);
														
 
															+	else
														
 
															+		rc = rtas_token_undefine(kvm, args.name);
														
 
															+
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct rtas_token_definition *d;
														
 
															+	struct rtas_args args;
														
 
															+	rtas_arg_t *orig_rets;
														
 
															+	gpa_t args_phys;
														
 
															+	int rc;
														
 
															+
														
 
															+	/* r4 contains the guest physical address of the RTAS args */
														
 
															+	args_phys = kvmppc_get_gpr(vcpu, 4);
														
 
															+
														
 
															+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
														
 
															+	if (rc)
														
 
															+		goto fail;
														
 
															+
														
 
															+	/*
														
 
															+	 * args->rets is a pointer into args->args. Now that we've
														
 
															+	 * copied args we need to fix it up to point into our copy,
														
 
															+	 * not the guest args. We also need to save the original
														
 
															+	 * value so we can restore it on the way out.
														
 
															+	 */
														
 
															+	orig_rets = args.rets;
														
 
															+	args.rets = &args.args[args.nargs];
														
 
															+
														
 
															+	mutex_lock(&vcpu->kvm->lock);
														
 
															+
														
 
															+	rc = -ENOENT;
														
 
															+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
														
 
															+		if (d->token == args.token) {
														
 
															+			d->handler->handler(vcpu, &args);
														
 
															+			rc = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&vcpu->kvm->lock);
														
 
															+
														
 
															+	if (rc == 0) {
														
 
															+		args.rets = orig_rets;
														
 
															+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
														
 
															+		if (rc)
														
 
															+			goto fail;
														
 
															+	}
														
 
															+
														
 
															+	return rc;
														
 
															+
														
 
															+fail:
														
 
															+	/*
														
 
															+	 * We only get here if the guest has called RTAS with a bogus
														
 
															+	 * args pointer. That means we can't get to the args, and so we
														
 
															+	 * can't fail the RTAS call. So fail right out to userspace,
														
 
															+	 * which should kill the guest.
														
 
															+	 */
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_rtas_tokens_free(struct kvm *kvm)
														
 
															+{
														
 
															+	struct rtas_token_definition *d, *tmp;
														
 
															+
														
 
															+	lockdep_assert_held(&kvm->lock);
														
 
															+
														
 
															+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
														
 
															+		list_del(&d->list);
														
 
															+		kfree(d);
														
 
															+	}
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1130 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/err.h>
														
 
															+#include <linux/gfp.h>
														
 
															+
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/kvm_book3s.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include <asm/hvcall.h>
														
 
															+#include <asm/xics.h>
														
 
															+#include <asm/debug.h>
														
 
															+
														
 
															+#include <linux/debugfs.h>
														
 
															+#include <linux/seq_file.h>
														
 
															+
														
 
															+#include "book3s_xics.h"
														
 
															+
														
 
															+#if 1
														
 
															+#define XICS_DBG(fmt...) do { } while (0)
														
 
															+#else
														
 
															+#define XICS_DBG(fmt...) trace_printk(fmt)
														
 
															+#endif
														
 
															+
														
 
															+#define ENABLE_REALMODE	true
														
 
															+#define DEBUG_REALMODE	false
														
 
															+
														
 
															+/*
														
 
															+ * LOCKING
														
 
															+ * =======
														
 
															+ *
														
 
															+ * Each ICS has a mutex protecting the information about the IRQ
														
 
															+ * sources and avoiding simultaneous deliveries if the same interrupt.
														
 
															+ *
														
 
															+ * ICP operations are done via a single compare & swap transaction
														
 
															+ * (most ICP state fits in the union kvmppc_icp_state)
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * TODO
														
 
															+ * ====
														
 
															+ *
														
 
															+ * - To speed up resends, keep a bitmap of "resend" set bits in the
														
 
															+ *   ICS
														
 
															+ *
														
 
															+ * - Speed up server# -> ICP lookup (array ? hash table ?)
														
 
															+ *
														
 
															+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
														
 
															+ *   locks array to improve scalability
														
 
															+ *
														
 
															+ * - ioctl's to save/restore the entire state for snapshot & migration
														
 
															+ */
														
 
															+
														
 
															+/* -- ICS routines -- */
														
 
															+
														
 
															+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			    u32 new_irq);
														
 
															+
														
 
															+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
														
 
															+{
														
 
															+	struct ics_irq_state *state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u16 src;
														
 
															+
														
 
															+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+	if (!state->exists)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/*
														
 
															+	 * We set state->asserted locklessly. This should be fine as
														
 
															+	 * we are the only setter, thus concurrent access is undefined
														
 
															+	 * to begin with.
														
 
															+	 */
														
 
															+	if (level == KVM_INTERRUPT_SET_LEVEL)
														
 
															+		state->asserted = 1;
														
 
															+	else if (level == KVM_INTERRUPT_UNSET) {
														
 
															+		state->asserted = 0;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/* Attempt delivery */
														
 
															+	icp_deliver_irq(xics, NULL, irq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
														
 
															+			     struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+		struct ics_irq_state *state = &ics->irq_state[i];
														
 
															+
														
 
															+		if (!state->resend)
														
 
															+			continue;
														
 
															+
														
 
															+		XICS_DBG("resend %#x prio %#x\n", state->number,
														
 
															+			      state->priority);
														
 
															+
														
 
															+		mutex_unlock(&ics->lock);
														
 
															+		icp_deliver_irq(xics, icp, state->number);
														
 
															+		mutex_lock(&ics->lock);
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+}
														
 
															+
														
 
															+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
														
 
															+		       struct ics_irq_state *state,
														
 
															+		       u32 server, u32 priority, u32 saved_priority)
														
 
															+{
														
 
															+	bool deliver;
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	state->server = server;
														
 
															+	state->priority = priority;
														
 
															+	state->saved_priority = saved_priority;
														
 
															+	deliver = false;
														
 
															+	if ((state->masked_pending || state->resend) && priority != MASKED) {
														
 
															+		state->masked_pending = 0;
														
 
															+		deliver = true;
														
 
															+	}
														
 
															+
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	return deliver;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	icp = kvmppc_xics_find_server(kvm, server);
														
 
															+	if (!icp)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
														
 
															+		 irq, server, priority,
														
 
															+		 state->masked_pending, state->resend);
														
 
															+
														
 
															+	if (write_xive(xics, ics, state, server, priority, priority))
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	mutex_lock(&ics->lock);
														
 
															+	*server = state->server;
														
 
															+	*priority = state->priority;
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	icp = kvmppc_xics_find_server(kvm, state->server);
														
 
															+	if (!icp)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (write_xive(xics, ics, state, state->server, state->saved_priority,
														
 
															+		       state->saved_priority))
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = kvm->arch.xics;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u16 src;
														
 
															+
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics)
														
 
															+		return -EINVAL;
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	write_xive(xics, ics, state, state->server, MASKED, state->priority);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* -- ICP routines, including hcalls -- */
														
 
															+
														
 
															+static inline bool icp_try_update(struct kvmppc_icp *icp,
														
 
															+				  union kvmppc_icp_state old,
														
 
															+				  union kvmppc_icp_state new,
														
 
															+				  bool change_self)
														
 
															+{
														
 
															+	bool success;
														
 
															+
														
 
															+	/* Calculate new output value */
														
 
															+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
														
 
															+
														
 
															+	/* Attempt atomic update */
														
 
															+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
														
 
															+	if (!success)
														
 
															+		goto bail;
														
 
															+
														
 
															+	XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
														
 
															+		 icp->server_num,
														
 
															+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
														
 
															+		 old.need_resend, old.out_ee);
														
 
															+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
														
 
															+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
														
 
															+		 new.need_resend, new.out_ee);
														
 
															+	/*
														
 
															+	 * Check for output state update
														
 
															+	 *
														
 
															+	 * Note that this is racy since another processor could be updating
														
 
															+	 * the state already. This is why we never clear the interrupt output
														
 
															+	 * here, we only ever set it. The clear only happens prior to doing
														
 
															+	 * an update and only by the processor itself. Currently we do it
														
 
															+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
														
 
															+	 *
														
 
															+	 * We also do not try to figure out whether the EE state has changed,
														
 
															+	 * we unconditionally set it if the new state calls for it. The reason
														
 
															+	 * for that is that we opportunistically remove the pending interrupt
														
 
															+	 * flag when raising CPPR, so we need to set it back here if an
														
 
															+	 * interrupt is still pending.
														
 
															+	 */
														
 
															+	if (new.out_ee) {
														
 
															+		kvmppc_book3s_queue_irqprio(icp->vcpu,
														
 
															+					    BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+		if (!change_self)
														
 
															+			kvmppc_fast_vcpu_kick(icp->vcpu);
														
 
															+	}
														
 
															+ bail:
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static void icp_check_resend(struct kvmppc_xics *xics,
														
 
															+			     struct kvmppc_icp *icp)
														
 
															+{
														
 
															+	u32 icsid;
														
 
															+
														
 
															+	/* Order this load with the test for need_resend in the caller */
														
 
															+	smp_rmb();
														
 
															+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
														
 
															+		struct kvmppc_ics *ics = xics->ics[icsid];
														
 
															+
														
 
															+		if (!test_and_clear_bit(icsid, icp->resend_map))
														
 
															+			continue;
														
 
															+		if (!ics)
														
 
															+			continue;
														
 
															+		ics_check_resend(xics, ics, icp);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
														
 
															+			       u32 *reject)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool success;
														
 
															+
														
 
															+	XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
														
 
															+		 icp->server_num);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		*reject = 0;
														
 
															+
														
 
															+		/* See if we can deliver */
														
 
															+		success = new_state.cppr > priority &&
														
 
															+			new_state.mfrr > priority &&
														
 
															+			new_state.pending_pri > priority;
														
 
															+
														
 
															+		/*
														
 
															+		 * If we can, check for a rejection and perform the
														
 
															+		 * delivery
														
 
															+		 */
														
 
															+		if (success) {
														
 
															+			*reject = new_state.xisr;
														
 
															+			new_state.xisr = irq;
														
 
															+			new_state.pending_pri = priority;
														
 
															+		} else {
														
 
															+			/*
														
 
															+			 * If we failed to deliver we set need_resend
														
 
															+			 * so a subsequent CPPR state change causes us
														
 
															+			 * to try a new delivery.
														
 
															+			 */
														
 
															+			new_state.need_resend = true;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, false));
														
 
															+
														
 
															+	return success;
														
 
															+}
														
 
															+
														
 
															+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			    u32 new_irq)
														
 
															+{
														
 
															+	struct ics_irq_state *state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u32 reject;
														
 
															+	u16 src;
														
 
															+
														
 
															+	/*
														
 
															+	 * This is used both for initial delivery of an interrupt and
														
 
															+	 * for subsequent rejection.
														
 
															+	 *
														
 
															+	 * Rejection can be racy vs. resends. We have evaluated the
														
 
															+	 * rejection in an atomic ICP transaction which is now complete,
														
 
															+	 * so potentially the ICP can already accept the interrupt again.
														
 
															+	 *
														
 
															+	 * So we need to retry the delivery. Essentially the reject path
														
 
															+	 * boils down to a failed delivery. Always.
														
 
															+	 *
														
 
															+	 * Now the interrupt could also have moved to a different target,
														
 
															+	 * thus we may need to re-do the ICP lookup as well
														
 
															+	 */
														
 
															+
														
 
															+ again:
														
 
															+	/* Get the ICS state and lock it */
														
 
															+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
														
 
															+		return;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Get a lock on the ICS */
														
 
															+	mutex_lock(&ics->lock);
														
 
															+
														
 
															+	/* Get our server */
														
 
															+	if (!icp || state->server != icp->server_num) {
														
 
															+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
														
 
															+		if (!icp) {
														
 
															+			pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
														
 
															+				new_irq, state->server);
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Clear the resend bit of that interrupt */
														
 
															+	state->resend = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * If masked, bail out
														
 
															+	 *
														
 
															+	 * Note: PAPR doesn't mention anything about masked pending
														
 
															+	 * when doing a resend, only when doing a delivery.
														
 
															+	 *
														
 
															+	 * However that would have the effect of losing a masked
														
 
															+	 * interrupt that was rejected and isn't consistent with
														
 
															+	 * the whole masked_pending business which is about not
														
 
															+	 * losing interrupts that occur while masked.
														
 
															+	 *
														
 
															+	 * I don't differenciate normal deliveries and resends, this
														
 
															+	 * implementation will differ from PAPR and not lose such
														
 
															+	 * interrupts.
														
 
															+	 */
														
 
															+	if (state->priority == MASKED) {
														
 
															+		XICS_DBG("irq %#x masked pending\n", new_irq);
														
 
															+		state->masked_pending = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Try the delivery, this will set the need_resend flag
														
 
															+	 * in the ICP as part of the atomic transaction if the
														
 
															+	 * delivery is not possible.
														
 
															+	 *
														
 
															+	 * Note that if successful, the new delivery might have itself
														
 
															+	 * rejected an interrupt that was "delivered" before we took the
														
 
															+	 * icp mutex.
														
 
															+	 *
														
 
															+	 * In this case we do the whole sequence all over again for the
														
 
															+	 * new guy. We cannot assume that the rejected interrupt is less
														
 
															+	 * favored than the new one, and thus doesn't need to be delivered,
														
 
															+	 * because by the time we exit icp_try_to_deliver() the target
														
 
															+	 * processor may well have alrady consumed & completed it, and thus
														
 
															+	 * the rejected interrupt might actually be already acceptable.
														
 
															+	 */
														
 
															+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
														
 
															+		/*
														
 
															+		 * Delivery was successful, did we reject somebody else ?
														
 
															+		 */
														
 
															+		if (reject && reject != XICS_IPI) {
														
 
															+			mutex_unlock(&ics->lock);
														
 
															+			new_irq = reject;
														
 
															+			goto again;
														
 
															+		}
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * We failed to deliver the interrupt we need to set the
														
 
															+		 * resend map bit and mark the ICS state as needing a resend
														
 
															+		 */
														
 
															+		set_bit(ics->icsid, icp->resend_map);
														
 
															+		state->resend = 1;
														
 
															+
														
 
															+		/*
														
 
															+		 * If the need_resend flag got cleared in the ICP some time
														
 
															+		 * between icp_try_to_deliver() atomic update and now, then
														
 
															+		 * we know it might have missed the resend_map bit. So we
														
 
															+		 * retry
														
 
															+		 */
														
 
															+		smp_mb();
														
 
															+		if (!icp->state.need_resend) {
														
 
															+			mutex_unlock(&ics->lock);
														
 
															+			goto again;
														
 
															+		}
														
 
															+	}
														
 
															+ out:
														
 
															+	mutex_unlock(&ics->lock);
														
 
															+}
														
 
															+
														
 
															+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
														
 
															+			  u8 new_cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	bool resend;
														
 
															+
														
 
															+	/*
														
 
															+	 * This handles several related states in one operation:
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR
														
 
															+	 *
														
 
															+	 * Load CPPR with new value and if the XISR is 0
														
 
															+	 * then check for resends:
														
 
															+	 *
														
 
															+	 * ICP State: Resend
														
 
															+	 *
														
 
															+	 * If MFRR is more favored than CPPR, check for IPIs
														
 
															+	 * and notify ICS of a potential resend. This is done
														
 
															+	 * asynchronously (when used in real mode, we will have
														
 
															+	 * to exit here).
														
 
															+	 *
														
 
															+	 * We do not handle the complete Check_IPI as documented
														
 
															+	 * here. In the PAPR, this state will be used for both
														
 
															+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
														
 
															+	 * changing the MFRR state here so we don't need to handle
														
 
															+	 * the case of an MFRR causing a reject of a pending irq,
														
 
															+	 * this will have been handled when the MFRR was set in the
														
 
															+	 * first place.
														
 
															+	 *
														
 
															+	 * Thus we don't have to handle rejects, only resends.
														
 
															+	 *
														
 
															+	 * When implementing real mode for HV KVM, resend will lead to
														
 
															+	 * a H_TOO_HARD return and the whole transaction will be handled
														
 
															+	 * in virtual mode.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Down_CPPR */
														
 
															+		new_state.cppr = new_cppr;
														
 
															+
														
 
															+		/*
														
 
															+		 * Cut down Resend / Check_IPI / IPI
														
 
															+		 *
														
 
															+		 * The logic is that we cannot have a pending interrupt
														
 
															+		 * trumped by an IPI at this point (see above), so we
														
 
															+		 * know that either the pending interrupt is already an
														
 
															+		 * IPI (in which case we don't care to override it) or
														
 
															+		 * it's either more favored than us or non existent
														
 
															+		 */
														
 
															+		if (new_state.mfrr < new_cppr &&
														
 
															+		    new_state.mfrr <= new_state.pending_pri) {
														
 
															+			WARN_ON(new_state.xisr != XICS_IPI &&
														
 
															+				new_state.xisr != 0);
														
 
															+			new_state.pending_pri = new_state.mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		/* Latch/clear resend bit */
														
 
															+		resend = new_state.need_resend;
														
 
															+		new_state.need_resend = 0;
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	/*
														
 
															+	 * Now handle resend checks. Those are asynchronous to the ICP
														
 
															+	 * state update in HW (ie bus transactions) so we can handle them
														
 
															+	 * separately here too
														
 
															+	 */
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+}
														
 
															+
														
 
															+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 xirr;
														
 
															+
														
 
															+	/* First, remove EE from the processor */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Accept_Interrupt
														
 
															+	 *
														
 
															+	 * Return the pending interrupt (if any) along with the
														
 
															+	 * current CPPR, then clear the XISR & set CPPR to the
														
 
															+	 * pending priority
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
														
 
															+		if (!old_state.xisr)
														
 
															+			break;
														
 
															+		new_state.cppr = new_state.pending_pri;
														
 
															+		new_state.pending_pri = 0xff;
														
 
															+		new_state.xisr = 0;
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
														
 
															+
														
 
															+	return xirr;
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
														
 
															+				 unsigned long mfrr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp;
														
 
															+	u32 reject;
														
 
															+	bool resend;
														
 
															+	bool local;
														
 
															+
														
 
															+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
														
 
															+		 vcpu->vcpu_id, server, mfrr);
														
 
															+
														
 
															+	icp = vcpu->arch.icp;
														
 
															+	local = icp->server_num == server;
														
 
															+	if (!local) {
														
 
															+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
														
 
															+		if (!icp)
														
 
															+			return H_PARAMETER;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP state: Set_MFRR
														
 
															+	 *
														
 
															+	 * If the CPPR is more favored than the new MFRR, then
														
 
															+	 * nothing needs to be rejected as there can be no XISR to
														
 
															+	 * reject.  If the MFRR is being made less favored then
														
 
															+	 * there might be a previously-rejected interrupt needing
														
 
															+	 * to be resent.
														
 
															+	 *
														
 
															+	 * If the CPPR is less favored, then we might be replacing
														
 
															+	 * an interrupt, and thus need to possibly reject it as in
														
 
															+	 *
														
 
															+	 * ICP state: Check_IPI
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		/* Set_MFRR */
														
 
															+		new_state.mfrr = mfrr;
														
 
															+
														
 
															+		/* Check_IPI */
														
 
															+		reject = 0;
														
 
															+		resend = false;
														
 
															+		if (mfrr < new_state.cppr) {
														
 
															+			/* Reject a pending interrupt if not an IPI */
														
 
															+			if (mfrr <= new_state.pending_pri)
														
 
															+				reject = new_state.xisr;
														
 
															+			new_state.pending_pri = mfrr;
														
 
															+			new_state.xisr = XICS_IPI;
														
 
															+		}
														
 
															+
														
 
															+		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
														
 
															+			resend = new_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, local));
														
 
															+
														
 
															+	/* Handle reject */
														
 
															+	if (reject && reject != XICS_IPI)
														
 
															+		icp_deliver_irq(xics, icp, reject);
														
 
															+
														
 
															+	/* Handle resend */
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
														
 
															+{
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	u32 reject;
														
 
															+
														
 
															+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Set_CPPR
														
 
															+	 *
														
 
															+	 * We can safely compare the new value with the current
														
 
															+	 * value outside of the transaction as the CPPR is only
														
 
															+	 * ever changed by the processor on itself
														
 
															+	 */
														
 
															+	if (cppr > icp->state.cppr)
														
 
															+		icp_down_cppr(xics, icp, cppr);
														
 
															+	else if (cppr == icp->state.cppr)
														
 
															+		return;
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: Up_CPPR
														
 
															+	 *
														
 
															+	 * The processor is raising its priority, this can result
														
 
															+	 * in a rejection of a pending interrupt:
														
 
															+	 *
														
 
															+	 * ICP State: Reject_Current
														
 
															+	 *
														
 
															+	 * We can remove EE from the current processor, the update
														
 
															+	 * transaction will set it again if needed
														
 
															+	 */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	do {
														
 
															+		old_state = new_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		reject = 0;
														
 
															+		new_state.cppr = cppr;
														
 
															+
														
 
															+		if (cppr <= new_state.pending_pri) {
														
 
															+			reject = new_state.xisr;
														
 
															+			new_state.xisr = 0;
														
 
															+			new_state.pending_pri = 0xff;
														
 
															+		}
														
 
															+
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, true));
														
 
															+
														
 
															+	/*
														
 
															+	 * Check for rejects. They are handled by doing a new delivery
														
 
															+	 * attempt (see comments in icp_deliver_irq).
														
 
															+	 */
														
 
															+	if (reject && reject != XICS_IPI)
														
 
															+		icp_deliver_irq(xics, icp, reject);
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	struct ics_irq_state *state;
														
 
															+	u32 irq = xirr & 0x00ffffff;
														
 
															+	u16 src;
														
 
															+
														
 
															+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
														
 
															+
														
 
															+	/*
														
 
															+	 * ICP State: EOI
														
 
															+	 *
														
 
															+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
														
 
															+	 * value (ie more favored), we do not check for rejection of
														
 
															+	 * a pending interrupt, this is a SW error and PAPR sepcifies
														
 
															+	 * that we don't have to deal with it.
														
 
															+	 *
														
 
															+	 * The sending of an EOI to the ICS is handled after the
														
 
															+	 * CPPR update
														
 
															+	 *
														
 
															+	 * ICP State: Down_CPPR which we handle
														
 
															+	 * in a separate function as it's shared with H_CPPR.
														
 
															+	 */
														
 
															+	icp_down_cppr(xics, icp, xirr >> 24);
														
 
															+
														
 
															+	/* IPIs have no EOI */
														
 
															+	if (irq == XICS_IPI)
														
 
															+		return H_SUCCESS;
														
 
															+	/*
														
 
															+	 * EOI handling: If the interrupt is still asserted, we need to
														
 
															+	 * resend it. We can take a lockless "peek" at the ICS state here.
														
 
															+	 *
														
 
															+	 * "Message" interrupts will never have "asserted" set
														
 
															+	 */
														
 
															+	ics = kvmppc_xics_find_ics(xics, irq, &src);
														
 
															+	if (!ics) {
														
 
															+		XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
														
 
															+		return H_PARAMETER;
														
 
															+	}
														
 
															+	state = &ics->irq_state[src];
														
 
															+
														
 
															+	/* Still asserted, resend it */
														
 
															+	if (state->asserted)
														
 
															+		icp_deliver_irq(xics, icp, irq);
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+
														
 
															+	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
														
 
															+		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
														
 
															+
														
 
															+	if (icp->rm_action & XICS_RM_KICK_VCPU)
														
 
															+		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
														
 
															+	if (icp->rm_action & XICS_RM_CHECK_RESEND)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+	if (icp->rm_action & XICS_RM_REJECT)
														
 
															+		icp_deliver_irq(xics, icp, icp->rm_reject);
														
 
															+
														
 
															+	icp->rm_action = 0;
														
 
															+
														
 
															+	return H_SUCCESS;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	unsigned long res;
														
 
															+	int rc = H_SUCCESS;
														
 
															+
														
 
															+	/* Check if we have an ICP */
														
 
															+	if (!xics || !vcpu->arch.icp)
														
 
															+		return H_HARDWARE;
														
 
															+
														
 
															+	/* Check for real mode returning too hard */
														
 
															+	if (xics->real_mode)
														
 
															+		return kvmppc_xics_rm_complete(vcpu, req);
														
 
															+
														
 
															+	switch (req) {
														
 
															+	case H_XIRR:
														
 
															+		res = kvmppc_h_xirr(vcpu);
														
 
															+		kvmppc_set_gpr(vcpu, 4, res);
														
 
															+		break;
														
 
															+	case H_CPPR:
														
 
															+		kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
														
 
															+		break;
														
 
															+	case H_EOI:
														
 
															+		rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
														
 
															+		break;
														
 
															+	case H_IPI:
														
 
															+		rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
														
 
															+				  kvmppc_get_gpr(vcpu, 5));
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return rc;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/* -- Initialisation code etc. -- */
														
 
															+
														
 
															+static int xics_debug_show(struct seq_file *m, void *private)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics = m->private;
														
 
															+	struct kvm *kvm = xics->kvm;
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	int icsid, i;
														
 
															+
														
 
															+	if (!kvm)
														
 
															+		return 0;
														
 
															+
														
 
															+	seq_printf(m, "=========\nICP state\n=========\n");
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+		union kvmppc_icp_state state;
														
 
															+
														
 
															+		if (!icp)
														
 
															+			continue;
														
 
															+
														
 
															+		state.raw = ACCESS_ONCE(icp->state.raw);
														
 
															+		seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
														
 
															+			   icp->server_num, state.xisr,
														
 
															+			   state.pending_pri, state.cppr, state.mfrr,
														
 
															+			   state.out_ee, state.need_resend);
														
 
															+	}
														
 
															+
														
 
															+	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
														
 
															+		struct kvmppc_ics *ics = xics->ics[icsid];
														
 
															+
														
 
															+		if (!ics)
														
 
															+			continue;
														
 
															+
														
 
															+		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
														
 
															+			   icsid);
														
 
															+
														
 
															+		mutex_lock(&ics->lock);
														
 
															+
														
 
															+		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+			struct ics_irq_state *irq = &ics->irq_state[i];
														
 
															+
														
 
															+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
														
 
															+				   irq->number, irq->server, irq->priority,
														
 
															+				   irq->saved_priority, irq->asserted,
														
 
															+				   irq->resend, irq->masked_pending);
														
 
															+
														
 
															+		}
														
 
															+		mutex_unlock(&ics->lock);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int xics_debug_open(struct inode *inode, struct file *file)
														
 
															+{
														
 
															+	return single_open(file, xics_debug_show, inode->i_private);
														
 
															+}
														
 
															+
														
 
															+static const struct file_operations xics_debug_fops = {
														
 
															+	.open = xics_debug_open,
														
 
															+	.read = seq_read,
														
 
															+	.llseek = seq_lseek,
														
 
															+	.release = single_release,
														
 
															+};
														
 
															+
														
 
															+static void xics_debugfs_init(struct kvmppc_xics *xics)
														
 
															+{
														
 
															+	char *name;
														
 
															+
														
 
															+	name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
														
 
															+	if (!name) {
														
 
															+		pr_err("%s: no memory for name\n", __func__);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
														
 
															+					   xics, &xics_debug_fops);
														
 
															+
														
 
															+	pr_debug("%s: created %s\n", __func__, name);
														
 
															+	kfree(name);
														
 
															+}
														
 
															+
														
 
															+struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
														
 
															+					  struct kvmppc_xics *xics, int irq)
														
 
															+{
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	int i, icsid;
														
 
															+
														
 
															+	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
														
 
															+
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+
														
 
															+	/* ICS already exists - somebody else got here first */
														
 
															+	if (xics->ics[icsid])
														
 
															+		goto out;
														
 
															+
														
 
															+	/* Create the ICS */
														
 
															+	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
														
 
															+	if (!ics)
														
 
															+		goto out;
														
 
															+
														
 
															+	mutex_init(&ics->lock);
														
 
															+	ics->icsid = icsid;
														
 
															+
														
 
															+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
														
 
															+		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
														
 
															+		ics->irq_state[i].priority = MASKED;
														
 
															+		ics->irq_state[i].saved_priority = MASKED;
														
 
															+	}
														
 
															+	smp_wmb();
														
 
															+	xics->ics[icsid] = ics;
														
 
															+
														
 
															+	if (icsid > xics->max_icsid)
														
 
															+		xics->max_icsid = icsid;
														
 
															+
														
 
															+ out:
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+	return xics->ics[icsid];
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp;
														
 
															+
														
 
															+	if (!vcpu->kvm->arch.xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	if (kvmppc_xics_find_server(vcpu->kvm, server_num))
														
 
															+		return -EEXIST;
														
 
															+
														
 
															+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
														
 
															+	if (!icp)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	icp->vcpu = vcpu;
														
 
															+	icp->server_num = server_num;
														
 
															+	icp->state.mfrr = MASKED;
														
 
															+	icp->state.pending_pri = MASKED;
														
 
															+	vcpu->arch.icp = icp;
														
 
															+
														
 
															+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	union kvmppc_icp_state state;
														
 
															+
														
 
															+	if (!icp)
														
 
															+		return 0;
														
 
															+	state = icp->state;
														
 
															+	return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
														
 
															+		((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
														
 
															+		((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
														
 
															+		((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
														
 
															+}
														
 
															+
														
 
															+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
														
 
															+{
														
 
															+	struct kvmppc_icp *icp = vcpu->arch.icp;
														
 
															+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
														
 
															+	union kvmppc_icp_state old_state, new_state;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+	u8 cppr, mfrr, pending_pri;
														
 
															+	u32 xisr;
														
 
															+	u16 src;
														
 
															+	bool resend;
														
 
															+
														
 
															+	if (!icp || !xics)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
														
 
															+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
														
 
															+		KVM_REG_PPC_ICP_XISR_MASK;
														
 
															+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
														
 
															+	pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
														
 
															+
														
 
															+	/* Require the new state to be internally consistent */
														
 
															+	if (xisr == 0) {
														
 
															+		if (pending_pri != 0xff)
														
 
															+			return -EINVAL;
														
 
															+	} else if (xisr == XICS_IPI) {
														
 
															+		if (pending_pri != mfrr || pending_pri >= cppr)
														
 
															+			return -EINVAL;
														
 
															+	} else {
														
 
															+		if (pending_pri >= mfrr || pending_pri >= cppr)
														
 
															+			return -EINVAL;
														
 
															+		ics = kvmppc_xics_find_ics(xics, xisr, &src);
														
 
															+		if (!ics)
														
 
															+			return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	new_state.raw = 0;
														
 
															+	new_state.cppr = cppr;
														
 
															+	new_state.xisr = xisr;
														
 
															+	new_state.mfrr = mfrr;
														
 
															+	new_state.pending_pri = pending_pri;
														
 
															+
														
 
															+	/*
														
 
															+	 * Deassert the CPU interrupt request.
														
 
															+	 * icp_try_update will reassert it if necessary.
														
 
															+	 */
														
 
															+	kvmppc_book3s_dequeue_irqprio(icp->vcpu,
														
 
															+				      BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
														
 
															+
														
 
															+	/*
														
 
															+	 * Note that if we displace an interrupt from old_state.xisr,
														
 
															+	 * we don't mark it as rejected.  We expect userspace to set
														
 
															+	 * the state of the interrupt sources to be consistent with
														
 
															+	 * the ICP states (either before or afterwards, which doesn't
														
 
															+	 * matter).  We do handle resends due to CPPR becoming less
														
 
															+	 * favoured because that is necessary to end up with a
														
 
															+	 * consistent state in the situation where userspace restores
														
 
															+	 * the ICS states before the ICP states.
														
 
															+	 */
														
 
															+	do {
														
 
															+		old_state = ACCESS_ONCE(icp->state);
														
 
															+
														
 
															+		if (new_state.mfrr <= old_state.mfrr) {
														
 
															+			resend = false;
														
 
															+			new_state.need_resend = old_state.need_resend;
														
 
															+		} else {
														
 
															+			resend = old_state.need_resend;
														
 
															+			new_state.need_resend = 0;
														
 
															+		}
														
 
															+	} while (!icp_try_update(icp, old_state, new_state, false));
														
 
															+
														
 
															+	if (resend)
														
 
															+		icp_check_resend(xics, icp);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* -- ioctls -- */
														
 
															+
														
 
															+int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics;
														
 
															+	int r;
														
 
															+
														
 
															+	/* locking against multiple callers? */
														
 
															+
														
 
															+	xics = kvm->arch.xics;
														
 
															+	if (!xics)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	switch (args->level) {
														
 
															+	case KVM_INTERRUPT_SET:
														
 
															+	case KVM_INTERRUPT_SET_LEVEL:
														
 
															+	case KVM_INTERRUPT_UNSET:
														
 
															+		r = ics_deliver_irq(xics, args->irq, args->level);
														
 
															+		break;
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_xics_free(struct kvmppc_xics *xics)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct kvm *kvm = xics->kvm;
														
 
															+
														
 
															+	debugfs_remove(xics->dentry);
														
 
															+
														
 
															+	if (kvm)
														
 
															+		kvm->arch.xics = NULL;
														
 
															+
														
 
															+	for (i = 0; i <= xics->max_icsid; i++)
														
 
															+		kfree(xics->ics[i]);
														
 
															+	kfree(xics);
														
 
															+}
														
 
															+
														
 
															+int kvm_xics_create(struct kvm *kvm, u32 type)
														
 
															+{
														
 
															+	struct kvmppc_xics *xics;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
														
 
															+	if (!xics)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	xics->kvm = kvm;
														
 
															+
														
 
															+	/* Already there ? */
														
 
															+	mutex_lock(&kvm->lock);
														
 
															+	if (kvm->arch.xics)
														
 
															+		ret = -EEXIST;
														
 
															+	else
														
 
															+		kvm->arch.xics = xics;
														
 
															+	mutex_unlock(&kvm->lock);
														
 
															+
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	xics_debugfs_init(xics);
														
 
															+
														
 
															+#ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
														
 
															+		/* Enable real mode support */
														
 
															+		xics->real_mode = ENABLE_REALMODE;
														
 
															+		xics->real_mode_dbg = DEBUG_REALMODE;
														
 
															+	}
														
 
															+#endif /* CONFIG_KVM_BOOK3S_64_HV */
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	if (!vcpu->arch.icp)
														
 
															+		return;
														
 
															+	kfree(vcpu->arch.icp);
														
 
															+	vcpu->arch.icp = NULL;
														
 
															+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
														
 
															+}
														
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,129 @@
 
															+/*
														
 
															+ * Copyright 2012 Michael Ellerman, IBM Corporation.
														
 
															+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License, version 2, as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef _KVM_PPC_BOOK3S_XICS_H
														
 
															+#define _KVM_PPC_BOOK3S_XICS_H
														
 
															+
														
 
															+/*
														
 
															+ * We use a two-level tree to store interrupt source information.
														
 
															+ * There are up to 1024 ICS nodes, each of which can represent
														
 
															+ * 1024 sources.
														
 
															+ */
														
 
															+#define KVMPPC_XICS_MAX_ICS_ID	1023
														
 
															+#define KVMPPC_XICS_ICS_SHIFT	10
														
 
															+#define KVMPPC_XICS_IRQ_PER_ICS	(1 << KVMPPC_XICS_ICS_SHIFT)
														
 
															+#define KVMPPC_XICS_SRC_MASK	(KVMPPC_XICS_IRQ_PER_ICS - 1)
														
 
															+
														
 
															+/*
														
 
															+ * Interrupt source numbers below this are reserved, for example
														
 
															+ * 0 is "no interrupt", and 2 is used for IPIs.
														
 
															+ */
														
 
															+#define KVMPPC_XICS_FIRST_IRQ	16
														
 
															+#define KVMPPC_XICS_NR_IRQS	((KVMPPC_XICS_MAX_ICS_ID + 1) * \
														
 
															+				 KVMPPC_XICS_IRQ_PER_ICS)
														
 
															+
														
 
															+/* Priority value to use for disabling an interrupt */
														
 
															+#define MASKED	0xff
														
 
															+
														
 
															+/* State for one irq source */
														
 
															+struct ics_irq_state {
														
 
															+	u32 number;
														
 
															+	u32 server;
														
 
															+	u8  priority;
														
 
															+	u8  saved_priority;
														
 
															+	u8  resend;
														
 
															+	u8  masked_pending;
														
 
															+	u8  asserted; /* Only for LSI */
														
 
															+	u8  exists;
														
 
															+};
														
 
															+
														
 
															+/* Atomic ICP state, updated with a single compare & swap */
														
 
															+union kvmppc_icp_state {
														
 
															+	unsigned long raw;
														
 
															+	struct {
														
 
															+		u8 out_ee:1;
														
 
															+		u8 need_resend:1;
														
 
															+		u8 cppr;
														
 
															+		u8 mfrr;
														
 
															+		u8 pending_pri;
														
 
															+		u32 xisr;
														
 
															+	};
														
 
															+};
														
 
															+
														
 
															+/* One bit per ICS */
														
 
															+#define ICP_RESEND_MAP_SIZE	(KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
														
 
															+
														
 
															+struct kvmppc_icp {
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+	unsigned long server_num;
														
 
															+	union kvmppc_icp_state state;
														
 
															+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
														
 
															+
														
 
															+	/* Real mode might find something too hard, here's the action
														
 
															+	 * it might request from virtual mode
														
 
															+	 */
														
 
															+#define XICS_RM_KICK_VCPU	0x1
														
 
															+#define XICS_RM_CHECK_RESEND	0x2
														
 
															+#define XICS_RM_REJECT		0x4
														
 
															+	u32 rm_action;
														
 
															+	struct kvm_vcpu *rm_kick_target;
														
 
															+	u32  rm_reject;
														
 
															+
														
 
															+	/* Debug stuff for real mode */
														
 
															+	union kvmppc_icp_state rm_dbgstate;
														
 
															+	struct kvm_vcpu *rm_dbgtgt;
														
 
															+};
														
 
															+
														
 
															+struct kvmppc_ics {
														
 
															+	struct mutex lock;
														
 
															+	u16 icsid;
														
 
															+	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
														
 
															+};
														
 
															+
														
 
															+struct kvmppc_xics {
														
 
															+	struct kvm *kvm;
														
 
															+	struct dentry *dentry;
														
 
															+	u32 max_icsid;
														
 
															+	bool real_mode;
														
 
															+	bool real_mode_dbg;
														
 
															+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
														
 
															+};
														
 
															+
														
 
															+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
														
 
															+							 u32 nr)
														
 
															+{
														
 
															+	struct kvm_vcpu *vcpu = NULL;
														
 
															+	int i;
														
 
															+
														
 
															+	kvm_for_each_vcpu(i, vcpu, kvm) {
														
 
															+		if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
														
 
															+			return vcpu->arch.icp;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
														
 
															+						      u32 irq, u16 *source)
														
 
															+{
														
 
															+	u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
														
 
															+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
														
 
															+	struct kvmppc_ics *ics;
														
 
															+
														
 
															+	if (source)
														
 
															+		*source = src;
														
 
															+	if (icsid > KVMPPC_XICS_MAX_ICS_ID)
														
 
															+		return NULL;
														
 
															+	ics = xics->ics[icsid];
														
 
															+	if (!ics)
														
 
															+		return NULL;
														
 
															+	return ics;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+#endif /* _KVM_PPC_BOOK3S_XICS_H */
														
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
															 		keep_irq = true;
														
 
															 	}
														
 
															-	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
														
 
															+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
														
 
															 		update_epr = true;
														
 
															 	switch (priority) {
														
@@ -427,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
															 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
														
 
															 		if (update_dear == true)
														
 
															 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
														
 
															-		if (update_epr == true)
														
 
															-			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
														
 
															+		if (update_epr == true) {
														
 
															+			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
														
 
															+				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
														
 
															+			else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
														
 
															+				BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
														
 
															+				kvmppc_mpic_set_epr(vcpu);
														
 
															+			}
														
 
															+		}
														
 
															 		new_msr &= msr_mask;
														
 
															 #if defined(CONFIG_64BIT)
														
@@ -745,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
															 		kvmppc_core_queue_program(vcpu, ESR_PIL);
														
 
															 		return RESUME_HOST;
														
 
															+	case EMULATE_EXIT_USER:
														
 
															+		return RESUME_HOST;
														
 
															+
														
 
															 	default:
														
 
															 		BUG();
														
 
															 	}
														
@@ -1412,120 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
															 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
														
 
															 {
														
 
															-	int r = -EINVAL;
														
 
															+	int r = 0;
														
 
															+	union kvmppc_one_reg val;
														
 
															+	int size;
														
 
															+	long int i;
														
 
															+
														
 
															+	size = one_reg_size(reg->id);
														
 
															+	if (size > sizeof(val))
														
 
															+		return -EINVAL;
														
 
															 	switch (reg->id) {
														
 
															 	case KVM_REG_PPC_IAC1:
														
 
															 	case KVM_REG_PPC_IAC2:
														
 
															 	case KVM_REG_PPC_IAC3:
														
 
															-	case KVM_REG_PPC_IAC4: {
														
 
															-		int iac = reg->id - KVM_REG_PPC_IAC1;
														
 
															-		r = copy_to_user((u64 __user *)(long)reg->addr,
														
 
															-				 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
														
 
															+	case KVM_REG_PPC_IAC4:
														
 
															+		i = reg->id - KVM_REG_PPC_IAC1;
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_DAC1:
														
 
															-	case KVM_REG_PPC_DAC2: {
														
 
															-		int dac = reg->id - KVM_REG_PPC_DAC1;
														
 
															-		r = copy_to_user((u64 __user *)(long)reg->addr,
														
 
															-				 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
														
 
															+	case KVM_REG_PPC_DAC2:
														
 
															+		i = reg->id - KVM_REG_PPC_DAC1;
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_EPR: {
														
 
															 		u32 epr = get_guest_epr(vcpu);
														
 
															-		r = put_user(epr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, epr);
														
 
															 		break;
														
 
															 	}
														
 
															 #if defined(CONFIG_64BIT)
														
 
															 	case KVM_REG_PPC_EPCR:
														
 
															-		r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.epcr);
														
 
															 		break;
														
 
															 #endif
														
 
															 	case KVM_REG_PPC_TCR:
														
 
															-		r = put_user(vcpu->arch.tcr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.tcr);
														
 
															 		break;
														
 
															 	case KVM_REG_PPC_TSR:
														
 
															-		r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr);
														
 
															+		val = get_reg_val(reg->id, vcpu->arch.tsr);
														
 
															 		break;
														
 
															-	case KVM_REG_PPC_DEBUG_INST: {
														
 
															-		u32 opcode = KVMPPC_INST_EHPRIV;
														
 
															-		r = copy_to_user((u32 __user *)(long)reg->addr,
														
 
															-				 &opcode, sizeof(u32));
														
 
															+	case KVM_REG_PPC_DEBUG_INST:
														
 
															+		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
														
 
															 		break;
														
 
															-	}
														
 
															 	default:
														
 
															+		r = kvmppc_get_one_reg(vcpu, reg->id, &val);
														
 
															 		break;
														
 
															 	}
														
 
															+
														
 
															+	if (r)
														
 
															+		return r;
														
 
															+
														
 
															+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
														
 
															+		r = -EFAULT;
														
 
															+
														
 
															 	return r;
														
 
															 }
														
 
															 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
														
 
															 {
														
 
															-	int r = -EINVAL;
														
 
															+	int r = 0;
														
 
															+	union kvmppc_one_reg val;
														
 
															+	int size;
														
 
															+	long int i;
														
 
															+
														
 
															+	size = one_reg_size(reg->id);
														
 
															+	if (size > sizeof(val))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
														
 
															+		return -EFAULT;
														
 
															 	switch (reg->id) {
														
 
															 	case KVM_REG_PPC_IAC1:
														
 
															 	case KVM_REG_PPC_IAC2:
														
 
															 	case KVM_REG_PPC_IAC3:
														
 
															-	case KVM_REG_PPC_IAC4: {
														
 
															-		int iac = reg->id - KVM_REG_PPC_IAC1;
														
 
															-		r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
														
 
															-			     (u64 __user *)(long)reg->addr, sizeof(u64));
														
 
															+	case KVM_REG_PPC_IAC4:
														
 
															+		i = reg->id - KVM_REG_PPC_IAC1;
														
 
															+		vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_DAC1:
														
 
															-	case KVM_REG_PPC_DAC2: {
														
 
															-		int dac = reg->id - KVM_REG_PPC_DAC1;
														
 
															-		r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
														
 
															-			     (u64 __user *)(long)reg->addr, sizeof(u64));
														
 
															+	case KVM_REG_PPC_DAC2:
														
 
															+		i = reg->id - KVM_REG_PPC_DAC1;
														
 
															+		vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
														
 
															 		break;
														
 
															-	}
														
 
															 	case KVM_REG_PPC_EPR: {
														
 
															-		u32 new_epr;
														
 
															-		r = get_user(new_epr, (u32 __user *)(long)reg->addr);
														
 
															-		if (!r)
														
 
															-			kvmppc_set_epr(vcpu, new_epr);
														
 
															+		u32 new_epr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_epr(vcpu, new_epr);
														
 
															 		break;
														
 
															 	}
														
 
															 #if defined(CONFIG_64BIT)
														
 
															 	case KVM_REG_PPC_EPCR: {
														
 
															-		u32 new_epcr;
														
 
															-		r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
														
 
															-		if (r == 0)
														
 
															-			kvmppc_set_epcr(vcpu, new_epcr);
														
 
															+		u32 new_epcr = set_reg_val(reg->id, val);
														
 
															+		kvmppc_set_epcr(vcpu, new_epcr);
														
 
															 		break;
														
 
															 	}
														
 
															 #endif
														
 
															 	case KVM_REG_PPC_OR_TSR: {
														
 
															-		u32 tsr_bits;
														
 
															-		r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
														
 
															+		u32 tsr_bits = set_reg_val(reg->id, val);
														
 
															 		kvmppc_set_tsr_bits(vcpu, tsr_bits);
														
 
															 		break;
														
 
															 	}
														
 
															 	case KVM_REG_PPC_CLEAR_TSR: {
														
 
															-		u32 tsr_bits;
														
 
															-		r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
														
 
															+		u32 tsr_bits = set_reg_val(reg->id, val);
														
 
															 		kvmppc_clr_tsr_bits(vcpu, tsr_bits);
														
 
															 		break;
														
 
															 	}
														
 
															 	case KVM_REG_PPC_TSR: {
														
 
															-		u32 tsr;
														
 
															-		r = get_user(tsr, (u32 __user *)(long)reg->addr);
														
 
															+		u32 tsr = set_reg_val(reg->id, val);
														
 
															 		kvmppc_set_tsr(vcpu, tsr);
														
 
															 		break;
														
 
															 	}
														
 
															 	case KVM_REG_PPC_TCR: {
														
 
															-		u32 tcr;
														
 
															-		r = get_user(tcr, (u32 __user *)(long)reg->addr);
														
 
															+		u32 tcr = set_reg_val(reg->id, val);
														
 
															 		kvmppc_set_tcr(vcpu, tcr);
														
 
															 		break;
														
 
															 	}
														
 
															 	default:
														
 
															+		r = kvmppc_set_one_reg(vcpu, reg->id, &val);
														
 
															 		break;
														
 
															 	}
														
 
															+
														
 
															 	return r;
														
 
															 }
														
 
															+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															+					 struct kvm_guest_debug *dbg)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
														
 
															 {
														
 
															 	return -ENOTSUPP;
														
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_e500 *vcpu_e500;
														
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -23,6 +23,10 @@
 
															 #include <asm/mmu-book3e.h>
														
 
															 #include <asm/tlb.h>
														
 
															+enum vcpu_ftr {
														
 
															+	VCPU_FTR_MMU_V2
														
 
															+};
														
 
															+
														
 
															 #define E500_PID_NUM   3
														
 
															 #define E500_TLB_NUM   2
														
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
 
															 void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
														
 
															 int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
														
 
															+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+				union kvmppc_one_reg *val);
														
 
															+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			       union kvmppc_one_reg *val);
														
 
															 #ifdef CONFIG_KVM_E500V2
														
 
															 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
														
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
 
															 #define get_tlb_sts(gtlbe)              (MAS1_TS)
														
 
															 #endif /* !BOOKE_HV */
														
 
															+static inline bool has_feature(const struct kvm_vcpu *vcpu,
														
 
															+			       enum vcpu_ftr ftr)
														
 
															+{
														
 
															+	bool has_ftr;
														
 
															+	switch (ftr) {
														
 
															+	case VCPU_FTR_MMU_V2:
														
 
															+		has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
														
 
															+		break;
														
 
															+	default:
														
 
															+		return false;
														
 
															+	}
														
 
															+	return has_ftr;
														
 
															+}
														
 
															+
														
 
															 #endif /* KVM_E500_H */
														
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 
															 	case SPRN_TLB1CFG:
														
 
															 		*spr_val = vcpu->arch.tlbcfg[1];
														
 
															 		break;
														
 
															+	case SPRN_TLB0PS:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		*spr_val = vcpu->arch.tlbps[0];
														
 
															+		break;
														
 
															+	case SPRN_TLB1PS:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		*spr_val = vcpu->arch.tlbps[1];
														
 
															+		break;
														
 
															 	case SPRN_L1CSR0:
														
 
															 		*spr_val = vcpu_e500->l1csr0;
														
 
															 		break;
														
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 
															 	case SPRN_MMUCFG:
														
 
															 		*spr_val = vcpu->arch.mmucfg;
														
 
															 		break;
														
 
															+	case SPRN_EPTCFG:
														
 
															+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
														
 
															+			return EMULATE_FAIL;
														
 
															+		/*
														
 
															+		 * Legacy Linux guests access EPTCFG register even if the E.PT
														
 
															+		 * category is disabled in the VM. Give them a chance to live.
														
 
															+		 */
														
 
															+		*spr_val = vcpu->arch.eptcfg;
														
 
															+		break;
														
 
															 	/* extra exceptions */
														
 
															 	case SPRN_IVOR32:
														
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return 0;
														
 
															 }
														
 
															+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+				union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = 0;
														
 
															+	long int i;
														
 
															+
														
 
															+	switch (id) {
														
 
															+	case KVM_REG_PPC_MAS0:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas0);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS1:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas1);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS2:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas2);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS7_3:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS4:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas4);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS6:
														
 
															+		*val = get_reg_val(id, vcpu->arch.shared->mas6);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MMUCFG:
														
 
															+		*val = get_reg_val(id, vcpu->arch.mmucfg);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_EPTCFG:
														
 
															+		*val = get_reg_val(id, vcpu->arch.eptcfg);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_TLB0CFG:
														
 
															+	case KVM_REG_PPC_TLB1CFG:
														
 
															+	case KVM_REG_PPC_TLB2CFG:
														
 
															+	case KVM_REG_PPC_TLB3CFG:
														
 
															+		i = id - KVM_REG_PPC_TLB0CFG;
														
 
															+		*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_TLB0PS:
														
 
															+	case KVM_REG_PPC_TLB1PS:
														
 
															+	case KVM_REG_PPC_TLB2PS:
														
 
															+	case KVM_REG_PPC_TLB3PS:
														
 
															+		i = id - KVM_REG_PPC_TLB0PS;
														
 
															+		*val = get_reg_val(id, vcpu->arch.tlbps[i]);
														
 
															+		break;
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = 0;
														
 
															+	long int i;
														
 
															+
														
 
															+	switch (id) {
														
 
															+	case KVM_REG_PPC_MAS0:
														
 
															+		vcpu->arch.shared->mas0 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS1:
														
 
															+		vcpu->arch.shared->mas1 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS2:
														
 
															+		vcpu->arch.shared->mas2 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS7_3:
														
 
															+		vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS4:
														
 
															+		vcpu->arch.shared->mas4 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	case KVM_REG_PPC_MAS6:
														
 
															+		vcpu->arch.shared->mas6 = set_reg_val(id, *val);
														
 
															+		break;
														
 
															+	/* Only allow MMU registers to be set to the config supported by KVM */
														
 
															+	case KVM_REG_PPC_MMUCFG: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		if (reg != vcpu->arch.mmucfg)
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_EPTCFG: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		if (reg != vcpu->arch.eptcfg)
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TLB0CFG:
														
 
															+	case KVM_REG_PPC_TLB1CFG:
														
 
															+	case KVM_REG_PPC_TLB2CFG:
														
 
															+	case KVM_REG_PPC_TLB3CFG: {
														
 
															+		/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		i = id - KVM_REG_PPC_TLB0CFG;
														
 
															+		if (reg != vcpu->arch.tlbcfg[i])
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	case KVM_REG_PPC_TLB0PS:
														
 
															+	case KVM_REG_PPC_TLB1PS:
														
 
															+	case KVM_REG_PPC_TLB2PS:
														
 
															+	case KVM_REG_PPC_TLB3PS: {
														
 
															+		u32 reg = set_reg_val(id, *val);
														
 
															+		i = id - KVM_REG_PPC_TLB0PS;
														
 
															+		if (reg != vcpu->arch.tlbps[i])
														
 
															+			r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+	default:
														
 
															+		r = -EINVAL;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
														
 
															+		struct kvm_book3e_206_tlb_params *params)
														
 
															+{
														
 
															+	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	if (params->tlb_sizes[0] <= 2048)
														
 
															+		vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
														
 
															+	vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
														
 
															+	vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
														
 
															 			      struct kvm_config_tlb *cfg)
														
 
															 {
														
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
															 	vcpu_e500->gtlb_offset[0] = 0;
														
 
															 	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
														
 
															-	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	if (params.tlb_sizes[0] <= 2048)
														
 
															-		vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
														
 
															-	vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
														
 
															-	vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
														
 
															+	/* Update vcpu's MMU geometry based on SW_TLB input */
														
 
															+	vcpu_mmu_geometry_update(vcpu, &params);
														
 
															 	vcpu_e500->shared_tlb_pages = pages;
														
 
															 	vcpu_e500->num_shared_tlb_pages = num_pages;
														
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
 
															 	return 0;
														
 
															 }
														
 
															+/* Vcpu's MMU default configuration */
														
 
															+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
														
 
															+		       struct kvmppc_e500_tlb_params *params)
														
 
															+{
														
 
															+	/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
														
 
															+	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
														
 
															+
														
 
															+	/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
														
 
															+	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
														
 
															+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[0] |= params[0].entries;
														
 
															+	vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
														
 
															+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															+	vcpu->arch.tlbcfg[1] |= params[1].entries;
														
 
															+	vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+
														
 
															+	if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
														
 
															+		vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
														
 
															+		vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
														
 
															+
														
 
															+		vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
														
 
															+
														
 
															+		/* Guest mmu emulation currently doesn't handle E.PT */
														
 
															+		vcpu->arch.eptcfg = 0;
														
 
															+		vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
														
 
															+		vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
														
 
															 {
														
 
															 	struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
														
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
															 	if (!vcpu_e500->g2h_tlb1_map)
														
 
															 		goto err;
														
 
															-	/* Init TLB configuration register */
														
 
															-	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
														
 
															-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
														
 
															-	vcpu->arch.tlbcfg[0] |=
														
 
															-		vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															-
														
 
															-	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
														
 
															-			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
														
 
															-	vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
														
 
															-	vcpu->arch.tlbcfg[1] |=
														
 
															-		vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
														
 
															+	vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
														
 
															 	kvmppc_recalc_tlb1map_range(vcpu_e500);
														
 
															 	return 0;
														
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -172,6 +172,8 @@ int kvmppc_core_check_processor_compat(void)
 
															 		r = 0;
														
 
															 	else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
														
 
															 		r = 0;
														
 
															+	else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
														
 
															+		r = 0;
														
 
															 	else
														
 
															 		r = -ENOTSUPP;
														
@@ -255,6 +257,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
															 	return kvmppc_set_sregs_ivor(vcpu, sregs);
														
 
															 }
														
 
															+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+			union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
														
 
															+		       union kvmppc_one_reg *val)
														
 
															+{
														
 
															+	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
														
 
															 {
														
 
															 	struct kvmppc_vcpu_e500 *vcpu_e500;
														
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -0,0 +1,17 @@
 
															+#ifndef __IRQ_H
														
 
															+#define __IRQ_H
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+
														
 
															+static inline int irqchip_in_kernel(struct kvm *kvm)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	ret = ret || (kvm->arch.mpic != NULL);
														
 
															+#endif
														
 
															+	smp_rmb();
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1843 @@
 
															+/*
														
 
															+ * OpenPIC emulation
														
 
															+ *
														
 
															+ * Copyright (c) 2004 Jocelyn Mayer
														
 
															+ *               2011 Alexander Graf
														
 
															+ *
														
 
															+ * Permission is hereby granted, free of charge, to any person obtaining a copy
														
 
															+ * of this software and associated documentation files (the "Software"), to deal
														
 
															+ * in the Software without restriction, including without limitation the rights
														
 
															+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
														
 
															+ * copies of the Software, and to permit persons to whom the Software is
														
 
															+ * furnished to do so, subject to the following conditions:
														
 
															+ *
														
 
															+ * The above copyright notice and this permission notice shall be included in
														
 
															+ * all copies or substantial portions of the Software.
														
 
															+ *
														
 
															+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
														
 
															+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
														
 
															+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
														
 
															+ * THE SOFTWARE.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/errno.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/anon_inodes.h>
														
 
															+#include <asm/uaccess.h>
														
 
															+#include <asm/mpic.h>
														
 
															+#include <asm/kvm_para.h>
														
 
															+#include <asm/kvm_host.h>
														
 
															+#include <asm/kvm_ppc.h>
														
 
															+#include "iodev.h"
														
 
															+
														
 
															+#define MAX_CPU     32
														
 
															+#define MAX_SRC     256
														
 
															+#define MAX_TMR     4
														
 
															+#define MAX_IPI     4
														
 
															+#define MAX_MSI     8
														
 
															+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
														
 
															+#define VID         0x03	/* MPIC version ID */
														
 
															+
														
 
															+/* OpenPIC capability flags */
														
 
															+#define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
														
 
															+#define OPENPIC_FLAG_ILR          (2 << 0)
														
 
															+
														
 
															+/* OpenPIC address map */
														
 
															+#define OPENPIC_REG_SIZE             0x40000
														
 
															+#define OPENPIC_GLB_REG_START        0x0
														
 
															+#define OPENPIC_GLB_REG_SIZE         0x10F0
														
 
															+#define OPENPIC_TMR_REG_START        0x10F0
														
 
															+#define OPENPIC_TMR_REG_SIZE         0x220
														
 
															+#define OPENPIC_MSI_REG_START        0x1600
														
 
															+#define OPENPIC_MSI_REG_SIZE         0x200
														
 
															+#define OPENPIC_SUMMARY_REG_START    0x3800
														
 
															+#define OPENPIC_SUMMARY_REG_SIZE     0x800
														
 
															+#define OPENPIC_SRC_REG_START        0x10000
														
 
															+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
														
 
															+#define OPENPIC_CPU_REG_START        0x20000
														
 
															+#define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
														
 
															+
														
 
															+struct fsl_mpic_info {
														
 
															+	int max_ext;
														
 
															+};
														
 
															+
														
 
															+static struct fsl_mpic_info fsl_mpic_20 = {
														
 
															+	.max_ext = 12,
														
 
															+};
														
 
															+
														
 
															+static struct fsl_mpic_info fsl_mpic_42 = {
														
 
															+	.max_ext = 12,
														
 
															+};
														
 
															+
														
 
															+#define FRR_NIRQ_SHIFT    16
														
 
															+#define FRR_NCPU_SHIFT     8
														
 
															+#define FRR_VID_SHIFT      0
														
 
															+
														
 
															+#define VID_REVISION_1_2   2
														
 
															+#define VID_REVISION_1_3   3
														
 
															+
														
 
															+#define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
														
 
															+
														
 
															+#define GCR_RESET        0x80000000
														
 
															+#define GCR_MODE_PASS    0x00000000
														
 
															+#define GCR_MODE_MIXED   0x20000000
														
 
															+#define GCR_MODE_PROXY   0x60000000
														
 
															+
														
 
															+#define TBCR_CI           0x80000000	/* count inhibit */
														
 
															+#define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
														
 
															+
														
 
															+#define IDR_EP_SHIFT      31
														
 
															+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
														
 
															+#define IDR_CI0_SHIFT     30
														
 
															+#define IDR_CI1_SHIFT     29
														
 
															+#define IDR_P1_SHIFT      1
														
 
															+#define IDR_P0_SHIFT      0
														
 
															+
														
 
															+#define ILR_INTTGT_MASK   0x000000ff
														
 
															+#define ILR_INTTGT_INT    0x00
														
 
															+#define ILR_INTTGT_CINT   0x01	/* critical */
														
 
															+#define ILR_INTTGT_MCP    0x02	/* machine check */
														
 
															+#define NUM_OUTPUTS       3
														
 
															+
														
 
															+#define MSIIR_OFFSET       0x140
														
 
															+#define MSIIR_SRS_SHIFT    29
														
 
															+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
														
 
															+#define MSIIR_IBS_SHIFT    24
														
 
															+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
														
 
															+
														
 
															+static int get_current_cpu(void)
														
 
															+{
														
 
															+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
														
 
															+	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
														
 
															+	return vcpu ? vcpu->arch.irq_cpu_id : -1;
														
 
															+#else
														
 
															+	/* XXX */
														
 
															+	return -1;
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
														
 
															+				      u32 val, int idx);
														
 
															+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
														
 
															+				     u32 *ptr, int idx);
														
 
															+
														
 
															+enum irq_type {
														
 
															+	IRQ_TYPE_NORMAL = 0,
														
 
															+	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
														
 
															+	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
														
 
															+};
														
 
															+
														
 
															+struct irq_queue {
														
 
															+	/* Round up to the nearest 64 IRQs so that the queue length
														
 
															+	 * won't change when moving between 32 and 64 bit hosts.
														
 
															+	 */
														
 
															+	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
														
 
															+	int next;
														
 
															+	int priority;
														
 
															+};
														
 
															+
														
 
															+struct irq_source {
														
 
															+	uint32_t ivpr;		/* IRQ vector/priority register */
														
 
															+	uint32_t idr;		/* IRQ destination register */
														
 
															+	uint32_t destmask;	/* bitmap of CPU destinations */
														
 
															+	int last_cpu;
														
 
															+	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
														
 
															+	int pending;		/* TRUE if IRQ is pending */
														
 
															+	enum irq_type type;
														
 
															+	bool level:1;		/* level-triggered */
														
 
															+	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
														
 
															+};
														
 
															+
														
 
															+#define IVPR_MASK_SHIFT       31
														
 
															+#define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
														
 
															+#define IVPR_ACTIVITY_SHIFT   30
														
 
															+#define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
														
 
															+#define IVPR_MODE_SHIFT       29
														
 
															+#define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
														
 
															+#define IVPR_POLARITY_SHIFT   23
														
 
															+#define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
														
 
															+#define IVPR_SENSE_SHIFT      22
														
 
															+#define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
														
 
															+
														
 
															+#define IVPR_PRIORITY_MASK     (0xF << 16)
														
 
															+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
														
 
															+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
														
 
															+
														
 
															+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
														
 
															+#define IDR_EP      0x80000000	/* external pin */
														
 
															+#define IDR_CI      0x40000000	/* critical interrupt */
														
 
															+
														
 
															+struct irq_dest {
														
 
															+	struct kvm_vcpu *vcpu;
														
 
															+
														
 
															+	int32_t ctpr;		/* CPU current task priority */
														
 
															+	struct irq_queue raised;
														
 
															+	struct irq_queue servicing;
														
 
															+
														
 
															+	/* Count of IRQ sources asserting on non-INT outputs */
														
 
															+	uint32_t outputs_active[NUM_OUTPUTS];
														
 
															+};
														
 
															+
														
 
															+struct openpic {
														
 
															+	struct kvm *kvm;
														
 
															+	struct kvm_device *dev;
														
 
															+	struct kvm_io_device mmio;
														
 
															+	struct list_head mmio_regions;
														
 
															+	atomic_t users;
														
 
															+
														
 
															+	gpa_t reg_base;
														
 
															+	spinlock_t lock;
														
 
															+
														
 
															+	/* Behavior control */
														
 
															+	struct fsl_mpic_info *fsl;
														
 
															+	uint32_t model;
														
 
															+	uint32_t flags;
														
 
															+	uint32_t nb_irqs;
														
 
															+	uint32_t vid;
														
 
															+	uint32_t vir;		/* Vendor identification register */
														
 
															+	uint32_t vector_mask;
														
 
															+	uint32_t tfrr_reset;
														
 
															+	uint32_t ivpr_reset;
														
 
															+	uint32_t idr_reset;
														
 
															+	uint32_t brr1;
														
 
															+	uint32_t mpic_mode_mask;
														
 
															+
														
 
															+	/* Global registers */
														
 
															+	uint32_t frr;		/* Feature reporting register */
														
 
															+	uint32_t gcr;		/* Global configuration register  */
														
 
															+	uint32_t pir;		/* Processor initialization register */
														
 
															+	uint32_t spve;		/* Spurious vector register */
														
 
															+	uint32_t tfrr;		/* Timer frequency reporting register */
														
 
															+	/* Source registers */
														
 
															+	struct irq_source src[MAX_IRQ];
														
 
															+	/* Local registers per output pin */
														
 
															+	struct irq_dest dst[MAX_CPU];
														
 
															+	uint32_t nb_cpus;
														
 
															+	/* Timer registers */
														
 
															+	struct {
														
 
															+		uint32_t tccr;	/* Global timer current count register */
														
 
															+		uint32_t tbcr;	/* Global timer base count register */
														
 
															+	} timers[MAX_TMR];
														
 
															+	/* Shared MSI registers */
														
 
															+	struct {
														
 
															+		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
														
 
															+	} msi[MAX_MSI];
														
 
															+	uint32_t max_irq;
														
 
															+	uint32_t irq_ipi0;
														
 
															+	uint32_t irq_tim0;
														
 
															+	uint32_t irq_msi;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
														
 
															+			   int output)
														
 
															+{
														
 
															+	struct kvm_interrupt irq = {
														
 
															+		.irq = KVM_INTERRUPT_SET_LEVEL,
														
 
															+	};
														
 
															+
														
 
															+	if (!dst->vcpu) {
														
 
															+		pr_debug("%s: destination cpu %d does not exist\n",
														
 
															+			 __func__, (int)(dst - &opp->dst[0]));
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
														
 
															+		output);
														
 
															+
														
 
															+	if (output != ILR_INTTGT_INT)	/* TODO */
														
 
															+		return;
														
 
															+
														
 
															+	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
														
 
															+}
														
 
															+
														
 
															+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
														
 
															+			   int output)
														
 
															+{
														
 
															+	if (!dst->vcpu) {
														
 
															+		pr_debug("%s: destination cpu %d does not exist\n",
														
 
															+			 __func__, (int)(dst - &opp->dst[0]));
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
														
 
															+		output);
														
 
															+
														
 
															+	if (output != ILR_INTTGT_INT)	/* TODO */
														
 
															+		return;
														
 
															+
														
 
															+	kvmppc_core_dequeue_external(dst->vcpu);
														
 
															+}
														
 
															+
														
 
															+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	set_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	clear_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
														
 
															+{
														
 
															+	return test_bit(n_IRQ, q->queue);
														
 
															+}
														
 
															+
														
 
															+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
														
 
															+{
														
 
															+	int irq = -1;
														
 
															+	int next = -1;
														
 
															+	int priority = -1;
														
 
															+
														
 
															+	for (;;) {
														
 
															+		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
														
 
															+		if (irq == opp->max_irq)
														
 
															+			break;
														
 
															+
														
 
															+		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
														
 
															+			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
														
 
															+
														
 
															+		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
														
 
															+			next = irq;
														
 
															+			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	q->next = next;
														
 
															+	q->priority = priority;
														
 
															+}
														
 
															+
														
 
															+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
														
 
															+{
														
 
															+	/* XXX: optimize */
														
 
															+	IRQ_check(opp, q);
														
 
															+
														
 
															+	return q->next;
														
 
															+}
														
 
															+
														
 
															+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
														
 
															+			   bool active, bool was_active)
														
 
															+{
														
 
															+	struct irq_dest *dst;
														
 
															+	struct irq_source *src;
														
 
															+	int priority;
														
 
															+
														
 
															+	dst = &opp->dst[n_CPU];
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+
														
 
															+	pr_debug("%s: IRQ %d active %d was %d\n",
														
 
															+		__func__, n_IRQ, active, was_active);
														
 
															+
														
 
															+	if (src->output != ILR_INTTGT_INT) {
														
 
															+		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
														
 
															+			__func__, src->output, n_IRQ, active, was_active,
														
 
															+			dst->outputs_active[src->output]);
														
 
															+
														
 
															+		/* On Freescale MPIC, critical interrupts ignore priority,
														
 
															+		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
														
 
															+		 * masking.
														
 
															+		 */
														
 
															+		if (active) {
														
 
															+			if (!was_active &&
														
 
															+			    dst->outputs_active[src->output]++ == 0) {
														
 
															+				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
														
 
															+					__func__, src->output, n_CPU, n_IRQ);
														
 
															+				mpic_irq_raise(opp, dst, src->output);
														
 
															+			}
														
 
															+		} else {
														
 
															+			if (was_active &&
														
 
															+			    --dst->outputs_active[src->output] == 0) {
														
 
															+				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
														
 
															+					__func__, src->output, n_CPU, n_IRQ);
														
 
															+				mpic_irq_lower(opp, dst, src->output);
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	priority = IVPR_PRIORITY(src->ivpr);
														
 
															+
														
 
															+	/* Even if the interrupt doesn't have enough priority,
														
 
															+	 * it is still raised, in case ctpr is lowered later.
														
 
															+	 */
														
 
															+	if (active)
														
 
															+		IRQ_setbit(&dst->raised, n_IRQ);
														
 
															+	else
														
 
															+		IRQ_resetbit(&dst->raised, n_IRQ);
														
 
															+
														
 
															+	IRQ_check(opp, &dst->raised);
														
 
															+
														
 
															+	if (active && priority <= dst->ctpr) {
														
 
															+		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
														
 
															+			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
														
 
															+		active = 0;
														
 
															+	}
														
 
															+
														
 
															+	if (active) {
														
 
															+		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
														
 
															+		    priority <= dst->servicing.priority) {
														
 
															+			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->servicing.next, n_CPU);
														
 
															+		} else {
														
 
															+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
														
 
															+				__func__, n_CPU, n_IRQ, dst->raised.next);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+	} else {
														
 
															+		IRQ_get_next(opp, &dst->servicing);
														
 
															+		if (dst->raised.priority > dst->ctpr &&
														
 
															+		    dst->raised.priority > dst->servicing.priority) {
														
 
															+			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->raised.next,
														
 
															+				dst->raised.priority, dst->ctpr,
														
 
															+				dst->servicing.priority, n_CPU);
														
 
															+			/* IRQ line stays asserted */
														
 
															+		} else {
														
 
															+			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
														
 
															+				__func__, n_IRQ, dst->ctpr,
														
 
															+				dst->servicing.priority, n_CPU);
														
 
															+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/* update pic state because registers for n_IRQ have changed value */
														
 
															+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	struct irq_source *src;
														
 
															+	bool active, was_active;
														
 
															+	int i;
														
 
															+
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+	active = src->pending;
														
 
															+
														
 
															+	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
														
 
															+		/* Interrupt source is disabled */
														
 
															+		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
														
 
															+		active = false;
														
 
															+	}
														
 
															+
														
 
															+	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
														
 
															+
														
 
															+	/*
														
 
															+	 * We don't have a similar check for already-active because
														
 
															+	 * ctpr may have changed and we need to withdraw the interrupt.
														
 
															+	 */
														
 
															+	if (!active && !was_active) {
														
 
															+		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	if (active)
														
 
															+		src->ivpr |= IVPR_ACTIVITY_MASK;
														
 
															+	else
														
 
															+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
														
 
															+
														
 
															+	if (src->destmask == 0) {
														
 
															+		/* No target */
														
 
															+		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	if (src->destmask == (1 << src->last_cpu)) {
														
 
															+		/* Only one CPU is allowed to receive this IRQ */
														
 
															+		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
														
 
															+	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
														
 
															+		/* Directed delivery mode */
														
 
															+		for (i = 0; i < opp->nb_cpus; i++) {
														
 
															+			if (src->destmask & (1 << i)) {
														
 
															+				IRQ_local_pipe(opp, i, n_IRQ, active,
														
 
															+					       was_active);
														
 
															+			}
														
 
															+		}
														
 
															+	} else {
														
 
															+		/* Distributed delivery mode */
														
 
															+		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
														
 
															+			if (i == opp->nb_cpus)
														
 
															+				i = 0;
														
 
															+
														
 
															+			if (src->destmask & (1 << i)) {
														
 
															+				IRQ_local_pipe(opp, i, n_IRQ, active,
														
 
															+					       was_active);
														
 
															+				src->last_cpu = i;
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_source *src;
														
 
															+
														
 
															+	if (n_IRQ >= MAX_IRQ) {
														
 
															+		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	src = &opp->src[n_IRQ];
														
 
															+	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
														
 
															+		n_IRQ, level, src->ivpr);
														
 
															+	if (src->level) {
														
 
															+		/* level-sensitive irq */
														
 
															+		src->pending = level;
														
 
															+		openpic_update_irq(opp, n_IRQ);
														
 
															+	} else {
														
 
															+		/* edge-sensitive irq */
														
 
															+		if (level) {
														
 
															+			src->pending = 1;
														
 
															+			openpic_update_irq(opp, n_IRQ);
														
 
															+		}
														
 
															+
														
 
															+		if (src->output != ILR_INTTGT_INT) {
														
 
															+			/* Edge-triggered interrupts shouldn't be used
														
 
															+			 * with non-INT delivery, but just in case,
														
 
															+			 * try to make it do something sane rather than
														
 
															+			 * cause an interrupt storm.  This is close to
														
 
															+			 * what you'd probably see happen in real hardware.
														
 
															+			 */
														
 
															+			src->pending = 0;
														
 
															+			openpic_update_irq(opp, n_IRQ);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void openpic_reset(struct openpic *opp)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	opp->gcr = GCR_RESET;
														
 
															+	/* Initialise controller registers */
														
 
															+	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
														
 
															+	    (opp->vid << FRR_VID_SHIFT);
														
 
															+
														
 
															+	opp->pir = 0;
														
 
															+	opp->spve = -1 & opp->vector_mask;
														
 
															+	opp->tfrr = opp->tfrr_reset;
														
 
															+	/* Initialise IRQ sources */
														
 
															+	for (i = 0; i < opp->max_irq; i++) {
														
 
															+		opp->src[i].ivpr = opp->ivpr_reset;
														
 
															+		opp->src[i].idr = opp->idr_reset;
														
 
															+
														
 
															+		switch (opp->src[i].type) {
														
 
															+		case IRQ_TYPE_NORMAL:
														
 
															+			opp->src[i].level =
														
 
															+			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
														
 
															+			break;
														
 
															+
														
 
															+		case IRQ_TYPE_FSLINT:
														
 
															+			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
														
 
															+			break;
														
 
															+
														
 
															+		case IRQ_TYPE_FSLSPECIAL:
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	/* Initialise IRQ destinations */
														
 
															+	for (i = 0; i < MAX_CPU; i++) {
														
 
															+		opp->dst[i].ctpr = 15;
														
 
															+		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
														
 
															+		opp->dst[i].raised.next = -1;
														
 
															+		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
														
 
															+		opp->dst[i].servicing.next = -1;
														
 
															+	}
														
 
															+	/* Initialise timers */
														
 
															+	for (i = 0; i < MAX_TMR; i++) {
														
 
															+		opp->timers[i].tccr = 0;
														
 
															+		opp->timers[i].tbcr = TBCR_CI;
														
 
															+	}
														
 
															+	/* Go out of RESET state */
														
 
															+	opp->gcr = 0;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	return opp->src[n_IRQ].idr;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	if (opp->flags & OPENPIC_FLAG_ILR)
														
 
															+		return opp->src[n_IRQ].output;
														
 
															+
														
 
															+	return 0xffffffff;
														
 
															+}
														
 
															+
														
 
															+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
														
 
															+{
														
 
															+	return opp->src[n_IRQ].ivpr;
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
														
 
															+				    uint32_t val)
														
 
															+{
														
 
															+	struct irq_source *src = &opp->src[n_IRQ];
														
 
															+	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
														
 
															+	uint32_t crit_mask = 0;
														
 
															+	uint32_t mask = normal_mask;
														
 
															+	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
														
 
															+	int i;
														
 
															+
														
 
															+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
														
 
															+		crit_mask = mask << crit_shift;
														
 
															+		mask |= crit_mask | IDR_EP;
														
 
															+	}
														
 
															+
														
 
															+	src->idr = val & mask;
														
 
															+	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
														
 
															+
														
 
															+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
														
 
															+		if (src->idr & crit_mask) {
														
 
															+			if (src->idr & normal_mask) {
														
 
															+				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
														
 
															+					__func__);
														
 
															+			}
														
 
															+
														
 
															+			src->output = ILR_INTTGT_CINT;
														
 
															+			src->nomask = true;
														
 
															+			src->destmask = 0;
														
 
															+
														
 
															+			for (i = 0; i < opp->nb_cpus; i++) {
														
 
															+				int n_ci = IDR_CI0_SHIFT - i;
														
 
															+
														
 
															+				if (src->idr & (1UL << n_ci))
														
 
															+					src->destmask |= 1UL << i;
														
 
															+			}
														
 
															+		} else {
														
 
															+			src->output = ILR_INTTGT_INT;
														
 
															+			src->nomask = false;
														
 
															+			src->destmask = src->idr & normal_mask;
														
 
															+		}
														
 
															+	} else {
														
 
															+		src->destmask = src->idr;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
														
 
															+				    uint32_t val)
														
 
															+{
														
 
															+	if (opp->flags & OPENPIC_FLAG_ILR) {
														
 
															+		struct irq_source *src = &opp->src[n_IRQ];
														
 
															+
														
 
															+		src->output = val & ILR_INTTGT_MASK;
														
 
															+		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
														
 
															+			src->output);
														
 
															+
														
 
															+		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
														
 
															+				     uint32_t val)
														
 
															+{
														
 
															+	uint32_t mask;
														
 
															+
														
 
															+	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
														
 
															+	 * the polarity bit is read-only on internal interrupts.
														
 
															+	 */
														
 
															+	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
														
 
															+	    IVPR_POLARITY_MASK | opp->vector_mask;
														
 
															+
														
 
															+	/* ACTIVITY bit is read-only */
														
 
															+	opp->src[n_IRQ].ivpr =
														
 
															+	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
														
 
															+
														
 
															+	/* For FSL internal interrupts, The sense bit is reserved and zero,
														
 
															+	 * and the interrupt is always level-triggered.  Timers and IPIs
														
 
															+	 * have no sense or polarity bits, and are edge-triggered.
														
 
															+	 */
														
 
															+	switch (opp->src[n_IRQ].type) {
														
 
															+	case IRQ_TYPE_NORMAL:
														
 
															+		opp->src[n_IRQ].level =
														
 
															+		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
														
 
															+		break;
														
 
															+
														
 
															+	case IRQ_TYPE_FSLINT:
														
 
															+		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
														
 
															+		break;
														
 
															+
														
 
															+	case IRQ_TYPE_FSLSPECIAL:
														
 
															+		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	openpic_update_irq(opp, n_IRQ);
														
 
															+	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
														
 
															+		opp->src[n_IRQ].ivpr);
														
 
															+}
														
 
															+
														
 
															+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
														
 
															+{
														
 
															+	if (val & GCR_RESET) {
														
 
															+		openpic_reset(opp);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	opp->gcr &= ~opp->mpic_mode_mask;
														
 
															+	opp->gcr |= val & opp->mpic_mode_mask;
														
 
															+}
														
 
															+
														
 
															+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
														
 
															+		break;
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+	case 0x80:
														
 
															+	case 0x90:
														
 
															+	case 0xA0:
														
 
															+	case 0xB0:
														
 
															+		err = openpic_cpu_write_internal(opp, addr, val,
														
 
															+						 get_current_cpu());
														
 
															+		break;
														
 
															+	case 0x1000:		/* FRR */
														
 
															+		break;
														
 
															+	case 0x1020:		/* GCR */
														
 
															+		openpic_gcr_write(opp, val);
														
 
															+		break;
														
 
															+	case 0x1080:		/* VIR */
														
 
															+		break;
														
 
															+	case 0x1090:		/* PIR */
														
 
															+		/*
														
 
															+		 * This register is used to reset a CPU core --
														
 
															+		 * let userspace handle it.
														
 
															+		 */
														
 
															+		err = -ENXIO;
														
 
															+		break;
														
 
															+	case 0x10A0:		/* IPI_IVPR */
														
 
															+	case 0x10B0:
														
 
															+	case 0x10C0:
														
 
															+	case 0x10D0: {
														
 
															+		int idx;
														
 
															+		idx = (addr - 0x10A0) >> 4;
														
 
															+		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+	case 0x10E0:		/* SPVE */
														
 
															+		opp->spve = val & opp->vector_mask;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	u32 retval;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x1000:		/* FRR */
														
 
															+		retval = opp->frr;
														
 
															+		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
														
 
															+		break;
														
 
															+	case 0x1020:		/* GCR */
														
 
															+		retval = opp->gcr;
														
 
															+		break;
														
 
															+	case 0x1080:		/* VIR */
														
 
															+		retval = opp->vir;
														
 
															+		break;
														
 
															+	case 0x1090:		/* PIR */
														
 
															+		retval = 0x00000000;
														
 
															+		break;
														
 
															+	case 0x00:		/* Block Revision Register1 (BRR1) */
														
 
															+		retval = opp->brr1;
														
 
															+		break;
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+	case 0x80:
														
 
															+	case 0x90:
														
 
															+	case 0xA0:
														
 
															+	case 0xB0:
														
 
															+		err = openpic_cpu_read_internal(opp, addr,
														
 
															+			&retval, get_current_cpu());
														
 
															+		break;
														
 
															+	case 0x10A0:		/* IPI_IVPR */
														
 
															+	case 0x10B0:
														
 
															+	case 0x10C0:
														
 
															+	case 0x10D0:
														
 
															+		{
														
 
															+			int idx;
														
 
															+			idx = (addr - 0x10A0) >> 4;
														
 
															+			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
														
 
															+		}
														
 
															+		break;
														
 
															+	case 0x10E0:		/* SPVE */
														
 
															+		retval = opp->spve;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx;
														
 
															+
														
 
															+	addr += 0x10f0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (addr == 0x10f0) {
														
 
															+		/* TFRR */
														
 
															+		opp->tfrr = val;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	idx = (addr >> 6) & 0x3;
														
 
															+	addr = addr & 0x30;
														
 
															+
														
 
															+	switch (addr & 0x30) {
														
 
															+	case 0x00:		/* TCCR */
														
 
															+		break;
														
 
															+	case 0x10:		/* TBCR */
														
 
															+		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
														
 
															+		    (val & TBCR_CI) == 0 &&
														
 
															+		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
														
 
															+			opp->timers[idx].tccr &= ~TCCR_TOG;
														
 
															+
														
 
															+		opp->timers[idx].tbcr = val;
														
 
															+		break;
														
 
															+	case 0x20:		/* TVPR */
														
 
															+		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
														
 
															+		break;
														
 
															+	case 0x30:		/* TDR */
														
 
															+		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t retval = -1;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	idx = (addr >> 6) & 0x3;
														
 
															+	if (addr == 0x0) {
														
 
															+		/* TFRR */
														
 
															+		retval = opp->tfrr;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	switch (addr & 0x30) {
														
 
															+	case 0x00:		/* TCCR */
														
 
															+		retval = opp->timers[idx].tccr;
														
 
															+		break;
														
 
															+	case 0x10:		/* TBCR */
														
 
															+		retval = opp->timers[idx].tbcr;
														
 
															+		break;
														
 
															+	case 0x20:		/* TIPV */
														
 
															+		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
														
 
															+		break;
														
 
															+	case 0x30:		/* TIDE (TIDR) */
														
 
															+		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
														
 
															+
														
 
															+	addr = addr & 0xffff;
														
 
															+	idx = addr >> 5;
														
 
															+
														
 
															+	switch (addr & 0x1f) {
														
 
															+	case 0x00:
														
 
															+		write_IRQreg_ivpr(opp, idx, val);
														
 
															+		break;
														
 
															+	case 0x10:
														
 
															+		write_IRQreg_idr(opp, idx, val);
														
 
															+		break;
														
 
															+	case 0x18:
														
 
															+		write_IRQreg_ilr(opp, idx, val);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t retval;
														
 
															+	int idx;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+
														
 
															+	addr = addr & 0xffff;
														
 
															+	idx = addr >> 5;
														
 
															+
														
 
															+	switch (addr & 0x1f) {
														
 
															+	case 0x00:
														
 
															+		retval = read_IRQreg_ivpr(opp, idx);
														
 
															+		break;
														
 
															+	case 0x10:
														
 
															+		retval = read_IRQreg_idr(opp, idx);
														
 
															+		break;
														
 
															+	case 0x18:
														
 
															+		retval = read_IRQreg_ilr(opp, idx);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	int idx = opp->irq_msi;
														
 
															+	int srs, ibs;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case MSIIR_OFFSET:
														
 
															+		srs = val >> MSIIR_SRS_SHIFT;
														
 
															+		idx += srs;
														
 
															+		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
														
 
															+		opp->msi[srs].msir |= 1 << ibs;
														
 
															+		openpic_set_irq(opp, idx, 1);
														
 
															+		break;
														
 
															+	default:
														
 
															+		/* most registers are read-only, thus ignored */
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	uint32_t r = 0;
														
 
															+	int i, srs;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+	if (addr & 0xF)
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	srs = addr >> 4;
														
 
															+
														
 
															+	switch (addr) {
														
 
															+	case 0x00:
														
 
															+	case 0x10:
														
 
															+	case 0x20:
														
 
															+	case 0x30:
														
 
															+	case 0x40:
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:		/* MSIRs */
														
 
															+		r = opp->msi[srs].msir;
														
 
															+		/* Clear on read */
														
 
															+		opp->msi[srs].msir = 0;
														
 
															+		openpic_set_irq(opp, opp->irq_msi + srs, 0);
														
 
															+		break;
														
 
															+	case 0x120:		/* MSISR */
														
 
															+		for (i = 0; i < MAX_MSI; i++)
														
 
															+			r |= (opp->msi[i].msir ? 1 : 0) << i;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, r);
														
 
															+	*ptr = r;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	uint32_t r = 0;
														
 
															+
														
 
															+	pr_debug("%s: addr %#llx\n", __func__, addr);
														
 
															+
														
 
															+	/* TODO: EISR/EIMR */
														
 
															+
														
 
															+	*ptr = r;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
														
 
															+
														
 
															+	/* TODO: EISR/EIMR */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
														
 
															+				      u32 val, int idx)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_source *src;
														
 
															+	struct irq_dest *dst;
														
 
															+	int s_IRQ, n_IRQ;
														
 
															+
														
 
															+	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
														
 
															+		addr, val);
														
 
															+
														
 
															+	if (idx < 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (addr & 0xF)
														
 
															+		return 0;
														
 
															+
														
 
															+	dst = &opp->dst[idx];
														
 
															+	addr &= 0xFF0;
														
 
															+	switch (addr) {
														
 
															+	case 0x40:		/* IPIDR */
														
 
															+	case 0x50:
														
 
															+	case 0x60:
														
 
															+	case 0x70:
														
 
															+		idx = (addr - 0x40) >> 4;
														
 
															+		/* we use IDE as mask which CPUs to deliver the IPI to still. */
														
 
															+		opp->src[opp->irq_ipi0 + idx].destmask |= val;
														
 
															+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
														
 
															+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
														
 
															+		break;
														
 
															+	case 0x80:		/* CTPR */
														
 
															+		dst->ctpr = val & 0x0000000F;
														
 
															+
														
 
															+		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
														
 
															+			__func__, idx, dst->ctpr, dst->raised.priority,
														
 
															+			dst->servicing.priority);
														
 
															+
														
 
															+		if (dst->raised.priority <= dst->ctpr) {
														
 
															+			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
														
 
															+				__func__, idx);
														
 
															+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+		} else if (dst->raised.priority > dst->servicing.priority) {
														
 
															+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
														
 
															+				__func__, idx, dst->raised.next);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+	case 0x90:		/* WHOAMI */
														
 
															+		/* Read-only register */
														
 
															+		break;
														
 
															+	case 0xA0:		/* IACK */
														
 
															+		/* Read-only register */
														
 
															+		break;
														
 
															+	case 0xB0: {		/* EOI */
														
 
															+		int notify_eoi;
														
 
															+
														
 
															+		pr_debug("EOI\n");
														
 
															+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
														
 
															+
														
 
															+		if (s_IRQ < 0) {
														
 
															+			pr_debug("%s: EOI with no interrupt in service\n",
														
 
															+				__func__);
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		IRQ_resetbit(&dst->servicing, s_IRQ);
														
 
															+		/* Notify listeners that the IRQ is over */
														
 
															+		notify_eoi = s_IRQ;
														
 
															+		/* Set up next servicing IRQ */
														
 
															+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
														
 
															+		/* Check queued interrupts. */
														
 
															+		n_IRQ = IRQ_get_next(opp, &dst->raised);
														
 
															+		src = &opp->src[n_IRQ];
														
 
															+		if (n_IRQ != -1 &&
														
 
															+		    (s_IRQ == -1 ||
														
 
															+		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
														
 
															+			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
														
 
															+				idx, n_IRQ);
														
 
															+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
														
 
															+		}
														
 
															+
														
 
															+		spin_unlock(&opp->lock);
														
 
															+		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
														
 
															+		spin_lock(&opp->lock);
														
 
															+
														
 
															+		break;
														
 
															+	}
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+
														
 
															+	return openpic_cpu_write_internal(opp, addr, val,
														
 
															+					 (addr & 0x1f000) >> 12);
														
 
															+}
														
 
															+
														
 
															+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
														
 
															+			     int cpu)
														
 
															+{
														
 
															+	struct irq_source *src;
														
 
															+	int retval, irq;
														
 
															+
														
 
															+	pr_debug("Lower OpenPIC INT output\n");
														
 
															+	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
														
 
															+
														
 
															+	irq = IRQ_get_next(opp, &dst->raised);
														
 
															+	pr_debug("IACK: irq=%d\n", irq);
														
 
															+
														
 
															+	if (irq == -1)
														
 
															+		/* No more interrupt pending */
														
 
															+		return opp->spve;
														
 
															+
														
 
															+	src = &opp->src[irq];
														
 
															+	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
														
 
															+	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
														
 
															+		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
														
 
															+			__func__, irq, dst->ctpr, src->ivpr);
														
 
															+		openpic_update_irq(opp, irq);
														
 
															+		retval = opp->spve;
														
 
															+	} else {
														
 
															+		/* IRQ enter servicing state */
														
 
															+		IRQ_setbit(&dst->servicing, irq);
														
 
															+		retval = IVPR_VECTOR(opp, src->ivpr);
														
 
															+	}
														
 
															+
														
 
															+	if (!src->level) {
														
 
															+		/* edge-sensitive IRQ */
														
 
															+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
														
 
															+		src->pending = 0;
														
 
															+		IRQ_resetbit(&dst->raised, irq);
														
 
															+	}
														
 
															+
														
 
															+	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
														
 
															+		src->destmask &= ~(1 << cpu);
														
 
															+		if (src->destmask && !src->level) {
														
 
															+			/* trigger on CPUs that didn't know about it yet */
														
 
															+			openpic_set_irq(opp, irq, 1);
														
 
															+			openpic_set_irq(opp, irq, 0);
														
 
															+			/* if all CPUs knew about it, set active bit again */
														
 
															+			src->ivpr |= IVPR_ACTIVITY_MASK;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return retval;
														
 
															+}
														
 
															+
														
 
															+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	struct openpic *opp = vcpu->arch.mpic;
														
 
															+	int cpu = vcpu->arch.irq_cpu_id;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+
														
 
															+	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
														
 
															+		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
														
 
															+
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
														
 
															+				     u32 *ptr, int idx)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+	struct irq_dest *dst;
														
 
															+	uint32_t retval;
														
 
															+
														
 
															+	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
														
 
															+	retval = 0xFFFFFFFF;
														
 
															+
														
 
															+	if (idx < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (addr & 0xF)
														
 
															+		goto out;
														
 
															+
														
 
															+	dst = &opp->dst[idx];
														
 
															+	addr &= 0xFF0;
														
 
															+	switch (addr) {
														
 
															+	case 0x80:		/* CTPR */
														
 
															+		retval = dst->ctpr;
														
 
															+		break;
														
 
															+	case 0x90:		/* WHOAMI */
														
 
															+		retval = idx;
														
 
															+		break;
														
 
															+	case 0xA0:		/* IACK */
														
 
															+		retval = openpic_iack(opp, dst, idx);
														
 
															+		break;
														
 
															+	case 0xB0:		/* EOI */
														
 
															+		retval = 0;
														
 
															+		break;
														
 
															+	default:
														
 
															+		break;
														
 
															+	}
														
 
															+	pr_debug("%s: => 0x%08x\n", __func__, retval);
														
 
															+
														
 
															+out:
														
 
															+	*ptr = retval;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = opaque;
														
 
															+
														
 
															+	return openpic_cpu_read_internal(opp, addr, ptr,
														
 
															+					 (addr & 0x1f000) >> 12);
														
 
															+}
														
 
															+
														
 
															+struct mem_reg {
														
 
															+	struct list_head list;
														
 
															+	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
														
 
															+	int (*write)(void *opaque, gpa_t addr, u32 val);
														
 
															+	gpa_t start_addr;
														
 
															+	int size;
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_gbl_mmio = {
														
 
															+	.write = openpic_gbl_write,
														
 
															+	.read = openpic_gbl_read,
														
 
															+	.start_addr = OPENPIC_GLB_REG_START,
														
 
															+	.size = OPENPIC_GLB_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_tmr_mmio = {
														
 
															+	.write = openpic_tmr_write,
														
 
															+	.read = openpic_tmr_read,
														
 
															+	.start_addr = OPENPIC_TMR_REG_START,
														
 
															+	.size = OPENPIC_TMR_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_cpu_mmio = {
														
 
															+	.write = openpic_cpu_write,
														
 
															+	.read = openpic_cpu_read,
														
 
															+	.start_addr = OPENPIC_CPU_REG_START,
														
 
															+	.size = OPENPIC_CPU_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_src_mmio = {
														
 
															+	.write = openpic_src_write,
														
 
															+	.read = openpic_src_read,
														
 
															+	.start_addr = OPENPIC_SRC_REG_START,
														
 
															+	.size = OPENPIC_SRC_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_msi_mmio = {
														
 
															+	.read = openpic_msi_read,
														
 
															+	.write = openpic_msi_write,
														
 
															+	.start_addr = OPENPIC_MSI_REG_START,
														
 
															+	.size = OPENPIC_MSI_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static struct mem_reg openpic_summary_mmio = {
														
 
															+	.read = openpic_summary_read,
														
 
															+	.write = openpic_summary_write,
														
 
															+	.start_addr = OPENPIC_SUMMARY_REG_START,
														
 
															+	.size = OPENPIC_SUMMARY_REG_SIZE,
														
 
															+};
														
 
															+
														
 
															+static void fsl_common_init(struct openpic *opp)
														
 
															+{
														
 
															+	int i;
														
 
															+	int virq = MAX_SRC;
														
 
															+
														
 
															+	list_add(&openpic_msi_mmio.list, &opp->mmio_regions);
														
 
															+	list_add(&openpic_summary_mmio.list, &opp->mmio_regions);
														
 
															+
														
 
															+	opp->vid = VID_REVISION_1_2;
														
 
															+	opp->vir = VIR_GENERIC;
														
 
															+	opp->vector_mask = 0xFFFF;
														
 
															+	opp->tfrr_reset = 0;
														
 
															+	opp->ivpr_reset = IVPR_MASK_MASK;
														
 
															+	opp->idr_reset = 1 << 0;
														
 
															+	opp->max_irq = MAX_IRQ;
														
 
															+
														
 
															+	opp->irq_ipi0 = virq;
														
 
															+	virq += MAX_IPI;
														
 
															+	opp->irq_tim0 = virq;
														
 
															+	virq += MAX_TMR;
														
 
															+
														
 
															+	BUG_ON(virq > MAX_IRQ);
														
 
															+
														
 
															+	opp->irq_msi = 224;
														
 
															+
														
 
															+	for (i = 0; i < opp->fsl->max_ext; i++)
														
 
															+		opp->src[i].level = false;
														
 
															+
														
 
															+	/* Internal interrupts, including message and MSI */
														
 
															+	for (i = 16; i < MAX_SRC; i++) {
														
 
															+		opp->src[i].type = IRQ_TYPE_FSLINT;
														
 
															+		opp->src[i].level = true;
														
 
															+	}
														
 
															+
														
 
															+	/* timers and IPIs */
														
 
															+	for (i = MAX_SRC; i < virq; i++) {
														
 
															+		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
														
 
															+		opp->src[i].level = false;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
														
 
															+{
														
 
															+	struct list_head *node;
														
 
															+
														
 
															+	list_for_each(node, &opp->mmio_regions) {
														
 
															+		struct mem_reg *mr = list_entry(node, struct mem_reg, list);
														
 
															+
														
 
															+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
														
 
															+			continue;
														
 
															+
														
 
															+		return mr->read(opp, addr - mr->start_addr, ptr);
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
														
 
															+{
														
 
															+	struct list_head *node;
														
 
															+
														
 
															+	list_for_each(node, &opp->mmio_regions) {
														
 
															+		struct mem_reg *mr = list_entry(node, struct mem_reg, list);
														
 
															+
														
 
															+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
														
 
															+			continue;
														
 
															+
														
 
															+		return mr->write(opp, addr - mr->start_addr, val);
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
														
 
															+			 int len, void *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = container_of(this, struct openpic, mmio);
														
 
															+	int ret;
														
 
															+	union {
														
 
															+		u32 val;
														
 
															+		u8 bytes[4];
														
 
															+	} u;
														
 
															+
														
 
															+	if (addr & (len - 1)) {
														
 
															+		pr_debug("%s: bad alignment %llx/%d\n",
														
 
															+			 __func__, addr, len);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * Technically only 32-bit accesses are allowed, but be nice to
														
 
															+	 * people dumping registers a byte at a time -- it works in real
														
 
															+	 * hardware (reads only, not writes).
														
 
															+	 */
														
 
															+	if (len == 4) {
														
 
															+		*(u32 *)ptr = u.val;
														
 
															+		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
														
 
															+			 __func__, addr, ret, u.val);
														
 
															+	} else if (len == 1) {
														
 
															+		*(u8 *)ptr = u.bytes[addr & 3];
														
 
															+		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
														
 
															+			 __func__, addr, ret, u.bytes[addr & 3]);
														
 
															+	} else {
														
 
															+		pr_debug("%s: bad length %d\n", __func__, len);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
														
 
															+			  int len, const void *ptr)
														
 
															+{
														
 
															+	struct openpic *opp = container_of(this, struct openpic, mmio);
														
 
															+	int ret;
														
 
															+
														
 
															+	if (len != 4) {
														
 
															+		pr_debug("%s: bad length %d\n", __func__, len);
														
 
															+		return -EOPNOTSUPP;
														
 
															+	}
														
 
															+	if (addr & 3) {
														
 
															+		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
														
 
															+		return -EOPNOTSUPP;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
														
 
															+				      *(const u32 *)ptr);
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	pr_debug("%s: addr %llx ret %d val %x\n",
														
 
															+		 __func__, addr, ret, *(const u32 *)ptr);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static const struct kvm_io_device_ops mpic_mmio_ops = {
														
 
															+	.read = kvm_mpic_read,
														
 
															+	.write = kvm_mpic_write,
														
 
															+};
														
 
															+
														
 
															+static void map_mmio(struct openpic *opp)
														
 
															+{
														
 
															+	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
														
 
															+
														
 
															+	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
														
 
															+				opp->reg_base, OPENPIC_REG_SIZE,
														
 
															+				&opp->mmio);
														
 
															+}
														
 
															+
														
 
															+static void unmap_mmio(struct openpic *opp)
														
 
															+{
														
 
															+	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
														
 
															+}
														
 
															+
														
 
															+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	u64 base;
														
 
															+
														
 
															+	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	if (base & 0x3ffff) {
														
 
															+		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
														
 
															+			 __func__, base);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	if (base == opp->reg_base)
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&opp->kvm->slots_lock);
														
 
															+
														
 
															+	unmap_mmio(opp);
														
 
															+	opp->reg_base = base;
														
 
															+
														
 
															+	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
														
 
															+		 __func__, base);
														
 
															+
														
 
															+	if (base == 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	map_mmio(opp);
														
 
															+
														
 
															+	mutex_unlock(&opp->kvm->slots_lock);
														
 
															+out:
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+#define ATTR_SET		0
														
 
															+#define ATTR_GET		1
														
 
															+
														
 
															+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (addr & 3)
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+
														
 
															+	if (type == ATTR_SET)
														
 
															+		ret = kvm_mpic_write_internal(opp, addr, *val);
														
 
															+	else
														
 
															+		ret = kvm_mpic_read_internal(opp, addr, val);
														
 
															+
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	u32 attr32;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			return set_base_addr(opp, attr);
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		if (attr32 != 0 && attr32 != 1)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		spin_lock_irq(&opp->lock);
														
 
															+		openpic_set_irq(opp, attr->attr, attr32);
														
 
															+		spin_unlock_irq(&opp->lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	u64 attr64;
														
 
															+	u32 attr32;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			mutex_lock(&opp->kvm->slots_lock);
														
 
															+			attr64 = opp->reg_base;
														
 
															+			mutex_unlock(&opp->kvm->slots_lock);
														
 
															+
														
 
															+			if (copy_to_user((u64 __user *)(long)attr->addr,
														
 
															+					 &attr64, sizeof(u64)))
														
 
															+				return -EFAULT;
														
 
															+
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return 0;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		spin_lock_irq(&opp->lock);
														
 
															+		attr32 = opp->src[attr->attr].pending;
														
 
															+		spin_unlock_irq(&opp->lock);
														
 
															+
														
 
															+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
														
 
															+			return -EFAULT;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
														
 
															+{
														
 
															+	switch (attr->group) {
														
 
															+	case KVM_DEV_MPIC_GRP_MISC:
														
 
															+		switch (attr->attr) {
														
 
															+		case KVM_DEV_MPIC_BASE_ADDR:
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_REGISTER:
														
 
															+		return 0;
														
 
															+
														
 
															+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
														
 
															+		if (attr->attr > MAX_SRC)
														
 
															+			break;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENXIO;
														
 
															+}
														
 
															+
														
 
															+static void mpic_destroy(struct kvm_device *dev)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+
														
 
															+	dev->kvm->arch.mpic = NULL;
														
 
															+	kfree(opp);
														
 
															+}
														
 
															+
														
 
															+static int mpic_set_default_irq_routing(struct openpic *opp)
														
 
															+{
														
 
															+	struct kvm_irq_routing_entry *routing;
														
 
															+
														
 
															+	/* Create a nop default map, so that dereferencing it still works */
														
 
															+	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
														
 
															+	if (!routing)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
														
 
															+
														
 
															+	kfree(routing);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int mpic_create(struct kvm_device *dev, u32 type)
														
 
															+{
														
 
															+	struct openpic *opp;
														
 
															+	int ret;
														
 
															+
														
 
															+	/* We only support one MPIC at a time for now */
														
 
															+	if (dev->kvm->arch.mpic)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
														
 
															+	if (!opp)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dev->private = opp;
														
 
															+	opp->kvm = dev->kvm;
														
 
															+	opp->dev = dev;
														
 
															+	opp->model = type;
														
 
															+	spin_lock_init(&opp->lock);
														
 
															+
														
 
															+	INIT_LIST_HEAD(&opp->mmio_regions);
														
 
															+	list_add(&openpic_gbl_mmio.list, &opp->mmio_regions);
														
 
															+	list_add(&openpic_tmr_mmio.list, &opp->mmio_regions);
														
 
															+	list_add(&openpic_src_mmio.list, &opp->mmio_regions);
														
 
															+	list_add(&openpic_cpu_mmio.list, &opp->mmio_regions);
														
 
															+
														
 
															+	switch (opp->model) {
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_20:
														
 
															+		opp->fsl = &fsl_mpic_20;
														
 
															+		opp->brr1 = 0x00400200;
														
 
															+		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
														
 
															+		opp->nb_irqs = 80;
														
 
															+		opp->mpic_mode_mask = GCR_MODE_MIXED;
														
 
															+
														
 
															+		fsl_common_init(opp);
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_42:
														
 
															+		opp->fsl = &fsl_mpic_42;
														
 
															+		opp->brr1 = 0x00400402;
														
 
															+		opp->flags |= OPENPIC_FLAG_ILR;
														
 
															+		opp->nb_irqs = 196;
														
 
															+		opp->mpic_mode_mask = GCR_MODE_PROXY;
														
 
															+
														
 
															+		fsl_common_init(opp);
														
 
															+
														
 
															+		break;
														
 
															+
														
 
															+	default:
														
 
															+		ret = -ENODEV;
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	ret = mpic_set_default_irq_routing(opp);
														
 
															+	if (ret)
														
 
															+		goto err;
														
 
															+
														
 
															+	openpic_reset(opp);
														
 
															+
														
 
															+	smp_wmb();
														
 
															+	dev->kvm->arch.mpic = opp;
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+err:
														
 
															+	kfree(opp);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct kvm_device_ops kvm_mpic_ops = {
														
 
															+	.name = "kvm-mpic",
														
 
															+	.create = mpic_create,
														
 
															+	.destroy = mpic_destroy,
														
 
															+	.set_attr = mpic_set_attr,
														
 
															+	.get_attr = mpic_get_attr,
														
 
															+	.has_attr = mpic_has_attr,
														
 
															+};
														
 
															+
														
 
															+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
														
 
															+			     u32 cpu)
														
 
															+{
														
 
															+	struct openpic *opp = dev->private;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (dev->ops != &kvm_mpic_ops)
														
 
															+		return -EPERM;
														
 
															+	if (opp->kvm != vcpu->kvm)
														
 
															+		return -EPERM;
														
 
															+	if (cpu < 0 || cpu >= MAX_CPU)
														
 
															+		return -EPERM;
														
 
															+
														
 
															+	spin_lock_irq(&opp->lock);
														
 
															+
														
 
															+	if (opp->dst[cpu].vcpu) {
														
 
															+		ret = -EEXIST;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (vcpu->arch.irq_type) {
														
 
															+		ret = -EBUSY;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	opp->dst[cpu].vcpu = vcpu;
														
 
															+	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
														
 
															+
														
 
															+	vcpu->arch.mpic = opp;
														
 
															+	vcpu->arch.irq_cpu_id = cpu;
														
 
															+	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
														
 
															+
														
 
															+	/* This might need to be changed if GCR gets extended */
														
 
															+	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
														
 
															+		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
														
 
															+
														
 
															+out:
														
 
															+	spin_unlock_irq(&opp->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This should only happen immediately before the mpic is destroyed,
														
 
															+ * so we shouldn't need to worry about anything still trying to
														
 
															+ * access the vcpu pointer.
														
 
															+ */
														
 
															+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
														
 
															+
														
 
															+	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Return value:
														
 
															+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
														
 
															+ *  = 0   Interrupt was coalesced (previous irq is still pending)
														
 
															+ *  > 0   Number of CPUs interrupt was delivered to
														
 
															+ */
														
 
															+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
														
 
															+			struct kvm *kvm, int irq_source_id, int level,
														
 
															+			bool line_status)
														
 
															+{
														
 
															+	u32 irq = e->irqchip.pin;
														
 
															+	struct openpic *opp = kvm->arch.mpic;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+	openpic_set_irq(opp, irq, level);
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+
														
 
															+	/* All code paths we care about don't check for the return value */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
														
 
															+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
														
 
															+{
														
 
															+	struct openpic *opp = kvm->arch.mpic;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&opp->lock, flags);
														
 
															+
														
 
															+	/*
														
 
															+	 * XXX We ignore the target address for now, as we only support
														
 
															+	 *     a single MSI bank.
														
 
															+	 */
														
 
															+	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
														
 
															+	spin_unlock_irqrestore(&opp->lock, flags);
														
 
															+
														
 
															+	/* All code paths we care about don't check for the return value */
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			  struct kvm_kernel_irq_routing_entry *e,
														
 
															+			  const struct kvm_irq_routing_entry *ue)
														
 
															+{
														
 
															+	int r = -EINVAL;
														
 
															+
														
 
															+	switch (ue->type) {
														
 
															+	case KVM_IRQ_ROUTING_IRQCHIP:
														
 
															+		e->set = mpic_set_irq;
														
 
															+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
														
 
															+		e->irqchip.pin = ue->u.irqchip.pin;
														
 
															+		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
														
 
															+			goto out;
														
 
															+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
														
 
															+		break;
														
 
															+	case KVM_IRQ_ROUTING_MSI:
														
 
															+		e->set = kvm_set_msi;
														
 
															+		e->msi.address_lo = ue->u.msi.address_lo;
														
 
															+		e->msi.address_hi = ue->u.msi.address_hi;
														
 
															+		e->msi.data = ue->u.msi.data;
														
 
															+		break;
														
 
															+	default:
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	r = 0;
														
 
															+out:
														
 
															+	return r;
														
 
															+}
														
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
 
															 #include <linux/hrtimer.h>
														
 
															 #include <linux/fs.h>
														
 
															 #include <linux/slab.h>
														
 
															+#include <linux/file.h>
														
 
															 #include <asm/cputable.h>
														
 
															 #include <asm/uaccess.h>
														
 
															 #include <asm/kvm_ppc.h>
														
@@ -32,6 +33,7 @@
 
															 #include <asm/cputhreads.h>
														
 
															 #include <asm/irqflags.h>
														
 
															 #include "timing.h"
														
 
															+#include "irq.h"
														
 
															 #include "../mm/mmu_decl.h"
														
 
															 #define CREATE_TRACE_POINTS
														
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_ENABLE_CAP:
														
 
															 	case KVM_CAP_ONE_REG:
														
 
															 	case KVM_CAP_IOEVENTFD:
														
 
															+	case KVM_CAP_DEVICE_CTRL:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 #ifndef CONFIG_KVM_BOOK3S_64_HV
														
@@ -325,6 +328,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_PPC_GET_PVINFO:
														
 
															 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
														
 
															 	case KVM_CAP_SW_TLB:
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	case KVM_CAP_IRQ_MPIC:
														
 
															 #endif
														
 
															 		r = 1;
														
 
															 		break;
														
@@ -335,6 +341,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_CAP_SPAPR_TCE:
														
 
															 	case KVM_CAP_PPC_ALLOC_HTAB:
														
 
															+	case KVM_CAP_PPC_RTAS:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 #endif /* CONFIG_PPC_BOOK3S_64 */
														
@@ -459,6 +466,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 
															 	tasklet_kill(&vcpu->arch.tasklet);
														
 
															 	kvmppc_remove_vcpu_debugfs(vcpu);
														
 
															+
														
 
															+	switch (vcpu->arch.irq_type) {
														
 
															+	case KVMPPC_IRQ_MPIC:
														
 
															+		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
														
 
															+		break;
														
 
															+	case KVMPPC_IRQ_XICS:
														
 
															+		kvmppc_xics_free_icp(vcpu);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															 	kvmppc_core_vcpu_free(vcpu);
														
 
															 }
														
@@ -531,12 +548,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
															 #endif
														
 
															 }
														
 
															-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
														
 
															-                                        struct kvm_guest_debug *dbg)
														
 
															-{
														
 
															-	return -EINVAL;
														
 
															-}
														
 
															-
														
 
															 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
														
 
															                                      struct kvm_run *run)
														
 
															 {
														
@@ -768,7 +779,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
															 		break;
														
 
															 	case KVM_CAP_PPC_EPR:
														
 
															 		r = 0;
														
 
															-		vcpu->arch.epr_enabled = cap->args[0];
														
 
															+		if (cap->args[0])
														
 
															+			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
														
 
															+		else
														
 
															+			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
														
 
															 		break;
														
 
															 #ifdef CONFIG_BOOKE
														
 
															 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
														
@@ -788,6 +802,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
															 		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
														
 
															 		break;
														
 
															 	}
														
 
															+#endif
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	case KVM_CAP_IRQ_MPIC: {
														
 
															+		struct file *filp;
														
 
															+		struct kvm_device *dev;
														
 
															+
														
 
															+		r = -EBADF;
														
 
															+		filp = fget(cap->args[0]);
														
 
															+		if (!filp)
														
 
															+			break;
														
 
															+
														
 
															+		r = -EPERM;
														
 
															+		dev = kvm_device_from_filp(filp);
														
 
															+		if (dev)
														
 
															+			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
														
 
															+
														
 
															+		fput(filp);
														
 
															+		break;
														
 
															+	}
														
 
															 #endif
														
 
															 	default:
														
 
															 		r = -EINVAL;
														
@@ -911,9 +944,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 
															 	return 0;
														
 
															 }
														
 
															+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
														
 
															+			  bool line_status)
														
 
															+{
														
 
															+	if (!irqchip_in_kernel(kvm))
														
 
															+		return -ENXIO;
														
 
															+
														
 
															+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
														
 
															+					irq_event->irq, irq_event->level,
														
 
															+					line_status);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 long kvm_arch_vm_ioctl(struct file *filp,
														
 
															                        unsigned int ioctl, unsigned long arg)
														
 
															 {
														
 
															+	struct kvm *kvm __maybe_unused = filp->private_data;
														
 
															 	void __user *argp = (void __user *)arg;
														
 
															 	long r;
														
@@ -932,7 +978,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_CREATE_SPAPR_TCE: {
														
 
															 		struct kvm_create_spapr_tce create_tce;
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		r = -EFAULT;
														
 
															 		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
														
@@ -944,8 +989,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_KVM_BOOK3S_64_HV
														
 
															 	case KVM_ALLOCATE_RMA: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_allocate_rma rma;
														
 
															+		struct kvm *kvm = filp->private_data;
														
 
															 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
														
 
															 		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
														
@@ -954,7 +999,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_PPC_ALLOCATE_HTAB: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		u32 htab_order;
														
 
															 		r = -EFAULT;
														
@@ -971,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 	}
														
 
															 	case KVM_PPC_GET_HTAB_FD: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_get_htab_fd ghf;
														
 
															 		r = -EFAULT;
														
@@ -984,7 +1027,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 #ifdef CONFIG_PPC_BOOK3S_64
														
 
															 	case KVM_PPC_GET_SMMU_INFO: {
														
 
															-		struct kvm *kvm = filp->private_data;
														
 
															 		struct kvm_ppc_smmu_info info;
														
 
															 		memset(&info, 0, sizeof(info));
														
@@ -993,6 +1035,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
															 			r = -EFAULT;
														
 
															 		break;
														
 
															 	}
														
 
															+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
														
 
															+		struct kvm *kvm = filp->private_data;
														
 
															+
														
 
															+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
														
 
															+		break;
														
 
															+	}
														
 
															 #endif /* CONFIG_PPC_BOOK3S_64 */
														
 
															 	default:
														
 
															 		r = -ENOTTY;
														
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
 
															 static inline unsigned int icp_native_get_xirr(void)
														
 
															 {
														
 
															 	int cpu = smp_processor_id();
														
 
															+	unsigned int xirr;
														
 
															+
														
 
															+	/* Handled an interrupt latched by KVM */
														
 
															+	xirr = kvmppc_get_xics_latch();
														
 
															+	if (xirr)
														
 
															+		return xirr;
														
 
															 	return in_be32(&icp_native_regs[cpu]->xirr.word);
														
 
															 }
														
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
 
															 static void icp_native_cause_ipi(int cpu, unsigned long data)
														
 
															 {
														
 
															+	kvmppc_set_host_ipi(cpu, 1);
														
 
															 	icp_native_set_qirr(cpu, IPI_PRIORITY);
														
 
															 }
														
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 
															 {
														
 
															 	int cpu = smp_processor_id();
														
 
															+	kvmppc_set_host_ipi(cpu, 0);
														
 
															 	icp_native_set_qirr(cpu, 0xff);
														
 
															 	return smp_ipi_demux();
														
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
 
															 #define KVM_PIO_PAGE_OFFSET 1
														
 
															 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
														
 
															+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
														
 
															+
														
 
															 #define CR0_RESERVED_BITS                                               \
														
 
															 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
														
 
															 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
														
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
 
															 	select MMU_NOTIFIER
														
 
															 	select ANON_INODES
														
 
															 	select HAVE_KVM_IRQCHIP
														
 
															+	select HAVE_KVM_IRQ_ROUTING
														
 
															 	select HAVE_KVM_EVENTFD
														
 
															 	select KVM_APIC_ARCHITECTURE
														
 
															 	select KVM_ASYNC_PF
														
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
 
															 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
														
 
															 				coalesced_mmio.o irq_comm.o eventfd.o \
														
 
															-				assigned-dev.o)
														
 
															+				assigned-dev.o irqchip.o)
														
 
															 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
														
 
															 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
														
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2522,7 +2522,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
															 	case KVM_CAP_PCI_2_3:
														
 
															 	case KVM_CAP_KVMCLOCK_CTRL:
														
 
															 	case KVM_CAP_READONLY_MEM:
														
 
															-	case KVM_CAP_IRQFD_RESAMPLE:
														
 
															 		r = 1;
														
 
															 		break;
														
 
															 	case KVM_CAP_COALESCED_MMIO:
														
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -303,10 +303,10 @@ struct kvm_kernel_irq_routing_entry {
 
															 	struct hlist_node link;
														
 
															 };
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 struct kvm_irq_routing_table {
														
 
															-	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
														
 
															+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
														
 
															 	struct kvm_kernel_irq_routing_entry *rt_entries;
														
 
															 	u32 nr_rt_entries;
														
 
															 	/*
														
@@ -392,6 +392,7 @@ struct kvm {
 
															 	long mmu_notifier_count;
														
 
															 #endif
														
 
															 	long tlbs_dirty;
														
 
															+	struct list_head devices;
														
 
															 };
														
 
															 #define kvm_err(fmt, ...) \
														
@@ -431,7 +432,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
															 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
														
 
															 void vcpu_put(struct kvm_vcpu *vcpu);
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 int kvm_irqfd_init(void);
														
 
															 void kvm_irqfd_exit(void);
														
 
															 #else
														
@@ -718,11 +719,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
															 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
														
 
															 			     bool mask);
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
														
 
															-				   union kvm_ioapic_redirect_entry *entry,
														
 
															-				   unsigned long *deliver_bitmask);
														
 
															-#endif
														
 
															 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															 		bool line_status);
														
 
															 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
														
@@ -956,7 +952,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
															 }
														
 
															 #endif
														
 
															-#ifdef KVM_CAP_IRQ_ROUTING
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 #define KVM_MAX_IRQ_ROUTES 1024
														
@@ -965,6 +961,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
															 			const struct kvm_irq_routing_entry *entries,
														
 
															 			unsigned nr,
														
 
															 			unsigned flags);
														
 
															+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			  struct kvm_kernel_irq_routing_entry *e,
														
 
															+			  const struct kvm_irq_routing_entry *ue);
														
 
															 void kvm_free_irq_routing(struct kvm *kvm);
														
 
															 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
														
@@ -1065,6 +1064,43 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
															 extern bool kvm_rebooting;
														
 
															+struct kvm_device_ops;
														
 
															+
														
 
															+struct kvm_device {
														
 
															+	struct kvm_device_ops *ops;
														
 
															+	struct kvm *kvm;
														
 
															+	void *private;
														
 
															+	struct list_head vm_node;
														
 
															+};
														
 
															+
														
 
															+/* create, destroy, and name are mandatory */
														
 
															+struct kvm_device_ops {
														
 
															+	const char *name;
														
 
															+	int (*create)(struct kvm_device *dev, u32 type);
														
 
															+
														
 
															+	/*
														
 
															+	 * Destroy is responsible for freeing dev.
														
 
															+	 *
														
 
															+	 * Destroy may be called before or after destructors are called
														
 
															+	 * on emulated I/O regions, depending on whether a reference is
														
 
															+	 * held by a vcpu or other kvm component that gets destroyed
														
 
															+	 * after the emulated I/O.
														
 
															+	 */
														
 
															+	void (*destroy)(struct kvm_device *dev);
														
 
															+
														
 
															+	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
														
 
															+	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
														
 
															+		      unsigned long arg);
														
 
															+};
														
 
															+
														
 
															+void kvm_device_get(struct kvm_device *dev);
														
 
															+void kvm_device_put(struct kvm_device *dev);
														
 
															+struct kvm_device *kvm_device_from_filp(struct file *filp);
														
 
															+
														
 
															+extern struct kvm_device_ops kvm_mpic_ops;
														
 
															+
														
 
															 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
														
 
															 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
														
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 
															 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
														
 
															 );
														
 
															-#if defined(__KVM_HAVE_IRQ_LINE)
														
 
															+#if defined(CONFIG_HAVE_KVM_IRQCHIP)
														
 
															 TRACE_EVENT(kvm_set_irq,
														
 
															 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
														
 
															 	TP_ARGS(gsi, level, irq_source_id),
														
@@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq,
 
															 	{KVM_IRQCHIP_PIC_SLAVE,		"PIC slave"},		\
														
 
															 	{KVM_IRQCHIP_IOAPIC,		"IOAPIC"}
														
 
															+#endif /* defined(__KVM_HAVE_IOAPIC) */
														
 
															+
														
 
															+#if defined(CONFIG_HAVE_KVM_IRQCHIP)
														
 
															+
														
 
															 TRACE_EVENT(kvm_ack_irq,
														
 
															 	TP_PROTO(unsigned int irqchip, unsigned int pin),
														
 
															 	TP_ARGS(irqchip, pin),
														
@@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq,
 
															 		__entry->pin		= pin;
														
 
															 	),
														
 
															+#ifdef kvm_irqchips
														
 
															 	TP_printk("irqchip %s pin %u",
														
 
															 		  __print_symbolic(__entry->irqchip, kvm_irqchips),
														
 
															 		 __entry->pin)
														
 
															+#else
														
 
															+	TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin)
														
 
															+#endif
														
 
															 );
														
 
															+#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
														
 
															-#endif /* defined(__KVM_HAVE_IOAPIC) */
														
 
															 #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
														
 
															 #define KVM_TRACE_MMIO_READ 1
														
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info {
 
															 #ifdef __KVM_HAVE_PIT
														
 
															 #define KVM_CAP_REINJECT_CONTROL 24
														
 
															 #endif
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															 #define KVM_CAP_IRQ_ROUTING 25
														
 
															-#endif
														
 
															 #define KVM_CAP_IRQ_INJECT_STATUS 26
														
 
															 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
														
 
															 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
														
@@ -668,6 +666,9 @@ struct kvm_ppc_smmu_info {
 
															 #define KVM_CAP_PPC_EPR 86
														
 
															 #define KVM_CAP_ARM_PSCI 87
														
 
															 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
														
 
															+#define KVM_CAP_DEVICE_CTRL 89
														
 
															+#define KVM_CAP_IRQ_MPIC 90
														
 
															+#define KVM_CAP_PPC_RTAS 91
														
 
															 #ifdef KVM_CAP_IRQ_ROUTING
														
@@ -820,6 +821,27 @@ struct kvm_arm_device_addr {
 
															 	__u64 addr;
														
 
															 };
														
 
															+/*
														
 
															+ * Device control API, available with KVM_CAP_DEVICE_CTRL
														
 
															+ */
														
 
															+#define KVM_CREATE_DEVICE_TEST		1
														
 
															+
														
 
															+struct kvm_create_device {
														
 
															+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
														
 
															+	__u32	fd;	/* out: device handle */
														
 
															+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
														
 
															+};
														
 
															+
														
 
															+struct kvm_device_attr {
														
 
															+	__u32	flags;		/* no flags currently defined */
														
 
															+	__u32	group;		/* device-defined */
														
 
															+	__u64	attr;		/* group-defined */
														
 
															+	__u64	addr;		/* userspace address of attr data */
														
 
															+};
														
 
															+
														
 
															+#define KVM_DEV_TYPE_FSL_MPIC_20	1
														
 
															+#define KVM_DEV_TYPE_FSL_MPIC_42	2
														
 
															+
														
 
															 /*
														
 
															  * ioctls for VM fds
														
 
															  */
														
@@ -907,6 +929,16 @@ struct kvm_s390_ucas_mapping {
 
															 #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
														
 
															 /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
														
 
															 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
														
 
															+/* Available with KVM_CAP_PPC_RTAS */
														
 
															+#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct kvm_rtas_token_args)
														
 
															+
														
 
															+/* ioctl for vm fd */
														
 
															+#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
														
 
															+
														
 
															+/* ioctls for fds returned by KVM_CREATE_DEVICE */
														
 
															+#define KVM_SET_DEVICE_ATTR	  _IOW(KVMIO,  0xe1, struct kvm_device_attr)
														
 
															+#define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
														
 
															+#define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
														
 
															 /*
														
 
															  * ioctls for vcpu fds
														
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 
															 config HAVE_KVM_IRQCHIP
														
 
															        bool
														
 
															+config HAVE_KVM_IRQ_ROUTING
														
 
															+       bool
														
 
															+
														
 
															 config HAVE_KVM_EVENTFD
														
 
															        bool
														
 
															        select EVENTFD
														
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -983,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 
															 			goto out;
														
 
															 		break;
														
 
															 	}
														
 
															-#ifdef KVM_CAP_IRQ_ROUTING
														
 
															-	case KVM_SET_GSI_ROUTING: {
														
 
															-		struct kvm_irq_routing routing;
														
 
															-		struct kvm_irq_routing __user *urouting;
														
 
															-		struct kvm_irq_routing_entry *entries;
														
 
															-
														
 
															-		r = -EFAULT;
														
 
															-		if (copy_from_user(&routing, argp, sizeof(routing)))
														
 
															-			goto out;
														
 
															-		r = -EINVAL;
														
 
															-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
														
 
															-			goto out;
														
 
															-		if (routing.flags)
														
 
															-			goto out;
														
 
															-		r = -ENOMEM;
														
 
															-		entries = vmalloc(routing.nr * sizeof(*entries));
														
 
															-		if (!entries)
														
 
															-			goto out;
														
 
															-		r = -EFAULT;
														
 
															-		urouting = argp;
														
 
															-		if (copy_from_user(entries, urouting->entries,
														
 
															-				   routing.nr * sizeof(*entries)))
														
 
															-			goto out_free_irq_routing;
														
 
															-		r = kvm_set_irq_routing(kvm, entries, routing.nr,
														
 
															-					routing.flags);
														
 
															-	out_free_irq_routing:
														
 
															-		vfree(entries);
														
 
															-		break;
														
 
															-	}
														
 
															-#endif /* KVM_CAP_IRQ_ROUTING */
														
 
															 #ifdef __KVM_HAVE_MSIX
														
 
															 	case KVM_ASSIGN_SET_MSIX_NR: {
														
 
															 		struct kvm_assigned_msix_nr entry_nr;
														
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,7 +35,7 @@
 
															 #include "iodev.h"
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 /*
														
 
															  * --------------------------------------------------------------------
														
 
															  * irqfd: Allows an fd to be used to inject an interrupt to the guest
														
@@ -433,7 +433,7 @@ fail:
 
															 void
														
 
															 kvm_eventfd_init(struct kvm *kvm)
														
 
															 {
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 	spin_lock_init(&kvm->irqfds.lock);
														
 
															 	INIT_LIST_HEAD(&kvm->irqfds.items);
														
 
															 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
														
@@ -442,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm)
 
															 	INIT_LIST_HEAD(&kvm->ioeventfds);
														
 
															 }
														
 
															-#ifdef __KVM_HAVE_IOAPIC
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 /*
														
 
															  * shutdown any irqfd's that match fd+gsi
														
 
															  */
														
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -151,59 +151,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
 
															 		return -EWOULDBLOCK;
														
 
															 }
														
 
															-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
														
 
															-{
														
 
															-	struct kvm_kernel_irq_routing_entry route;
														
 
															-
														
 
															-	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	route.msi.address_lo = msi->address_lo;
														
 
															-	route.msi.address_hi = msi->address_hi;
														
 
															-	route.msi.data = msi->data;
														
 
															-
														
 
															-	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Return value:
														
 
															- *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
														
 
															- *  = 0   Interrupt was coalesced (previous irq is still pending)
														
 
															- *  > 0   Number of CPUs interrupt was delivered to
														
 
															- */
														
 
															-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															-		bool line_status)
														
 
															-{
														
 
															-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
														
 
															-	int ret = -1, i = 0;
														
 
															-	struct kvm_irq_routing_table *irq_rt;
														
 
															-
														
 
															-	trace_kvm_set_irq(irq, level, irq_source_id);
														
 
															-
														
 
															-	/* Not possible to detect if the guest uses the PIC or the
														
 
															-	 * IOAPIC.  So set the bit in both. The guest will ignore
														
 
															-	 * writes to the unused one.
														
 
															-	 */
														
 
															-	rcu_read_lock();
														
 
															-	irq_rt = rcu_dereference(kvm->irq_routing);
														
 
															-	if (irq < irq_rt->nr_rt_entries)
														
 
															-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
														
 
															-			irq_set[i++] = *e;
														
 
															-	rcu_read_unlock();
														
 
															-
														
 
															-	while(i--) {
														
 
															-		int r;
														
 
															-		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
														
 
															-				line_status);
														
 
															-		if (r < 0)
														
 
															-			continue;
														
 
															-
														
 
															-		ret = r + ((ret < 0) ? 0 : ret);
														
 
															-	}
														
 
															-
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															 /*
														
 
															  * Deliver an IRQ in an atomic context if we can, or return a failure,
														
 
															  * user can retry in a process context.
														
@@ -241,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 
															 	return ret;
														
 
															 }
														
 
															-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															-{
														
 
															-	struct kvm_irq_ack_notifier *kian;
														
 
															-	int gsi;
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
														
 
															-	if (gsi != -1)
														
 
															-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															-					 link)
														
 
															-			if (kian->gsi == gsi) {
														
 
															-				rcu_read_unlock();
														
 
															-				return true;
														
 
															-			}
														
 
															-
														
 
															-	rcu_read_unlock();
														
 
															-
														
 
															-	return false;
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
														
 
															-
														
 
															-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															-{
														
 
															-	struct kvm_irq_ack_notifier *kian;
														
 
															-	int gsi;
														
 
															-
														
 
															-	trace_kvm_ack_irq(irqchip, pin);
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
														
 
															-	if (gsi != -1)
														
 
															-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															-					 link)
														
 
															-			if (kian->gsi == gsi)
														
 
															-				kian->irq_acked(kian);
														
 
															-	rcu_read_unlock();
														
 
															-}
														
 
															-
														
 
															-void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
 
															-				   struct kvm_irq_ack_notifier *kian)
														
 
															-{
														
 
															-	mutex_lock(&kvm->irq_lock);
														
 
															-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
														
 
															-	mutex_unlock(&kvm->irq_lock);
														
 
															-	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															-}
														
 
															-
														
 
															-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
														
 
															-				    struct kvm_irq_ack_notifier *kian)
														
 
															-{
														
 
															-	mutex_lock(&kvm->irq_lock);
														
 
															-	hlist_del_init_rcu(&kian->link);
														
 
															-	mutex_unlock(&kvm->irq_lock);
														
 
															-	synchronize_rcu();
														
 
															-	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															-}
														
 
															-
														
 
															 int kvm_request_irq_source_id(struct kvm *kvm)
														
 
															 {
														
 
															 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
														
@@ -381,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 
															 	rcu_read_unlock();
														
 
															 }
														
 
															-void kvm_free_irq_routing(struct kvm *kvm)
														
 
															-{
														
 
															-	/* Called only during vm destruction. Nobody can use the pointer
														
 
															-	   at this stage */
														
 
															-	kfree(kvm->irq_routing);
														
 
															-}
														
 
															-
														
 
															-static int setup_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															-			       struct kvm_kernel_irq_routing_entry *e,
														
 
															-			       const struct kvm_irq_routing_entry *ue)
														
 
															+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			  struct kvm_kernel_irq_routing_entry *e,
														
 
															+			  const struct kvm_irq_routing_entry *ue)
														
 
															 {
														
 
															 	int r = -EINVAL;
														
 
															 	int delta;
														
 
															 	unsigned max_pin;
														
 
															-	struct kvm_kernel_irq_routing_entry *ei;
														
 
															-	/*
														
 
															-	 * Do not allow GSI to be mapped to the same irqchip more than once.
														
 
															-	 * Allow only one to one mapping between GSI and MSI.
														
 
															-	 */
														
 
															-	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
														
 
															-		if (ei->type == KVM_IRQ_ROUTING_MSI ||
														
 
															-		    ue->type == KVM_IRQ_ROUTING_MSI ||
														
 
															-		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
														
 
															-			return r;
														
 
															-
														
 
															-	e->gsi = ue->gsi;
														
 
															-	e->type = ue->type;
														
 
															 	switch (ue->type) {
														
 
															 	case KVM_IRQ_ROUTING_IRQCHIP:
														
 
															 		delta = 0;
														
@@ -445,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
															 		goto out;
														
 
															 	}
														
 
															-	hlist_add_head(&e->link, &rt->map[e->gsi]);
														
 
															 	r = 0;
														
 
															 out:
														
 
															 	return r;
														
 
															 }
														
 
															-
														
 
															-int kvm_set_irq_routing(struct kvm *kvm,
														
 
															-			const struct kvm_irq_routing_entry *ue,
														
 
															-			unsigned nr,
														
 
															-			unsigned flags)
														
 
															-{
														
 
															-	struct kvm_irq_routing_table *new, *old;
														
 
															-	u32 i, j, nr_rt_entries = 0;
														
 
															-	int r;
														
 
															-
														
 
															-	for (i = 0; i < nr; ++i) {
														
 
															-		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
														
 
															-			return -EINVAL;
														
 
															-		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
														
 
															-	}
														
 
															-
														
 
															-	nr_rt_entries += 1;
														
 
															-
														
 
															-	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
														
 
															-		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
														
 
															-		      GFP_KERNEL);
														
 
															-
														
 
															-	if (!new)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	new->rt_entries = (void *)&new->map[nr_rt_entries];
														
 
															-
														
 
															-	new->nr_rt_entries = nr_rt_entries;
														
 
															-	for (i = 0; i < 3; i++)
														
 
															-		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
														
 
															-			new->chip[i][j] = -1;
														
 
															-
														
 
															-	for (i = 0; i < nr; ++i) {
														
 
															-		r = -EINVAL;
														
 
															-		if (ue->flags)
														
 
															-			goto out;
														
 
															-		r = setup_routing_entry(new, &new->rt_entries[i], ue);
														
 
															-		if (r)
														
 
															-			goto out;
														
 
															-		++ue;
														
 
															-	}
														
 
															-
														
 
															-	mutex_lock(&kvm->irq_lock);
														
 
															-	old = kvm->irq_routing;
														
 
															-	kvm_irq_routing_update(kvm, new);
														
 
															-	mutex_unlock(&kvm->irq_lock);
														
 
															-
														
 
															-	synchronize_rcu();
														
 
															-
														
 
															-	new = old;
														
 
															-	r = 0;
														
 
															-
														
 
															-out:
														
 
															-	kfree(new);
														
 
															-	return r;
														
 
															-}
														
 
															-
														
 
															 #define IOAPIC_ROUTING_ENTRY(irq) \
														
 
															 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
														
 
															 	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
														
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -0,0 +1,237 @@
 
															+/*
														
 
															+ * irqchip.c: Common API for in kernel interrupt controllers
														
 
															+ * Copyright (c) 2007, Intel Corporation.
														
 
															+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
														
 
															+ * Copyright (c) 2013, Alexander Graf <agraf@suse.de>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify it
														
 
															+ * under the terms and conditions of the GNU General Public License,
														
 
															+ * version 2, as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope it will be useful, but WITHOUT
														
 
															+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
														
 
															+ * more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License along with
														
 
															+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
														
 
															+ * Place - Suite 330, Boston, MA 02111-1307 USA.
														
 
															+ *
														
 
															+ * This file is derived from virt/kvm/irq_comm.c.
														
 
															+ *
														
 
															+ * Authors:
														
 
															+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
														
 
															+ *   Alexander Graf <agraf@suse.de>
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kvm_host.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/export.h>
														
 
															+#include <trace/events/kvm.h>
														
 
															+#include "irq.h"
														
 
															+
														
 
															+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															+{
														
 
															+	struct kvm_irq_ack_notifier *kian;
														
 
															+	int gsi;
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
														
 
															+	if (gsi != -1)
														
 
															+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															+					 link)
														
 
															+			if (kian->gsi == gsi) {
														
 
															+				rcu_read_unlock();
														
 
															+				return true;
														
 
															+			}
														
 
															+
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	return false;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
														
 
															+
														
 
															+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
														
 
															+{
														
 
															+	struct kvm_irq_ack_notifier *kian;
														
 
															+	int gsi;
														
 
															+
														
 
															+	trace_kvm_ack_irq(irqchip, pin);
														
 
															+
														
 
															+	rcu_read_lock();
														
 
															+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
														
 
															+	if (gsi != -1)
														
 
															+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
														
 
															+					 link)
														
 
															+			if (kian->gsi == gsi)
														
 
															+				kian->irq_acked(kian);
														
 
															+	rcu_read_unlock();
														
 
															+}
														
 
															+
														
 
															+void kvm_register_irq_ack_notifier(struct kvm *kvm,
														
 
															+				   struct kvm_irq_ack_notifier *kian)
														
 
															+{
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															+#ifdef __KVM_HAVE_IOAPIC
														
 
															+	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
														
 
															+				    struct kvm_irq_ack_notifier *kian)
														
 
															+{
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															+	hlist_del_init_rcu(&kian->link);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															+	synchronize_rcu();
														
 
															+#ifdef __KVM_HAVE_IOAPIC
														
 
															+	kvm_vcpu_request_scan_ioapic(kvm);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
														
 
															+{
														
 
															+	struct kvm_kernel_irq_routing_entry route;
														
 
															+
														
 
															+	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	route.msi.address_lo = msi->address_lo;
														
 
															+	route.msi.address_hi = msi->address_hi;
														
 
															+	route.msi.data = msi->data;
														
 
															+
														
 
															+	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Return value:
														
 
															+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
														
 
															+ *  = 0   Interrupt was coalesced (previous irq is still pending)
														
 
															+ *  > 0   Number of CPUs interrupt was delivered to
														
 
															+ */
														
 
															+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
														
 
															+		bool line_status)
														
 
															+{
														
 
															+	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
														
 
															+	int ret = -1, i = 0;
														
 
															+	struct kvm_irq_routing_table *irq_rt;
														
 
															+
														
 
															+	trace_kvm_set_irq(irq, level, irq_source_id);
														
 
															+
														
 
															+	/* Not possible to detect if the guest uses the PIC or the
														
 
															+	 * IOAPIC.  So set the bit in both. The guest will ignore
														
 
															+	 * writes to the unused one.
														
 
															+	 */
														
 
															+	rcu_read_lock();
														
 
															+	irq_rt = rcu_dereference(kvm->irq_routing);
														
 
															+	if (irq < irq_rt->nr_rt_entries)
														
 
															+		hlist_for_each_entry(e, &irq_rt->map[irq], link)
														
 
															+			irq_set[i++] = *e;
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	while(i--) {
														
 
															+		int r;
														
 
															+		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
														
 
															+				   line_status);
														
 
															+		if (r < 0)
														
 
															+			continue;
														
 
															+
														
 
															+		ret = r + ((ret < 0) ? 0 : ret);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void kvm_free_irq_routing(struct kvm *kvm)
														
 
															+{
														
 
															+	/* Called only during vm destruction. Nobody can use the pointer
														
 
															+	   at this stage */
														
 
															+	kfree(kvm->irq_routing);
														
 
															+}
														
 
															+
														
 
															+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
														
 
															+			       struct kvm_kernel_irq_routing_entry *e,
														
 
															+			       const struct kvm_irq_routing_entry *ue)
														
 
															+{
														
 
															+	int r = -EINVAL;
														
 
															+	struct kvm_kernel_irq_routing_entry *ei;
														
 
															+
														
 
															+	/*
														
 
															+	 * Do not allow GSI to be mapped to the same irqchip more than once.
														
 
															+	 * Allow only one to one mapping between GSI and MSI.
														
 
															+	 */
														
 
															+	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
														
 
															+		if (ei->type == KVM_IRQ_ROUTING_MSI ||
														
 
															+		    ue->type == KVM_IRQ_ROUTING_MSI ||
														
 
															+		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
														
 
															+			return r;
														
 
															+
														
 
															+	e->gsi = ue->gsi;
														
 
															+	e->type = ue->type;
														
 
															+	r = kvm_set_routing_entry(rt, e, ue);
														
 
															+	if (r)
														
 
															+		goto out;
														
 
															+
														
 
															+	hlist_add_head(&e->link, &rt->map[e->gsi]);
														
 
															+	r = 0;
														
 
															+out:
														
 
															+	return r;
														
 
															+}
														
 
															+
														
 
															+int kvm_set_irq_routing(struct kvm *kvm,
														
 
															+			const struct kvm_irq_routing_entry *ue,
														
 
															+			unsigned nr,
														
 
															+			unsigned flags)
														
 
															+{
														
 
															+	struct kvm_irq_routing_table *new, *old;
														
 
															+	u32 i, j, nr_rt_entries = 0;
														
 
															+	int r;
														
 
															+
														
 
															+	for (i = 0; i < nr; ++i) {
														
 
															+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
														
 
															+			return -EINVAL;
														
 
															+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
														
 
															+	}
														
 
															+
														
 
															+	nr_rt_entries += 1;
														
 
															+
														
 
															+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
														
 
															+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
														
 
															+		      GFP_KERNEL);
														
 
															+
														
 
															+	if (!new)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	new->rt_entries = (void *)&new->map[nr_rt_entries];
														
 
															+
														
 
															+	new->nr_rt_entries = nr_rt_entries;
														
 
															+	for (i = 0; i < KVM_NR_IRQCHIPS; i++)
														
 
															+		for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
														
 
															+			new->chip[i][j] = -1;
														
 
															+
														
 
															+	for (i = 0; i < nr; ++i) {
														
 
															+		r = -EINVAL;
														
 
															+		if (ue->flags)
														
 
															+			goto out;
														
 
															+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
														
 
															+		if (r)
														
 
															+			goto out;
														
 
															+		++ue;
														
 
															+	}
														
 
															+
														
 
															+	mutex_lock(&kvm->irq_lock);
														
 
															+	old = kvm->irq_routing;
														
 
															+	kvm_irq_routing_update(kvm, new);
														
 
															+	mutex_unlock(&kvm->irq_lock);
														
 
															+
														
 
															+	synchronize_rcu();
														
 
															+
														
 
															+	new = old;
														
 
															+	r = 0;
														
 
															+
														
 
															+out:
														
 
															+	kfree(new);
														
 
															+	return r;
														
 
															+}
														
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -504,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
															 	mutex_init(&kvm->irq_lock);
														
 
															 	mutex_init(&kvm->slots_lock);
														
 
															 	atomic_set(&kvm->users_count, 1);
														
 
															+	INIT_LIST_HEAD(&kvm->devices);
														
 
															 	r = kvm_init_mmu_notifier(kvm);
														
 
															 	if (r)
														
@@ -581,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm)
 
															 	kfree(kvm->memslots);
														
 
															 }
														
 
															+static void kvm_destroy_devices(struct kvm *kvm)
														
 
															+{
														
 
															+	struct list_head *node, *tmp;
														
 
															+
														
 
															+	list_for_each_safe(node, tmp, &kvm->devices) {
														
 
															+		struct kvm_device *dev =
														
 
															+			list_entry(node, struct kvm_device, vm_node);
														
 
															+
														
 
															+		list_del(node);
														
 
															+		dev->ops->destroy(dev);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															 static void kvm_destroy_vm(struct kvm *kvm)
														
 
															 {
														
 
															 	int i;
														
@@ -600,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 
															 	kvm_arch_flush_shadow_all(kvm);
														
 
															 #endif
														
 
															 	kvm_arch_destroy_vm(kvm);
														
 
															+	kvm_destroy_devices(kvm);
														
 
															 	kvm_free_physmem(kvm);
														
 
															 	cleanup_srcu_struct(&kvm->srcu);
														
 
															 	kvm_arch_free_vm(kvm);
														
@@ -2159,6 +2174,111 @@ out:
 
															 }
														
 
															 #endif
														
 
															+static int kvm_device_ioctl_attr(struct kvm_device *dev,
														
 
															+				 int (*accessor)(struct kvm_device *dev,
														
 
															+						 struct kvm_device_attr *attr),
														
 
															+				 unsigned long arg)
														
 
															+{
														
 
															+	struct kvm_device_attr attr;
														
 
															+
														
 
															+	if (!accessor)
														
 
															+		return -EPERM;
														
 
															+
														
 
															+	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
														
 
															+		return -EFAULT;
														
 
															+
														
 
															+	return accessor(dev, &attr);
														
 
															+}
														
 
															+
														
 
															+static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
														
 
															+			     unsigned long arg)
														
 
															+{
														
 
															+	struct kvm_device *dev = filp->private_data;
														
 
															+
														
 
															+	switch (ioctl) {
														
 
															+	case KVM_SET_DEVICE_ATTR:
														
 
															+		return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
														
 
															+	case KVM_GET_DEVICE_ATTR:
														
 
															+		return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
														
 
															+	case KVM_HAS_DEVICE_ATTR:
														
 
															+		return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
														
 
															+	default:
														
 
															+		if (dev->ops->ioctl)
														
 
															+			return dev->ops->ioctl(dev, ioctl, arg);
														
 
															+
														
 
															+		return -ENOTTY;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int kvm_device_release(struct inode *inode, struct file *filp)
														
 
															+{
														
 
															+	struct kvm_device *dev = filp->private_data;
														
 
															+	struct kvm *kvm = dev->kvm;
														
 
															+
														
 
															+	kvm_put_kvm(kvm);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static const struct file_operations kvm_device_fops = {
														
 
															+	.unlocked_ioctl = kvm_device_ioctl,
														
 
															+	.release = kvm_device_release,
														
 
															+};
														
 
															+
														
 
															+struct kvm_device *kvm_device_from_filp(struct file *filp)
														
 
															+{
														
 
															+	if (filp->f_op != &kvm_device_fops)
														
 
															+		return NULL;
														
 
															+
														
 
															+	return filp->private_data;
														
 
															+}
														
 
															+
														
 
															+static int kvm_ioctl_create_device(struct kvm *kvm,
														
 
															+				   struct kvm_create_device *cd)
														
 
															+{
														
 
															+	struct kvm_device_ops *ops = NULL;
														
 
															+	struct kvm_device *dev;
														
 
															+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (cd->type) {
														
 
															+#ifdef CONFIG_KVM_MPIC
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_20:
														
 
															+	case KVM_DEV_TYPE_FSL_MPIC_42:
														
 
															+		ops = &kvm_mpic_ops;
														
 
															+		break;
														
 
															+#endif
														
 
															+	default:
														
 
															+		return -ENODEV;
														
 
															+	}
														
 
															+
														
 
															+	if (test)
														
 
															+		return 0;
														
 
															+
														
 
															+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
														
 
															+	if (!dev)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dev->ops = ops;
														
 
															+	dev->kvm = kvm;
														
 
															+
														
 
															+	ret = ops->create(dev, cd->type);
														
 
															+	if (ret < 0) {
														
 
															+		kfree(dev);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
														
 
															+	if (ret < 0) {
														
 
															+		ops->destroy(dev);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&dev->vm_node, &kvm->devices);
														
 
															+	kvm_get_kvm(kvm);
														
 
															+	cd->fd = ret;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 static long kvm_vm_ioctl(struct file *filp,
														
 
															 			   unsigned int ioctl, unsigned long arg)
														
 
															 {
														
@@ -2274,6 +2394,54 @@ static long kvm_vm_ioctl(struct file *filp,
 
															 		break;
														
 
															 	}
														
 
															 #endif
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															+	case KVM_SET_GSI_ROUTING: {
														
 
															+		struct kvm_irq_routing routing;
														
 
															+		struct kvm_irq_routing __user *urouting;
														
 
															+		struct kvm_irq_routing_entry *entries;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&routing, argp, sizeof(routing)))
														
 
															+			goto out;
														
 
															+		r = -EINVAL;
														
 
															+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
														
 
															+			goto out;
														
 
															+		if (routing.flags)
														
 
															+			goto out;
														
 
															+		r = -ENOMEM;
														
 
															+		entries = vmalloc(routing.nr * sizeof(*entries));
														
 
															+		if (!entries)
														
 
															+			goto out;
														
 
															+		r = -EFAULT;
														
 
															+		urouting = argp;
														
 
															+		if (copy_from_user(entries, urouting->entries,
														
 
															+				   routing.nr * sizeof(*entries)))
														
 
															+			goto out_free_irq_routing;
														
 
															+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
														
 
															+					routing.flags);
														
 
															+	out_free_irq_routing:
														
 
															+		vfree(entries);
														
 
															+		break;
														
 
															+	}
														
 
															+#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
														
 
															+	case KVM_CREATE_DEVICE: {
														
 
															+		struct kvm_create_device cd;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_from_user(&cd, argp, sizeof(cd)))
														
 
															+			goto out;
														
 
															+
														
 
															+		r = kvm_ioctl_create_device(kvm, &cd);
														
 
															+		if (r)
														
 
															+			goto out;
														
 
															+
														
 
															+		r = -EFAULT;
														
 
															+		if (copy_to_user(argp, &cd, sizeof(cd)))
														
 
															+			goto out;
														
 
															+
														
 
															+		r = 0;
														
 
															+		break;
														
 
															+	}
														
 
															 	default:
														
 
															 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
														
 
															 		if (r == -ENOTTY)
														
@@ -2402,9 +2570,12 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 
															 	case KVM_CAP_INTERNAL_ERROR_DATA:
														
 
															 #ifdef CONFIG_HAVE_KVM_MSI
														
 
															 	case KVM_CAP_SIGNAL_MSI:
														
 
															+#endif
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															+	case KVM_CAP_IRQFD_RESAMPLE:
														
 
															 #endif
														
 
															 		return 1;
														
 
															-#ifdef KVM_CAP_IRQ_ROUTING
														
 
															+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
														
 
															 	case KVM_CAP_IRQ_ROUTING:
														
 
															 		return KVM_MAX_IRQ_ROUTES;
														
 
															 #endif
	`@@ -0,0 +1 @@`
			`+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.`