9 years ago · ad53e35ae5
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -183,15 +183,15 @@ struct kvm_vcpu_arch {
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 remote_tlb_flush;
			
 
				+	ulong remote_tlb_flush;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 hvc_exit_stat;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				+	u64 hvc_exit_stat;
			
 
				 	u64 wfe_exit_stat;
			
 
				 	u64 wfi_exit_stat;
			
 
				 	u64 mmio_exit_user;
			
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -290,15 +290,15 @@ struct kvm_vcpu_arch {
 
				 #endif
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 remote_tlb_flush;
			
 
				+	ulong remote_tlb_flush;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 hvc_exit_stat;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				+	u64 hvc_exit_stat;
			
 
				 	u64 wfe_exit_stat;
			
 
				 	u64 wfi_exit_stat;
			
 
				 	u64 mmio_exit_user;
			
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -110,32 +110,32 @@
 
				 extern atomic_t kvm_mips_instance;
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 remote_tlb_flush;
			
 
				+	ulong remote_tlb_flush;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 wait_exits;
			
 
				-	u32 cache_exits;
			
 
				-	u32 signal_exits;
			
 
				-	u32 int_exits;
			
 
				-	u32 cop_unusable_exits;
			
 
				-	u32 tlbmod_exits;
			
 
				-	u32 tlbmiss_ld_exits;
			
 
				-	u32 tlbmiss_st_exits;
			
 
				-	u32 addrerr_st_exits;
			
 
				-	u32 addrerr_ld_exits;
			
 
				-	u32 syscall_exits;
			
 
				-	u32 resvd_inst_exits;
			
 
				-	u32 break_inst_exits;
			
 
				-	u32 trap_inst_exits;
			
 
				-	u32 msa_fpe_exits;
			
 
				-	u32 fpe_exits;
			
 
				-	u32 msa_disabled_exits;
			
 
				-	u32 flush_dcache_exits;
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				+	u64 wait_exits;
			
 
				+	u64 cache_exits;
			
 
				+	u64 signal_exits;
			
 
				+	u64 int_exits;
			
 
				+	u64 cop_unusable_exits;
			
 
				+	u64 tlbmod_exits;
			
 
				+	u64 tlbmiss_ld_exits;
			
 
				+	u64 tlbmiss_st_exits;
			
 
				+	u64 addrerr_st_exits;
			
 
				+	u64 addrerr_ld_exits;
			
 
				+	u64 syscall_exits;
			
 
				+	u64 resvd_inst_exits;
			
 
				+	u64 break_inst_exits;
			
 
				+	u64 trap_inst_exits;
			
 
				+	u64 msa_fpe_exits;
			
 
				+	u64 fpe_exits;
			
 
				+	u64 msa_disabled_exits;
			
 
				+	u64 flush_dcache_exits;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				 };
			
 
				 
			
 
				 struct kvm_arch_memory_slot {
			
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -244,6 +244,43 @@ static inline int segment_shift(int ssize)
 
				 	return SID_SHIFT_1T;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This array is indexed by the LP field of the HPTE second dword.
			
 
				+ * Since this field may contain some RPN bits, some entries are
			
 
				+ * replicated so that we get the same value irrespective of RPN.
			
 
				+ * The top 4 bits are the page size index (MMU_PAGE_*) for the
			
 
				+ * actual page size, the bottom 4 bits are the base page size.
			
 
				+ */
			
 
				+extern u8 hpte_page_sizes[1 << LP_BITS];
			
 
				+
			
 
				+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
			
 
				+					     bool is_base_size)
			
 
				+{
			
 
				+	unsigned int i, lp;
			
 
				+
			
 
				+	if (!(h & HPTE_V_LARGE))
			
 
				+		return 1ul << 12;
			
 
				+
			
 
				+	/* Look at the 8 bit LP value */
			
 
				+	lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
			
 
				+	i = hpte_page_sizes[lp];
			
 
				+	if (!i)
			
 
				+		return 0;
			
 
				+	if (!is_base_size)
			
 
				+		i >>= 4;
			
 
				+	return 1ul << mmu_psize_defs[i & 0xf].shift;
			
 
				+}
			
 
				+
			
 
				+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
			
 
				+{
			
 
				+	return __hpte_page_size(h, l, 0);
			
 
				+}
			
 
				+
			
 
				+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
			
 
				+{
			
 
				+	return __hpte_page_size(h, l, 1);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * The current system page and segment sizes
			
 
				  */
			
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -21,7 +21,7 @@
 
				 #ifndef __ASM_PPC64_HMI_H__
			
 
				 #define __ASM_PPC64_HMI_H__
			
 
				 
			
 
				-#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				 
			
 
				 #define	CORE_TB_RESYNC_REQ_BIT		63
			
 
				 #define MAX_SUBCORE_PER_CORE		4
			
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
 
				 #endif
			
 
				 #endif /* __powerpc64__ */
			
 
				 
			
 
				+
			
 
				+/*
			
 
				+ * Simple Cache inhibited accessors
			
 
				+ * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
			
 
				+ * barriers, callers need to manage memory barriers on their own.
			
 
				+ * These can only be used in hypervisor real mode.
			
 
				+ */
			
 
				+
			
 
				+static inline u32 _lwzcix(unsigned long addr)
			
 
				+{
			
 
				+	u32 ret;
			
 
				+
			
 
				+	__asm__ __volatile__("lwzcix %0,0, %1"
			
 
				+			     : "=r" (ret) : "r" (addr) : "memory");
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static inline void _stbcix(u64 addr, u8 val)
			
 
				+{
			
 
				+	__asm__ __volatile__("stbcix %0,0,%1"
			
 
				+		: : "r" (val), "r" (addr) : "memory");
			
 
				+}
			
 
				+
			
 
				+static inline void _stwcix(u64 addr, u32 val)
			
 
				+{
			
 
				+	__asm__ __volatile__("stwcix %0,0,%1"
			
 
				+		: : "r" (val), "r" (addr) : "memory");
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Low level IO stream instructions are defined out of line for now
			
 
				  */
			
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -105,6 +105,15 @@
 
				 #define BOOK3S_INTERRUPT_FAC_UNAVAIL	0xf60
			
 
				 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL	0xf80
			
 
				 
			
 
				+/* book3s_hv */
			
 
				+
			
 
				+/*
			
 
				+ * Special trap used to indicate to host that this is a
			
 
				+ * passthrough interrupt that could not be handled
			
 
				+ * completely in the guest.
			
 
				+ */
			
 
				+#define BOOK3S_INTERRUPT_HV_RM_HARD	0x5555
			
 
				+
			
 
				 #define BOOK3S_IRQPRIO_SYSTEM_RESET		0
			
 
				 #define BOOK3S_IRQPRIO_DATA_SEGMENT		1
			
 
				 #define BOOK3S_IRQPRIO_INST_SEGMENT		2
			
@@ -136,6 +145,7 @@
 
				 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
			
 
				 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
			
 
				 #define RESUME_FLAG_ARCH1	(1<<2)
			
 
				+#define RESUME_FLAG_ARCH2	(1<<3)
			
 
				 
			
 
				 #define RESUME_GUEST            0
			
 
				 #define RESUME_GUEST_NV         RESUME_FLAG_NV
			
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -69,6 +69,42 @@ struct hpte_cache {
 
				 	int pagesize;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Struct for a virtual core.
			
 
				+ * Note: entry_exit_map combines a bitmap of threads that have entered
			
 
				+ * in the bottom 8 bits and a bitmap of threads that have exited in the
			
 
				+ * next 8 bits.  This is so that we can atomically set the entry bit
			
 
				+ * iff the exit map is 0 without taking a lock.
			
 
				+ */
			
 
				+struct kvmppc_vcore {
			
 
				+	int n_runnable;
			
 
				+	int num_threads;
			
 
				+	int entry_exit_map;
			
 
				+	int napping_threads;
			
 
				+	int first_vcpuid;
			
 
				+	u16 pcpu;
			
 
				+	u16 last_cpu;
			
 
				+	u8 vcore_state;
			
 
				+	u8 in_guest;
			
 
				+	struct kvmppc_vcore *master_vcore;
			
 
				+	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
			
 
				+	struct list_head preempt_list;
			
 
				+	spinlock_t lock;
			
 
				+	struct swait_queue_head wq;
			
 
				+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
			
 
				+	u64 stolen_tb;
			
 
				+	u64 preempt_tb;
			
 
				+	struct kvm_vcpu *runner;
			
 
				+	struct kvm *kvm;
			
 
				+	u64 tb_offset;		/* guest timebase - host timebase */
			
 
				+	ulong lpcr;
			
 
				+	u32 arch_compat;
			
 
				+	ulong pcr;
			
 
				+	ulong dpdes;		/* doorbell state (POWER8) */
			
 
				+	ulong conferring_threads;
			
 
				+	unsigned int halt_poll_ns;
			
 
				+};
			
 
				+
			
 
				 struct kvmppc_vcpu_book3s {
			
 
				 	struct kvmppc_sid_map sid_map[SID_MAP_NUM];
			
 
				 	struct {
			
@@ -191,6 +227,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 
				 				 struct kvm_vcpu *vcpu);
			
 
				 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
			
 
				 				   struct kvmppc_book3s_shadow_vcpu *svcpu);
			
 
				+extern int kvm_irq_bypass;
			
 
				 
			
 
				 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
			
 
				 {
			
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,6 +20,8 @@
 
				 #ifndef __ASM_KVM_BOOK3S_64_H__
			
 
				 #define __ASM_KVM_BOOK3S_64_H__
			
 
				 
			
 
				+#include <asm/book3s/64/mmu-hash.h>
			
 
				+
			
 
				 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
			
 
				 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
			
 
				 {
			
@@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
 
				 	hpte[0] = cpu_to_be64(hpte_v);
			
 
				 }
			
 
				 
			
 
				-static inline int __hpte_actual_psize(unsigned int lp, int psize)
			
 
				-{
			
 
				-	int i, shift;
			
 
				-	unsigned int mask;
			
 
				-
			
 
				-	/* start from 1 ignoring MMU_PAGE_4K */
			
 
				-	for (i = 1; i < MMU_PAGE_COUNT; i++) {
			
 
				-
			
 
				-		/* invalid penc */
			
 
				-		if (mmu_psize_defs[psize].penc[i] == -1)
			
 
				-			continue;
			
 
				-		/*
			
 
				-		 * encoding bits per actual page size
			
 
				-		 *        PTE LP     actual page size
			
 
				-		 *    rrrr rrrz		>=8KB
			
 
				-		 *    rrrr rrzz		>=16KB
			
 
				-		 *    rrrr rzzz		>=32KB
			
 
				-		 *    rrrr zzzz		>=64KB
			
 
				-		 * .......
			
 
				-		 */
			
 
				-		shift = mmu_psize_defs[i].shift - LP_SHIFT;
			
 
				-		if (shift > LP_BITS)
			
 
				-			shift = LP_BITS;
			
 
				-		mask = (1 << shift) - 1;
			
 
				-		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			
 
				-			return i;
			
 
				-	}
			
 
				-	return -1;
			
 
				-}
			
 
				-
			
 
				 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
			
 
				 					     unsigned long pte_index)
			
 
				 {
			
 
				-	int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
			
 
				+	int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
			
 
				 	unsigned int penc;
			
 
				 	unsigned long rb = 0, va_low, sllp;
			
 
				 	unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
			
 
				 
			
 
				 	if (v & HPTE_V_LARGE) {
			
 
				-		for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
			
 
				-
			
 
				-			/* valid entries have a shift value */
			
 
				-			if (!mmu_psize_defs[b_psize].shift)
			
 
				-				continue;
			
 
				-
			
 
				-			a_psize = __hpte_actual_psize(lp, b_psize);
			
 
				-			if (a_psize != -1)
			
 
				-				break;
			
 
				-		}
			
 
				+		i = hpte_page_sizes[lp];
			
 
				+		b_psize = i & 0xf;
			
 
				+		a_psize = i >> 4;
			
 
				 	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Ignore the top 14 bits of va
			
 
				 	 * v have top two bits covering segment size, hence move
			
@@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 
				 	return rb;
			
 
				 }
			
 
				 
			
 
				-static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
			
 
				-					     bool is_base_size)
			
 
				-{
			
 
				-
			
 
				-	int size, a_psize;
			
 
				-	/* Look at the 8 bit LP value */
			
 
				-	unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
			
 
				-
			
 
				-	/* only handle 4k, 64k and 16M pages for now */
			
 
				-	if (!(h & HPTE_V_LARGE))
			
 
				-		return 1ul << 12;
			
 
				-	else {
			
 
				-		for (size = 0; size < MMU_PAGE_COUNT; size++) {
			
 
				-			/* valid entries have a shift value */
			
 
				-			if (!mmu_psize_defs[size].shift)
			
 
				-				continue;
			
 
				-
			
 
				-			a_psize = __hpte_actual_psize(lp, size);
			
 
				-			if (a_psize != -1) {
			
 
				-				if (is_base_size)
			
 
				-					return 1ul << mmu_psize_defs[size].shift;
			
 
				-				return 1ul << mmu_psize_defs[a_psize].shift;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
			
 
				-{
			
 
				-	return __hpte_page_size(h, l, 0);
			
 
				-}
			
 
				-
			
 
				-static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
			
 
				-{
			
 
				-	return __hpte_page_size(h, l, 1);
			
 
				-}
			
 
				-
			
 
				 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
			
 
				 {
			
 
				 	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
			
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
 
				 #include <asm/cputhreads.h>
			
 
				 #define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)
			
 
				 
			
 
				+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
			
 
				+
			
 
				 #ifdef CONFIG_KVM_MMIO
			
 
				 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
			
 
				 #endif
			
@@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s;
 
				 struct kvmppc_book3s_shadow_vcpu;
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 remote_tlb_flush;
			
 
				+	ulong remote_tlb_flush;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 sum_exits;
			
 
				-	u32 mmio_exits;
			
 
				-	u32 signal_exits;
			
 
				-	u32 light_exits;
			
 
				+	u64 sum_exits;
			
 
				+	u64 mmio_exits;
			
 
				+	u64 signal_exits;
			
 
				+	u64 light_exits;
			
 
				 	/* Account for special types of light exits: */
			
 
				-	u32 itlb_real_miss_exits;
			
 
				-	u32 itlb_virt_miss_exits;
			
 
				-	u32 dtlb_real_miss_exits;
			
 
				-	u32 dtlb_virt_miss_exits;
			
 
				-	u32 syscall_exits;
			
 
				-	u32 isi_exits;
			
 
				-	u32 dsi_exits;
			
 
				-	u32 emulated_inst_exits;
			
 
				-	u32 dec_exits;
			
 
				-	u32 ext_intr_exits;
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 dbell_exits;
			
 
				-	u32 gdbell_exits;
			
 
				-	u32 ld;
			
 
				-	u32 st;
			
 
				+	u64 itlb_real_miss_exits;
			
 
				+	u64 itlb_virt_miss_exits;
			
 
				+	u64 dtlb_real_miss_exits;
			
 
				+	u64 dtlb_virt_miss_exits;
			
 
				+	u64 syscall_exits;
			
 
				+	u64 isi_exits;
			
 
				+	u64 dsi_exits;
			
 
				+	u64 emulated_inst_exits;
			
 
				+	u64 dec_exits;
			
 
				+	u64 ext_intr_exits;
			
 
				+	u64 halt_poll_success_ns;
			
 
				+	u64 halt_poll_fail_ns;
			
 
				+	u64 halt_wait_ns;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_successful_wait;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				+	u64 dbell_exits;
			
 
				+	u64 gdbell_exits;
			
 
				+	u64 ld;
			
 
				+	u64 st;
			
 
				 #ifdef CONFIG_PPC_BOOK3S
			
 
				-	u32 pf_storage;
			
 
				-	u32 pf_instruc;
			
 
				-	u32 sp_storage;
			
 
				-	u32 sp_instruc;
			
 
				-	u32 queue_intr;
			
 
				-	u32 ld_slow;
			
 
				-	u32 st_slow;
			
 
				+	u64 pf_storage;
			
 
				+	u64 pf_instruc;
			
 
				+	u64 sp_storage;
			
 
				+	u64 sp_instruc;
			
 
				+	u64 queue_intr;
			
 
				+	u64 ld_slow;
			
 
				+	u64 st_slow;
			
 
				 #endif
			
 
				+	u64 pthru_all;
			
 
				+	u64 pthru_host;
			
 
				+	u64 pthru_bad_aff;
			
 
				 };
			
 
				 
			
 
				 enum kvm_exit_types {
			
@@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table {
 
				 struct kvmppc_xics;
			
 
				 struct kvmppc_icp;
			
 
				 
			
 
				+struct kvmppc_passthru_irqmap;
			
 
				+
			
 
				 /*
			
 
				  * The reverse mapping array has one entry for each HPTE,
			
 
				  * which stores the guest's view of the second word of the HPTE
			
@@ -267,6 +278,7 @@ struct kvm_arch {
 
				 #endif
			
 
				 #ifdef CONFIG_KVM_XICS
			
 
				 	struct kvmppc_xics *xics;
			
 
				+	struct kvmppc_passthru_irqmap *pimap;
			
 
				 #endif
			
 
				 	struct kvmppc_ops *kvm_ops;
			
 
				 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
@@ -275,41 +287,6 @@ struct kvm_arch {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * Struct for a virtual core.
			
 
				- * Note: entry_exit_map combines a bitmap of threads that have entered
			
 
				- * in the bottom 8 bits and a bitmap of threads that have exited in the
			
 
				- * next 8 bits.  This is so that we can atomically set the entry bit
			
 
				- * iff the exit map is 0 without taking a lock.
			
 
				- */
			
 
				-struct kvmppc_vcore {
			
 
				-	int n_runnable;
			
 
				-	int num_threads;
			
 
				-	int entry_exit_map;
			
 
				-	int napping_threads;
			
 
				-	int first_vcpuid;
			
 
				-	u16 pcpu;
			
 
				-	u16 last_cpu;
			
 
				-	u8 vcore_state;
			
 
				-	u8 in_guest;
			
 
				-	struct kvmppc_vcore *master_vcore;
			
 
				-	struct list_head runnable_threads;
			
 
				-	struct list_head preempt_list;
			
 
				-	spinlock_t lock;
			
 
				-	struct swait_queue_head wq;
			
 
				-	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
			
 
				-	u64 stolen_tb;
			
 
				-	u64 preempt_tb;
			
 
				-	struct kvm_vcpu *runner;
			
 
				-	struct kvm *kvm;
			
 
				-	u64 tb_offset;		/* guest timebase - host timebase */
			
 
				-	ulong lpcr;
			
 
				-	u32 arch_compat;
			
 
				-	ulong pcr;
			
 
				-	ulong dpdes;		/* doorbell state (POWER8) */
			
 
				-	ulong conferring_threads;
			
 
				-};
			
 
				-
			
 
				 #define VCORE_ENTRY_MAP(vc)	((vc)->entry_exit_map & 0xff)
			
 
				 #define VCORE_EXIT_MAP(vc)	((vc)->entry_exit_map >> 8)
			
 
				 #define VCORE_IS_EXITING(vc)	(VCORE_EXIT_MAP(vc) != 0)
			
@@ -329,6 +306,7 @@ struct kvmppc_vcore {
 
				 #define VCORE_SLEEPING	3
			
 
				 #define VCORE_RUNNING	4
			
 
				 #define VCORE_EXITING	5
			
 
				+#define VCORE_POLLING	6
			
 
				 
			
 
				 /*
			
 
				  * Struct used to manage memory for a virtual processor area
			
@@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator {
 
				 	u64	tb_max;		/* max time */
			
 
				 };
			
 
				 
			
 
				+#ifdef CONFIG_PPC_BOOK3S_64
			
 
				+struct kvmppc_irq_map {
			
 
				+	u32	r_hwirq;
			
 
				+	u32	v_hwirq;
			
 
				+	struct irq_desc *desc;
			
 
				+};
			
 
				+
			
 
				+#define	KVMPPC_PIRQ_MAPPED	1024
			
 
				+struct kvmppc_passthru_irqmap {
			
 
				+	int n_mapped;
			
 
				+	struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				 # ifdef CONFIG_PPC_FSL_BOOK3E
			
 
				 #define KVMPPC_BOOKE_IAC_NUM	2
			
 
				 #define KVMPPC_BOOKE_DAC_NUM	2
			
@@ -668,7 +660,6 @@ struct kvm_vcpu_arch {
 
				 	long pgfault_index;
			
 
				 	unsigned long pgfault_hpte[2];
			
 
				 
			
 
				-	struct list_head run_list;
			
 
				 	struct task_struct *run_task;
			
 
				 	struct kvm_run *kvm_run;
			
 
				 
			
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,6 +287,10 @@ struct kvmppc_ops {
 
				 	long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
			
 
				 			      unsigned long arg);
			
 
				 	int (*hcall_implemented)(unsigned long hcall);
			
 
				+	int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
			
 
				+				       struct irq_bypass_producer *);
			
 
				+	void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
			
 
				+					struct irq_bypass_producer *);
			
 
				 };
			
 
				 
			
 
				 extern struct kvmppc_ops *kvmppc_hv_ops;
			
@@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
			
 
				 }
			
 
				+
			
 
				+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
			
 
				+				struct kvm *kvm)
			
 
				+{
			
 
				+	if (kvm && kvm_irq_bypass)
			
 
				+		return kvm->arch.pimap;
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				 extern void kvmppc_alloc_host_rm_ops(void);
			
 
				 extern void kvmppc_free_host_rm_ops(void);
			
 
				+extern void kvmppc_free_pimap(struct kvm *kvm);
			
 
				+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
			
 
				 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
			
 
				 extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
			
 
				 extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
			
@@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 
				 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
			
 
				 			struct kvm_vcpu *vcpu, u32 cpu);
			
 
				 extern void kvmppc_xics_ipi_action(void);
			
 
				+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
			
 
				+				   unsigned long host_irq);
			
 
				+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
			
 
				+				   unsigned long host_irq);
			
 
				+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
			
 
				+				 struct kvmppc_irq_map *irq_map,
			
 
				+				 struct kvmppc_passthru_irqmap *pimap);
			
 
				 extern int h_ipi_redirect;
			
 
				 #else
			
 
				+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
			
 
				+				struct kvm *kvm)
			
 
				+	{ return NULL; }
			
 
				 static inline void kvmppc_alloc_host_rm_ops(void) {};
			
 
				 static inline void kvmppc_free_host_rm_ops(void) {};
			
 
				+static inline void kvmppc_free_pimap(struct kvm *kvm) {};
			
 
				+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
			
 
				+	{ return 0; }
			
 
				 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
			
 
				 	{ return 0; }
			
 
				 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
			
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void)
 
				 #define MMU_PAGE_16G	13
			
 
				 #define MMU_PAGE_64G	14
			
 
				 
			
 
				+/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
			
 
				 #define MMU_PAGE_COUNT	15
			
 
				 
			
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
 
				 int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
			
 
				 				   uint64_t offset, uint32_t data);
			
 
				 int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
			
 
				+int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
			
 
				 int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
			
 
				 int64_t opal_register_exception_handler(uint64_t opal_exception,
			
 
				 					uint64_t handler_address,
			
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -183,11 +183,6 @@ struct paca_struct {
 
				 	 */
			
 
				 	u16 in_mce;
			
 
				 	u8 hmi_event_available;		 /* HMI event is available */
			
 
				-	/*
			
 
				-	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
			
 
				-	 * more details
			
 
				-	 */
			
 
				-	struct sibling_subcore_state *sibling_subcore_state;
			
 
				 #endif
			
 
				 
			
 
				 	/* Stuff for accurate time accounting */
			
@@ -202,6 +197,13 @@ struct paca_struct {
 
				 	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
			
 
				 #endif
			
 
				 	struct kvmppc_host_state kvm_hstate;
			
 
				+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				+	/*
			
 
				+	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
			
 
				+	 * more details
			
 
				+	 */
			
 
				+	struct sibling_subcore_state *sibling_subcore_state;
			
 
				+#endif
			
 
				 #endif
			
 
				 };
			
 
				 
			
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -12,6 +12,7 @@
 
				 
			
 
				 #include <linux/pci.h>
			
 
				 #include <linux/pci_hotplug.h>
			
 
				+#include <linux/irq.h>
			
 
				 #include <misc/cxl-base.h>
			
 
				 #include <asm/opal-api.h>
			
 
				 
			
@@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
 
				 void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
			
 
				 int pnv_cxl_get_irq_count(struct pci_dev *dev);
			
 
				 struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
			
 
				+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
			
 
				+bool is_pnv_opal_msi(struct irq_chip *chip);
			
 
				 
			
 
				 #ifdef CONFIG_CXL_BASE
			
 
				 int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
			
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32)		+= vdso32/
 
				 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
			
 
				 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
			
 
				 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
			
 
				-obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o hmi.o
			
 
				+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
			
 
				 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
			
 
				 obj-$(CONFIG_PPC64)		+= vdso64/
			
 
				 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
			
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,9 @@ config KVM
 
				 	select ANON_INODES
			
 
				 	select HAVE_KVM_EVENTFD
			
 
				 	select SRCU
			
 
				+	select KVM_VFIO
			
 
				+	select IRQ_BYPASS_MANAGER
			
 
				+	select HAVE_KVM_IRQ_BYPASS
			
 
				 
			
 
				 config KVM_BOOK3S_HANDLER
			
 
				 	bool
			
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
				 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
			
 
				 KVM := ../../../virt/kvm
			
 
				 
			
 
				-common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
			
 
				-		$(KVM)/eventfd.o
			
 
				+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
			
 
				 common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
			
 
				+common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
			
 
				 
			
 
				 CFLAGS_e500_mmu.o := -I.
			
 
				 CFLAGS_e500_mmu_host.o := -I.
			
 
				 CFLAGS_emulate.o  := -I.
			
 
				 CFLAGS_emulate_loadstore.o  := -I.
			
 
				 
			
 
				-common-objs-y += powerpc.o emulate.o emulate_loadstore.o
			
 
				+common-objs-y += powerpc.o emulate_loadstore.o
			
 
				 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
			
 
				 obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
			
 
				 
			
@@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
 
				 
			
 
				 kvm-e500-objs := \
			
 
				 	$(common-objs-y) \
			
 
				+	emulate.o \
			
 
				 	booke.o \
			
 
				 	booke_emulate.o \
			
 
				 	booke_interrupts.o \
			
@@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
 
				 
			
 
				 kvm-e500mc-objs := \
			
 
				 	$(common-objs-y) \
			
 
				+	emulate.o \
			
 
				 	booke.o \
			
 
				 	booke_emulate.o \
			
 
				 	bookehv_interrupts.o \
			
@@ -61,9 +63,6 @@ kvm-pr-y := \
 
				 	book3s_32_mmu.o
			
 
				 
			
 
				 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
			
 
				-kvm-book3s_64-module-objs := \
			
 
				-	$(KVM)/coalesced_mmio.o
			
 
				-
			
 
				 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
			
 
				 	book3s_rmhandlers.o
			
 
				 endif
			
@@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 
				 
			
 
				 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
			
 
				+	book3s_hv_hmi.o \
			
 
				 	book3s_hv_rmhandlers.o \
			
 
				 	book3s_hv_rm_mmu.o \
			
 
				 	book3s_hv_ras.o \
			
@@ -88,11 +88,8 @@ endif
 
				 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
			
 
				 	book3s_xics.o
			
 
				 
			
 
				-kvm-book3s_64-module-objs += \
			
 
				-	$(KVM)/kvm_main.o \
			
 
				-	$(KVM)/eventfd.o \
			
 
				-	powerpc.o \
			
 
				-	emulate_loadstore.o \
			
 
				+kvm-book3s_64-module-objs := \
			
 
				+	$(common-objs-y) \
			
 
				 	book3s.o \
			
 
				 	book3s_64_vio.o \
			
 
				 	book3s_rtas.o \
			
@@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
 
				 
			
 
				 kvm-book3s_32-objs := \
			
 
				 	$(common-objs-y) \
			
 
				+	emulate.o \
			
 
				 	fpu.o \
			
 
				 	book3s_paired_singles.o \
			
 
				 	book3s.o \
			
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
				 	{ "dec",         VCPU_STAT(dec_exits) },
			
 
				 	{ "ext_intr",    VCPU_STAT(ext_intr_exits) },
			
 
				 	{ "queue_intr",  VCPU_STAT(queue_intr) },
			
 
				+	{ "halt_poll_success_ns",	VCPU_STAT(halt_poll_success_ns) },
			
 
				+	{ "halt_poll_fail_ns",		VCPU_STAT(halt_poll_fail_ns) },
			
 
				+	{ "halt_wait_ns",		VCPU_STAT(halt_wait_ns) },
			
 
				 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
			
 
				 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
			
 
				+	{ "halt_successful_wait",	VCPU_STAT(halt_successful_wait) },
			
 
				 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
			
 
				 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
			
 
				 	{ "pf_storage",  VCPU_STAT(pf_storage) },
			
@@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
				 	{ "ld_slow",     VCPU_STAT(ld_slow) },
			
 
				 	{ "st",          VCPU_STAT(st) },
			
 
				 	{ "st_slow",     VCPU_STAT(st_slow) },
			
 
				+	{ "pthru_all",       VCPU_STAT(pthru_all) },
			
 
				+	{ "pthru_host",      VCPU_STAT(pthru_host) },
			
 
				+	{ "pthru_bad_aff",   VCPU_STAT(pthru_bad_aff) },
			
 
				 	{ NULL }
			
 
				 };
			
 
				 
			
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,11 +53,15 @@
 
				 #include <asm/smp.h>
			
 
				 #include <asm/dbell.h>
			
 
				 #include <asm/hmi.h>
			
 
				+#include <asm/pnv-pci.h>
			
 
				 #include <linux/gfp.h>
			
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/highmem.h>
			
 
				 #include <linux/hugetlb.h>
			
 
				+#include <linux/kvm_irqfd.h>
			
 
				+#include <linux/irqbypass.h>
			
 
				 #include <linux/module.h>
			
 
				+#include <linux/compiler.h>
			
 
				 
			
 
				 #include "book3s.h"
			
 
				 
			
@@ -70,6 +74,8 @@
 
				 
			
 
				 /* Used to indicate that a guest page fault needs to be handled */
			
 
				 #define RESUME_PAGE_FAULT	(RESUME_GUEST | RESUME_FLAG_ARCH1)
			
 
				+/* Used to indicate that a guest passthrough interrupt needs to be handled */
			
 
				+#define RESUME_PASSTHROUGH	(RESUME_GUEST | RESUME_FLAG_ARCH2)
			
 
				 
			
 
				 /* Used as a "null" value for timebase values */
			
 
				 #define TB_NIL	(~(u64)0)
			
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
 
				 	.get = param_get_int,
			
 
				 };
			
 
				 
			
 
				+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
			
 
				+							S_IRUGO | S_IWUSR);
			
 
				+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
			
 
				+
			
 
				 module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
			
 
				 							S_IRUGO | S_IWUSR);
			
 
				 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
			
 
				 #endif
			
 
				 
			
 
				+/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
			
 
				+static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
			
 
				+module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
			
 
				+MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
			
 
				+
			
 
				+/* Factor by which the vcore halt poll interval is grown, default is to double
			
 
				+ */
			
 
				+static unsigned int halt_poll_ns_grow = 2;
			
 
				+module_param(halt_poll_ns_grow, int, S_IRUGO);
			
 
				+MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
			
 
				+
			
 
				+/* Factor by which the vcore halt poll interval is shrunk, default is to reset
			
 
				+ */
			
 
				+static unsigned int halt_poll_ns_shrink;
			
 
				+module_param(halt_poll_ns_shrink, int, S_IRUGO);
			
 
				+MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
			
 
				+
			
 
				 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
			
 
				 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
			
 
				+		int *ip)
			
 
				+{
			
 
				+	int i = *ip;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	while (++i < MAX_SMT_THREADS) {
			
 
				+		vcpu = READ_ONCE(vc->runnable_threads[i]);
			
 
				+		if (vcpu) {
			
 
				+			*ip = i;
			
 
				+			return vcpu;
			
 
				+		}
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/* Used to traverse the list of runnable threads for a given vcore */
			
 
				+#define for_each_runnable_thread(i, vcpu, vc) \
			
 
				+	for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
			
 
				+
			
 
				 static bool kvmppc_ipi_thread(int cpu)
			
 
				 {
			
 
				 	/* On POWER8 for IPIs to threads in the same core, use msgsnd */
			
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
				 		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
			
 
				 		r = RESUME_GUEST;
			
 
				 		break;
			
 
				+	case BOOK3S_INTERRUPT_HV_RM_HARD:
			
 
				+		r = RESUME_PASSTHROUGH;
			
 
				+		break;
			
 
				 	default:
			
 
				 		kvmppc_dump_regs(vcpu);
			
 
				 		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
			
@@ -1493,7 +1543,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
				 	if (vcore == NULL)
			
 
				 		return NULL;
			
 
				 
			
 
				-	INIT_LIST_HEAD(&vcore->runnable_threads);
			
 
				 	spin_lock_init(&vcore->lock);
			
 
				 	spin_lock_init(&vcore->stoltb_lock);
			
 
				 	init_swait_queue_head(&vcore->wq);
			
@@ -1802,7 +1851,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 
				 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
			
 
				 	spin_unlock_irq(&vcpu->arch.tbacct_lock);
			
 
				 	--vc->n_runnable;
			
 
				-	list_del(&vcpu->arch.run_list);
			
 
				+	WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
			
 
				 }
			
 
				 
			
 
				 static int kvmppc_grab_hwthread(int cpu)
			
@@ -2209,10 +2258,10 @@ static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
 
				 
			
 
				 static void prepare_threads(struct kvmppc_vcore *vc)
			
 
				 {
			
 
				-	struct kvm_vcpu *vcpu, *vnext;
			
 
				+	int i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				-	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
			
 
				-				 arch.run_list) {
			
 
				+	for_each_runnable_thread(i, vcpu, vc) {
			
 
				 		if (signal_pending(vcpu->arch.run_task))
			
 
				 			vcpu->arch.ret = -EINTR;
			
 
				 		else if (vcpu->arch.vpa.update_pending ||
			
@@ -2259,15 +2308,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 
				 
			
 
				 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
			
 
				 {
			
 
				-	int still_running = 0;
			
 
				+	int still_running = 0, i;
			
 
				 	u64 now;
			
 
				 	long ret;
			
 
				-	struct kvm_vcpu *vcpu, *vnext;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 
			
 
				 	spin_lock(&vc->lock);
			
 
				 	now = get_tb();
			
 
				-	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
			
 
				-				 arch.run_list) {
			
 
				+	for_each_runnable_thread(i, vcpu, vc) {
			
 
				 		/* cancel pending dec exception if dec is positive */
			
 
				 		if (now < vcpu->arch.dec_expires &&
			
 
				 		    kvmppc_core_pending_dec(vcpu))
			
@@ -2307,8 +2355,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 
				 		}
			
 
				 		if (vc->n_runnable > 0 && vc->runner == NULL) {
			
 
				 			/* make sure there's a candidate runner awake */
			
 
				-			vcpu = list_first_entry(&vc->runnable_threads,
			
 
				-						struct kvm_vcpu, arch.run_list);
			
 
				+			i = -1;
			
 
				+			vcpu = next_runnable_thread(vc, &i);
			
 
				 			wake_up(&vcpu->arch.cpu_run);
			
 
				 		}
			
 
				 	}
			
@@ -2361,7 +2409,7 @@ static inline void kvmppc_set_host_core(int cpu)
 
				  */
			
 
				 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
			
 
				 {
			
 
				-	struct kvm_vcpu *vcpu, *vnext;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				 	int i;
			
 
				 	int srcu_idx;
			
 
				 	struct core_info core_info;
			
@@ -2397,8 +2445,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	 */
			
 
				 	if ((threads_per_core > 1) &&
			
 
				 	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
			
 
				-		list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
			
 
				-					 arch.run_list) {
			
 
				+		for_each_runnable_thread(i, vcpu, vc) {
			
 
				 			vcpu->arch.ret = -EBUSY;
			
 
				 			kvmppc_remove_runnable(vc, vcpu);
			
 
				 			wake_up(&vcpu->arch.cpu_run);
			
@@ -2477,8 +2524,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 		active |= 1 << thr;
			
 
				 		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
			
 
				 			pvc->pcpu = pcpu + thr;
			
 
				-			list_for_each_entry(vcpu, &pvc->runnable_threads,
			
 
				-					    arch.run_list) {
			
 
				+			for_each_runnable_thread(i, vcpu, pvc) {
			
 
				 				kvmppc_start_thread(vcpu, pvc);
			
 
				 				kvmppc_create_dtl_entry(vcpu, pvc);
			
 
				 				trace_kvm_guest_enter(vcpu);
			
@@ -2604,34 +2650,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
 
				 	finish_wait(&vcpu->arch.cpu_run, &wait);
			
 
				 }
			
 
				 
			
 
				+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
			
 
				+{
			
 
				+	/* 10us base */
			
 
				+	if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
			
 
				+		vc->halt_poll_ns = 10000;
			
 
				+	else
			
 
				+		vc->halt_poll_ns *= halt_poll_ns_grow;
			
 
				+
			
 
				+	if (vc->halt_poll_ns > halt_poll_max_ns)
			
 
				+		vc->halt_poll_ns = halt_poll_max_ns;
			
 
				+}
			
 
				+
			
 
				+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
			
 
				+{
			
 
				+	if (halt_poll_ns_shrink == 0)
			
 
				+		vc->halt_poll_ns = 0;
			
 
				+	else
			
 
				+		vc->halt_poll_ns /= halt_poll_ns_shrink;
			
 
				+}
			
 
				+
			
 
				+/* Check to see if any of the runnable vcpus on the vcore have pending
			
 
				+ * exceptions or are no longer ceded
			
 
				+ */
			
 
				+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
			
 
				+{
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_runnable_thread(i, vcpu, vc) {
			
 
				+		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
			
 
				+			return 1;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * All the vcpus in this vcore are idle, so wait for a decrementer
			
 
				  * or external interrupt to one of the vcpus.  vc->lock is held.
			
 
				  */
			
 
				 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
			
 
				 {
			
 
				-	struct kvm_vcpu *vcpu;
			
 
				+	ktime_t cur, start_poll, start_wait;
			
 
				 	int do_sleep = 1;
			
 
				+	u64 block_ns;
			
 
				 	DECLARE_SWAITQUEUE(wait);
			
 
				 
			
 
				-	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				+	/* Poll for pending exceptions and ceded state */
			
 
				+	cur = start_poll = ktime_get();
			
 
				+	if (vc->halt_poll_ns) {
			
 
				+		ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
			
 
				+		++vc->runner->stat.halt_attempted_poll;
			
 
				 
			
 
				-	/*
			
 
				-	 * Check one last time for pending exceptions and ceded state after
			
 
				-	 * we put ourselves on the wait queue
			
 
				-	 */
			
 
				-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
			
 
				-		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
			
 
				-			do_sleep = 0;
			
 
				-			break;
			
 
				+		vc->vcore_state = VCORE_POLLING;
			
 
				+		spin_unlock(&vc->lock);
			
 
				+
			
 
				+		do {
			
 
				+			if (kvmppc_vcore_check_block(vc)) {
			
 
				+				do_sleep = 0;
			
 
				+				break;
			
 
				+			}
			
 
				+			cur = ktime_get();
			
 
				+		} while (single_task_running() && ktime_before(cur, stop));
			
 
				+
			
 
				+		spin_lock(&vc->lock);
			
 
				+		vc->vcore_state = VCORE_INACTIVE;
			
 
				+
			
 
				+		if (!do_sleep) {
			
 
				+			++vc->runner->stat.halt_successful_poll;
			
 
				+			goto out;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (!do_sleep) {
			
 
				+	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				+
			
 
				+	if (kvmppc_vcore_check_block(vc)) {
			
 
				 		finish_swait(&vc->wq, &wait);
			
 
				-		return;
			
 
				+		do_sleep = 0;
			
 
				+		/* If we polled, count this as a successful poll */
			
 
				+		if (vc->halt_poll_ns)
			
 
				+			++vc->runner->stat.halt_successful_poll;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				+	start_wait = ktime_get();
			
 
				+
			
 
				 	vc->vcore_state = VCORE_SLEEPING;
			
 
				 	trace_kvmppc_vcore_blocked(vc, 0);
			
 
				 	spin_unlock(&vc->lock);
			
@@ -2640,13 +2744,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 
				 	spin_lock(&vc->lock);
			
 
				 	vc->vcore_state = VCORE_INACTIVE;
			
 
				 	trace_kvmppc_vcore_blocked(vc, 1);
			
 
				+	++vc->runner->stat.halt_successful_wait;
			
 
				+
			
 
				+	cur = ktime_get();
			
 
				+
			
 
				+out:
			
 
				+	block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
			
 
				+
			
 
				+	/* Attribute wait time */
			
 
				+	if (do_sleep) {
			
 
				+		vc->runner->stat.halt_wait_ns +=
			
 
				+			ktime_to_ns(cur) - ktime_to_ns(start_wait);
			
 
				+		/* Attribute failed poll time */
			
 
				+		if (vc->halt_poll_ns)
			
 
				+			vc->runner->stat.halt_poll_fail_ns +=
			
 
				+				ktime_to_ns(start_wait) -
			
 
				+				ktime_to_ns(start_poll);
			
 
				+	} else {
			
 
				+		/* Attribute successful poll time */
			
 
				+		if (vc->halt_poll_ns)
			
 
				+			vc->runner->stat.halt_poll_success_ns +=
			
 
				+				ktime_to_ns(cur) -
			
 
				+				ktime_to_ns(start_poll);
			
 
				+	}
			
 
				+
			
 
				+	/* Adjust poll time */
			
 
				+	if (halt_poll_max_ns) {
			
 
				+		if (block_ns <= vc->halt_poll_ns)
			
 
				+			;
			
 
				+		/* We slept and blocked for longer than the max halt time */
			
 
				+		else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
			
 
				+			shrink_halt_poll_ns(vc);
			
 
				+		/* We slept and our poll time is too small */
			
 
				+		else if (vc->halt_poll_ns < halt_poll_max_ns &&
			
 
				+				block_ns < halt_poll_max_ns)
			
 
				+			grow_halt_poll_ns(vc);
			
 
				+	} else
			
 
				+		vc->halt_poll_ns = 0;
			
 
				+
			
 
				+	trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
			
 
				 }
			
 
				 
			
 
				 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	int n_ceded;
			
 
				+	int n_ceded, i;
			
 
				 	struct kvmppc_vcore *vc;
			
 
				-	struct kvm_vcpu *v, *vn;
			
 
				+	struct kvm_vcpu *v;
			
 
				 
			
 
				 	trace_kvmppc_run_vcpu_enter(vcpu);
			
 
				 
			
@@ -2666,7 +2809,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 	vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
			
 
				 	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
			
 
				 	vcpu->arch.busy_preempt = TB_NIL;
			
 
				-	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
			
 
				+	WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
			
 
				 	++vc->n_runnable;
			
 
				 
			
 
				 	/*
			
@@ -2706,8 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 			kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
			
 
				 			continue;
			
 
				 		}
			
 
				-		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
			
 
				-					 arch.run_list) {
			
 
				+		for_each_runnable_thread(i, v, vc) {
			
 
				 			kvmppc_core_prepare_to_enter(v);
			
 
				 			if (signal_pending(v->arch.run_task)) {
			
 
				 				kvmppc_remove_runnable(vc, v);
			
@@ -2720,7 +2862,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 		if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
			
 
				 			break;
			
 
				 		n_ceded = 0;
			
 
				-		list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
			
 
				+		for_each_runnable_thread(i, v, vc) {
			
 
				 			if (!v->arch.pending_exceptions)
			
 
				 				n_ceded += v->arch.ceded;
			
 
				 			else
			
@@ -2759,8 +2901,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 
			
 
				 	if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
			
 
				 		/* Wake up some vcpu to run the core */
			
 
				-		v = list_first_entry(&vc->runnable_threads,
			
 
				-				     struct kvm_vcpu, arch.run_list);
			
 
				+		i = -1;
			
 
				+		v = next_runnable_thread(vc, &i);
			
 
				 		wake_up(&v->arch.cpu_run);
			
 
				 	}
			
 
				 
			
@@ -2818,7 +2960,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 			r = kvmppc_book3s_hv_page_fault(run, vcpu,
			
 
				 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
			
 
				 			srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
			
 
				-		}
			
 
				+		} else if (r == RESUME_PASSTHROUGH)
			
 
				+			r = kvmppc_xics_rm_complete(vcpu, 0);
			
 
				 	} while (is_kvmppc_resume_guest(r));
			
 
				 
			
 
				  out:
			
@@ -3247,6 +3390,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 
				 	kvmppc_free_vcores(kvm);
			
 
				 
			
 
				 	kvmppc_free_hpt(kvm);
			
 
				+
			
 
				+	kvmppc_free_pimap(kvm);
			
 
				 }
			
 
				 
			
 
				 /* We don't need to emulate any privileged instructions or dcbz */
			
@@ -3282,6 +3427,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_KVM_XICS
			
 
				+
			
 
				+void kvmppc_free_pimap(struct kvm *kvm)
			
 
				+{
			
 
				+	kfree(kvm->arch.pimap);
			
 
				+}
			
 
				+
			
 
				+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
			
 
				+{
			
 
				+	return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
			
 
				+}
			
 
				+
			
 
				+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
			
 
				+{
			
 
				+	struct irq_desc *desc;
			
 
				+	struct kvmppc_irq_map *irq_map;
			
 
				+	struct kvmppc_passthru_irqmap *pimap;
			
 
				+	struct irq_chip *chip;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!kvm_irq_bypass)
			
 
				+		return 1;
			
 
				+
			
 
				+	desc = irq_to_desc(host_irq);
			
 
				+	if (!desc)
			
 
				+		return -EIO;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	pimap = kvm->arch.pimap;
			
 
				+	if (pimap == NULL) {
			
 
				+		/* First call, allocate structure to hold IRQ map */
			
 
				+		pimap = kvmppc_alloc_pimap();
			
 
				+		if (pimap == NULL) {
			
 
				+			mutex_unlock(&kvm->lock);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+		kvm->arch.pimap = pimap;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * For now, we only support interrupts for which the EOI operation
			
 
				+	 * is an OPAL call followed by a write to XIRR, since that's
			
 
				+	 * what our real-mode EOI code does.
			
 
				+	 */
			
 
				+	chip = irq_data_get_irq_chip(&desc->irq_data);
			
 
				+	if (!chip || !is_pnv_opal_msi(chip)) {
			
 
				+		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
			
 
				+			host_irq, guest_gsi);
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * See if we already have an entry for this guest IRQ number.
			
 
				+	 * If it's mapped to a hardware IRQ number, that's an error,
			
 
				+	 * otherwise re-use this entry.
			
 
				+	 */
			
 
				+	for (i = 0; i < pimap->n_mapped; i++) {
			
 
				+		if (guest_gsi == pimap->mapped[i].v_hwirq) {
			
 
				+			if (pimap->mapped[i].r_hwirq) {
			
 
				+				mutex_unlock(&kvm->lock);
			
 
				+				return -EINVAL;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (i == KVMPPC_PIRQ_MAPPED) {
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		return -EAGAIN;		/* table is full */
			
 
				+	}
			
 
				+
			
 
				+	irq_map = &pimap->mapped[i];
			
 
				+
			
 
				+	irq_map->v_hwirq = guest_gsi;
			
 
				+	irq_map->desc = desc;
			
 
				+
			
 
				+	/*
			
 
				+	 * Order the above two stores before the next to serialize with
			
 
				+	 * the KVM real mode handler.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	irq_map->r_hwirq = desc->irq_data.hwirq;
			
 
				+
			
 
				+	if (i == pimap->n_mapped)
			
 
				+		pimap->n_mapped++;
			
 
				+
			
 
				+	kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
			
 
				+
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
			
 
				+{
			
 
				+	struct irq_desc *desc;
			
 
				+	struct kvmppc_passthru_irqmap *pimap;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!kvm_irq_bypass)
			
 
				+		return 0;
			
 
				+
			
 
				+	desc = irq_to_desc(host_irq);
			
 
				+	if (!desc)
			
 
				+		return -EIO;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	if (kvm->arch.pimap == NULL) {
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	pimap = kvm->arch.pimap;
			
 
				+
			
 
				+	for (i = 0; i < pimap->n_mapped; i++) {
			
 
				+		if (guest_gsi == pimap->mapped[i].v_hwirq)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (i == pimap->n_mapped) {
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				+		return -ENODEV;
			
 
				+	}
			
 
				+
			
 
				+	kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
			
 
				+
			
 
				+	/* invalidate the entry */
			
 
				+	pimap->mapped[i].r_hwirq = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't free this structure even when the count goes to
			
 
				+	 * zero. The structure is freed when we destroy the VM.
			
 
				+	 */
			
 
				+
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
			
 
				+					     struct irq_bypass_producer *prod)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	struct kvm_kernel_irqfd *irqfd =
			
 
				+		container_of(cons, struct kvm_kernel_irqfd, consumer);
			
 
				+
			
 
				+	irqfd->producer = prod;
			
 
				+
			
 
				+	ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
			
 
				+	if (ret)
			
 
				+		pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
			
 
				+			prod->irq, irqfd->gsi, ret);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
			
 
				+					      struct irq_bypass_producer *prod)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct kvm_kernel_irqfd *irqfd =
			
 
				+		container_of(cons, struct kvm_kernel_irqfd, consumer);
			
 
				+
			
 
				+	irqfd->producer = NULL;
			
 
				+
			
 
				+	/*
			
 
				+	 * When producer of consumer is unregistered, we change back to
			
 
				+	 * default external interrupt handling mode - KVM real mode
			
 
				+	 * will switch back to host.
			
 
				+	 */
			
 
				+	ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
			
 
				+	if (ret)
			
 
				+		pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
			
 
				+			prod->irq, irqfd->gsi, ret);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static long kvm_arch_vm_ioctl_hv(struct file *filp,
			
 
				 				 unsigned int ioctl, unsigned long arg)
			
 
				 {
			
@@ -3400,6 +3723,10 @@ static struct kvmppc_ops kvm_ops_hv = {
 
				 	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
			
 
				 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
			
 
				 	.hcall_implemented = kvmppc_hcall_impl_hv,
			
 
				+#ifdef CONFIG_KVM_XICS
			
 
				+	.irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
			
 
				+	.irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 static int kvm_init_subcore_bitmap(void)
			
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,7 @@
 
				 #include <asm/xics.h>
			
 
				 #include <asm/dbell.h>
			
 
				 #include <asm/cputhreads.h>
			
 
				+#include <asm/io.h>
			
 
				 
			
 
				 #define KVM_CMA_CHUNK_ORDER	18
			
 
				 
			
@@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap)
 
				 
			
 
				 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
			
 
				 EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
			
 
				+
			
 
				+#ifdef CONFIG_KVM_XICS
			
 
				+static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
			
 
				+					 u32 xisr)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	/*
			
 
				+	 * We access the mapped array here without a lock.  That
			
 
				+	 * is safe because we never reduce the number of entries
			
 
				+	 * in the array and we never change the v_hwirq field of
			
 
				+	 * an entry once it is set.
			
 
				+	 *
			
 
				+	 * We have also carefully ordered the stores in the writer
			
 
				+	 * and the loads here in the reader, so that if we find a matching
			
 
				+	 * hwirq here, the associated GSI and irq_desc fields are valid.
			
 
				+	 */
			
 
				+	for (i = 0; i < pimap->n_mapped; i++)  {
			
 
				+		if (xisr == pimap->mapped[i].r_hwirq) {
			
 
				+			/*
			
 
				+			 * Order subsequent reads in the caller to serialize
			
 
				+			 * with the writer.
			
 
				+			 */
			
 
				+			smp_rmb();
			
 
				+			return &pimap->mapped[i];
			
 
				+		}
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * If we have an interrupt that's not an IPI, check if we have a
			
 
				+ * passthrough adapter and if so, check if this external interrupt
			
 
				+ * is for the adapter.
			
 
				+ * We will attempt to deliver the IRQ directly to the target VCPU's
			
 
				+ * ICP, the virtual ICP (based on affinity - the xive value in ICS).
			
 
				+ *
			
 
				+ * If the delivery fails or if this is not for a passthrough adapter,
			
 
				+ * return to the host to handle this interrupt. We earlier
			
 
				+ * saved a copy of the XIRR in the PACA, it will be picked up by
			
 
				+ * the host ICP driver.
			
 
				+ */
			
 
				+static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
			
 
				+{
			
 
				+	struct kvmppc_passthru_irqmap *pimap;
			
 
				+	struct kvmppc_irq_map *irq_map;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	vcpu = local_paca->kvm_hstate.kvm_vcpu;
			
 
				+	if (!vcpu)
			
 
				+		return 1;
			
 
				+	pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
			
 
				+	if (!pimap)
			
 
				+		return 1;
			
 
				+	irq_map = get_irqmap(pimap, xisr);
			
 
				+	if (!irq_map)
			
 
				+		return 1;
			
 
				+
			
 
				+	/* We're handling this interrupt, generic code doesn't need to */
			
 
				+	local_paca->kvm_hstate.saved_xirr = 0;
			
 
				+
			
 
				+	return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Determine what sort of external interrupt is pending (if any).
			
 
				+ * Returns:
			
 
				+ *	0 if no interrupt is pending
			
 
				+ *	1 if an interrupt is pending that needs to be handled by the host
			
 
				+ *	2 Passthrough that needs completion in the host
			
 
				+ *	-1 if there was a guest wakeup IPI (which has now been cleared)
			
 
				+ *	-2 if there is PCI passthrough external interrupt that was handled
			
 
				+ */
			
 
				+
			
 
				+long kvmppc_read_intr(void)
			
 
				+{
			
 
				+	unsigned long xics_phys;
			
 
				+	u32 h_xirr;
			
 
				+	__be32 xirr;
			
 
				+	u32 xisr;
			
 
				+	u8 host_ipi;
			
 
				+
			
 
				+	/* see if a host IPI is pending */
			
 
				+	host_ipi = local_paca->kvm_hstate.host_ipi;
			
 
				+	if (host_ipi)
			
 
				+		return 1;
			
 
				+
			
 
				+	/* Now read the interrupt from the ICP */
			
 
				+	xics_phys = local_paca->kvm_hstate.xics_phys;
			
 
				+	if (unlikely(!xics_phys))
			
 
				+		return 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * Save XIRR for later. Since we get control in reverse endian
			
 
				+	 * on LE systems, save it byte reversed and fetch it back in
			
 
				+	 * host endian. Note that xirr is the value read from the
			
 
				+	 * XIRR register, while h_xirr is the host endian version.
			
 
				+	 */
			
 
				+	xirr = _lwzcix(xics_phys + XICS_XIRR);
			
 
				+	h_xirr = be32_to_cpu(xirr);
			
 
				+	local_paca->kvm_hstate.saved_xirr = h_xirr;
			
 
				+	xisr = h_xirr & 0xffffff;
			
 
				+	/*
			
 
				+	 * Ensure that the store/load complete to guarantee all side
			
 
				+	 * effects of loading from XIRR has completed
			
 
				+	 */
			
 
				+	smp_mb();
			
 
				+
			
 
				+	/* if nothing pending in the ICP */
			
 
				+	if (!xisr)
			
 
				+		return 0;
			
 
				+
			
 
				+	/* We found something in the ICP...
			
 
				+	 *
			
 
				+	 * If it is an IPI, clear the MFRR and EOI it.
			
 
				+	 */
			
 
				+	if (xisr == XICS_IPI) {
			
 
				+		_stbcix(xics_phys + XICS_MFRR, 0xff);
			
 
				+		_stwcix(xics_phys + XICS_XIRR, xirr);
			
 
				+		/*
			
 
				+		 * Need to ensure side effects of above stores
			
 
				+		 * complete before proceeding.
			
 
				+		 */
			
 
				+		smp_mb();
			
 
				+
			
 
				+		/*
			
 
				+		 * We need to re-check host IPI now in case it got set in the
			
 
				+		 * meantime. If it's clear, we bounce the interrupt to the
			
 
				+		 * guest
			
 
				+		 */
			
 
				+		host_ipi = local_paca->kvm_hstate.host_ipi;
			
 
				+		if (unlikely(host_ipi != 0)) {
			
 
				+			/* We raced with the host,
			
 
				+			 * we need to resend that IPI, bummer
			
 
				+			 */
			
 
				+			_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
			
 
				+			/* Let side effects complete */
			
 
				+			smp_mb();
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		/* OK, it's an IPI for us */
			
 
				+		local_paca->kvm_hstate.saved_xirr = 0;
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	return kvmppc_check_passthru(xisr, xirr);
			
 
				+}
			
--- a/arch/powerpc/kvm/book3s_hv_hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -10,6 +10,7 @@
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/kvm_host.h>
			
 
				 #include <linux/err.h>
			
 
				+#include <linux/kernel_stat.h>
			
 
				 
			
 
				 #include <asm/kvm_book3s.h>
			
 
				 #include <asm/kvm_ppc.h>
			
@@ -18,7 +19,10 @@
 
				 #include <asm/debug.h>
			
 
				 #include <asm/synch.h>
			
 
				 #include <asm/cputhreads.h>
			
 
				+#include <asm/pgtable.h>
			
 
				 #include <asm/ppc-opcode.h>
			
 
				+#include <asm/pnv-pci.h>
			
 
				+#include <asm/opal.h>
			
 
				 
			
 
				 #include "book3s_xics.h"
			
 
				 
			
@@ -26,9 +30,12 @@
 
				 
			
 
				 int h_ipi_redirect = 1;
			
 
				 EXPORT_SYMBOL(h_ipi_redirect);
			
 
				+int kvm_irq_bypass = 1;
			
 
				+EXPORT_SYMBOL(kvm_irq_bypass);
			
 
				 
			
 
				 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
			
 
				 			    u32 new_irq);
			
 
				+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
			
 
				 
			
 
				 /* -- ICS routines -- */
			
 
				 static void ics_rm_check_resend(struct kvmppc_xics *xics,
			
@@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 
				 		icp->rm_action |= XICS_RM_NOTIFY_EOI;
			
 
				 		icp->rm_eoied_irq = irq;
			
 
				 	}
			
 
				+
			
 
				+	if (state->host_irq) {
			
 
				+		++vcpu->stat.pthru_all;
			
 
				+		if (state->intr_cpu != -1) {
			
 
				+			int pcpu = raw_smp_processor_id();
			
 
				+
			
 
				+			pcpu = cpu_first_thread_sibling(pcpu);
			
 
				+			++vcpu->stat.pthru_host;
			
 
				+			if (state->intr_cpu != pcpu) {
			
 
				+				++vcpu->stat.pthru_bad_aff;
			
 
				+				xics_opal_rm_set_server(state->host_irq, pcpu);
			
 
				+			}
			
 
				+			state->intr_cpu = -1;
			
 
				+		}
			
 
				+	}
			
 
				  bail:
			
 
				 	return check_too_hard(xics, icp);
			
 
				 }
			
 
				 
			
 
				+unsigned long eoi_rc;
			
 
				+
			
 
				+static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
			
 
				+{
			
 
				+	unsigned long xics_phys;
			
 
				+	int64_t rc;
			
 
				+
			
 
				+	rc = pnv_opal_pci_msi_eoi(c, hwirq);
			
 
				+
			
 
				+	if (rc)
			
 
				+		eoi_rc = rc;
			
 
				+
			
 
				+	iosync();
			
 
				+
			
 
				+	/* EOI it */
			
 
				+	xics_phys = local_paca->kvm_hstate.xics_phys;
			
 
				+	_stwcix(xics_phys + XICS_XIRR, xirr);
			
 
				+}
			
 
				+
			
 
				+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
			
 
				+{
			
 
				+	unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
			
 
				+
			
 
				+	return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Increment a per-CPU 32-bit unsigned integer variable.
			
 
				+ * Safe to call in real-mode. Handles vmalloc'ed addresses
			
 
				+ *
			
 
				+ * ToDo: Make this work for any integral type
			
 
				+ */
			
 
				+
			
 
				+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
			
 
				+{
			
 
				+	unsigned long l;
			
 
				+	unsigned int *raddr;
			
 
				+	int cpu = smp_processor_id();
			
 
				+
			
 
				+	raddr = per_cpu_ptr(addr, cpu);
			
 
				+	l = (unsigned long)raddr;
			
 
				+
			
 
				+	if (REGION_ID(l) == VMALLOC_REGION_ID) {
			
 
				+		l = vmalloc_to_phys(raddr);
			
 
				+		raddr = (unsigned int *)l;
			
 
				+	}
			
 
				+	++*raddr;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We don't try to update the flags in the irq_desc 'istate' field in
			
 
				+ * here as would happen in the normal IRQ handling path for several reasons:
			
 
				+ *  - state flags represent internal IRQ state and are not expected to be
			
 
				+ *    updated outside the IRQ subsystem
			
 
				+ *  - more importantly, these are useful for edge triggered interrupts,
			
 
				+ *    IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
			
 
				+ *    and these states shouldn't apply to us.
			
 
				+ *
			
 
				+ * However, we do update irq_stats - we somewhat duplicate the code in
			
 
				+ * kstat_incr_irqs_this_cpu() for this since this function is defined
			
 
				+ * in irq/internal.h which we don't want to include here.
			
 
				+ * The only difference is that desc->kstat_irqs is an allocated per CPU
			
 
				+ * variable and could have been vmalloc'ed, so we can't directly
			
 
				+ * call __this_cpu_inc() on it. The kstat structure is a static
			
 
				+ * per CPU variable and it should be accessible by real-mode KVM.
			
 
				+ *
			
 
				+ */
			
 
				+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
			
 
				+{
			
 
				+	this_cpu_inc_rm(desc->kstat_irqs);
			
 
				+	__this_cpu_inc(kstat.irqs_sum);
			
 
				+}
			
 
				+
			
 
				+long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
			
 
				+				 u32 xirr,
			
 
				+				 struct kvmppc_irq_map *irq_map,
			
 
				+				 struct kvmppc_passthru_irqmap *pimap)
			
 
				+{
			
 
				+	struct kvmppc_xics *xics;
			
 
				+	struct kvmppc_icp *icp;
			
 
				+	u32 irq;
			
 
				+
			
 
				+	irq = irq_map->v_hwirq;
			
 
				+	xics = vcpu->kvm->arch.xics;
			
 
				+	icp = vcpu->arch.icp;
			
 
				+
			
 
				+	kvmppc_rm_handle_irq_desc(irq_map->desc);
			
 
				+	icp_rm_deliver_irq(xics, icp, irq);
			
 
				+
			
 
				+	/* EOI the interrupt */
			
 
				+	icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
			
 
				+
			
 
				+	if (check_too_hard(xics, icp) == H_TOO_HARD)
			
 
				+		return 2;
			
 
				+	else
			
 
				+		return -2;
			
 
				+}
			
 
				+
			
 
				 /*  --- Non-real mode XICS-related built-in routines ---  */
			
 
				 
			
 
				 /**
			
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -221,6 +221,13 @@ kvmppc_primary_no_guest:
 
				 	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
			
 
				 	b	kvm_do_nap
			
 
				 
			
 
				+/*
			
 
				+ * kvm_novcpu_wakeup
			
 
				+ *	Entered from kvm_start_guest if kvm_hstate.napping is set
			
 
				+ *	to NAPPING_NOVCPU
			
 
				+ *		r2 = kernel TOC
			
 
				+ *		r13 = paca
			
 
				+ */
			
 
				 kvm_novcpu_wakeup:
			
 
				 	ld	r1, HSTATE_HOST_R1(r13)
			
 
				 	ld	r5, HSTATE_KVM_VCORE(r13)
			
@@ -230,6 +237,13 @@ kvm_novcpu_wakeup:
 
				 	/* check the wake reason */
			
 
				 	bl	kvmppc_check_wake_reason
			
 
				 
			
 
				+	/*
			
 
				+	 * Restore volatile registers since we could have called
			
 
				+	 * a C routine in kvmppc_check_wake_reason.
			
 
				+	 *	r5 = VCORE
			
 
				+	 */
			
 
				+	ld	r5, HSTATE_KVM_VCORE(r13)
			
 
				+
			
 
				 	/* see if any other thread is already exiting */
			
 
				 	lwz	r0, VCORE_ENTRY_EXIT(r5)
			
 
				 	cmpwi	r0, 0x100
			
@@ -322,6 +336,11 @@ kvm_start_guest:
 
				 
			
 
				 	/* Check the wake reason in SRR1 to see why we got here */
			
 
				 	bl	kvmppc_check_wake_reason
			
 
				+	/*
			
 
				+	 * kvmppc_check_wake_reason could invoke a C routine, but we
			
 
				+	 * have no volatile registers to restore when we return.
			
 
				+	 */
			
 
				+
			
 
				 	cmpdi	r3, 0
			
 
				 	bge	kvm_no_guest
			
 
				 
			
@@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
				 	cmpwi	r3, 512		/* 1 microsecond */
			
 
				 	blt	hdec_soon
			
 
				 
			
 
				+deliver_guest_interrupt:
			
 
				 	ld	r6, VCPU_CTR(r4)
			
 
				 	ld	r7, VCPU_XER(r4)
			
 
				 
			
@@ -895,7 +915,6 @@ kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 
				 	mtspr	SPRN_SRR0, r6
			
 
				 	mtspr	SPRN_SRR1, r7
			
 
				 
			
 
				-deliver_guest_interrupt:
			
 
				 	/* r11 = vcpu->arch.msr & ~MSR_HV */
			
 
				 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
			
 
				 	rotldi	r11, r11, 1 + MSR_HV_LG
			
@@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 	 * set, we know the host wants us out so let's do it now
			
 
				 	 */
			
 
				 	bl	kvmppc_read_intr
			
 
				+
			
 
				+	/*
			
 
				+	 * Restore the active volatile registers after returning from
			
 
				+	 * a C function.
			
 
				+	 */
			
 
				+	ld	r9, HSTATE_KVM_VCPU(r13)
			
 
				+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				+
			
 
				+	/*
			
 
				+	 * kvmppc_read_intr return codes:
			
 
				+	 *
			
 
				+	 * Exit to host (r3 > 0)
			
 
				+	 *   1 An interrupt is pending that needs to be handled by the host
			
 
				+	 *     Exit guest and return to host by branching to guest_exit_cont
			
 
				+	 *
			
 
				+	 *   2 Passthrough that needs completion in the host
			
 
				+	 *     Exit guest and return to host by branching to guest_exit_cont
			
 
				+	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
			
 
				+	 *     to indicate to the host to complete handling the interrupt
			
 
				+	 *
			
 
				+	 * Before returning to guest, we check if any CPU is heading out
			
 
				+	 * to the host and if so, we head out also. If no CPUs are heading
			
 
				+	 * check return values <= 0.
			
 
				+	 *
			
 
				+	 * Return to guest (r3 <= 0)
			
 
				+	 *  0 No external interrupt is pending
			
 
				+	 * -1 A guest wakeup IPI (which has now been cleared)
			
 
				+	 *    In either case, we return to guest to deliver any pending
			
 
				+	 *    guest interrupts.
			
 
				+	 *
			
 
				+	 * -2 A PCI passthrough external interrupt was handled
			
 
				+	 *    (interrupt was delivered directly to guest)
			
 
				+	 *    Return to guest to deliver any pending guest interrupts.
			
 
				+	 */
			
 
				+
			
 
				+	cmpdi	r3, 1
			
 
				+	ble	1f
			
 
				+
			
 
				+	/* Return code = 2 */
			
 
				+	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
			
 
				+	stw	r12, VCPU_TRAP(r9)
			
 
				+	b	guest_exit_cont
			
 
				+
			
 
				+1:	/* Return code <= 1 */
			
 
				 	cmpdi	r3, 0
			
 
				 	bgt	guest_exit_cont
			
 
				 
			
 
				-	/* Check if any CPU is heading out to the host, if so head out too */
			
 
				+	/* Return code <= 0 */
			
 
				 4:	ld	r5, HSTATE_KVM_VCORE(r13)
			
 
				 	lwz	r0, VCORE_ENTRY_EXIT(r5)
			
 
				 	cmpwi	r0, 0x100
			
@@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 
				 	ld	r29, VCPU_GPR(R29)(r4)
			
 
				 	ld	r30, VCPU_GPR(R30)(r4)
			
 
				 	ld	r31, VCPU_GPR(R31)(r4)
			
 
				- 
			
 
				+
			
 
				 	/* Check the wake reason in SRR1 to see why we got here */
			
 
				 	bl	kvmppc_check_wake_reason
			
 
				 
			
 
				+	/*
			
 
				+	 * Restore volatile registers since we could have called a
			
 
				+	 * C routine in kvmppc_check_wake_reason
			
 
				+	 *	r4 = VCPU
			
 
				+	 * r3 tells us whether we need to return to host or not
			
 
				+	 * WARNING: it gets checked further down:
			
 
				+	 * should not modify r3 until this check is done.
			
 
				+	 */
			
 
				+	ld	r4, HSTATE_KVM_VCPU(r13)
			
 
				+
			
 
				 	/* clear our bit in vcore->napping_threads */
			
 
				 34:	ld	r5,HSTATE_KVM_VCORE(r13)
			
 
				 	lbz	r7,HSTATE_PTID(r13)
			
@@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 
				 	li	r0,0
			
 
				 	stb	r0,HSTATE_NAPPING(r13)
			
 
				 
			
 
				-	/* See if the wake reason means we need to exit */
			
 
				+	/* See if the wake reason saved in r3 means we need to exit */
			
 
				 	stw	r12, VCPU_TRAP(r4)
			
 
				 	mr	r9, r4
			
 
				 	cmpdi	r3, 0
			
@@ -2297,10 +2370,14 @@ machine_check_realmode:
 
				  *	0 if nothing needs to be done
			
 
				  *	1 if something happened that needs to be handled by the host
			
 
				  *	-1 if there was a guest wakeup (IPI or msgsnd)
			
 
				+ *	-2 if we handled a PCI passthrough interrupt (returned by
			
 
				+ *		kvmppc_read_intr only)
			
 
				  *
			
 
				  * Also sets r12 to the interrupt vector for any interrupt that needs
			
 
				  * to be handled now by the host (0x500 for external interrupt), or zero.
			
 
				- * Modifies r0, r6, r7, r8.
			
 
				+ * Modifies all volatile registers (since it may call a C function).
			
 
				+ * This routine calls kvmppc_read_intr, a C function, if an external
			
 
				+ * interrupt is pending.
			
 
				  */
			
 
				 kvmppc_check_wake_reason:
			
 
				 	mfspr	r6, SPRN_SRR1
			
@@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE
 
				 	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
			
 
				 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
			
 
				 	cmpwi	r6, 8			/* was it an external interrupt? */
			
 
				-	li	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				-	beq	kvmppc_read_intr	/* if so, see what it was */
			
 
				+	beq	7f			/* if so, see what it was */
			
 
				 	li	r3, 0
			
 
				 	li	r12, 0
			
 
				 	cmpwi	r6, 6			/* was it the decrementer? */
			
@@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	li	r3, 1
			
 
				 	blr
			
 
				 
			
 
				-/*
			
 
				- * Determine what sort of external interrupt is pending (if any).
			
 
				- * Returns:
			
 
				- *	0 if no interrupt is pending
			
 
				- *	1 if an interrupt is pending that needs to be handled by the host
			
 
				- *	-1 if there was a guest wakeup IPI (which has now been cleared)
			
 
				- * Modifies r0, r6, r7, r8, returns value in r3.
			
 
				- */
			
 
				-kvmppc_read_intr:
			
 
				-	/* see if a host IPI is pending */
			
 
				-	li	r3, 1
			
 
				-	lbz	r0, HSTATE_HOST_IPI(r13)
			
 
				-	cmpwi	r0, 0
			
 
				-	bne	1f
			
 
				+	/* external interrupt - create a stack frame so we can call C */
			
 
				+7:	mflr	r0
			
 
				+	std	r0, PPC_LR_STKOFF(r1)
			
 
				+	stdu	r1, -PPC_MIN_STKFRM(r1)
			
 
				+	bl	kvmppc_read_intr
			
 
				+	nop
			
 
				+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				+	cmpdi	r3, 1
			
 
				+	ble	1f
			
 
				 
			
 
				-	/* Now read the interrupt from the ICP */
			
 
				-	ld	r6, HSTATE_XICS_PHYS(r13)
			
 
				-	li	r7, XICS_XIRR
			
 
				-	cmpdi	r6, 0
			
 
				-	beq-	1f
			
 
				-	lwzcix	r0, r6, r7
			
 
				 	/*
			
 
				-	 * Save XIRR for later. Since we get in in reverse endian on LE
			
 
				-	 * systems, save it byte reversed and fetch it back in host endian.
			
 
				-	 */
			
 
				-	li	r3, HSTATE_SAVED_XIRR
			
 
				-	STWX_BE	r0, r3, r13
			
 
				-#ifdef __LITTLE_ENDIAN__
			
 
				-	lwz	r3, HSTATE_SAVED_XIRR(r13)
			
 
				-#else
			
 
				-	mr	r3, r0
			
 
				-#endif
			
 
				-	rlwinm.	r3, r3, 0, 0xffffff
			
 
				-	sync
			
 
				-	beq	1f			/* if nothing pending in the ICP */
			
 
				-
			
 
				-	/* We found something in the ICP...
			
 
				-	 *
			
 
				-	 * If it's not an IPI, stash it in the PACA and return to
			
 
				-	 * the host, we don't (yet) handle directing real external
			
 
				-	 * interrupts directly to the guest
			
 
				+	 * Return code of 2 means PCI passthrough interrupt, but
			
 
				+	 * we need to return back to host to complete handling the
			
 
				+	 * interrupt. Trap reason is expected in r12 by guest
			
 
				+	 * exit code.
			
 
				 	 */
			
 
				-	cmpwi	r3, XICS_IPI		/* if there is, is it an IPI? */
			
 
				-	bne	42f
			
 
				-
			
 
				-	/* It's an IPI, clear the MFRR and EOI it */
			
 
				-	li	r3, 0xff
			
 
				-	li	r8, XICS_MFRR
			
 
				-	stbcix	r3, r6, r8		/* clear the IPI */
			
 
				-	stwcix	r0, r6, r7		/* EOI it */
			
 
				-	sync
			
 
				-
			
 
				-	/* We need to re-check host IPI now in case it got set in the
			
 
				-	 * meantime. If it's clear, we bounce the interrupt to the
			
 
				-	 * guest
			
 
				-	 */
			
 
				-	lbz	r0, HSTATE_HOST_IPI(r13)
			
 
				-	cmpwi	r0, 0
			
 
				-	bne-	43f
			
 
				-
			
 
				-	/* OK, it's an IPI for us */
			
 
				-	li	r12, 0
			
 
				-	li	r3, -1
			
 
				-1:	blr
			
 
				-
			
 
				-42:	/* It's not an IPI and it's for the host. We saved a copy of XIRR in
			
 
				-	 * the PACA earlier, it will be picked up by the host ICP driver
			
 
				-	 */
			
 
				-	li	r3, 1
			
 
				-	b	1b
			
 
				-
			
 
				-43:	/* We raced with the host, we need to resend that IPI, bummer */
			
 
				-	li	r0, IPI_PRIORITY
			
 
				-	stbcix	r0, r6, r8		/* set the IPI */
			
 
				-	sync
			
 
				-	li	r3, 1
			
 
				-	b	1b
			
 
				+	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
			
 
				+1:
			
 
				+	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
			
 
				+	addi	r1, r1, PPC_MIN_STKFRM
			
 
				+	mtlr	r0
			
 
				+	blr
			
 
				 
			
 
				 /*
			
 
				  * Save away FP, VMX and VSX registers.
			
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				+	/* Record which CPU this arrived on for passed-through interrupts */
			
 
				+	if (state->host_irq)
			
 
				+		state->intr_cpu = raw_smp_processor_id();
			
 
				+
			
 
				 	/* Attempt delivery */
			
 
				 	icp_deliver_irq(xics, NULL, irq);
			
 
				 
			
@@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 
				 	return H_SUCCESS;
			
 
				 }
			
 
				 
			
 
				-static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
			
 
				+int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
			
 
				 {
			
 
				 	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
			
 
				 	struct kvmppc_icp *icp = vcpu->arch.icp;
			
@@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 
				 
			
 
				 	return H_SUCCESS;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
			
 
				 
			
 
				 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
			
 
				 {
			
@@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
 
				 
			
 
				 /* -- Initialisation code etc. -- */
			
 
				 
			
 
				+static void xics_debugfs_irqmap(struct seq_file *m,
			
 
				+				struct kvmppc_passthru_irqmap *pimap)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	if (!pimap)
			
 
				+		return;
			
 
				+	seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
			
 
				+				pimap->n_mapped);
			
 
				+	for (i = 0; i < pimap->n_mapped; i++)  {
			
 
				+		seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
			
 
				+			pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int xics_debug_show(struct seq_file *m, void *private)
			
 
				 {
			
 
				 	struct kvmppc_xics *xics = m->private;
			
@@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
 
				 	t_check_resend = 0;
			
 
				 	t_reject = 0;
			
 
				 
			
 
				+	xics_debugfs_irqmap(m, kvm->arch.pimap);
			
 
				+
			
 
				 	seq_printf(m, "=========\nICP state\n=========\n");
			
 
				 
			
 
				 	kvm_for_each_vcpu(i, vcpu, kvm) {
			
@@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 
				 {
			
 
				 	struct kvmppc_xics *xics = kvm->arch.xics;
			
 
				 
			
 
				+	if (!xics)
			
 
				+		return -ENODEV;
			
 
				 	return ics_deliver_irq(xics, irq, level);
			
 
				 }
			
 
				 
			
@@ -1418,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
				 {
			
 
				 	return pin;
			
 
				 }
			
 
				+
			
 
				+void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
			
 
				+			    unsigned long host_irq)
			
 
				+{
			
 
				+	struct kvmppc_xics *xics = kvm->arch.xics;
			
 
				+	struct kvmppc_ics *ics;
			
 
				+	u16 idx;
			
 
				+
			
 
				+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
			
 
				+	if (!ics)
			
 
				+		return;
			
 
				+
			
 
				+	ics->irq_state[idx].host_irq = host_irq;
			
 
				+	ics->irq_state[idx].intr_cpu = -1;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
			
 
				+
			
 
				+void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
			
 
				+			    unsigned long host_irq)
			
 
				+{
			
 
				+	struct kvmppc_xics *xics = kvm->arch.xics;
			
 
				+	struct kvmppc_ics *ics;
			
 
				+	u16 idx;
			
 
				+
			
 
				+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
			
 
				+	if (!ics)
			
 
				+		return;
			
 
				+
			
 
				+	ics->irq_state[idx].host_irq = 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
			
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -42,6 +42,8 @@ struct ics_irq_state {
 
				 	u8  lsi;		/* level-sensitive interrupt */
			
 
				 	u8  asserted; /* Only for LSI */
			
 
				 	u8  exists;
			
 
				+	int intr_cpu;
			
 
				+	u32 host_irq;
			
 
				 };
			
 
				 
			
 
				 /* Atomic ICP state, updated with a single compare & swap */
			
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
				 	char *virt;
			
 
				 	struct page **pages;
			
 
				 	struct tlbe_priv *privs[2] = {};
			
 
				-	u64 *g2h_bitmap = NULL;
			
 
				+	u64 *g2h_bitmap;
			
 
				 	size_t array_len;
			
 
				 	u32 sets;
			
 
				 	int num_pages, ret, i;
			
@@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
				 
			
 
				 	num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
			
 
				 		    cfg->array / PAGE_SIZE;
			
 
				-	pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
			
 
				+	pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
			
 
				 	if (!pages)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
			
 
				 	if (ret < 0)
			
 
				-		goto err_pages;
			
 
				+		goto free_pages;
			
 
				 
			
 
				 	if (ret != num_pages) {
			
 
				 		num_pages = ret;
			
 
				 		ret = -EFAULT;
			
 
				-		goto err_put_page;
			
 
				+		goto put_pages;
			
 
				 	}
			
 
				 
			
 
				 	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
			
 
				 	if (!virt) {
			
 
				 		ret = -ENOMEM;
			
 
				-		goto err_put_page;
			
 
				+		goto put_pages;
			
 
				 	}
			
 
				 
			
 
				-	privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
			
 
				-			   GFP_KERNEL);
			
 
				-	privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
			
 
				-			   GFP_KERNEL);
			
 
				+	privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
			
 
				+	if (!privs[0]) {
			
 
				+		ret = -ENOMEM;
			
 
				+		goto put_pages;
			
 
				+	}
			
 
				 
			
 
				-	if (!privs[0] || !privs[1]) {
			
 
				+	privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
			
 
				+	if (!privs[1]) {
			
 
				 		ret = -ENOMEM;
			
 
				-		goto err_privs;
			
 
				+		goto free_privs_first;
			
 
				 	}
			
 
				 
			
 
				-	g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
			
 
				-	                     GFP_KERNEL);
			
 
				+	g2h_bitmap = kcalloc(params.tlb_sizes[1],
			
 
				+			     sizeof(*g2h_bitmap),
			
 
				+			     GFP_KERNEL);
			
 
				 	if (!g2h_bitmap) {
			
 
				 		ret = -ENOMEM;
			
 
				-		goto err_privs;
			
 
				+		goto free_privs_second;
			
 
				 	}
			
 
				 
			
 
				 	free_gtlb(vcpu_e500);
			
@@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 
				 
			
 
				 	kvmppc_recalc_tlb1map_range(vcpu_e500);
			
 
				 	return 0;
			
 
				-
			
 
				-err_privs:
			
 
				-	kfree(privs[0]);
			
 
				+ free_privs_second:
			
 
				 	kfree(privs[1]);
			
 
				-
			
 
				-err_put_page:
			
 
				+ free_privs_first:
			
 
				+	kfree(privs[0]);
			
 
				+ put_pages:
			
 
				 	for (i = 0; i < num_pages; i++)
			
 
				 		put_page(pages[i]);
			
 
				-
			
 
				-err_pages:
			
 
				+ free_pages:
			
 
				 	kfree(pages);
			
 
				 	return ret;
			
 
				 }
			
@@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
 
				 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
			
 
				-	int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
			
 
				-	int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
			
 
				 
			
 
				 	if (e500_mmu_host_init(vcpu_e500))
			
 
				-		goto err;
			
 
				+		goto free_vcpu;
			
 
				 
			
 
				 	vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
			
 
				 	vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
			
@@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 
				 	vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
			
 
				 	vcpu_e500->gtlb_params[1].sets = 1;
			
 
				 
			
 
				-	vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
			
 
				+	vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
			
 
				+					     KVM_E500_TLB1_SIZE,
			
 
				+					     sizeof(*vcpu_e500->gtlb_arch),
			
 
				+					     GFP_KERNEL);
			
 
				 	if (!vcpu_e500->gtlb_arch)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	vcpu_e500->gtlb_offset[0] = 0;
			
 
				 	vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
			
 
				 
			
 
				-	vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
			
 
				-					  vcpu_e500->gtlb_params[0].entries,
			
 
				+	vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
			
 
				+					  sizeof(struct tlbe_ref),
			
 
				 					  GFP_KERNEL);
			
 
				 	if (!vcpu_e500->gtlb_priv[0])
			
 
				-		goto err;
			
 
				+		goto free_vcpu;
			
 
				 
			
 
				-	vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
			
 
				-					  vcpu_e500->gtlb_params[1].entries,
			
 
				+	vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
			
 
				+					  sizeof(struct tlbe_ref),
			
 
				 					  GFP_KERNEL);
			
 
				 	if (!vcpu_e500->gtlb_priv[1])
			
 
				-		goto err;
			
 
				+		goto free_vcpu;
			
 
				 
			
 
				-	vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
			
 
				-					  vcpu_e500->gtlb_params[1].entries,
			
 
				+	vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
			
 
				+					  sizeof(*vcpu_e500->g2h_tlb1_map),
			
 
				 					  GFP_KERNEL);
			
 
				 	if (!vcpu_e500->g2h_tlb1_map)
			
 
				-		goto err;
			
 
				+		goto free_vcpu;
			
 
				 
			
 
				 	vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
			
 
				 
			
 
				 	kvmppc_recalc_tlb1map_range(vcpu_e500);
			
 
				 	return 0;
			
 
				-
			
 
				-err:
			
 
				+ free_vcpu:
			
 
				 	free_gtlb(vcpu_e500);
			
 
				 	return -1;
			
 
				 }
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -27,6 +27,8 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/file.h>
			
 
				 #include <linux/module.h>
			
 
				+#include <linux/irqbypass.h>
			
 
				+#include <linux/kvm_irqfd.h>
			
 
				 #include <asm/cputable.h>
			
 
				 #include <asm/uaccess.h>
			
 
				 #include <asm/kvm_ppc.h>
			
@@ -739,6 +741,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * irq_bypass_add_producer and irq_bypass_del_producer are only
			
 
				+ * useful if the architecture supports PCI passthrough.
			
 
				+ * irq_bypass_stop and irq_bypass_start are not needed and so
			
 
				+ * kvm_ops are not defined for them.
			
 
				+ */
			
 
				+bool kvm_arch_has_irq_bypass(void)
			
 
				+{
			
 
				+	return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
			
 
				+		(kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
			
 
				+}
			
 
				+
			
 
				+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
			
 
				+				     struct irq_bypass_producer *prod)
			
 
				+{
			
 
				+	struct kvm_kernel_irqfd *irqfd =
			
 
				+		container_of(cons, struct kvm_kernel_irqfd, consumer);
			
 
				+	struct kvm *kvm = irqfd->kvm;
			
 
				+
			
 
				+	if (kvm->arch.kvm_ops->irq_bypass_add_producer)
			
 
				+		return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
			
 
				+				      struct irq_bypass_producer *prod)
			
 
				+{
			
 
				+	struct kvm_kernel_irqfd *irqfd =
			
 
				+		container_of(cons, struct kvm_kernel_irqfd, consumer);
			
 
				+	struct kvm *kvm = irqfd->kvm;
			
 
				+
			
 
				+	if (kvm->arch.kvm_ops->irq_bypass_del_producer)
			
 
				+		kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
			
 
				+}
			
 
				+
			
 
				 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
			
 
				                                       struct kvm_run *run)
			
 
				 {
			
@@ -1167,6 +1205,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				+bool kvm_arch_intc_initialized(struct kvm *kvm)
			
 
				+{
			
 
				+#ifdef CONFIG_KVM_MPIC
			
 
				+	if (kvm->arch.mpic)
			
 
				+		return true;
			
 
				+#endif
			
 
				+#ifdef CONFIG_KVM_XICS
			
 
				+	if (kvm->arch.xics)
			
 
				+		return true;
			
 
				+#endif
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
			
 
				                                     struct kvm_mp_state *mp_state)
			
 
				 {
			
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
 
				 		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(kvmppc_vcore_wakeup,
			
 
				+	TP_PROTO(int do_sleep, __u64 ns),
			
 
				+
			
 
				+	TP_ARGS(do_sleep, ns),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(__u64,  ns)
			
 
				+		__field(int,    waited)
			
 
				+		__field(pid_t,  tgid)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->ns     = ns;
			
 
				+		__entry->waited = do_sleep;
			
 
				+		__entry->tgid   = current->tgid;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("%s time %lld ns, tgid=%d",
			
 
				+		__entry->waited ? "wait" : "poll",
			
 
				+		__entry->ns, __entry->tgid)
			
 
				+);
			
 
				+
			
 
				 TRACE_EVENT(kvmppc_run_vcpu_enter,
			
 
				 	TP_PROTO(struct kvm_vcpu *vcpu),
			
 
				 
			
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static inline int __hpte_actual_psize(unsigned int lp, int psize)
			
 
				-{
			
 
				-	int i, shift;
			
 
				-	unsigned int mask;
			
 
				-
			
 
				-	/* start from 1 ignoring MMU_PAGE_4K */
			
 
				-	for (i = 1; i < MMU_PAGE_COUNT; i++) {
			
 
				-
			
 
				-		/* invalid penc */
			
 
				-		if (mmu_psize_defs[psize].penc[i] == -1)
			
 
				-			continue;
			
 
				-		/*
			
 
				-		 * encoding bits per actual page size
			
 
				-		 *        PTE LP     actual page size
			
 
				-		 *    rrrr rrrz		>=8KB
			
 
				-		 *    rrrr rrzz		>=16KB
			
 
				-		 *    rrrr rzzz		>=32KB
			
 
				-		 *    rrrr zzzz		>=64KB
			
 
				-		 * .......
			
 
				-		 */
			
 
				-		shift = mmu_psize_defs[i].shift - LP_SHIFT;
			
 
				-		if (shift > LP_BITS)
			
 
				-			shift = LP_BITS;
			
 
				-		mask = (1 << shift) - 1;
			
 
				-		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			
 
				-			return i;
			
 
				-	}
			
 
				-	return -1;
			
 
				-}
			
 
				-
			
 
				 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
			
 
				 			int *psize, int *apsize, int *ssize, unsigned long *vpn)
			
 
				 {
			
@@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 
				 		size   = MMU_PAGE_4K;
			
 
				 		a_size = MMU_PAGE_4K;
			
 
				 	} else {
			
 
				-		for (size = 0; size < MMU_PAGE_COUNT; size++) {
			
 
				-
			
 
				-			/* valid entries have a shift value */
			
 
				-			if (!mmu_psize_defs[size].shift)
			
 
				-				continue;
			
 
				-
			
 
				-			a_size = __hpte_actual_psize(lp, size);
			
 
				-			if (a_size != -1)
			
 
				-				break;
			
 
				-		}
			
 
				+		size = hpte_page_sizes[lp] & 0xf;
			
 
				+		a_size = hpte_page_sizes[lp] >> 4;
			
 
				 	}
			
 
				 	/* This works for all page sizes, and for 256M and 1T segments */
			
 
				 	if (cpu_has_feature(CPU_FTR_ARCH_300))
			
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -93,6 +93,9 @@ static unsigned long _SDR1;
 
				 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
			
 
				 EXPORT_SYMBOL_GPL(mmu_psize_defs);
			
 
				 
			
 
				+u8 hpte_page_sizes[1 << LP_BITS];
			
 
				+EXPORT_SYMBOL_GPL(hpte_page_sizes);
			
 
				+
			
 
				 struct hash_pte *htab_address;
			
 
				 unsigned long htab_size_bytes;
			
 
				 unsigned long htab_hash_mask;
			
@@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
 
				 #endif /* CONFIG_HUGETLB_PAGE */
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Fill in the hpte_page_sizes[] array.
			
 
				+ * We go through the mmu_psize_defs[] array looking for all the
			
 
				+ * supported base/actual page size combinations.  Each combination
			
 
				+ * has a unique pagesize encoding (penc) value in the low bits of
			
 
				+ * the LP field of the HPTE.  For actual page sizes less than 1MB,
			
 
				+ * some of the upper LP bits are used for RPN bits, meaning that
			
 
				+ * we need to fill in several entries in hpte_page_sizes[].
			
 
				+ *
			
 
				+ * In diagrammatic form, with r = RPN bits and z = page size bits:
			
 
				+ *        PTE LP     actual page size
			
 
				+ *    rrrr rrrz		>=8KB
			
 
				+ *    rrrr rrzz		>=16KB
			
 
				+ *    rrrr rzzz		>=32KB
			
 
				+ *    rrrr zzzz		>=64KB
			
 
				+ *    ...
			
 
				+ *
			
 
				+ * The zzzz bits are implementation-specific but are chosen so that
			
 
				+ * no encoding for a larger page size uses the same value in its
			
 
				+ * low-order N bits as the encoding for the 2^(12+N) byte page size
			
 
				+ * (if it exists).
			
 
				+ */
			
 
				+static void init_hpte_page_sizes(void)
			
 
				+{
			
 
				+	long int ap, bp;
			
 
				+	long int shift, penc;
			
 
				+
			
 
				+	for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
			
 
				+		if (!mmu_psize_defs[bp].shift)
			
 
				+			continue;	/* not a supported page size */
			
 
				+		for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
			
 
				+			penc = mmu_psize_defs[bp].penc[ap];
			
 
				+			if (penc == -1)
			
 
				+				continue;
			
 
				+			shift = mmu_psize_defs[ap].shift - LP_SHIFT;
			
 
				+			if (shift <= 0)
			
 
				+				continue;	/* should never happen */
			
 
				+			/*
			
 
				+			 * For page sizes less than 1MB, this loop
			
 
				+			 * replicates the entry for all possible values
			
 
				+			 * of the rrrr bits.
			
 
				+			 */
			
 
				+			while (penc < (1 << LP_BITS)) {
			
 
				+				hpte_page_sizes[penc] = (ap << 4) | bp;
			
 
				+				penc += 1 << shift;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void __init htab_init_page_sizes(void)
			
 
				 {
			
 
				+	init_hpte_page_sizes();
			
 
				+
			
 
				 	if (!debug_pagealloc_enabled()) {
			
 
				 		/*
			
 
				 		 * Pick a size for the linear mapping. Currently, we only
			
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte,		OPAL_PCI_CONFIG_WRITE_BYTE);
 
				 OPAL_CALL(opal_pci_config_write_half_word,	OPAL_PCI_CONFIG_WRITE_HALF_WORD);
			
 
				 OPAL_CALL(opal_pci_config_write_word,		OPAL_PCI_CONFIG_WRITE_WORD);
			
 
				 OPAL_CALL(opal_set_xive,			OPAL_SET_XIVE);
			
 
				+OPAL_CALL_REAL(opal_rm_set_xive,		OPAL_SET_XIVE);
			
 
				 OPAL_CALL(opal_get_xive,			OPAL_GET_XIVE);
			
 
				 OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
			
 
				 OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
			
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2710,15 +2710,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_PCI_MSI
			
 
				-static void pnv_ioda2_msi_eoi(struct irq_data *d)
			
 
				+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
			
 
				 {
			
 
				-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
			
 
				-	struct irq_chip *chip = irq_data_get_irq_chip(d);
			
 
				 	struct pnv_phb *phb = container_of(chip, struct pnv_phb,
			
 
				 					   ioda.irq_chip);
			
 
				+
			
 
				+	return opal_pci_msi_eoi(phb->opal_id, hw_irq);
			
 
				+}
			
 
				+
			
 
				+static void pnv_ioda2_msi_eoi(struct irq_data *d)
			
 
				+{
			
 
				 	int64_t rc;
			
 
				+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
			
 
				+	struct irq_chip *chip = irq_data_get_irq_chip(d);
			
 
				 
			
 
				-	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
			
 
				+	rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
			
 
				 	WARN_ON_ONCE(rc);
			
 
				 
			
 
				 	icp_native_eoi(d);
			
@@ -2748,6 +2754,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
 
				 	irq_set_chip(virq, &phb->ioda.irq_chip);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Returns true iff chip is something that we could call
			
 
				+ * pnv_opal_pci_msi_eoi for.
			
 
				+ */
			
 
				+bool is_pnv_opal_msi(struct irq_chip *chip)
			
 
				+{
			
 
				+	return chip->irq_eoi == pnv_ioda2_msi_eoi;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
			
 
				+
			
 
				 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
			
 
				 				  unsigned int hwirq, unsigned int virq,
			
 
				 				  unsigned int is_64, struct msi_msg *msg)
			
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -245,72 +245,72 @@ struct sie_page {
 
				 } __packed;
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 exit_userspace;
			
 
				-	u32 exit_null;
			
 
				-	u32 exit_external_request;
			
 
				-	u32 exit_external_interrupt;
			
 
				-	u32 exit_stop_request;
			
 
				-	u32 exit_validity;
			
 
				-	u32 exit_instruction;
			
 
				-	u32 exit_pei;
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 instruction_lctl;
			
 
				-	u32 instruction_lctlg;
			
 
				-	u32 instruction_stctl;
			
 
				-	u32 instruction_stctg;
			
 
				-	u32 exit_program_interruption;
			
 
				-	u32 exit_instr_and_program;
			
 
				-	u32 exit_operation_exception;
			
 
				-	u32 deliver_external_call;
			
 
				-	u32 deliver_emergency_signal;
			
 
				-	u32 deliver_service_signal;
			
 
				-	u32 deliver_virtio_interrupt;
			
 
				-	u32 deliver_stop_signal;
			
 
				-	u32 deliver_prefix_signal;
			
 
				-	u32 deliver_restart_signal;
			
 
				-	u32 deliver_program_int;
			
 
				-	u32 deliver_io_int;
			
 
				-	u32 exit_wait_state;
			
 
				-	u32 instruction_pfmf;
			
 
				-	u32 instruction_stidp;
			
 
				-	u32 instruction_spx;
			
 
				-	u32 instruction_stpx;
			
 
				-	u32 instruction_stap;
			
 
				-	u32 instruction_storage_key;
			
 
				-	u32 instruction_ipte_interlock;
			
 
				-	u32 instruction_stsch;
			
 
				-	u32 instruction_chsc;
			
 
				-	u32 instruction_stsi;
			
 
				-	u32 instruction_stfl;
			
 
				-	u32 instruction_tprot;
			
 
				-	u32 instruction_sie;
			
 
				-	u32 instruction_essa;
			
 
				-	u32 instruction_sthyi;
			
 
				-	u32 instruction_sigp_sense;
			
 
				-	u32 instruction_sigp_sense_running;
			
 
				-	u32 instruction_sigp_external_call;
			
 
				-	u32 instruction_sigp_emergency;
			
 
				-	u32 instruction_sigp_cond_emergency;
			
 
				-	u32 instruction_sigp_start;
			
 
				-	u32 instruction_sigp_stop;
			
 
				-	u32 instruction_sigp_stop_store_status;
			
 
				-	u32 instruction_sigp_store_status;
			
 
				-	u32 instruction_sigp_store_adtl_status;
			
 
				-	u32 instruction_sigp_arch;
			
 
				-	u32 instruction_sigp_prefix;
			
 
				-	u32 instruction_sigp_restart;
			
 
				-	u32 instruction_sigp_init_cpu_reset;
			
 
				-	u32 instruction_sigp_cpu_reset;
			
 
				-	u32 instruction_sigp_unknown;
			
 
				-	u32 diagnose_10;
			
 
				-	u32 diagnose_44;
			
 
				-	u32 diagnose_9c;
			
 
				-	u32 diagnose_258;
			
 
				-	u32 diagnose_308;
			
 
				-	u32 diagnose_500;
			
 
				+	u64 exit_userspace;
			
 
				+	u64 exit_null;
			
 
				+	u64 exit_external_request;
			
 
				+	u64 exit_external_interrupt;
			
 
				+	u64 exit_stop_request;
			
 
				+	u64 exit_validity;
			
 
				+	u64 exit_instruction;
			
 
				+	u64 exit_pei;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				+	u64 instruction_lctl;
			
 
				+	u64 instruction_lctlg;
			
 
				+	u64 instruction_stctl;
			
 
				+	u64 instruction_stctg;
			
 
				+	u64 exit_program_interruption;
			
 
				+	u64 exit_instr_and_program;
			
 
				+	u64 exit_operation_exception;
			
 
				+	u64 deliver_external_call;
			
 
				+	u64 deliver_emergency_signal;
			
 
				+	u64 deliver_service_signal;
			
 
				+	u64 deliver_virtio_interrupt;
			
 
				+	u64 deliver_stop_signal;
			
 
				+	u64 deliver_prefix_signal;
			
 
				+	u64 deliver_restart_signal;
			
 
				+	u64 deliver_program_int;
			
 
				+	u64 deliver_io_int;
			
 
				+	u64 exit_wait_state;
			
 
				+	u64 instruction_pfmf;
			
 
				+	u64 instruction_stidp;
			
 
				+	u64 instruction_spx;
			
 
				+	u64 instruction_stpx;
			
 
				+	u64 instruction_stap;
			
 
				+	u64 instruction_storage_key;
			
 
				+	u64 instruction_ipte_interlock;
			
 
				+	u64 instruction_stsch;
			
 
				+	u64 instruction_chsc;
			
 
				+	u64 instruction_stsi;
			
 
				+	u64 instruction_stfl;
			
 
				+	u64 instruction_tprot;
			
 
				+	u64 instruction_sie;
			
 
				+	u64 instruction_essa;
			
 
				+	u64 instruction_sthyi;
			
 
				+	u64 instruction_sigp_sense;
			
 
				+	u64 instruction_sigp_sense_running;
			
 
				+	u64 instruction_sigp_external_call;
			
 
				+	u64 instruction_sigp_emergency;
			
 
				+	u64 instruction_sigp_cond_emergency;
			
 
				+	u64 instruction_sigp_start;
			
 
				+	u64 instruction_sigp_stop;
			
 
				+	u64 instruction_sigp_stop_store_status;
			
 
				+	u64 instruction_sigp_store_status;
			
 
				+	u64 instruction_sigp_store_adtl_status;
			
 
				+	u64 instruction_sigp_arch;
			
 
				+	u64 instruction_sigp_prefix;
			
 
				+	u64 instruction_sigp_restart;
			
 
				+	u64 instruction_sigp_init_cpu_reset;
			
 
				+	u64 instruction_sigp_cpu_reset;
			
 
				+	u64 instruction_sigp_unknown;
			
 
				+	u64 diagnose_10;
			
 
				+	u64 diagnose_44;
			
 
				+	u64 diagnose_9c;
			
 
				+	u64 diagnose_258;
			
 
				+	u64 diagnose_308;
			
 
				+	u64 diagnose_500;
			
 
				 };
			
 
				 
			
 
				 #define PGM_OPERATION			0x01
			
@@ -577,7 +577,7 @@ struct kvm_vcpu_arch {
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 remote_tlb_flush;
			
 
				+	ulong remote_tlb_flush;
			
 
				 };
			
 
				 
			
 
				 struct kvm_arch_memory_slot {
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -792,45 +792,45 @@ struct kvm_arch {
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
 
				-	u32 mmu_shadow_zapped;
			
 
				-	u32 mmu_pte_write;
			
 
				-	u32 mmu_pte_updated;
			
 
				-	u32 mmu_pde_zapped;
			
 
				-	u32 mmu_flooded;
			
 
				-	u32 mmu_recycled;
			
 
				-	u32 mmu_cache_miss;
			
 
				-	u32 mmu_unsync;
			
 
				-	u32 remote_tlb_flush;
			
 
				-	u32 lpages;
			
 
				+	ulong mmu_shadow_zapped;
			
 
				+	ulong mmu_pte_write;
			
 
				+	ulong mmu_pte_updated;
			
 
				+	ulong mmu_pde_zapped;
			
 
				+	ulong mmu_flooded;
			
 
				+	ulong mmu_recycled;
			
 
				+	ulong mmu_cache_miss;
			
 
				+	ulong mmu_unsync;
			
 
				+	ulong remote_tlb_flush;
			
 
				+	ulong lpages;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vcpu_stat {
			
 
				-	u32 pf_fixed;
			
 
				-	u32 pf_guest;
			
 
				-	u32 tlb_flush;
			
 
				-	u32 invlpg;
			
 
				-
			
 
				-	u32 exits;
			
 
				-	u32 io_exits;
			
 
				-	u32 mmio_exits;
			
 
				-	u32 signal_exits;
			
 
				-	u32 irq_window_exits;
			
 
				-	u32 nmi_window_exits;
			
 
				-	u32 halt_exits;
			
 
				-	u32 halt_successful_poll;
			
 
				-	u32 halt_attempted_poll;
			
 
				-	u32 halt_poll_invalid;
			
 
				-	u32 halt_wakeup;
			
 
				-	u32 request_irq_exits;
			
 
				-	u32 irq_exits;
			
 
				-	u32 host_state_reload;
			
 
				-	u32 efer_reload;
			
 
				-	u32 fpu_reload;
			
 
				-	u32 insn_emulation;
			
 
				-	u32 insn_emulation_fail;
			
 
				-	u32 hypercalls;
			
 
				-	u32 irq_injections;
			
 
				-	u32 nmi_injections;
			
 
				+	u64 pf_fixed;
			
 
				+	u64 pf_guest;
			
 
				+	u64 tlb_flush;
			
 
				+	u64 invlpg;
			
 
				+
			
 
				+	u64 exits;
			
 
				+	u64 io_exits;
			
 
				+	u64 mmio_exits;
			
 
				+	u64 signal_exits;
			
 
				+	u64 irq_window_exits;
			
 
				+	u64 nmi_window_exits;
			
 
				+	u64 halt_exits;
			
 
				+	u64 halt_successful_poll;
			
 
				+	u64 halt_attempted_poll;
			
 
				+	u64 halt_poll_invalid;
			
 
				+	u64 halt_wakeup;
			
 
				+	u64 request_irq_exits;
			
 
				+	u64 irq_exits;
			
 
				+	u64 host_state_reload;
			
 
				+	u64 efer_reload;
			
 
				+	u64 fpu_reload;
			
 
				+	u64 insn_emulation;
			
 
				+	u64 insn_emulation_fail;
			
 
				+	u64 hypercalls;
			
 
				+	u64 irq_injections;
			
 
				+	u64 nmi_injections;
			
 
				 };
			
 
				 
			
 
				 struct x86_instruction_info;
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3619,7 +3619,7 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
 
				 {
			
 
				 	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
			
 
				 
			
 
				-	*val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
			
 
				+	*val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -3649,7 +3649,7 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
 
				 	*val = 0;
			
 
				 
			
 
				 	kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
			
 
				-		*val += *(u32 *)((void *)vcpu + stat_data->offset);
			
 
				+		*val += *(u64 *)((void *)vcpu + stat_data->offset);
			
 
				 
			
 
				 	return 0;
			
 
				 }