8 years ago · 8a53e7e572
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4131,6 +4131,34 @@ Parameters: none
 
				 Allow use of adapter-interruption suppression.
			
 
				 Returns: 0 on success; -EBUSY if a VCPU has already been created.
			
 
				 
			
 
				+7.11 KVM_CAP_PPC_SMT
			
 
				+
			
 
				+Architectures: ppc
			
 
				+Parameters: vsmt_mode, flags
			
 
				+
			
 
				+Enabling this capability on a VM provides userspace with a way to set
			
 
				+the desired virtual SMT mode (i.e. the number of virtual CPUs per
			
 
				+virtual core).  The virtual SMT mode, vsmt_mode, must be a power of 2
			
 
				+between 1 and 8.  On POWER8, vsmt_mode must also be no greater than
			
 
				+the number of threads per subcore for the host.  Currently flags must
			
 
				+be 0.  A successful call to enable this capability will result in
			
 
				+vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
			
 
				+subsequently queried for the VM.  This capability is only supported by
			
 
				+HV KVM, and can only be set before any VCPUs have been created.
			
 
				+The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT
			
 
				+modes are available.
			
 
				+
			
 
				+7.12 KVM_CAP_PPC_FWNMI
			
 
				+
			
 
				+Architectures: ppc
			
 
				+Parameters: none
			
 
				+
			
 
				+With this capability a machine check exception in the guest address
			
 
				+space will cause KVM to exit the guest with NMI exit reason. This
			
 
				+enables QEMU to build error log and branch to guest kernel registered
			
 
				+machine check handling routine. Without this capability KVM will
			
 
				+branch to guests' 0x200 interrupt vector.
			
 
				+
			
 
				 8. Other capabilities.
			
 
				 ----------------------
			
 
				 
			
@@ -4292,3 +4320,12 @@ Currently the following bits are defined for the device_irq_level bitmap:
 
				 Future versions of kvm may implement additional events. These will get
			
 
				 indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
			
 
				 listed above.
			
 
				+
			
 
				+8.10 KVM_CAP_PPC_SMT_POSSIBLE
			
 
				+
			
 
				+Architectures: ppc
			
 
				+
			
 
				+Querying this capability returns a bitmap indicating the possible
			
 
				+virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
			
 
				+(counting from the right) is set, then a virtual SMT mode of 2^N is
			
 
				+available.
			
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -86,7 +86,6 @@ struct kvmppc_vcore {
 
				 	u16 last_cpu;
			
 
				 	u8 vcore_state;
			
 
				 	u8 in_guest;
			
 
				-	struct kvmppc_vcore *master_vcore;
			
 
				 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
			
 
				 	struct list_head preempt_list;
			
 
				 	spinlock_t lock;
			
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -81,7 +81,7 @@ struct kvm_split_mode {
 
				 	u8		subcore_size;
			
 
				 	u8		do_nap;
			
 
				 	u8		napped[MAX_SMT_THREADS];
			
 
				-	struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
			
 
				+	struct kvmppc_vcore *vc[MAX_SUBCORES];
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
 
				 #include <asm/page.h>
			
 
				 #include <asm/cacheflush.h>
			
 
				 #include <asm/hvcall.h>
			
 
				+#include <asm/mce.h>
			
 
				 
			
 
				 #define KVM_MAX_VCPUS		NR_CPUS
			
 
				 #define KVM_MAX_VCORES		NR_CPUS
			
@@ -267,6 +268,8 @@ struct kvm_resize_hpt;
 
				 
			
 
				 struct kvm_arch {
			
 
				 	unsigned int lpid;
			
 
				+	unsigned int smt_mode;		/* # vcpus per virtual core */
			
 
				+	unsigned int emul_smt_mode;	/* emualted SMT mode, on P9 */
			
 
				 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				 	unsigned int tlb_sets;
			
 
				 	struct kvm_hpt_info hpt;
			
@@ -285,6 +288,7 @@ struct kvm_arch {
 
				 	cpumask_t need_tlb_flush;
			
 
				 	cpumask_t cpu_in_guest;
			
 
				 	u8 radix;
			
 
				+	u8 fwnmi_enabled;
			
 
				 	pgd_t *pgtable;
			
 
				 	u64 process_table;
			
 
				 	struct dentry *debugfs_dir;
			
@@ -566,6 +570,7 @@ struct kvm_vcpu_arch {
 
				 	ulong wort;
			
 
				 	ulong tid;
			
 
				 	ulong psscr;
			
 
				+	ulong hfscr;
			
 
				 	ulong shadow_srr1;
			
 
				 #endif
			
 
				 	u32 vrsave; /* also USPRG0 */
			
@@ -579,7 +584,7 @@ struct kvm_vcpu_arch {
 
				 	ulong mcsrr0;
			
 
				 	ulong mcsrr1;
			
 
				 	ulong mcsr;
			
 
				-	u32 dec;
			
 
				+	ulong dec;
			
 
				 #ifdef CONFIG_BOOKE
			
 
				 	u32 decar;
			
 
				 #endif
			
@@ -710,6 +715,7 @@ struct kvm_vcpu_arch {
 
				 	unsigned long pending_exceptions;
			
 
				 	u8 ceded;
			
 
				 	u8 prodded;
			
 
				+	u8 doorbell_request;
			
 
				 	u32 last_inst;
			
 
				 
			
 
				 	struct swait_queue_head *wqp;
			
@@ -722,6 +728,7 @@ struct kvm_vcpu_arch {
 
				 	int prev_cpu;
			
 
				 	bool timer_running;
			
 
				 	wait_queue_head_t cpu_run;
			
 
				+	struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
			
 
				 
			
 
				 	struct kvm_vcpu_arch_shared *shared;
			
 
				 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
			
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -315,6 +315,8 @@ struct kvmppc_ops {
 
				 					struct irq_bypass_producer *);
			
 
				 	int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
			
 
				 	int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
			
 
				+	int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
			
 
				+			    unsigned long flags);
			
 
				 };
			
 
				 
			
 
				 extern struct kvmppc_ops *kvmppc_hv_ops;
			
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -103,6 +103,8 @@
 
				 #define OP_31_XOP_STBUX     247
			
 
				 #define OP_31_XOP_LHZX      279
			
 
				 #define OP_31_XOP_LHZUX     311
			
 
				+#define OP_31_XOP_MSGSNDP   142
			
 
				+#define OP_31_XOP_MSGCLRP   174
			
 
				 #define OP_31_XOP_MFSPR     339
			
 
				 #define OP_31_XOP_LWAX      341
			
 
				 #define OP_31_XOP_LHAX      343
			
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -60,6 +60,12 @@ struct kvm_regs {
 
				 
			
 
				 #define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
			
 
				 
			
 
				+/* flags for kvm_run.flags */
			
 
				+#define KVM_RUN_PPC_NMI_DISP_MASK		(3 << 0)
			
 
				+#define   KVM_RUN_PPC_NMI_DISP_FULLY_RECOV	(1 << 0)
			
 
				+#define   KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV	(2 << 0)
			
 
				+#define   KVM_RUN_PPC_NMI_DISP_NOT_RECOV	(3 << 0)
			
 
				+
			
 
				 /*
			
 
				  * Feature bits indicate which sections of the sregs struct are valid,
			
 
				  * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
			
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -485,6 +485,7 @@ int main(void)
 
				 	OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
			
 
				 	OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
			
 
				 	OFFSET(KVM_RADIX, kvm, arch.radix);
			
 
				+	OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
			
 
				 	OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
			
 
				 	OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
			
 
				 	OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
			
@@ -513,6 +514,7 @@ int main(void)
 
				 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
			
 
				 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
			
 
				 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
			
 
				+	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
			
 
				 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
			
 
				 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
			
 
				 	OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
			
@@ -542,6 +544,7 @@ int main(void)
 
				 	OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
			
 
				 	OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
			
 
				 	OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
			
 
				+	OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
			
 
				 	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
			
 
				 	OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
			
 
				 	OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
			
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
 
				 		break;
			
 
				 	}
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
			
 
				 
			
 
				 uint64_t get_mce_fault_addr(struct machine_check_event *evt)
			
 
				 {
			
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -46,6 +46,8 @@
 
				 #include <linux/of.h>
			
 
				 
			
 
				 #include <asm/reg.h>
			
 
				+#include <asm/ppc-opcode.h>
			
 
				+#include <asm/disassemble.h>
			
 
				 #include <asm/cputable.h>
			
 
				 #include <asm/cacheflush.h>
			
 
				 #include <asm/tlbflush.h>
			
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
				 	unsigned long stolen;
			
 
				 	unsigned long core_stolen;
			
 
				 	u64 now;
			
 
				+	unsigned long flags;
			
 
				 
			
 
				 	dt = vcpu->arch.dtl_ptr;
			
 
				 	vpa = vcpu->arch.vpa.pinned_addr;
			
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
				 	core_stolen = vcore_stolen_time(vc, now);
			
 
				 	stolen = core_stolen - vcpu->arch.stolen_logged;
			
 
				 	vcpu->arch.stolen_logged = core_stolen;
			
 
				-	spin_lock_irq(&vcpu->arch.tbacct_lock);
			
 
				+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
			
 
				 	stolen += vcpu->arch.busy_stolen;
			
 
				 	vcpu->arch.busy_stolen = 0;
			
 
				-	spin_unlock_irq(&vcpu->arch.tbacct_lock);
			
 
				+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
			
 
				 	if (!dt || !vpa)
			
 
				 		return;
			
 
				 	memset(dt, 0, sizeof(struct dtl_entry));
			
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
				 	vcpu->arch.dtl.dirty = true;
			
 
				 }
			
 
				 
			
 
				+/* See if there is a doorbell interrupt pending for a vcpu */
			
 
				+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int thr;
			
 
				+	struct kvmppc_vcore *vc;
			
 
				+
			
 
				+	if (vcpu->arch.doorbell_request)
			
 
				+		return true;
			
 
				+	/*
			
 
				+	 * Ensure that the read of vcore->dpdes comes after the read
			
 
				+	 * of vcpu->doorbell_request.  This barrier matches the
			
 
				+	 * lwsync in book3s_hv_rmhandlers.S just before the
			
 
				+	 * fast_guest_return label.
			
 
				+	 */
			
 
				+	smp_rmb();
			
 
				+	vc = vcpu->arch.vcore;
			
 
				+	thr = vcpu->vcpu_id - vc->first_vcpuid;
			
 
				+	return !!(vc->dpdes & (1 << thr));
			
 
				+}
			
 
				+
			
 
				 static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
			
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void do_nothing(void *x)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	int thr, cpu, pcpu, nthreads;
			
 
				+	struct kvm_vcpu *v;
			
 
				+	unsigned long dpdes;
			
 
				+
			
 
				+	nthreads = vcpu->kvm->arch.emul_smt_mode;
			
 
				+	dpdes = 0;
			
 
				+	cpu = vcpu->vcpu_id & ~(nthreads - 1);
			
 
				+	for (thr = 0; thr < nthreads; ++thr, ++cpu) {
			
 
				+		v = kvmppc_find_vcpu(vcpu->kvm, cpu);
			
 
				+		if (!v)
			
 
				+			continue;
			
 
				+		/*
			
 
				+		 * If the vcpu is currently running on a physical cpu thread,
			
 
				+		 * interrupt it in order to pull it out of the guest briefly,
			
 
				+		 * which will update its vcore->dpdes value.
			
 
				+		 */
			
 
				+		pcpu = READ_ONCE(v->cpu);
			
 
				+		if (pcpu >= 0)
			
 
				+			smp_call_function_single(pcpu, do_nothing, NULL, 1);
			
 
				+		if (kvmppc_doorbell_pending(v))
			
 
				+			dpdes |= 1 << thr;
			
 
				+	}
			
 
				+	return dpdes;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * On POWER9, emulate doorbell-related instructions in order to
			
 
				+ * give the guest the illusion of running on a multi-threaded core.
			
 
				+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
			
 
				+ * and mfspr DPDES.
			
 
				+ */
			
 
				+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	u32 inst, rb, thr;
			
 
				+	unsigned long arg;
			
 
				+	struct kvm *kvm = vcpu->kvm;
			
 
				+	struct kvm_vcpu *tvcpu;
			
 
				+
			
 
				+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				+		return EMULATE_FAIL;
			
 
				+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
			
 
				+		return RESUME_GUEST;
			
 
				+	if (get_op(inst) != 31)
			
 
				+		return EMULATE_FAIL;
			
 
				+	rb = get_rb(inst);
			
 
				+	thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
			
 
				+	switch (get_xop(inst)) {
			
 
				+	case OP_31_XOP_MSGSNDP:
			
 
				+		arg = kvmppc_get_gpr(vcpu, rb);
			
 
				+		if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
			
 
				+			break;
			
 
				+		arg &= 0x3f;
			
 
				+		if (arg >= kvm->arch.emul_smt_mode)
			
 
				+			break;
			
 
				+		tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
			
 
				+		if (!tvcpu)
			
 
				+			break;
			
 
				+		if (!tvcpu->arch.doorbell_request) {
			
 
				+			tvcpu->arch.doorbell_request = 1;
			
 
				+			kvmppc_fast_vcpu_kick_hv(tvcpu);
			
 
				+		}
			
 
				+		break;
			
 
				+	case OP_31_XOP_MSGCLRP:
			
 
				+		arg = kvmppc_get_gpr(vcpu, rb);
			
 
				+		if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
			
 
				+			break;
			
 
				+		vcpu->arch.vcore->dpdes = 0;
			
 
				+		vcpu->arch.doorbell_request = 0;
			
 
				+		break;
			
 
				+	case OP_31_XOP_MFSPR:
			
 
				+		switch (get_sprn(inst)) {
			
 
				+		case SPRN_TIR:
			
 
				+			arg = thr;
			
 
				+			break;
			
 
				+		case SPRN_DPDES:
			
 
				+			arg = kvmppc_read_dpdes(vcpu);
			
 
				+			break;
			
 
				+		default:
			
 
				+			return EMULATE_FAIL;
			
 
				+		}
			
 
				+		kvmppc_set_gpr(vcpu, get_rt(inst), arg);
			
 
				+		break;
			
 
				+	default:
			
 
				+		return EMULATE_FAIL;
			
 
				+	}
			
 
				+	kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
			
 
				+	return RESUME_GUEST;
			
 
				+}
			
 
				+
			
 
				 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
			
 
				 				 struct task_struct *tsk)
			
 
				 {
			
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
				 		r = RESUME_GUEST;
			
 
				 		break;
			
 
				 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
			
 
				-		/*
			
 
				-		 * Deliver a machine check interrupt to the guest.
			
 
				-		 * We have to do this, even if the host has handled the
			
 
				-		 * machine check, because machine checks use SRR0/1 and
			
 
				-		 * the interrupt might have trashed guest state in them.
			
 
				-		 */
			
 
				-		kvmppc_book3s_queue_irqprio(vcpu,
			
 
				-					    BOOK3S_INTERRUPT_MACHINE_CHECK);
			
 
				-		r = RESUME_GUEST;
			
 
				+		/* Exit to guest with KVM_EXIT_NMI as exit reason */
			
 
				+		run->exit_reason = KVM_EXIT_NMI;
			
 
				+		run->hw.hardware_exit_reason = vcpu->arch.trap;
			
 
				+		/* Clear out the old NMI status from run->flags */
			
 
				+		run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
			
 
				+		/* Now set the NMI status */
			
 
				+		if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
			
 
				+			run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
			
 
				+		else
			
 
				+			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
			
 
				+
			
 
				+		r = RESUME_HOST;
			
 
				+		/* Print the MCE event to host console. */
			
 
				+		machine_check_print_event_info(&vcpu->arch.mce_evt, false);
			
 
				 		break;
			
 
				 	case BOOK3S_INTERRUPT_PROGRAM:
			
 
				 	{
			
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
				 		break;
			
 
				 	/*
			
 
				 	 * This occurs if the guest (kernel or userspace), does something that
			
 
				-	 * is prohibited by HFSCR.  We just generate a program interrupt to
			
 
				-	 * the guest.
			
 
				+	 * is prohibited by HFSCR.
			
 
				+	 * On POWER9, this could be a doorbell instruction that we need
			
 
				+	 * to emulate.
			
 
				+	 * Otherwise, we just generate a program interrupt to the guest.
			
 
				 	 */
			
 
				 	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
			
 
				-		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
			
 
				-		r = RESUME_GUEST;
			
 
				+		r = EMULATE_FAIL;
			
 
				+		if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
			
 
				+			r = kvmppc_emulate_doorbell_instr(vcpu);
			
 
				+		if (r == EMULATE_FAIL) {
			
 
				+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
			
 
				+			r = RESUME_GUEST;
			
 
				+		}
			
 
				 		break;
			
 
				 	case BOOK3S_INTERRUPT_HV_RM_HARD:
			
 
				 		r = RESUME_PASSTHROUGH;
			
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 
				 	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
			
 
				 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
			
 
				 		mask |= LPCR_AIL;
			
 
				+	/*
			
 
				+	 * On POWER9, allow userspace to enable large decrementer for the
			
 
				+	 * guest, whether or not the host has it enabled.
			
 
				+	 */
			
 
				+	if (cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				+		mask |= LPCR_LD;
			
 
				 
			
 
				 	/* Broken 32-bit version of LPCR must not clear top bits */
			
 
				 	if (preserve_top32)
			
@@ -1486,6 +1622,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 
				 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
			
 
				 		break;
			
 
				 	case KVM_REG_PPC_TB_OFFSET:
			
 
				+		/*
			
 
				+		 * POWER9 DD1 has an erratum where writing TBU40 causes
			
 
				+		 * the timebase to lose ticks.  So we don't let the
			
 
				+		 * timebase offset be changed on P9 DD1.  (It is
			
 
				+		 * initialized to zero.)
			
 
				+		 */
			
 
				+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
			
 
				+			break;
			
 
				 		/* round up to multiple of 2^24 */
			
 
				 		vcpu->arch.vcore->tb_offset =
			
 
				 			ALIGN(set_reg_val(id, *val), 1UL << 24);
			
@@ -1603,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
				 	init_swait_queue_head(&vcore->wq);
			
 
				 	vcore->preempt_tb = TB_NIL;
			
 
				 	vcore->lpcr = kvm->arch.lpcr;
			
 
				-	vcore->first_vcpuid = core * threads_per_vcore();
			
 
				+	vcore->first_vcpuid = core * kvm->arch.smt_mode;
			
 
				 	vcore->kvm = kvm;
			
 
				 	INIT_LIST_HEAD(&vcore->preempt_list);
			
 
				 
			
@@ -1762,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 
				 						   unsigned int id)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu;
			
 
				-	int err = -EINVAL;
			
 
				+	int err;
			
 
				 	int core;
			
 
				 	struct kvmppc_vcore *vcore;
			
 
				 
			
 
				-	core = id / threads_per_vcore();
			
 
				-	if (core >= KVM_MAX_VCORES)
			
 
				-		goto out;
			
 
				-
			
 
				 	err = -ENOMEM;
			
 
				 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
			
 
				 	if (!vcpu)
			
@@ -1800,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 
				 	vcpu->arch.busy_preempt = TB_NIL;
			
 
				 	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
			
 
				 
			
 
				+	/*
			
 
				+	 * Set the default HFSCR for the guest from the host value.
			
 
				+	 * This value is only used on POWER9.
			
 
				+	 * On POWER9 DD1, TM doesn't work, so we make sure to
			
 
				+	 * prevent the guest from using it.
			
 
				+	 * On POWER9, we want to virtualize the doorbell facility, so we
			
 
				+	 * turn off the HFSCR bit, which causes those instructions to trap.
			
 
				+	 */
			
 
				+	vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
			
 
				+	if (!cpu_has_feature(CPU_FTR_TM))
			
 
				+		vcpu->arch.hfscr &= ~HFSCR_TM;
			
 
				+	if (cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				+		vcpu->arch.hfscr &= ~HFSCR_MSGP;
			
 
				+
			
 
				 	kvmppc_mmu_book3s_hv_init(vcpu);
			
 
				 
			
 
				 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
			
@@ -1807,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 
				 	init_waitqueue_head(&vcpu->arch.cpu_run);
			
 
				 
			
 
				 	mutex_lock(&kvm->lock);
			
 
				-	vcore = kvm->arch.vcores[core];
			
 
				-	if (!vcore) {
			
 
				-		vcore = kvmppc_vcore_create(kvm, core);
			
 
				-		kvm->arch.vcores[core] = vcore;
			
 
				-		kvm->arch.online_vcores++;
			
 
				+	vcore = NULL;
			
 
				+	err = -EINVAL;
			
 
				+	core = id / kvm->arch.smt_mode;
			
 
				+	if (core < KVM_MAX_VCORES) {
			
 
				+		vcore = kvm->arch.vcores[core];
			
 
				+		if (!vcore) {
			
 
				+			err = -ENOMEM;
			
 
				+			vcore = kvmppc_vcore_create(kvm, core);
			
 
				+			kvm->arch.vcores[core] = vcore;
			
 
				+			kvm->arch.online_vcores++;
			
 
				+		}
			
 
				 	}
			
 
				 	mutex_unlock(&kvm->lock);
			
 
				 
			
@@ -1839,6 +1999,43 @@ out:
 
				 	return ERR_PTR(err);
			
 
				 }
			
 
				 
			
 
				+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
			
 
				+			      unsigned long flags)
			
 
				+{
			
 
				+	int err;
			
 
				+	int esmt = 0;
			
 
				+
			
 
				+	if (flags)
			
 
				+		return -EINVAL;
			
 
				+	if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
			
 
				+		return -EINVAL;
			
 
				+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
			
 
				+		/*
			
 
				+		 * On POWER8 (or POWER7), the threading mode is "strict",
			
 
				+		 * so we pack smt_mode vcpus per vcore.
			
 
				+		 */
			
 
				+		if (smt_mode > threads_per_subcore)
			
 
				+			return -EINVAL;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * On POWER9, the threading mode is "loose",
			
 
				+		 * so each vcpu gets its own vcore.
			
 
				+		 */
			
 
				+		esmt = smt_mode;
			
 
				+		smt_mode = 1;
			
 
				+	}
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	err = -EBUSY;
			
 
				+	if (!kvm->arch.online_vcores) {
			
 
				+		kvm->arch.smt_mode = smt_mode;
			
 
				+		kvm->arch.emul_smt_mode = esmt;
			
 
				+		err = 0;
			
 
				+	}
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
			
 
				 {
			
 
				 	if (vpa->pinned_addr)
			
@@ -1889,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-extern void __kvmppc_vcore_entry(void);
			
 
				+extern int __kvmppc_vcore_entry(void);
			
 
				 
			
 
				 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
			
 
				 				   struct kvm_vcpu *vcpu)
			
@@ -1954,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
 
				 	tpaca->kvm_hstate.kvm_split_mode = NULL;
			
 
				 }
			
 
				 
			
 
				-static void do_nothing(void *x)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int i;
			
@@ -1975,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 
				 			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
			
 
				 }
			
 
				 
			
 
				+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
			
 
				+{
			
 
				+	struct kvm *kvm = vcpu->kvm;
			
 
				+
			
 
				+	/*
			
 
				+	 * With radix, the guest can do TLB invalidations itself,
			
 
				+	 * and it could choose to use the local form (tlbiel) if
			
 
				+	 * it is invalidating a translation that has only ever been
			
 
				+	 * used on one vcpu.  However, that doesn't mean it has
			
 
				+	 * only ever been used on one physical cpu, since vcpus
			
 
				+	 * can move around between pcpus.  To cope with this, when
			
 
				+	 * a vcpu moves from one pcpu to another, we need to tell
			
 
				+	 * any vcpus running on the same core as this vcpu previously
			
 
				+	 * ran to flush the TLB.  The TLB is shared between threads,
			
 
				+	 * so we use a single bit in .need_tlb_flush for all 4 threads.
			
 
				+	 */
			
 
				+	if (vcpu->arch.prev_cpu != pcpu) {
			
 
				+		if (vcpu->arch.prev_cpu >= 0 &&
			
 
				+		    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
			
 
				+		    cpu_first_thread_sibling(pcpu))
			
 
				+			radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
			
 
				+		vcpu->arch.prev_cpu = pcpu;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
			
 
				 {
			
 
				 	int cpu;
			
 
				 	struct paca_struct *tpaca;
			
 
				-	struct kvmppc_vcore *mvc = vc->master_vcore;
			
 
				 	struct kvm *kvm = vc->kvm;
			
 
				 
			
 
				 	cpu = vc->pcpu;
			
@@ -1989,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 
				 			vcpu->arch.timer_running = 0;
			
 
				 		}
			
 
				 		cpu += vcpu->arch.ptid;
			
 
				-		vcpu->cpu = mvc->pcpu;
			
 
				+		vcpu->cpu = vc->pcpu;
			
 
				 		vcpu->arch.thread_cpu = cpu;
			
 
				-
			
 
				-		/*
			
 
				-		 * With radix, the guest can do TLB invalidations itself,
			
 
				-		 * and it could choose to use the local form (tlbiel) if
			
 
				-		 * it is invalidating a translation that has only ever been
			
 
				-		 * used on one vcpu.  However, that doesn't mean it has
			
 
				-		 * only ever been used on one physical cpu, since vcpus
			
 
				-		 * can move around between pcpus.  To cope with this, when
			
 
				-		 * a vcpu moves from one pcpu to another, we need to tell
			
 
				-		 * any vcpus running on the same core as this vcpu previously
			
 
				-		 * ran to flush the TLB.  The TLB is shared between threads,
			
 
				-		 * so we use a single bit in .need_tlb_flush for all 4 threads.
			
 
				-		 */
			
 
				-		if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
			
 
				-			if (vcpu->arch.prev_cpu >= 0 &&
			
 
				-			    cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
			
 
				-			    cpu_first_thread_sibling(cpu))
			
 
				-				radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
			
 
				-			vcpu->arch.prev_cpu = cpu;
			
 
				-		}
			
 
				 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
			
 
				 	}
			
 
				 	tpaca = &paca[cpu];
			
 
				 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
			
 
				-	tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
			
 
				+	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
			
 
				 	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
			
 
				 	smp_wmb();
			
 
				-	tpaca->kvm_hstate.kvm_vcore = mvc;
			
 
				+	tpaca->kvm_hstate.kvm_vcore = vc;
			
 
				 	if (cpu != smp_processor_id())
			
 
				 		kvmppc_ipi_thread(cpu);
			
 
				 }
			
@@ -2147,8 +2344,7 @@ struct core_info {
 
				 	int		max_subcore_threads;
			
 
				 	int		total_threads;
			
 
				 	int		subcore_threads[MAX_SUBCORES];
			
 
				-	struct kvm	*subcore_vm[MAX_SUBCORES];
			
 
				-	struct list_head vcs[MAX_SUBCORES];
			
 
				+	struct kvmppc_vcore *vc[MAX_SUBCORES];
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -2159,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
 
				 
			
 
				 static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
			
 
				 {
			
 
				-	int sub;
			
 
				-
			
 
				 	memset(cip, 0, sizeof(*cip));
			
 
				 	cip->n_subcores = 1;
			
 
				 	cip->max_subcore_threads = vc->num_threads;
			
 
				 	cip->total_threads = vc->num_threads;
			
 
				 	cip->subcore_threads[0] = vc->num_threads;
			
 
				-	cip->subcore_vm[0] = vc->kvm;
			
 
				-	for (sub = 0; sub < MAX_SUBCORES; ++sub)
			
 
				-		INIT_LIST_HEAD(&cip->vcs[sub]);
			
 
				-	list_add_tail(&vc->preempt_list, &cip->vcs[0]);
			
 
				+	cip->vc[0] = vc;
			
 
				 }
			
 
				 
			
 
				 static bool subcore_config_ok(int n_subcores, int n_threads)
			
@@ -2189,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
 
				 	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
			
 
				 }
			
 
				 
			
 
				-static void init_master_vcore(struct kvmppc_vcore *vc)
			
 
				+static void init_vcore_to_run(struct kvmppc_vcore *vc)
			
 
				 {
			
 
				-	vc->master_vcore = vc;
			
 
				 	vc->entry_exit_map = 0;
			
 
				 	vc->in_guest = 0;
			
 
				 	vc->napping_threads = 0;
			
@@ -2216,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 
				 	++cip->n_subcores;
			
 
				 	cip->total_threads += vc->num_threads;
			
 
				 	cip->subcore_threads[sub] = vc->num_threads;
			
 
				-	cip->subcore_vm[sub] = vc->kvm;
			
 
				-	init_master_vcore(vc);
			
 
				-	list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
			
 
				+	cip->vc[sub] = vc;
			
 
				+	init_vcore_to_run(vc);
			
 
				+	list_del_init(&vc->preempt_list);
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -2286,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
 
				 	spin_unlock(&lp->lock);
			
 
				 }
			
 
				 
			
 
				+static bool recheck_signals(struct core_info *cip)
			
 
				+{
			
 
				+	int sub, i;
			
 
				+	struct kvm_vcpu *vcpu;
			
 
				+
			
 
				+	for (sub = 0; sub < cip->n_subcores; ++sub)
			
 
				+		for_each_runnable_thread(i, vcpu, cip->vc[sub])
			
 
				+			if (signal_pending(vcpu->arch.run_task))
			
 
				+				return true;
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
			
 
				 {
			
 
				 	int still_running = 0, i;
			
@@ -2323,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
 
				 			wake_up(&vcpu->arch.cpu_run);
			
 
				 		}
			
 
				 	}
			
 
				-	list_del_init(&vc->preempt_list);
			
 
				 	if (!is_master) {
			
 
				 		if (still_running > 0) {
			
 
				 			kvmppc_vcore_preempt(vc);
			
@@ -2385,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void set_irq_happened(int trap)
			
 
				+{
			
 
				+	switch (trap) {
			
 
				+	case BOOK3S_INTERRUPT_EXTERNAL:
			
 
				+		local_paca->irq_happened |= PACA_IRQ_EE;
			
 
				+		break;
			
 
				+	case BOOK3S_INTERRUPT_H_DOORBELL:
			
 
				+		local_paca->irq_happened |= PACA_IRQ_DBELL;
			
 
				+		break;
			
 
				+	case BOOK3S_INTERRUPT_HMI:
			
 
				+		local_paca->irq_happened |= PACA_IRQ_HMI;
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Run a set of guest threads on a physical core.
			
 
				  * Called with vc->lock held.
			
@@ -2395,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	int i;
			
 
				 	int srcu_idx;
			
 
				 	struct core_info core_info;
			
 
				-	struct kvmppc_vcore *pvc, *vcnext;
			
 
				+	struct kvmppc_vcore *pvc;
			
 
				 	struct kvm_split_mode split_info, *sip;
			
 
				 	int split, subcore_size, active;
			
 
				 	int sub;
			
@@ -2404,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	int pcpu, thr;
			
 
				 	int target_threads;
			
 
				 	int controlled_threads;
			
 
				+	int trap;
			
 
				 
			
 
				 	/*
			
 
				 	 * Remove from the list any threads that have a signal pending
			
@@ -2418,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	/*
			
 
				 	 * Initialize *vc.
			
 
				 	 */
			
 
				-	init_master_vcore(vc);
			
 
				+	init_vcore_to_run(vc);
			
 
				 	vc->preempt_tb = TB_NIL;
			
 
				 
			
 
				 	/*
			
@@ -2455,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	if (vc->num_threads < target_threads)
			
 
				 		collect_piggybacks(&core_info, target_threads);
			
 
				 
			
 
				+	/*
			
 
				+	 * On radix, arrange for TLB flushing if necessary.
			
 
				+	 * This has to be done before disabling interrupts since
			
 
				+	 * it uses smp_call_function().
			
 
				+	 */
			
 
				+	pcpu = smp_processor_id();
			
 
				+	if (kvm_is_radix(vc->kvm)) {
			
 
				+		for (sub = 0; sub < core_info.n_subcores; ++sub)
			
 
				+			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
			
 
				+				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Hard-disable interrupts, and check resched flag and signals.
			
 
				+	 * If we need to reschedule or deliver a signal, clean up
			
 
				+	 * and return without going into the guest(s).
			
 
				+	 */
			
 
				+	local_irq_disable();
			
 
				+	hard_irq_disable();
			
 
				+	if (lazy_irq_pending() || need_resched() ||
			
 
				+	    recheck_signals(&core_info)) {
			
 
				+		local_irq_enable();
			
 
				+		vc->vcore_state = VCORE_INACTIVE;
			
 
				+		/* Unlock all except the primary vcore */
			
 
				+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
			
 
				+			pvc = core_info.vc[sub];
			
 
				+			/* Put back on to the preempted vcores list */
			
 
				+			kvmppc_vcore_preempt(pvc);
			
 
				+			spin_unlock(&pvc->lock);
			
 
				+		}
			
 
				+		for (i = 0; i < controlled_threads; ++i)
			
 
				+			kvmppc_release_hwthread(pcpu + i);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	kvmppc_clear_host_core(pcpu);
			
 
				+
			
 
				 	/* Decide on micro-threading (split-core) mode */
			
 
				 	subcore_size = threads_per_subcore;
			
 
				 	cmd_bit = stat_bit = 0;
			
@@ -2478,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 		split_info.ldbar = mfspr(SPRN_LDBAR);
			
 
				 		split_info.subcore_size = subcore_size;
			
 
				 		for (sub = 0; sub < core_info.n_subcores; ++sub)
			
 
				-			split_info.master_vcs[sub] =
			
 
				-				list_first_entry(&core_info.vcs[sub],
			
 
				-					struct kvmppc_vcore, preempt_list);
			
 
				+			split_info.vc[sub] = core_info.vc[sub];
			
 
				 		/* order writes to split_info before kvm_split_mode pointer */
			
 
				 		smp_wmb();
			
 
				 	}
			
 
				-	pcpu = smp_processor_id();
			
 
				 	for (thr = 0; thr < controlled_threads; ++thr)
			
 
				 		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
			
 
				 
			
@@ -2504,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	kvmppc_clear_host_core(pcpu);
			
 
				-
			
 
				 	/* Start all the threads */
			
 
				 	active = 0;
			
 
				 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
			
 
				 		thr = subcore_thread_map[sub];
			
 
				 		thr0_done = false;
			
 
				 		active |= 1 << thr;
			
 
				-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
			
 
				-			pvc->pcpu = pcpu + thr;
			
 
				-			for_each_runnable_thread(i, vcpu, pvc) {
			
 
				-				kvmppc_start_thread(vcpu, pvc);
			
 
				-				kvmppc_create_dtl_entry(vcpu, pvc);
			
 
				-				trace_kvm_guest_enter(vcpu);
			
 
				-				if (!vcpu->arch.ptid)
			
 
				-					thr0_done = true;
			
 
				-				active |= 1 << (thr + vcpu->arch.ptid);
			
 
				-			}
			
 
				-			/*
			
 
				-			 * We need to start the first thread of each subcore
			
 
				-			 * even if it doesn't have a vcpu.
			
 
				-			 */
			
 
				-			if (pvc->master_vcore == pvc && !thr0_done)
			
 
				-				kvmppc_start_thread(NULL, pvc);
			
 
				-			thr += pvc->num_threads;
			
 
				+		pvc = core_info.vc[sub];
			
 
				+		pvc->pcpu = pcpu + thr;
			
 
				+		for_each_runnable_thread(i, vcpu, pvc) {
			
 
				+			kvmppc_start_thread(vcpu, pvc);
			
 
				+			kvmppc_create_dtl_entry(vcpu, pvc);
			
 
				+			trace_kvm_guest_enter(vcpu);
			
 
				+			if (!vcpu->arch.ptid)
			
 
				+				thr0_done = true;
			
 
				+			active |= 1 << (thr + vcpu->arch.ptid);
			
 
				 		}
			
 
				+		/*
			
 
				+		 * We need to start the first thread of each subcore
			
 
				+		 * even if it doesn't have a vcpu.
			
 
				+		 */
			
 
				+		if (!thr0_done)
			
 
				+			kvmppc_start_thread(NULL, pvc);
			
 
				+		thr += pvc->num_threads;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2556,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 	trace_kvmppc_run_core(vc, 0);
			
 
				 
			
 
				 	for (sub = 0; sub < core_info.n_subcores; ++sub)
			
 
				-		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
			
 
				-			spin_unlock(&pvc->lock);
			
 
				+		spin_unlock(&core_info.vc[sub]->lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Interrupts will be enabled once we get into the guest,
			
 
				+	 * so tell lockdep that we're about to enable interrupts.
			
 
				+	 */
			
 
				+	trace_hardirqs_on();
			
 
				 
			
 
				 	guest_enter();
			
 
				 
			
 
				 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
			
 
				 
			
 
				-	__kvmppc_vcore_entry();
			
 
				+	trap = __kvmppc_vcore_entry();
			
 
				 
			
 
				 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
			
 
				 
			
 
				+	guest_exit();
			
 
				+
			
 
				+	trace_hardirqs_off();
			
 
				+	set_irq_happened(trap);
			
 
				+
			
 
				 	spin_lock(&vc->lock);
			
 
				 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
			
 
				 	vc->vcore_state = VCORE_EXITING;
			
@@ -2594,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 		split_info.do_nap = 0;
			
 
				 	}
			
 
				 
			
 
				+	kvmppc_set_host_core(pcpu);
			
 
				+
			
 
				+	local_irq_enable();
			
 
				+
			
 
				 	/* Let secondaries go back to the offline loop */
			
 
				 	for (i = 0; i < controlled_threads; ++i) {
			
 
				 		kvmppc_release_hwthread(pcpu + i);
			
@@ -2602,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
				 		cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
			
 
				 	}
			
 
				 
			
 
				-	kvmppc_set_host_core(pcpu);
			
 
				-
			
 
				 	spin_unlock(&vc->lock);
			
 
				 
			
 
				 	/* make sure updates to secondary vcpu structs are visible now */
			
 
				 	smp_mb();
			
 
				-	guest_exit();
			
 
				 
			
 
				-	for (sub = 0; sub < core_info.n_subcores; ++sub)
			
 
				-		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
			
 
				-					 preempt_list)
			
 
				-			post_guest_process(pvc, pvc == vc);
			
 
				+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
			
 
				+		pvc = core_info.vc[sub];
			
 
				+		post_guest_process(pvc, pvc == vc);
			
 
				+	}
			
 
				 
			
 
				 	spin_lock(&vc->lock);
			
 
				 	preempt_enable();
			
@@ -2658,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
 
				 		vc->halt_poll_ns /= halt_poll_ns_shrink;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_KVM_XICS
			
 
				+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (!xive_enabled())
			
 
				+		return false;
			
 
				+	return vcpu->arch.xive_saved_state.pipr <
			
 
				+		vcpu->arch.xive_saved_state.cppr;
			
 
				+}
			
 
				+#else
			
 
				+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+#endif /* CONFIG_KVM_XICS */
			
 
				+
			
 
				+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
			
 
				+	    kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
			
 
				+		return true;
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Check to see if any of the runnable vcpus on the vcore have pending
			
 
				  * exceptions or are no longer ceded
			
@@ -2668,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
 
				 	int i;
			
 
				 
			
 
				 	for_each_runnable_thread(i, vcpu, vc) {
			
 
				-		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
			
 
				-		    vcpu->arch.prodded)
			
 
				+		if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
			
 
				 			return 1;
			
 
				 	}
			
 
				 
			
@@ -2811,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 	 */
			
 
				 	if (!signal_pending(current)) {
			
 
				 		if (vc->vcore_state == VCORE_PIGGYBACK) {
			
 
				-			struct kvmppc_vcore *mvc = vc->master_vcore;
			
 
				-			if (spin_trylock(&mvc->lock)) {
			
 
				-				if (mvc->vcore_state == VCORE_RUNNING &&
			
 
				-				    !VCORE_IS_EXITING(mvc)) {
			
 
				+			if (spin_trylock(&vc->lock)) {
			
 
				+				if (vc->vcore_state == VCORE_RUNNING &&
			
 
				+				    !VCORE_IS_EXITING(vc)) {
			
 
				 					kvmppc_create_dtl_entry(vcpu, vc);
			
 
				 					kvmppc_start_thread(vcpu, vc);
			
 
				 					trace_kvm_guest_enter(vcpu);
			
 
				 				}
			
 
				-				spin_unlock(&mvc->lock);
			
 
				+				spin_unlock(&vc->lock);
			
 
				 			}
			
 
				 		} else if (vc->vcore_state == VCORE_RUNNING &&
			
 
				 			   !VCORE_IS_EXITING(vc)) {
			
@@ -2855,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 			break;
			
 
				 		n_ceded = 0;
			
 
				 		for_each_runnable_thread(i, v, vc) {
			
 
				-			if (!v->arch.pending_exceptions && !v->arch.prodded)
			
 
				+			if (!kvmppc_vcpu_woken(v))
			
 
				 				n_ceded += v->arch.ceded;
			
 
				 			else
			
 
				 				v->arch.ceded = 0;
			
@@ -2907,12 +3188,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 {
			
 
				 	int r;
			
 
				 	int srcu_idx;
			
 
				+	unsigned long ebb_regs[3] = {};	/* shut up GCC */
			
 
				+	unsigned long user_tar = 0;
			
 
				+	unsigned int user_vrsave;
			
 
				 
			
 
				 	if (!vcpu->arch.sane) {
			
 
				 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Don't allow entry with a suspended transaction, because
			
 
				+	 * the guest entry/exit code will lose it.
			
 
				+	 * If the guest has TM enabled, save away their TM-related SPRs
			
 
				+	 * (they will get restored by the TM unavailable interrupt).
			
 
				+	 */
			
 
				+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
			
 
				+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
			
 
				+	    (current->thread.regs->msr & MSR_TM)) {
			
 
				+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
			
 
				+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
			
 
				+			run->fail_entry.hardware_entry_failure_reason = 0;
			
 
				+			return -EINVAL;
			
 
				+		}
			
 
				+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
			
 
				+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
			
 
				+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
			
 
				+		current->thread.regs->msr &= ~MSR_TM;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	kvmppc_core_prepare_to_enter(vcpu);
			
 
				 
			
 
				 	/* No need to go into the guest when all we'll do is come back out */
			
@@ -2934,6 +3239,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 
			
 
				 	flush_all_to_thread(current);
			
 
				 
			
 
				+	/* Save userspace EBB and other register values */
			
 
				+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
			
 
				+		ebb_regs[0] = mfspr(SPRN_EBBHR);
			
 
				+		ebb_regs[1] = mfspr(SPRN_EBBRR);
			
 
				+		ebb_regs[2] = mfspr(SPRN_BESCR);
			
 
				+		user_tar = mfspr(SPRN_TAR);
			
 
				+	}
			
 
				+	user_vrsave = mfspr(SPRN_VRSAVE);
			
 
				+
			
 
				 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
			
 
				 	vcpu->arch.pgdir = current->mm->pgd;
			
 
				 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
			
@@ -2960,6 +3274,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 		}
			
 
				 	} while (is_kvmppc_resume_guest(r));
			
 
				 
			
 
				+	/* Restore userspace EBB and other register values */
			
 
				+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
			
 
				+		mtspr(SPRN_EBBHR, ebb_regs[0]);
			
 
				+		mtspr(SPRN_EBBRR, ebb_regs[1]);
			
 
				+		mtspr(SPRN_BESCR, ebb_regs[2]);
			
 
				+		mtspr(SPRN_TAR, user_tar);
			
 
				+		mtspr(SPRN_FSCR, current->thread.fscr);
			
 
				+	}
			
 
				+	mtspr(SPRN_VRSAVE, user_vrsave);
			
 
				+
			
 
				  out:
			
 
				 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
			
 
				 	atomic_dec(&vcpu->kvm->arch.vcpus_running);
			
@@ -3467,6 +3791,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 
				 	if (!cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				 		kvm_hv_vm_activated();
			
 
				 
			
 
				+	/*
			
 
				+	 * Initialize smt_mode depending on processor.
			
 
				+	 * POWER8 and earlier have to use "strict" threading, where
			
 
				+	 * all vCPUs in a vcore have to run on the same (sub)core,
			
 
				+	 * whereas on POWER9 the threads can each run a different
			
 
				+	 * guest.
			
 
				+	 */
			
 
				+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				+		kvm->arch.smt_mode = threads_per_subcore;
			
 
				+	else
			
 
				+		kvm->arch.smt_mode = 1;
			
 
				+	kvm->arch.emul_smt_mode = 1;
			
 
				+
			
 
				 	/*
			
 
				 	 * Create a debugfs directory for the VM
			
 
				 	 */
			
@@ -3896,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 
				 #endif
			
 
				 	.configure_mmu = kvmhv_configure_mmu,
			
 
				 	.get_rmmu_info = kvmhv_get_rmmu_info,
			
 
				+	.set_smt_mode = kvmhv_set_smt_mode,
			
 
				 };
			
 
				 
			
 
				 static int kvm_init_subcore_bitmap(void)
			
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
 
				 		return;
			
 
				 
			
 
				 	for (i = 0; i < MAX_SUBCORES; ++i) {
			
 
				-		vc = sip->master_vcs[i];
			
 
				+		vc = sip->vc[i];
			
 
				 		if (!vc)
			
 
				 			break;
			
 
				 		do {
			
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
 
				 	std	r3, HSTATE_DABR(r13)
			
 
				 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
			
 
				 
			
 
				-	/* Hard-disable interrupts */
			
 
				-	mfmsr   r10
			
 
				-	std	r10, HSTATE_HOST_MSR(r13)
			
 
				-	rldicl  r10,r10,48,1
			
 
				-	rotldi  r10,r10,16
			
 
				-	mtmsrd  r10,1
			
 
				-
			
 
				 	/* Save host PMU registers */
			
 
				 BEGIN_FTR_SECTION
			
 
				 	/* Work around P8 PMAE bug */
			
@@ -121,10 +114,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	 * Put whatever is in the decrementer into the
			
 
				 	 * hypervisor decrementer.
			
 
				 	 */
			
 
				+BEGIN_FTR_SECTION
			
 
				+	ld	r5, HSTATE_KVM_VCORE(r13)
			
 
				+	ld	r6, VCORE_KVM(r5)
			
 
				+	ld	r9, KVM_HOST_LPCR(r6)
			
 
				+	andis.	r9, r9, LPCR_LD@h
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				 	mfspr	r8,SPRN_DEC
			
 
				 	mftb	r7
			
 
				-	mtspr	SPRN_HDEC,r8
			
 
				+BEGIN_FTR_SECTION
			
 
				+	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
			
 
				+	bne	32f
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				 	extsw	r8,r8
			
 
				+32:	mtspr	SPRN_HDEC,r8
			
 
				 	add	r8,r8,r7
			
 
				 	std	r8,HSTATE_DECEXP(r13)
			
 
				 
			
@@ -143,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	 *
			
 
				 	 * R1       = host R1
			
 
				 	 * R2       = host R2
			
 
				+	 * R3       = trap number on this thread
			
 
				 	 * R12      = exit handler id
			
 
				 	 * R13      = PACA
			
 
				 	 */
			
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 
				 
			
 
				 out:
			
 
				 	/*
			
 
				+	 * For guest that supports FWNMI capability, hook the MCE event into
			
 
				+	 * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
			
 
				+	 * exit reason. On our way to exit we will pull this event from vcpu
			
 
				+	 * structure and print it from thread 0 of the core/subcore.
			
 
				+	 *
			
 
				+	 * For guest that does not support FWNMI capability (old QEMU):
			
 
				 	 * We are now going enter guest either through machine check
			
 
				 	 * interrupt (for unhandled errors) or will continue from
			
 
				 	 * current HSRR0 (for handled errors) in guest. Hence
			
 
				 	 * queue up the event so that we can log it from host console later.
			
 
				 	 */
			
 
				-	machine_check_queue_event();
			
 
				+	if (vcpu->kvm->arch.fwnmi_enabled) {
			
 
				+		/*
			
 
				+		 * Hook up the mce event on to vcpu structure.
			
 
				+		 * First clear the old event.
			
 
				+		 */
			
 
				+		memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
			
 
				+		if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
			
 
				+			vcpu->arch.mce_evt = mce_evt;
			
 
				+		}
			
 
				+	} else
			
 
				+		machine_check_queue_event();
			
 
				 
			
 
				 	return handled;
			
 
				 }
			
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,30 @@
 
				 #include <asm/opal.h>
			
 
				 #include <asm/xive-regs.h>
			
 
				 
			
 
				+/* Sign-extend HDEC if not on POWER9 */
			
 
				+#define EXTEND_HDEC(reg)			\
			
 
				+BEGIN_FTR_SECTION;				\
			
 
				+	extsw	reg, reg;			\
			
 
				+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
			
 
				+
			
 
				 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
			
 
				 
			
 
				 /* Values in HSTATE_NAPPING(r13) */
			
 
				 #define NAPPING_CEDE	1
			
 
				 #define NAPPING_NOVCPU	2
			
 
				 
			
 
				+/* Stack frame offsets for kvmppc_hv_entry */
			
 
				+#define SFS			160
			
 
				+#define STACK_SLOT_TRAP		(SFS-4)
			
 
				+#define STACK_SLOT_TID		(SFS-16)
			
 
				+#define STACK_SLOT_PSSCR	(SFS-24)
			
 
				+#define STACK_SLOT_PID		(SFS-32)
			
 
				+#define STACK_SLOT_IAMR		(SFS-40)
			
 
				+#define STACK_SLOT_CIABR	(SFS-48)
			
 
				+#define STACK_SLOT_DAWR		(SFS-56)
			
 
				+#define STACK_SLOT_DAWRX	(SFS-64)
			
 
				+#define STACK_SLOT_HFSCR	(SFS-72)
			
 
				+
			
 
				 /*
			
 
				  * Call kvmppc_hv_entry in real mode.
			
 
				  * Must be called with interrupts hard-disabled.
			
@@ -51,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 
				 	std	r0, PPC_LR_STKOFF(r1)
			
 
				 	stdu	r1, -112(r1)
			
 
				 	mfmsr	r10
			
 
				+	std	r10, HSTATE_HOST_MSR(r13)
			
 
				 	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
			
 
				 	li	r0,MSR_RI
			
 
				 	andc	r0,r10,r0
			
@@ -135,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	stb	r0, HSTATE_HWTHREAD_REQ(r13)
			
 
				 
			
 
				 	/*
			
 
				-	 * For external and machine check interrupts, we need
			
 
				-	 * to call the Linux handler to process the interrupt.
			
 
				-	 * We do that by jumping to absolute address 0x500 for
			
 
				-	 * external interrupts, or the machine_check_fwnmi label
			
 
				-	 * for machine checks (since firmware might have patched
			
 
				-	 * the vector area at 0x200).  The [h]rfid at the end of the
			
 
				-	 * handler will return to the book3s_hv_interrupts.S code.
			
 
				-	 * For other interrupts we do the rfid to get back
			
 
				-	 * to the book3s_hv_interrupts.S code here.
			
 
				+	 * For external interrupts we need to call the Linux
			
 
				+	 * handler to process the interrupt. We do that by jumping
			
 
				+	 * to absolute address 0x500 for external interrupts.
			
 
				+	 * The [h]rfid at the end of the handler will return to
			
 
				+	 * the book3s_hv_interrupts.S code. For other interrupts
			
 
				+	 * we do the rfid to get back to the book3s_hv_interrupts.S
			
 
				+	 * code here.
			
 
				 	 */
			
 
				 	ld	r8, 112+PPC_LR_STKOFF(r1)
			
 
				 	addi	r1, r1, 112
			
 
				 	ld	r7, HSTATE_HOST_MSR(r13)
			
 
				 
			
 
				+	/* Return the trap number on this thread as the return value */
			
 
				+	mr	r3, r12
			
 
				+
			
 
				 	/*
			
 
				 	 * If we came back from the guest via a relocation-on interrupt,
			
 
				 	 * we will be in virtual mode at this point, which makes it a
			
@@ -158,62 +178,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	andi.	r0, r0, MSR_IR		/* in real mode? */
			
 
				 	bne	.Lvirt_return
			
 
				 
			
 
				-	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
			
 
				-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				-	beq	11f
			
 
				-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
			
 
				-	beq 	15f	/* Invoke the H_DOORBELL handler */
			
 
				-	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
			
 
				-	beq	cr2, 14f			/* HMI check */
			
 
				-
			
 
				-	/* RFI into the highmem handler, or branch to interrupt handler */
			
 
				+	/* RFI into the highmem handler */
			
 
				 	mfmsr	r6
			
 
				 	li	r0, MSR_RI
			
 
				 	andc	r6, r6, r0
			
 
				 	mtmsrd	r6, 1			/* Clear RI in MSR */
			
 
				 	mtsrr0	r8
			
 
				 	mtsrr1	r7
			
 
				-	beq	cr1, 13f		/* machine check */
			
 
				 	RFI
			
 
				 
			
 
				-	/* On POWER7, we have external interrupts set to use HSRR0/1 */
			
 
				-11:	mtspr	SPRN_HSRR0, r8
			
 
				-	mtspr	SPRN_HSRR1, r7
			
 
				-	ba	0x500
			
 
				-
			
 
				-13:	b	machine_check_fwnmi
			
 
				-
			
 
				-14:	mtspr	SPRN_HSRR0, r8
			
 
				-	mtspr	SPRN_HSRR1, r7
			
 
				-	b	hmi_exception_after_realmode
			
 
				-
			
 
				-15:	mtspr SPRN_HSRR0, r8
			
 
				-	mtspr SPRN_HSRR1, r7
			
 
				-	ba    0xe80
			
 
				-
			
 
				-	/* Virtual-mode return - can't get here for HMI or machine check */
			
 
				+	/* Virtual-mode return */
			
 
				 .Lvirt_return:
			
 
				-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				-	beq	16f
			
 
				-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
			
 
				-	beq	17f
			
 
				-	andi.	r0, r7, MSR_EE		/* were interrupts hard-enabled? */
			
 
				-	beq	18f
			
 
				-	mtmsrd	r7, 1			/* if so then re-enable them */
			
 
				-18:	mtlr	r8
			
 
				+	mtlr	r8
			
 
				 	blr
			
 
				 
			
 
				-16:	mtspr	SPRN_HSRR0, r8		/* jump to reloc-on external vector */
			
 
				-	mtspr	SPRN_HSRR1, r7
			
 
				-	b	exc_virt_0x4500_hardware_interrupt
			
 
				-
			
 
				-17:	mtspr	SPRN_HSRR0, r8
			
 
				-	mtspr	SPRN_HSRR1, r7
			
 
				-	b	exc_virt_0x4e80_h_doorbell
			
 
				-
			
 
				 kvmppc_primary_no_guest:
			
 
				 	/* We handle this much like a ceded vcpu */
			
 
				 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
			
 
				+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
			
 
				+	/* HDEC value came from DEC in the first place, it will fit */
			
 
				 	mfspr	r3, SPRN_HDEC
			
 
				 	mtspr	SPRN_DEC, r3
			
 
				 	/*
			
@@ -295,8 +278,9 @@ kvm_novcpu_wakeup:
 
				 
			
 
				 	/* See if our timeslice has expired (HDEC is negative) */
			
 
				 	mfspr	r0, SPRN_HDEC
			
 
				+	EXTEND_HDEC(r0)
			
 
				 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
			
 
				-	cmpwi	r0, 0
			
 
				+	cmpdi	r0, 0
			
 
				 	blt	kvm_novcpu_exit
			
 
				 
			
 
				 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
			
@@ -319,10 +303,10 @@ kvm_novcpu_exit:
 
				 	bl	kvmhv_accumulate_time
			
 
				 #endif
			
 
				 13:	mr	r3, r12
			
 
				-	stw	r12, 112-4(r1)
			
 
				+	stw	r12, STACK_SLOT_TRAP(r1)
			
 
				 	bl	kvmhv_commence_exit
			
 
				 	nop
			
 
				-	lwz	r12, 112-4(r1)
			
 
				+	lwz	r12, STACK_SLOT_TRAP(r1)
			
 
				 	b	kvmhv_switch_to_host
			
 
				 
			
 
				 /*
			
@@ -390,8 +374,8 @@ kvm_secondary_got_guest:
 
				 	lbz	r4, HSTATE_PTID(r13)
			
 
				 	cmpwi	r4, 0
			
 
				 	bne	63f
			
 
				-	lis	r6, 0x7fff
			
 
				-	ori	r6, r6, 0xffff
			
 
				+	LOAD_REG_ADDR(r6, decrementer_max)
			
 
				+	ld	r6, 0(r6)
			
 
				 	mtspr	SPRN_HDEC, r6
			
 
				 	/* and set per-LPAR registers, if doing dynamic micro-threading */
			
 
				 	ld	r6, HSTATE_SPLIT_MODE(r13)
			
@@ -545,11 +529,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				  *                                                                            *
			
 
				  *****************************************************************************/
			
 
				 
			
 
				-/* Stack frame offsets */
			
 
				-#define STACK_SLOT_TID		(112-16)
			
 
				-#define STACK_SLOT_PSSCR	(112-24)
			
 
				-#define STACK_SLOT_PID		(112-32)
			
 
				-
			
 
				 .global kvmppc_hv_entry
			
 
				 kvmppc_hv_entry:
			
 
				 
			
@@ -565,7 +544,7 @@ kvmppc_hv_entry:
 
				 	 */
			
 
				 	mflr	r0
			
 
				 	std	r0, PPC_LR_STKOFF(r1)
			
 
				-	stdu	r1, -112(r1)
			
 
				+	stdu	r1, -SFS(r1)
			
 
				 
			
 
				 	/* Save R1 in the PACA */
			
 
				 	std	r1, HSTATE_HOST_R1(r13)
			
@@ -749,10 +728,22 @@ BEGIN_FTR_SECTION
 
				 	mfspr	r5, SPRN_TIDR
			
 
				 	mfspr	r6, SPRN_PSSCR
			
 
				 	mfspr	r7, SPRN_PID
			
 
				+	mfspr	r8, SPRN_IAMR
			
 
				 	std	r5, STACK_SLOT_TID(r1)
			
 
				 	std	r6, STACK_SLOT_PSSCR(r1)
			
 
				 	std	r7, STACK_SLOT_PID(r1)
			
 
				+	std	r8, STACK_SLOT_IAMR(r1)
			
 
				+	mfspr	r5, SPRN_HFSCR
			
 
				+	std	r5, STACK_SLOT_HFSCR(r1)
			
 
				 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				+BEGIN_FTR_SECTION
			
 
				+	mfspr	r5, SPRN_CIABR
			
 
				+	mfspr	r6, SPRN_DAWR
			
 
				+	mfspr	r7, SPRN_DAWRX
			
 
				+	std	r5, STACK_SLOT_CIABR(r1)
			
 
				+	std	r6, STACK_SLOT_DAWR(r1)
			
 
				+	std	r7, STACK_SLOT_DAWRX(r1)
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
			
 
				 
			
 
				 BEGIN_FTR_SECTION
			
 
				 	/* Set partition DABR */
			
@@ -895,8 +886,10 @@ FTR_SECTION_ELSE
 
				 	ld	r5, VCPU_TID(r4)
			
 
				 	ld	r6, VCPU_PSSCR(r4)
			
 
				 	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
			
 
				+	ld	r7, VCPU_HFSCR(r4)
			
 
				 	mtspr	SPRN_TIDR, r5
			
 
				 	mtspr	SPRN_PSSCR, r6
			
 
				+	mtspr	SPRN_HFSCR, r7
			
 
				 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
			
 
				 8:
			
 
				 
			
@@ -911,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
				 	mftb	r7
			
 
				 	subf	r3,r7,r8
			
 
				 	mtspr	SPRN_DEC,r3
			
 
				-	stw	r3,VCPU_DEC(r4)
			
 
				+	std	r3,VCPU_DEC(r4)
			
 
				 
			
 
				 	ld	r5, VCPU_SPRG0(r4)
			
 
				 	ld	r6, VCPU_SPRG1(r4)
			
@@ -968,7 +961,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
				 
			
 
				 	/* Check if HDEC expires soon */
			
 
				 	mfspr	r3, SPRN_HDEC
			
 
				-	cmpwi	r3, 512		/* 1 microsecond */
			
 
				+	EXTEND_HDEC(r3)
			
 
				+	cmpdi	r3, 512		/* 1 microsecond */
			
 
				 	blt	hdec_soon
			
 
				 
			
 
				 #ifdef CONFIG_KVM_XICS
			
@@ -1022,7 +1016,13 @@ kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 
				 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
			
 
				 	bne	cr1, 12f
			
 
				 	mfspr	r0, SPRN_DEC
			
 
				-	cmpwi	r0, 0
			
 
				+BEGIN_FTR_SECTION
			
 
				+	/* On POWER9 check whether the guest has large decrementer enabled */
			
 
				+	andis.	r8, r8, LPCR_LD@h
			
 
				+	bne	15f
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				+	extsw	r0, r0
			
 
				+15:	cmpdi	r0, 0
			
 
				 	li	r0, BOOK3S_INTERRUPT_DECREMENTER
			
 
				 	bge	5f
			
 
				 
			
@@ -1032,6 +1032,23 @@ kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 
				 	mr	r9, r4
			
 
				 	bl	kvmppc_msr_interrupt
			
 
				 5:
			
 
				+BEGIN_FTR_SECTION
			
 
				+	b	fast_guest_return
			
 
				+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
			
 
				+	/* On POWER9, check for pending doorbell requests */
			
 
				+	lbz	r0, VCPU_DBELL_REQ(r4)
			
 
				+	cmpwi	r0, 0
			
 
				+	beq	fast_guest_return
			
 
				+	ld	r5, HSTATE_KVM_VCORE(r13)
			
 
				+	/* Set DPDES register so the CPU will take a doorbell interrupt */
			
 
				+	li	r0, 1
			
 
				+	mtspr	SPRN_DPDES, r0
			
 
				+	std	r0, VCORE_DPDES(r5)
			
 
				+	/* Make sure other cpus see vcore->dpdes set before dbell req clear */
			
 
				+	lwsync
			
 
				+	/* Clear the pending doorbell request */
			
 
				+	li	r0, 0
			
 
				+	stb	r0, VCPU_DBELL_REQ(r4)
			
 
				 
			
 
				 /*
			
 
				  * Required state:
			
@@ -1206,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 
			
 
				 	stw	r12,VCPU_TRAP(r9)
			
 
				 
			
 
				+	/*
			
 
				+	 * Now that we have saved away SRR0/1 and HSRR0/1,
			
 
				+	 * interrupts are recoverable in principle, so set MSR_RI.
			
 
				+	 * This becomes important for relocation-on interrupts from
			
 
				+	 * the guest, which we can get in radix mode on POWER9.
			
 
				+	 */
			
 
				+	li	r0, MSR_RI
			
 
				+	mtmsrd	r0, 1
			
 
				+
			
 
				 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
			
 
				 	addi	r3, r9, VCPU_TB_RMINTR
			
 
				 	mr	r4, r9
			
@@ -1262,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
				 	beq	4f
			
 
				 	b	guest_exit_cont
			
 
				 3:
			
 
				+	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
			
 
				+	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
			
 
				+	bne	14f
			
 
				+	mfspr	r3, SPRN_HFSCR
			
 
				+	std	r3, VCPU_HFSCR(r9)
			
 
				+	b	guest_exit_cont
			
 
				+14:
			
 
				 	/* External interrupt ? */
			
 
				 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
			
 
				 	bne+	guest_exit_cont
			
@@ -1449,12 +1482,18 @@ mc_cont:
 
				 	mtspr	SPRN_SPURR,r4
			
 
				 
			
 
				 	/* Save DEC */
			
 
				+	ld	r3, HSTATE_KVM_VCORE(r13)
			
 
				 	mfspr	r5,SPRN_DEC
			
 
				 	mftb	r6
			
 
				+	/* On P9, if the guest has large decr enabled, don't sign extend */
			
 
				+BEGIN_FTR_SECTION
			
 
				+	ld	r4, VCORE_LPCR(r3)
			
 
				+	andis.	r4, r4, LPCR_LD@h
			
 
				+	bne	16f
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				 	extsw	r5,r5
			
 
				-	add	r5,r5,r6
			
 
				+16:	add	r5,r5,r6
			
 
				 	/* r5 is a guest timebase value here, convert to host TB */
			
 
				-	ld	r3,HSTATE_KVM_VCORE(r13)
			
 
				 	ld	r4,VCORE_TB_OFFSET(r3)
			
 
				 	subf	r5,r4,r5
			
 
				 	std	r5,VCPU_DEC_EXPIRES(r9)
			
@@ -1499,17 +1538,19 @@ FTR_SECTION_ELSE
 
				 	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
			
 
				 	rotldi	r6, r6, 60
			
 
				 	std	r6, VCPU_PSSCR(r9)
			
 
				+	/* Restore host HFSCR value */
			
 
				+	ld	r7, STACK_SLOT_HFSCR(r1)
			
 
				+	mtspr	SPRN_HFSCR, r7
			
 
				 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
			
 
				 	/*
			
 
				 	 * Restore various registers to 0, where non-zero values
			
 
				 	 * set by the guest could disrupt the host.
			
 
				 	 */
			
 
				 	li	r0, 0
			
 
				-	mtspr	SPRN_IAMR, r0
			
 
				-	mtspr	SPRN_CIABR, r0
			
 
				-	mtspr	SPRN_DAWRX, r0
			
 
				+	mtspr	SPRN_PSPB, r0
			
 
				 	mtspr	SPRN_WORT, r0
			
 
				 BEGIN_FTR_SECTION
			
 
				+	mtspr	SPRN_IAMR, r0
			
 
				 	mtspr	SPRN_TCSCR, r0
			
 
				 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
			
 
				 	li	r0, 1
			
@@ -1525,6 +1566,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
				 	std	r6,VCPU_UAMOR(r9)
			
 
				 	li	r6,0
			
 
				 	mtspr	SPRN_AMR,r6
			
 
				+	mtspr	SPRN_UAMOR, r6
			
 
				 
			
 
				 	/* Switch DSCR back to host value */
			
 
				 	mfspr	r8, SPRN_DSCR
			
@@ -1669,13 +1711,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
				 	ptesync
			
 
				 
			
 
				 	/* Restore host values of some registers */
			
 
				+BEGIN_FTR_SECTION
			
 
				+	ld	r5, STACK_SLOT_CIABR(r1)
			
 
				+	ld	r6, STACK_SLOT_DAWR(r1)
			
 
				+	ld	r7, STACK_SLOT_DAWRX(r1)
			
 
				+	mtspr	SPRN_CIABR, r5
			
 
				+	mtspr	SPRN_DAWR, r6
			
 
				+	mtspr	SPRN_DAWRX, r7
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
			
 
				 BEGIN_FTR_SECTION
			
 
				 	ld	r5, STACK_SLOT_TID(r1)
			
 
				 	ld	r6, STACK_SLOT_PSSCR(r1)
			
 
				 	ld	r7, STACK_SLOT_PID(r1)
			
 
				+	ld	r8, STACK_SLOT_IAMR(r1)
			
 
				 	mtspr	SPRN_TIDR, r5
			
 
				 	mtspr	SPRN_PSSCR, r6
			
 
				 	mtspr	SPRN_PID, r7
			
 
				+	mtspr	SPRN_IAMR, r8
			
 
				 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				 BEGIN_FTR_SECTION
			
 
				 	PPC_INVALIDATE_ERAT
			
@@ -1819,8 +1871,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 
				 	li	r0, KVM_GUEST_MODE_NONE
			
 
				 	stb	r0, HSTATE_IN_GUEST(r13)
			
 
				 
			
 
				-	ld	r0, 112+PPC_LR_STKOFF(r1)
			
 
				-	addi	r1, r1, 112
			
 
				+	ld	r0, SFS+PPC_LR_STKOFF(r1)
			
 
				+	addi	r1, r1, SFS
			
 
				 	mtlr	r0
			
 
				 	blr
			
 
				 
			
@@ -2366,12 +2418,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 
				 	mfspr	r3, SPRN_DEC
			
 
				 	mfspr	r4, SPRN_HDEC
			
 
				 	mftb	r5
			
 
				-	cmpw	r3, r4
			
 
				+BEGIN_FTR_SECTION
			
 
				+	/* On P9 check whether the guest has large decrementer mode enabled */
			
 
				+	ld	r6, HSTATE_KVM_VCORE(r13)
			
 
				+	ld	r6, VCORE_LPCR(r6)
			
 
				+	andis.	r6, r6, LPCR_LD@h
			
 
				+	bne	68f
			
 
				+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
			
 
				+	extsw	r3, r3
			
 
				+68:	EXTEND_HDEC(r4)
			
 
				+	cmpd	r3, r4
			
 
				 	ble	67f
			
 
				 	mtspr	SPRN_DEC, r4
			
 
				 67:
			
 
				 	/* save expiry time of guest decrementer */
			
 
				-	extsw	r3, r3
			
 
				 	add	r3, r3, r5
			
 
				 	ld	r4, HSTATE_KVM_VCPU(r13)
			
 
				 	ld	r5, HSTATE_KVM_VCORE(r13)
			
@@ -2552,22 +2612,32 @@ machine_check_realmode:
 
				 	ld	r9, HSTATE_KVM_VCPU(r13)
			
 
				 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
			
 
				 	/*
			
 
				-	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
			
 
				-	 * machine check interrupt (set HSRR0 to 0x200). And for handled
			
 
				-	 * errors (no-fatal), just go back to guest execution with current
			
 
				-	 * HSRR0 instead of exiting guest. This new approach will inject
			
 
				-	 * machine check to guest for fatal error causing guest to crash.
			
 
				-	 *
			
 
				-	 * The old code used to return to host for unhandled errors which
			
 
				-	 * was causing guest to hang with soft lockups inside guest and
			
 
				-	 * makes it difficult to recover guest instance.
			
 
				+	 * For the guest that is FWNMI capable, deliver all the MCE errors
			
 
				+	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
			
 
				+	 * reason. This new approach injects machine check errors in guest
			
 
				+	 * address space to guest with additional information in the form
			
 
				+	 * of RTAS event, thus enabling guest kernel to suitably handle
			
 
				+	 * such errors.
			
 
				 	 *
			
 
				+	 * For the guest that is not FWNMI capable (old QEMU) fallback
			
 
				+	 * to old behaviour for backward compatibility:
			
 
				+	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
			
 
				+	 * through machine check interrupt (set HSRR0 to 0x200).
			
 
				+	 * For handled errors (no-fatal), just go back to guest execution
			
 
				+	 * with current HSRR0.
			
 
				 	 * if we receive machine check with MSR(RI=0) then deliver it to
			
 
				 	 * guest as machine check causing guest to crash.
			
 
				 	 */
			
 
				 	ld	r11, VCPU_MSR(r9)
			
 
				 	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
			
 
				 	bne	mc_cont			/* if so, exit to host */
			
 
				+	/* Check if guest is capable of handling NMI exit */
			
 
				+	ld	r10, VCPU_KVM(r9)
			
 
				+	lbz	r10, KVM_FWNMI(r10)
			
 
				+	cmpdi	r10, 1			/* FWNMI capable? */
			
 
				+	beq	mc_cont			/* if so, exit with KVM_EXIT_NMI. */
			
 
				+
			
 
				+	/* if not, fall through for backward compatibility. */
			
 
				 	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
			
 
				 	beq	1f			/* Deliver a machine check to guest */
			
 
				 	ld	r10, VCPU_PC(r9)
			
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 
				 	unsigned long dec_nsec;
			
 
				 	unsigned long long dec_time;
			
 
				 
			
 
				-	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
			
 
				+	pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
			
 
				 	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
			
 
				 
			
 
				 #ifdef CONFIG_PPC_BOOK3S
			
@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
				 	case SPRN_TBWU: break;
			
 
				 
			
 
				 	case SPRN_DEC:
			
 
				-		vcpu->arch.dec = spr_val;
			
 
				+		vcpu->arch.dec = (u32) spr_val;
			
 
				 		kvmppc_emulate_dec(vcpu);
			
 
				 		break;
			
 
				 
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -553,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
				 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				 	case KVM_CAP_PPC_SMT:
			
 
				 		r = 0;
			
 
				-		if (hv_enabled) {
			
 
				+		if (kvm) {
			
 
				+			if (kvm->arch.emul_smt_mode > 1)
			
 
				+				r = kvm->arch.emul_smt_mode;
			
 
				+			else
			
 
				+				r = kvm->arch.smt_mode;
			
 
				+		} else if (hv_enabled) {
			
 
				 			if (cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				 				r = 1;
			
 
				 			else
			
 
				 				r = threads_per_subcore;
			
 
				 		}
			
 
				 		break;
			
 
				+	case KVM_CAP_PPC_SMT_POSSIBLE:
			
 
				+		r = 1;
			
 
				+		if (hv_enabled) {
			
 
				+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
			
 
				+				r = ((threads_per_subcore << 1) - 1);
			
 
				+			else
			
 
				+				/* P9 can emulate dbells, so allow any mode */
			
 
				+				r = 8 | 4 | 2 | 1;
			
 
				+		}
			
 
				+		break;
			
 
				 	case KVM_CAP_PPC_RMA:
			
 
				 		r = 0;
			
 
				 		break;
			
@@ -617,6 +632,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
				 		/* Disable this on POWER9 until code handles new HPTE format */
			
 
				 		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
			
 
				 		break;
			
 
				+#endif
			
 
				+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				+	case KVM_CAP_PPC_FWNMI:
			
 
				+		r = hv_enabled;
			
 
				+		break;
			
 
				 #endif
			
 
				 	case KVM_CAP_PPC_HTM:
			
 
				 		r = cpu_has_feature(CPU_FTR_TM_COMP) &&
			
@@ -1537,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
				 		break;
			
 
				 	}
			
 
				 #endif /* CONFIG_KVM_XICS */
			
 
				+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
			
 
				+	case KVM_CAP_PPC_FWNMI:
			
 
				+		r = -EINVAL;
			
 
				+		if (!is_kvmppc_hv_enabled(vcpu->kvm))
			
 
				+			break;
			
 
				+		r = 0;
			
 
				+		vcpu->kvm->arch.fwnmi_enabled = true;
			
 
				+		break;
			
 
				+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
 
				 		break;
			
@@ -1711,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 
				 		r = 0;
			
 
				 		break;
			
 
				 	}
			
 
				+	case KVM_CAP_PPC_SMT: {
			
 
				+		unsigned long mode = cap->args[0];
			
 
				+		unsigned long flags = cap->args[1];
			
 
				+
			
 
				+		r = -EINVAL;
			
 
				+		if (kvm->arch.kvm_ops->set_smt_mode)
			
 
				+			r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
			
 
				+		break;
			
 
				+	}
			
 
				 #endif
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -925,6 +925,8 @@ struct kvm_ppc_resize_hpt {
 
				 #define KVM_CAP_X86_GUEST_MWAIT 143
			
 
				 #define KVM_CAP_ARM_USER_IRQ 144
			
 
				 #define KVM_CAP_S390_CMMA_MIGRATION 145
			
 
				+#define KVM_CAP_PPC_FWNMI 146
			
 
				+#define KVM_CAP_PPC_SMT_POSSIBLE 147
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING