瀏覽代碼

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Paolo Bonzini:
 "Second batch of KVM updates.  Some minor x86 fixes, two s390 guest
  features that need some handling in the host, and all the PPC changes.

  The PPC changes include support for little-endian guests and
  enablement for new POWER8 features"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (45 commits)
  x86, kvm: correctly access the KVM_CPUID_FEATURES leaf at 0x40000101
  x86, kvm: cache the base of the KVM cpuid leaves
  kvm: x86: move KVM_CAP_HYPERV_TIME outside #ifdef
  KVM: PPC: Book3S PR: Cope with doorbell interrupts
  KVM: PPC: Book3S HV: Add software abort codes for transactional memory
  KVM: PPC: Book3S HV: Add new state for transactional memory
  powerpc/Kconfig: Make TM select VSX and VMX
  KVM: PPC: Book3S HV: Basic little-endian guest support
  KVM: PPC: Book3S HV: Add support for DABRX register on POWER7
  KVM: PPC: Book3S HV: Prepare for host using hypervisor doorbells
  KVM: PPC: Book3S HV: Handle new LPCR bits on POWER8
  KVM: PPC: Book3S HV: Handle guest using doorbells for IPIs
  KVM: PPC: Book3S HV: Consolidate code that checks reason for wake from nap
  KVM: PPC: Book3S HV: Implement architecture compatibility modes for POWER8
  KVM: PPC: Book3S HV: Add handler for HV facility unavailable
  KVM: PPC: Book3S HV: Flush the correct number of TLB sets on POWER8
  KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs
  KVM: PPC: Book3S HV: Align physical and virtual CPU thread numbers
  KVM: PPC: Book3S HV: Don't set DABR on POWER8
  kvm/ppc: IRQ disabling cleanup
  ...
Linus Torvalds 11 年之前
父節點
當前提交
e2a0f813e0
共有 53 個文件被更改,包括 1715 次插入1122 次删除
  1. 1 0
      Documentation/virtual/kvm/api.txt
  2. 2 0
      arch/powerpc/Kconfig
  3. 111 0
      arch/powerpc/include/asm/epapr_hcalls.h
  4. 3 0
      arch/powerpc/include/asm/kvm_asm.h
  5. 13 14
      arch/powerpc/include/asm/kvm_book3s.h
  6. 1 0
      arch/powerpc/include/asm/kvm_book3s_asm.h
  7. 6 0
      arch/powerpc/include/asm/kvm_booke.h
  8. 51 10
      arch/powerpc/include/asm/kvm_host.h
  9. 1 79
      arch/powerpc/include/asm/kvm_para.h
  10. 10 3
      arch/powerpc/include/asm/kvm_ppc.h
  11. 21 0
      arch/powerpc/include/asm/pgtable.h
  12. 35 8
      arch/powerpc/include/asm/reg.h
  13. 0 2
      arch/powerpc/include/asm/switch_to.h
  14. 3 0
      arch/powerpc/include/uapi/asm/kvm.h
  15. 2 0
      arch/powerpc/include/uapi/asm/tm.h
  16. 42 8
      arch/powerpc/kernel/asm-offsets.c
  17. 2 39
      arch/powerpc/kernel/kvm.c
  18. 4 0
      arch/powerpc/kvm/44x.c
  19. 38 8
      arch/powerpc/kvm/book3s.c
  20. 5 0
      arch/powerpc/kvm/book3s_32_mmu_host.c
  21. 2 2
      arch/powerpc/kvm/book3s_64_mmu_hv.c
  22. 0 4
      arch/powerpc/kvm/book3s_exports.c
  23. 241 78
      arch/powerpc/kvm/book3s_hv.c
  24. 4 4
      arch/powerpc/kvm/book3s_hv_interrupts.S
  25. 5 3
      arch/powerpc/kvm/book3s_hv_rm_mmu.c
  26. 745 468
      arch/powerpc/kvm/book3s_hv_rmhandlers.S
  27. 83 86
      arch/powerpc/kvm/book3s_paired_singles.c
  28. 34 121
      arch/powerpc/kvm/book3s_pr.c
  29. 0 47
      arch/powerpc/kvm/book3s_rmhandlers.S
  30. 2 0
      arch/powerpc/kvm/book3s_segment.S
  31. 3 1
      arch/powerpc/kvm/book3s_xics.c
  32. 6 38
      arch/powerpc/kvm/booke.c
  33. 4 1
      arch/powerpc/kvm/booke.h
  34. 11 0
      arch/powerpc/kvm/bookehv_interrupts.S
  35. 4 0
      arch/powerpc/kvm/e500.c
  36. 5 3
      arch/powerpc/kvm/e500.h
  37. 1 1
      arch/powerpc/kvm/e500_mmu.c
  38. 34 25
      arch/powerpc/kvm/e500_mmu_host.c
  39. 4 0
      arch/powerpc/kvm/e500mc.c
  40. 0 1
      arch/powerpc/kvm/emulate.c
  41. 1 0
      arch/powerpc/kvm/mpic.c
  42. 35 23
      arch/powerpc/kvm/powerpc.c
  43. 14 1
      arch/s390/include/asm/kvm_host.h
  44. 11 0
      arch/s390/kvm/intercept.c
  45. 11 6
      arch/s390/kvm/kvm-s390.c
  46. 6 0
      arch/s390/kvm/kvm-s390.h
  47. 12 21
      arch/x86/include/asm/kvm_para.h
  48. 32 0
      arch/x86/kernel/kvm.c
  49. 8 0
      arch/x86/kvm/cpuid.h
  50. 1 1
      arch/x86/kvm/lapic.h
  51. 5 4
      arch/x86/kvm/vmx.c
  52. 31 10
      arch/x86/kvm/x86.c
  53. 9 2
      drivers/s390/kvm/virtio_ccw.c

+ 1 - 0
Documentation/virtual/kvm/api.txt

@@ -1838,6 +1838,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_LPCR	| 64
   PPC   | KVM_REG_PPC_LPCR	| 64
   PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_ARCH_COMPAT 32
   PPC   | KVM_REG_PPC_ARCH_COMPAT 32
+  PPC   | KVM_REG_PPC_DABRX     | 32
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
   PPC   | KVM_REG_PPC_TM_GPR31	| 64

+ 2 - 0
arch/powerpc/Kconfig

@@ -342,6 +342,8 @@ config PPC_TRANSACTIONAL_MEM
        bool "Transactional Memory support for POWERPC"
        bool "Transactional Memory support for POWERPC"
        depends on PPC_BOOK3S_64
        depends on PPC_BOOK3S_64
        depends on SMP
        depends on SMP
+       select ALTIVEC
+       select VSX
        default n
        default n
        ---help---
        ---help---
          Support user-mode Transactional Memory on POWERPC.
          Support user-mode Transactional Memory on POWERPC.

+ 111 - 0
arch/powerpc/include/asm/epapr_hcalls.h

@@ -460,5 +460,116 @@ static inline unsigned int ev_idle(void)
 
 
 	return r3;
 	return r3;
 }
 }
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+static inline unsigned long epapr_hypercall(unsigned long *in,
+			    unsigned long *out,
+			    unsigned long nr)
+{
+	unsigned long register r0 asm("r0");
+	unsigned long register r3 asm("r3") = in[0];
+	unsigned long register r4 asm("r4") = in[1];
+	unsigned long register r5 asm("r5") = in[2];
+	unsigned long register r6 asm("r6") = in[3];
+	unsigned long register r7 asm("r7") = in[4];
+	unsigned long register r8 asm("r8") = in[5];
+	unsigned long register r9 asm("r9") = in[6];
+	unsigned long register r10 asm("r10") = in[7];
+	unsigned long register r11 asm("r11") = nr;
+	unsigned long register r12 asm("r12");
+
+	asm volatile("bl	epapr_hypercall_start"
+		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+		       "=r"(r12)
+		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+		       "r"(r9), "r"(r10), "r"(r11)
+		     : "memory", "cc", "xer", "ctr", "lr");
+
+	out[0] = r4;
+	out[1] = r5;
+	out[2] = r6;
+	out[3] = r7;
+	out[4] = r8;
+	out[5] = r9;
+	out[6] = r10;
+	out[7] = r11;
+
+	return r3;
+}
+#else
+static unsigned long epapr_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr)
+{
+	return EV_UNIMPLEMENTED;
+}
+#endif
+
+static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+	unsigned long r;
+
+	r = epapr_hypercall(in, out, nr);
+	*r2 = out[0];
+
+	return r;
+}
+
+static inline long epapr_hypercall0(unsigned int nr)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
+				    unsigned long p2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
+				    unsigned long p2, unsigned long p3)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
+				    unsigned long p2, unsigned long p3,
+				    unsigned long p4)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	in[3] = p4;
+	return epapr_hypercall(in, out, nr);
+}
 #endif /* !__ASSEMBLY__ */
 #endif /* !__ASSEMBLY__ */
 #endif /* _EPAPR_HCALLS_H */
 #endif /* _EPAPR_HCALLS_H */

+ 3 - 0
arch/powerpc/include/asm/kvm_asm.h

@@ -92,14 +92,17 @@
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
 #define BOOK3S_INTERRUPT_DECREMENTER	0x900
 #define BOOK3S_INTERRUPT_DECREMENTER	0x900
 #define BOOK3S_INTERRUPT_HV_DECREMENTER	0x980
 #define BOOK3S_INTERRUPT_HV_DECREMENTER	0x980
+#define BOOK3S_INTERRUPT_DOORBELL	0xa00
 #define BOOK3S_INTERRUPT_SYSCALL	0xc00
 #define BOOK3S_INTERRUPT_SYSCALL	0xc00
 #define BOOK3S_INTERRUPT_TRACE		0xd00
 #define BOOK3S_INTERRUPT_TRACE		0xd00
 #define BOOK3S_INTERRUPT_H_DATA_STORAGE	0xe00
 #define BOOK3S_INTERRUPT_H_DATA_STORAGE	0xe00
 #define BOOK3S_INTERRUPT_H_INST_STORAGE	0xe20
 #define BOOK3S_INTERRUPT_H_INST_STORAGE	0xe20
 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST	0xe40
 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST	0xe40
+#define BOOK3S_INTERRUPT_H_DOORBELL	0xe80
 #define BOOK3S_INTERRUPT_PERFMON	0xf00
 #define BOOK3S_INTERRUPT_PERFMON	0xf00
 #define BOOK3S_INTERRUPT_ALTIVEC	0xf20
 #define BOOK3S_INTERRUPT_ALTIVEC	0xf20
 #define BOOK3S_INTERRUPT_VSX		0xf40
 #define BOOK3S_INTERRUPT_VSX		0xf40
+#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL	0xf80
 
 
 #define BOOK3S_IRQPRIO_SYSTEM_RESET		0
 #define BOOK3S_IRQPRIO_SYSTEM_RESET		0
 #define BOOK3S_IRQPRIO_DATA_SEGMENT		1
 #define BOOK3S_IRQPRIO_DATA_SEGMENT		1

+ 13 - 14
arch/powerpc/include/asm/kvm_book3s.h

@@ -186,9 +186,6 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
 
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
-extern void kvmppc_load_up_fpu(void);
-extern void kvmppc_load_up_altivec(void);
-extern void kvmppc_load_up_vsx(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
@@ -271,16 +268,25 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pc;
 	return vcpu->arch.pc;
 }
 }
 
 
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
+static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 {
 {
-	ulong pc = kvmppc_get_pc(vcpu);
+	return (vcpu->arch.shared->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+}
 
 
+static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)
+{
 	/* Load the instruction manually if it failed to do so in the
 	/* Load the instruction manually if it failed to do so in the
 	 * exit path */
 	 * exit path */
 	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
 	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
 		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
 		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
 
 
-	return vcpu->arch.last_inst;
+	return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) :
+		vcpu->arch.last_inst;
+}
+
+static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
+{
+	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu));
 }
 }
 
 
 /*
 /*
@@ -290,14 +296,7 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
  */
  */
 static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
 static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
 {
 {
-	ulong pc = kvmppc_get_pc(vcpu) - 4;
-
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return vcpu->arch.last_inst;
+	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4);
 }
 }
 
 
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)

+ 1 - 0
arch/powerpc/include/asm/kvm_book3s_asm.h

@@ -88,6 +88,7 @@ struct kvmppc_host_state {
 	u8 hwthread_req;
 	u8 hwthread_req;
 	u8 hwthread_state;
 	u8 hwthread_state;
 	u8 host_ipi;
 	u8 host_ipi;
+	u8 ptid;
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
 	struct kvmppc_vcore *kvm_vcore;
 	unsigned long xics_phys;
 	unsigned long xics_phys;

+ 6 - 0
arch/powerpc/include/asm/kvm_booke.h

@@ -63,6 +63,12 @@ static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
 	return vcpu->arch.xer;
 	return vcpu->arch.xer;
 }
 }
 
 
+static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+{
+	/* XXX Would need to check TLB entry */
+	return false;
+}
+
 static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 {
 {
 	return vcpu->arch.last_inst;
 	return vcpu->arch.last_inst;

+ 51 - 10
arch/powerpc/include/asm/kvm_host.h

@@ -288,6 +288,7 @@ struct kvmppc_vcore {
 	int n_woken;
 	int n_woken;
 	int nap_count;
 	int nap_count;
 	int napping_threads;
 	int napping_threads;
+	int first_vcpuid;
 	u16 pcpu;
 	u16 pcpu;
 	u16 last_cpu;
 	u16 last_cpu;
 	u8 vcore_state;
 	u8 vcore_state;
@@ -298,10 +299,12 @@ struct kvmppc_vcore {
 	u64 stolen_tb;
 	u64 stolen_tb;
 	u64 preempt_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
 	struct kvm_vcpu *runner;
+	struct kvm *kvm;
 	u64 tb_offset;		/* guest timebase - host timebase */
 	u64 tb_offset;		/* guest timebase - host timebase */
 	ulong lpcr;
 	ulong lpcr;
 	u32 arch_compat;
 	u32 arch_compat;
 	ulong pcr;
 	ulong pcr;
+	ulong dpdes;		/* doorbell state (POWER8) */
 };
 };
 
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -410,8 +413,7 @@ struct kvm_vcpu_arch {
 
 
 	ulong gpr[32];
 	ulong gpr[32];
 
 
-	u64 fpr[32];
-	u64 fpscr;
+	struct thread_fp_state fp;
 
 
 #ifdef CONFIG_SPE
 #ifdef CONFIG_SPE
 	ulong evr[32];
 	ulong evr[32];
@@ -420,12 +422,7 @@ struct kvm_vcpu_arch {
 	u64 acc;
 	u64 acc;
 #endif
 #endif
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-	vector128 vr[32];
-	vector128 vscr;
-#endif
-
-#ifdef CONFIG_VSX
-	u64 vsr[64];
+	struct thread_vr_state vr;
 #endif
 #endif
 
 
 #ifdef CONFIG_KVM_BOOKE_HV
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -452,6 +449,7 @@ struct kvm_vcpu_arch {
 	ulong pc;
 	ulong pc;
 	ulong ctr;
 	ulong ctr;
 	ulong lr;
 	ulong lr;
+	ulong tar;
 
 
 	ulong xer;
 	ulong xer;
 	u32 cr;
 	u32 cr;
@@ -461,13 +459,30 @@ struct kvm_vcpu_arch {
 	ulong guest_owned_ext;
 	ulong guest_owned_ext;
 	ulong purr;
 	ulong purr;
 	ulong spurr;
 	ulong spurr;
+	ulong ic;
+	ulong vtb;
 	ulong dscr;
 	ulong dscr;
 	ulong amr;
 	ulong amr;
 	ulong uamor;
 	ulong uamor;
+	ulong iamr;
 	u32 ctrl;
 	u32 ctrl;
+	u32 dabrx;
 	ulong dabr;
 	ulong dabr;
+	ulong dawr;
+	ulong dawrx;
+	ulong ciabr;
 	ulong cfar;
 	ulong cfar;
 	ulong ppr;
 	ulong ppr;
+	ulong pspb;
+	ulong fscr;
+	ulong ebbhr;
+	ulong ebbrr;
+	ulong bescr;
+	ulong csigr;
+	ulong tacr;
+	ulong tcscr;
+	ulong acop;
+	ulong wort;
 	ulong shadow_srr1;
 	ulong shadow_srr1;
 #endif
 #endif
 	u32 vrsave; /* also USPRG0 */
 	u32 vrsave; /* also USPRG0 */
@@ -502,10 +517,33 @@ struct kvm_vcpu_arch {
 	u32 ccr1;
 	u32 ccr1;
 	u32 dbsr;
 	u32 dbsr;
 
 
-	u64 mmcr[3];
+	u64 mmcr[5];
 	u32 pmc[8];
 	u32 pmc[8];
+	u32 spmc[2];
 	u64 siar;
 	u64 siar;
 	u64 sdar;
 	u64 sdar;
+	u64 sier;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	u64 tfhar;
+	u64 texasr;
+	u64 tfiar;
+
+	u32 cr_tm;
+	u64 lr_tm;
+	u64 ctr_tm;
+	u64 amr_tm;
+	u64 ppr_tm;
+	u64 dscr_tm;
+	u64 tar_tm;
+
+	ulong gpr_tm[32];
+
+	struct thread_fp_state fp_tm;
+
+	struct thread_vr_state vr_tm;
+	u32 vrsave_tm; /* also USPRG0 */
+
+#endif
 
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 #ifdef CONFIG_KVM_EXIT_TIMING
 	struct mutex exit_timing_lock;
 	struct mutex exit_timing_lock;
@@ -546,6 +584,7 @@ struct kvm_vcpu_arch {
 #endif
 #endif
 	gpa_t paddr_accessed;
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
 	gva_t vaddr_accessed;
+	pgd_t *pgdir;
 
 
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_is_bigendian;
 	u8 mmio_is_bigendian;
@@ -603,7 +642,6 @@ struct kvm_vcpu_arch {
 	struct list_head run_list;
 	struct list_head run_list;
 	struct task_struct *run_task;
 	struct task_struct *run_task;
 	struct kvm_run *kvm_run;
 	struct kvm_run *kvm_run;
-	pgd_t *pgdir;
 
 
 	spinlock_t vpa_update_lock;
 	spinlock_t vpa_update_lock;
 	struct kvmppc_vpa vpa;
 	struct kvmppc_vpa vpa;
@@ -616,9 +654,12 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_stolen;
 	u64 busy_preempt;
 	u64 busy_preempt;
+	unsigned long intr_msr;
 #endif
 #endif
 };
 };
 
 
+#define VCPU_FPR(vcpu, i)	(vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+
 /* Values for vcpu->arch.state */
 /* Values for vcpu->arch.state */
 #define KVMPPC_VCPU_NOTREADY		0
 #define KVMPPC_VCPU_NOTREADY		0
 #define KVMPPC_VCPU_RUNNABLE		1
 #define KVMPPC_VCPU_RUNNABLE		1

+ 1 - 79
arch/powerpc/include/asm/kvm_para.h

@@ -39,10 +39,6 @@ static inline int kvm_para_available(void)
 	return 1;
 	return 1;
 }
 }
 
 
-extern unsigned long kvm_hypercall(unsigned long *in,
-				   unsigned long *out,
-				   unsigned long nr);
-
 #else
 #else
 
 
 static inline int kvm_para_available(void)
 static inline int kvm_para_available(void)
@@ -50,82 +46,8 @@ static inline int kvm_para_available(void)
 	return 0;
 	return 0;
 }
 }
 
 
-static unsigned long kvm_hypercall(unsigned long *in,
-				   unsigned long *out,
-				   unsigned long nr)
-{
-	return EV_UNIMPLEMENTED;
-}
-
 #endif
 #endif
 
 
-static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-	unsigned long r;
-
-	r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-	*r2 = out[0];
-
-	return r;
-}
-
-static inline long kvm_hypercall0(unsigned int nr)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-
-	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-}
-
-static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-
-	in[0] = p1;
-	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-}
-
-static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
-				  unsigned long p2)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-
-	in[0] = p1;
-	in[1] = p2;
-	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-}
-
-static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
-				  unsigned long p2, unsigned long p3)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-
-	in[0] = p1;
-	in[1] = p2;
-	in[2] = p3;
-	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-}
-
-static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
-				  unsigned long p2, unsigned long p3,
-				  unsigned long p4)
-{
-	unsigned long in[8];
-	unsigned long out[8];
-
-	in[0] = p1;
-	in[1] = p2;
-	in[2] = p3;
-	in[3] = p4;
-	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
-}
-
-
 static inline unsigned int kvm_arch_para_features(void)
 static inline unsigned int kvm_arch_para_features(void)
 {
 {
 	unsigned long r;
 	unsigned long r;
@@ -133,7 +55,7 @@ static inline unsigned int kvm_arch_para_features(void)
 	if (!kvm_para_available())
 	if (!kvm_para_available())
 		return 0;
 		return 0;
 
 
-	if(kvm_hypercall0_1(KVM_HC_FEATURES, &r))
+	if(epapr_hypercall0_1(KVM_HCALL_TOKEN(KVM_HC_FEATURES), &r))
 		return 0;
 		return 0;
 
 
 	return r;
 	return r;

+ 10 - 3
arch/powerpc/include/asm/kvm_ppc.h

@@ -54,12 +54,13 @@ extern void kvmppc_handler_highmem(void);
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
 extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
                               unsigned int rt, unsigned int bytes,
                               unsigned int rt, unsigned int bytes,
-                              int is_bigendian);
+			      int is_default_endian);
 extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                unsigned int rt, unsigned int bytes,
                                unsigned int rt, unsigned int bytes,
-                               int is_bigendian);
+			       int is_default_endian);
 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                               u64 val, unsigned int bytes, int is_bigendian);
+			       u64 val, unsigned int bytes,
+			       int is_default_endian);
 
 
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
                                       struct kvm_vcpu *vcpu);
@@ -455,6 +456,12 @@ static inline void kvmppc_fix_ee_before_entry(void)
 	trace_hardirqs_on();
 	trace_hardirqs_on();
 
 
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC64
+	/*
+	 * To avoid races, the caller must have gone directly from having
+	 * interrupts fully-enabled to hard-disabled.
+	 */
+	WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+
 	/* Only need to enable IRQs by hard enabling them after this */
 	/* Only need to enable IRQs by hard enabling them after this */
 	local_paca->irq_happened = 0;
 	local_paca->irq_happened = 0;
 	local_paca->soft_enabled = 1;
 	local_paca->soft_enabled = 1;

+ 21 - 0
arch/powerpc/include/asm/pgtable.h

@@ -287,6 +287,27 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 #endif
 #endif
 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 				 unsigned *shift);
 				 unsigned *shift);
+
+static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
+				     unsigned long *pte_sizep)
+{
+	pte_t *ptep;
+	unsigned long ps = *pte_sizep;
+	unsigned int shift;
+
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+	if (!ptep)
+		return NULL;
+	if (shift)
+		*pte_sizep = 1ul << shift;
+	else
+		*pte_sizep = PAGE_SIZE;
+
+	if (ps > *pte_sizep)
+		return NULL;
+
+	return ptep;
+}
 #endif /* __ASSEMBLY__ */
 #endif /* __ASSEMBLY__ */
 
 
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */

+ 35 - 8
arch/powerpc/include/asm/reg.h

@@ -223,17 +223,26 @@
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DAWR	0xB4
 #define SPRN_DAWR	0xB4
+#define SPRN_CIABR	0xBB
+#define   CIABR_PRIV		0x3
+#define   CIABR_PRIV_USER	1
+#define   CIABR_PRIV_SUPER	2
+#define   CIABR_PRIV_HYPER	3
 #define SPRN_DAWRX	0xBC
 #define SPRN_DAWRX	0xBC
-#define   DAWRX_USER	(1UL << 0)
-#define   DAWRX_KERNEL	(1UL << 1)
-#define   DAWRX_HYP	(1UL << 2)
+#define   DAWRX_USER	__MASK(0)
+#define   DAWRX_KERNEL	__MASK(1)
+#define   DAWRX_HYP	__MASK(2)
+#define   DAWRX_WTI	__MASK(3)
+#define   DAWRX_WT	__MASK(4)
+#define   DAWRX_DR	__MASK(5)
+#define   DAWRX_DW	__MASK(6)
 #define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
 #define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
 #define SPRN_DABR2	0x13D	/* e300 */
 #define SPRN_DABR2	0x13D	/* e300 */
 #define SPRN_DABRX	0x3F7	/* Data Address Breakpoint Register Extension */
 #define SPRN_DABRX	0x3F7	/* Data Address Breakpoint Register Extension */
-#define   DABRX_USER	(1UL << 0)
-#define   DABRX_KERNEL	(1UL << 1)
-#define   DABRX_HYP	(1UL << 2)
-#define   DABRX_BTI	(1UL << 3)
+#define   DABRX_USER	__MASK(0)
+#define   DABRX_KERNEL	__MASK(1)
+#define   DABRX_HYP	__MASK(2)
+#define   DABRX_BTI	__MASK(3)
 #define   DABRX_ALL     (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER)
 #define   DABRX_ALL     (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER)
 #define SPRN_DAR	0x013	/* Data Address Register */
 #define SPRN_DAR	0x013	/* Data Address Register */
 #define SPRN_DBCR	0x136	/* e300 Data Breakpoint Control Reg */
 #define SPRN_DBCR	0x136	/* e300 Data Breakpoint Control Reg */
@@ -260,6 +269,8 @@
 #define SPRN_HRMOR	0x139	/* Real mode offset register */
 #define SPRN_HRMOR	0x139	/* Real mode offset register */
 #define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
 #define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
 #define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
+#define SPRN_IC		0x350	/* Virtual Instruction Count */
+#define SPRN_VTB	0x351	/* Virtual Time Base */
 /* HFSCR and FSCR bit numbers are the same */
 /* HFSCR and FSCR bit numbers are the same */
 #define FSCR_TAR_LG	8	/* Enable Target Address Register */
 #define FSCR_TAR_LG	8	/* Enable Target Address Register */
 #define FSCR_EBB_LG	7	/* Enable Event Based Branching */
 #define FSCR_EBB_LG	7	/* Enable Event Based Branching */
@@ -298,9 +309,13 @@
 #define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
 #define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
 #define	  LPCR_RMLS_SH	(63-37)
 #define	  LPCR_RMLS_SH	(63-37)
 #define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
 #define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
+#define   LPCR_AIL	0x01800000	/* Alternate interrupt location */
 #define   LPCR_AIL_0	0x00000000	/* MMU off exception offset 0x0 */
 #define   LPCR_AIL_0	0x00000000	/* MMU off exception offset 0x0 */
 #define   LPCR_AIL_3	0x01800000	/* MMU on exception offset 0xc00...4xxx */
 #define   LPCR_AIL_3	0x01800000	/* MMU on exception offset 0xc00...4xxx */
-#define   LPCR_PECE	0x00007000	/* powersave exit cause enable */
+#define   LPCR_ONL	0x00040000	/* online - PURR/SPURR count */
+#define   LPCR_PECE	0x0001f000	/* powersave exit cause enable */
+#define     LPCR_PECEDP	0x00010000	/* directed priv dbells cause exit */
+#define     LPCR_PECEDH	0x00008000	/* directed hyp dbells cause exit */
 #define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
 #define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
 #define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
 #define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
@@ -322,6 +337,8 @@
 #define SPRN_PCR	0x152	/* Processor compatibility register */
 #define SPRN_PCR	0x152	/* Processor compatibility register */
 #define   PCR_VEC_DIS	(1ul << (63-0))	/* Vec. disable (bit NA since POWER8) */
 #define   PCR_VEC_DIS	(1ul << (63-0))	/* Vec. disable (bit NA since POWER8) */
 #define   PCR_VSX_DIS	(1ul << (63-1))	/* VSX disable (bit NA since POWER8) */
 #define   PCR_VSX_DIS	(1ul << (63-1))	/* VSX disable (bit NA since POWER8) */
+#define   PCR_TM_DIS	(1ul << (63-2))	/* Trans. memory disable (POWER8) */
+#define   PCR_ARCH_206	0x4		/* Architecture 2.06 */
 #define   PCR_ARCH_205	0x2		/* Architecture 2.05 */
 #define   PCR_ARCH_205	0x2		/* Architecture 2.05 */
 #define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
 #define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
 #define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
 #define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
@@ -368,6 +385,8 @@
 #define DER_EBRKE	0x00000002	/* External Breakpoint Interrupt */
 #define DER_EBRKE	0x00000002	/* External Breakpoint Interrupt */
 #define DER_DPIE	0x00000001	/* Dev. Port Nonmaskable Request */
 #define DER_DPIE	0x00000001	/* Dev. Port Nonmaskable Request */
 #define SPRN_DMISS	0x3D0		/* Data TLB Miss Register */
 #define SPRN_DMISS	0x3D0		/* Data TLB Miss Register */
+#define SPRN_DHDES	0x0B1		/* Directed Hyp. Doorbell Exc. State */
+#define SPRN_DPDES	0x0B0		/* Directed Priv. Doorbell Exc. State */
 #define SPRN_EAR	0x11A		/* External Address Register */
 #define SPRN_EAR	0x11A		/* External Address Register */
 #define SPRN_HASH1	0x3D2		/* Primary Hash Address Register */
 #define SPRN_HASH1	0x3D2		/* Primary Hash Address Register */
 #define SPRN_HASH2	0x3D3		/* Secondary Hash Address Resgister */
 #define SPRN_HASH2	0x3D3		/* Secondary Hash Address Resgister */
@@ -427,6 +446,7 @@
 #define SPRN_IABR	0x3F2	/* Instruction Address Breakpoint Register */
 #define SPRN_IABR	0x3F2	/* Instruction Address Breakpoint Register */
 #define SPRN_IABR2	0x3FA		/* 83xx */
 #define SPRN_IABR2	0x3FA		/* 83xx */
 #define SPRN_IBCR	0x135		/* 83xx Insn Breakpoint Control Reg */
 #define SPRN_IBCR	0x135		/* 83xx Insn Breakpoint Control Reg */
+#define SPRN_IAMR	0x03D		/* Instr. Authority Mask Reg */
 #define SPRN_HID4	0x3F4		/* 970 HID4 */
 #define SPRN_HID4	0x3F4		/* 970 HID4 */
 #define  HID4_LPES0	 (1ul << (63-0)) /* LPAR env. sel. bit 0 */
 #define  HID4_LPES0	 (1ul << (63-0)) /* LPAR env. sel. bit 0 */
 #define	 HID4_RMLS2_SH	 (63 - 2)	/* Real mode limit bottom 2 bits */
 #define	 HID4_RMLS2_SH	 (63 - 2)	/* Real mode limit bottom 2 bits */
@@ -541,6 +561,7 @@
 #define SPRN_PIR	0x3FF	/* Processor Identification Register */
 #define SPRN_PIR	0x3FF	/* Processor Identification Register */
 #endif
 #endif
 #define SPRN_TIR	0x1BE	/* Thread Identification Register */
 #define SPRN_TIR	0x1BE	/* Thread Identification Register */
+#define SPRN_PSPB	0x09F	/* Problem State Priority Boost reg */
 #define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
 #define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
 #define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
 #define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
 #define SPRN_PURR	0x135	/* Processor Utilization of Resources Reg */
 #define SPRN_PURR	0x135	/* Processor Utilization of Resources Reg */
@@ -682,6 +703,7 @@
 #define SPRN_EBBHR	804	/* Event based branch handler register */
 #define SPRN_EBBHR	804	/* Event based branch handler register */
 #define SPRN_EBBRR	805	/* Event based branch return register */
 #define SPRN_EBBRR	805	/* Event based branch return register */
 #define SPRN_BESCR	806	/* Branch event status and control register */
 #define SPRN_BESCR	806	/* Branch event status and control register */
+#define SPRN_WORT	895	/* Workload optimization register - thread */
 
 
 #define SPRN_PMC1	787
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788
 #define SPRN_PMC2	788
@@ -698,6 +720,11 @@
 #define   SIER_SIHV		0x1000000	/* Sampled MSR_HV */
 #define   SIER_SIHV		0x1000000	/* Sampled MSR_HV */
 #define   SIER_SIAR_VALID	0x0400000	/* SIAR contents valid */
 #define   SIER_SIAR_VALID	0x0400000	/* SIAR contents valid */
 #define   SIER_SDAR_VALID	0x0200000	/* SDAR contents valid */
 #define   SIER_SDAR_VALID	0x0200000	/* SDAR contents valid */
+#define SPRN_TACR	888
+#define SPRN_TCSCR	889
+#define SPRN_CSIGR	890
+#define SPRN_SPMC1	892
+#define SPRN_SPMC2	893
 
 
 /* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */
 /* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */
 #define MMCR0_USER_MASK	(MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO)
 #define MMCR0_USER_MASK	(MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO)

+ 0 - 2
arch/powerpc/include/asm/switch_to.h

@@ -25,10 +25,8 @@ static inline void save_tar(struct thread_struct *prev)
 static inline void save_tar(struct thread_struct *prev) {}
 static inline void save_tar(struct thread_struct *prev) {}
 #endif
 #endif
 
 
-extern void load_up_fpu(void);
 extern void enable_kernel_fp(void);
 extern void enable_kernel_fp(void);
 extern void enable_kernel_altivec(void);
 extern void enable_kernel_altivec(void);
-extern void load_up_altivec(struct task_struct *);
 extern int emulate_altivec(struct pt_regs *);
 extern int emulate_altivec(struct pt_regs *);
 extern void __giveup_vsx(struct task_struct *);
 extern void __giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);

+ 3 - 0
arch/powerpc/include/uapi/asm/kvm.h

@@ -545,6 +545,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
 #define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4)
 
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
@@ -553,6 +554,8 @@ struct kvm_get_htab_header {
 /* Architecture compatibility level */
 /* Architecture compatibility level */
 #define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
 #define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
 
 
+#define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+
 /* Transactional Memory checkpointed state:
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
  */

+ 2 - 0
arch/powerpc/include/uapi/asm/tm.h

@@ -6,6 +6,8 @@
  * the failure is persistent.  PAPR saves 0xff-0xe0 for the hypervisor.
  * the failure is persistent.  PAPR saves 0xff-0xe0 for the hypervisor.
  */
  */
 #define TM_CAUSE_PERSISTENT	0x01
 #define TM_CAUSE_PERSISTENT	0x01
+#define TM_CAUSE_KVM_RESCHED	0xe0  /* From PAPR */
+#define TM_CAUSE_KVM_FAC_UNAV	0xe2  /* From PAPR */
 #define TM_CAUSE_RESCHED	0xde
 #define TM_CAUSE_RESCHED	0xde
 #define TM_CAUSE_TLBI		0xdc
 #define TM_CAUSE_TLBI		0xdc
 #define TM_CAUSE_FAC_UNAV	0xda
 #define TM_CAUSE_FAC_UNAV	0xda

+ 42 - 8
arch/powerpc/kernel/asm-offsets.c

@@ -438,18 +438,14 @@ int main(void)
 	DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
 	DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
 	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
-	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
-	DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr));
+	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr));
-	DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr));
-#endif
-#ifdef CONFIG_VSX
-	DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr));
+	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
 #endif
 #endif
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+	DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -497,16 +493,24 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
 #endif
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
 	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
 	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
 	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
 	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
+	DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
+	DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
 	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
 	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
 	DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
 	DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
 	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
 	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
+	DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
 	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
 	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
 	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
 	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
+	DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
+	DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
+	DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
+	DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
 	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
 	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
 	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
 	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
@@ -515,8 +519,10 @@ int main(void)
 	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
 	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
 	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
 	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
 	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
 	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+	DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
 	DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
 	DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
 	DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
 	DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
+	DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
 	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
 	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
 	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
 	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
@@ -524,20 +530,47 @@ int main(void)
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
-	DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
 	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
 	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+	DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
+	DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
+	DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
+	DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
+	DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
+	DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
+	DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
+	DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
+	DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
+	DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
 	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
+	DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
 	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
 	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
 	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
 	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
 	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
 	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
+	DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
 	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
 	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
+	DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
+	DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
+	DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
+	DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
+	DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
+	DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
+	DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
+	DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
+	DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
+	DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
+	DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
+	DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
+	DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
+#endif
 
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -602,6 +635,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
 	HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
+	HSTATE_FIELD(HSTATE_PTID, ptid);
 	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
 	HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
 	HSTATE_FIELD(HSTATE_PMC, host_pmc);
 	HSTATE_FIELD(HSTATE_PMC, host_pmc);
 	HSTATE_FIELD(HSTATE_PURR, host_purr);
 	HSTATE_FIELD(HSTATE_PURR, host_purr);

+ 2 - 39
arch/powerpc/kernel/kvm.c

@@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data)
 {
 {
 	u32 *features = data;
 	u32 *features = data;
 
 
-	ulong in[8];
+	ulong in[8] = {0};
 	ulong out[8];
 	ulong out[8];
 
 
 	in[0] = KVM_MAGIC_PAGE;
 	in[0] = KVM_MAGIC_PAGE;
 	in[1] = KVM_MAGIC_PAGE;
 	in[1] = KVM_MAGIC_PAGE;
 
 
-	kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
+	epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
 
 
 	*features = out[0];
 	*features = out[0];
 }
 }
@@ -711,43 +711,6 @@ static void kvm_use_magic_page(void)
 			 kvm_patching_worked ? "worked" : "failed");
 			 kvm_patching_worked ? "worked" : "failed");
 }
 }
 
 
-unsigned long kvm_hypercall(unsigned long *in,
-			    unsigned long *out,
-			    unsigned long nr)
-{
-	unsigned long register r0 asm("r0");
-	unsigned long register r3 asm("r3") = in[0];
-	unsigned long register r4 asm("r4") = in[1];
-	unsigned long register r5 asm("r5") = in[2];
-	unsigned long register r6 asm("r6") = in[3];
-	unsigned long register r7 asm("r7") = in[4];
-	unsigned long register r8 asm("r8") = in[5];
-	unsigned long register r9 asm("r9") = in[6];
-	unsigned long register r10 asm("r10") = in[7];
-	unsigned long register r11 asm("r11") = nr;
-	unsigned long register r12 asm("r12");
-
-	asm volatile("bl	epapr_hypercall_start"
-		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
-		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
-		       "=r"(r12)
-		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
-		       "r"(r9), "r"(r10), "r"(r11)
-		     : "memory", "cc", "xer", "ctr", "lr");
-
-	out[0] = r4;
-	out[1] = r5;
-	out[2] = r6;
-	out[3] = r7;
-	out[4] = r8;
-	out[5] = r9;
-	out[6] = r10;
-	out[7] = r11;
-
-	return r3;
-}
-EXPORT_SYMBOL_GPL(kvm_hypercall);
-
 static __init void kvm_free_tmp(void)
 static __init void kvm_free_tmp(void)
 {
 {
 	free_reserved_area(&kvm_tmp[kvm_tmp_index],
 	free_reserved_area(&kvm_tmp[kvm_tmp_index],

+ 4 - 0
arch/powerpc/kvm/44x.c

@@ -21,6 +21,8 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
 
 
 #include <asm/reg.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cputable.h>
@@ -231,3 +233,5 @@ static void __exit kvmppc_44x_exit(void)
 
 
 module_init(kvmppc_44x_init);
 module_init(kvmppc_44x_init);
 module_exit(kvmppc_44x_exit);
 module_exit(kvmppc_44x_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");

+ 38 - 8
arch/powerpc/kvm/book3s.c

@@ -18,6 +18,8 @@
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
 
 
 #include <asm/reg.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cputable.h>
@@ -575,10 +577,10 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			break;
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 			i = reg->id - KVM_REG_PPC_FPR0;
 			i = reg->id - KVM_REG_PPC_FPR0;
-			val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
+			val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
 			break;
 			break;
 		case KVM_REG_PPC_FPSCR:
 		case KVM_REG_PPC_FPSCR:
-			val = get_reg_val(reg->id, vcpu->arch.fpscr);
+			val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
 			break;
 			break;
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -586,19 +588,30 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				r = -ENXIO;
 				break;
 				break;
 			}
 			}
-			val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
 			break;
 			break;
 		case KVM_REG_PPC_VSCR:
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				r = -ENXIO;
 				break;
 				break;
 			}
 			}
-			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
 			break;
 			break;
 		case KVM_REG_PPC_VRSAVE:
 		case KVM_REG_PPC_VRSAVE:
 			val = get_reg_val(reg->id, vcpu->arch.vrsave);
 			val = get_reg_val(reg->id, vcpu->arch.vrsave);
 			break;
 			break;
 #endif /* CONFIG_ALTIVEC */
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+			if (cpu_has_feature(CPU_FTR_VSX)) {
+				long int i = reg->id - KVM_REG_PPC_VSR0;
+				val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
+				val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+			} else {
+				r = -ENXIO;
+			}
+			break;
+#endif /* CONFIG_VSX */
 		case KVM_REG_PPC_DEBUG_INST: {
 		case KVM_REG_PPC_DEBUG_INST: {
 			u32 opcode = INS_TW;
 			u32 opcode = INS_TW;
 			r = copy_to_user((u32 __user *)(long)reg->addr,
 			r = copy_to_user((u32 __user *)(long)reg->addr,
@@ -654,10 +667,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			break;
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 			i = reg->id - KVM_REG_PPC_FPR0;
 			i = reg->id - KVM_REG_PPC_FPR0;
-			vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
+			VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
 			break;
 			break;
 		case KVM_REG_PPC_FPSCR:
 		case KVM_REG_PPC_FPSCR:
-			vcpu->arch.fpscr = set_reg_val(reg->id, val);
+			vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
 			break;
 			break;
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -665,14 +678,14 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				r = -ENXIO;
 				break;
 				break;
 			}
 			}
-			vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
 			break;
 			break;
 		case KVM_REG_PPC_VSCR:
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				r = -ENXIO;
 				break;
 				break;
 			}
 			}
-			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
 			break;
 		case KVM_REG_PPC_VRSAVE:
 		case KVM_REG_PPC_VRSAVE:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
@@ -682,6 +695,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			vcpu->arch.vrsave = set_reg_val(reg->id, val);
 			vcpu->arch.vrsave = set_reg_val(reg->id, val);
 			break;
 			break;
 #endif /* CONFIG_ALTIVEC */
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+			if (cpu_has_feature(CPU_FTR_VSX)) {
+				long int i = reg->id - KVM_REG_PPC_VSR0;
+				vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
+				vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+			} else {
+				r = -ENXIO;
+			}
+			break;
+#endif /* CONFIG_VSX */
 #ifdef CONFIG_KVM_XICS
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
 		case KVM_REG_PPC_ICP_STATE:
 			if (!vcpu->arch.icp) {
 			if (!vcpu->arch.icp) {
@@ -879,3 +903,9 @@ static void kvmppc_book3s_exit(void)
 
 
 module_init(kvmppc_book3s_init);
 module_init(kvmppc_book3s_init);
 module_exit(kvmppc_book3s_exit);
 module_exit(kvmppc_book3s_exit);
+
+/* On 32bit this is our one and only kernel module */
+#ifdef CONFIG_KVM_BOOK3S_32
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif

+ 5 - 0
arch/powerpc/kvm/book3s_32_mmu_host.c

@@ -243,6 +243,11 @@ next_pteg:
 	/* Now tell our Shadow PTE code about the new page */
 	/* Now tell our Shadow PTE code about the new page */
 
 
 	pte = kvmppc_mmu_hpte_cache_next(vcpu);
 	pte = kvmppc_mmu_hpte_cache_next(vcpu);
+	if (!pte) {
+		kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+		r = -EAGAIN;
+		goto out;
+	}
 
 
 	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
 	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
 		    orig_pte->may_write ? 'w' : '-',
 		    orig_pte->may_write ? 'w' : '-',

+ 2 - 2
arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -262,7 +262,7 @@ int kvmppc_mmu_hv_init(void)
 
 
 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 {
 {
-	kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
+	kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
 }
 }
 
 
 /*
 /*
@@ -562,7 +562,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * we just return and retry the instruction.
 	 * we just return and retry the instruction.
 	 */
 	 */
 
 
-	if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
+	if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)
 		return RESUME_GUEST;
 		return RESUME_GUEST;
 
 
 	/*
 	/*

+ 0 - 4
arch/powerpc/kvm/book3s_exports.c

@@ -25,9 +25,5 @@ EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
 #endif
 #endif
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
-EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
-#endif
 #endif
 #endif
 
 

+ 241 - 78
arch/powerpc/kvm/book3s_hv.c

@@ -31,6 +31,7 @@
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/page-flags.h>
 #include <linux/page-flags.h>
 #include <linux/srcu.h>
 #include <linux/srcu.h>
+#include <linux/miscdevice.h>
 
 
 #include <asm/reg.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cputable.h>
@@ -85,10 +86,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
 
 	/* CPU points to the first thread of the core */
 	/* CPU points to the first thread of the core */
 	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
 	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+#ifdef CONFIG_KVM_XICS
 		int real_cpu = cpu + vcpu->arch.ptid;
 		int real_cpu = cpu + vcpu->arch.ptid;
 		if (paca[real_cpu].kvm_hstate.xics_phys)
 		if (paca[real_cpu].kvm_hstate.xics_phys)
 			xics_wake_cpu(real_cpu);
 			xics_wake_cpu(real_cpu);
-		else if (cpu_online(cpu))
+		else
+#endif
+		if (cpu_online(cpu))
 			smp_send_reschedule(cpu);
 			smp_send_reschedule(cpu);
 	}
 	}
 	put_cpu();
 	put_cpu();
@@ -182,14 +186,28 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 
 
 		switch (arch_compat) {
 		switch (arch_compat) {
 		case PVR_ARCH_205:
 		case PVR_ARCH_205:
-			pcr = PCR_ARCH_205;
+			/*
+			 * If an arch bit is set in PCR, all the defined
+			 * higher-order arch bits also have to be set.
+			 */
+			pcr = PCR_ARCH_206 | PCR_ARCH_205;
 			break;
 			break;
 		case PVR_ARCH_206:
 		case PVR_ARCH_206:
 		case PVR_ARCH_206p:
 		case PVR_ARCH_206p:
+			pcr = PCR_ARCH_206;
+			break;
+		case PVR_ARCH_207:
 			break;
 			break;
 		default:
 		default:
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
+
+		if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
+			/* POWER7 can't emulate POWER8 */
+			if (!(pcr & PCR_ARCH_206))
+				return -EINVAL;
+			pcr &= ~PCR_ARCH_206;
+		}
 	}
 	}
 
 
 	spin_lock(&vc->lock);
 	spin_lock(&vc->lock);
@@ -637,6 +655,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		r = RESUME_GUEST;
 		r = RESUME_GUEST;
 		break;
 		break;
 	case BOOK3S_INTERRUPT_EXTERNAL:
 	case BOOK3S_INTERRUPT_EXTERNAL:
+	case BOOK3S_INTERRUPT_H_DOORBELL:
 		vcpu->stat.ext_intr_exits++;
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		r = RESUME_GUEST;
 		break;
 		break;
@@ -673,12 +692,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		/* hcall - punt to userspace */
 		/* hcall - punt to userspace */
 		int i;
 		int i;
 
 
-		if (vcpu->arch.shregs.msr & MSR_PR) {
-			/* sc 1 from userspace - reflect to guest syscall */
-			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
-			r = RESUME_GUEST;
-			break;
-		}
+		/* hypercall with MSR_PR has already been handled in rmode,
+		 * and never reaches here.
+		 */
+
 		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
 		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
 		for (i = 0; i < 9; ++i)
 		for (i = 0; i < 9; ++i)
 			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
 			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -708,7 +725,16 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * we don't emulate any guest instructions at this stage.
 	 * we don't emulate any guest instructions at this stage.
 	 */
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
-		kvmppc_core_queue_program(vcpu, 0x80000);
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+		r = RESUME_GUEST;
+		break;
+	/*
+	 * This occurs if the guest (kernel or userspace), does something that
+	 * is prohibited by HFSCR.  We just generate a program interrupt to
+	 * the guest.
+	 */
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 		r = RESUME_GUEST;
 		r = RESUME_GUEST;
 		break;
 		break;
 	default:
 	default:
@@ -765,11 +791,35 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
 	u64 mask;
 	u64 mask;
 
 
 	spin_lock(&vc->lock);
 	spin_lock(&vc->lock);
+	/*
+	 * If ILE (interrupt little-endian) has changed, update the
+	 * MSR_LE bit in the intr_msr for each vcpu in this vcore.
+	 */
+	if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
+		struct kvm *kvm = vcpu->kvm;
+		struct kvm_vcpu *vcpu;
+		int i;
+
+		mutex_lock(&kvm->lock);
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (vcpu->arch.vcore != vc)
+				continue;
+			if (new_lpcr & LPCR_ILE)
+				vcpu->arch.intr_msr |= MSR_LE;
+			else
+				vcpu->arch.intr_msr &= ~MSR_LE;
+		}
+		mutex_unlock(&kvm->lock);
+	}
+
 	/*
 	/*
 	 * Userspace can only modify DPFD (default prefetch depth),
 	 * Userspace can only modify DPFD (default prefetch depth),
 	 * ILE (interrupt little-endian) and TC (translation control).
 	 * ILE (interrupt little-endian) and TC (translation control).
+	 * On POWER8 userspace can also modify AIL (alt. interrupt loc.)
 	 */
 	 */
 	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
 	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		mask |= LPCR_AIL;
 	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	spin_unlock(&vc->lock);
 	spin_unlock(&vc->lock);
 }
 }
@@ -787,6 +837,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_DABR:
 	case KVM_REG_PPC_DABR:
 		*val = get_reg_val(id, vcpu->arch.dabr);
 		*val = get_reg_val(id, vcpu->arch.dabr);
 		break;
 		break;
+	case KVM_REG_PPC_DABRX:
+		*val = get_reg_val(id, vcpu->arch.dabrx);
+		break;
 	case KVM_REG_PPC_DSCR:
 	case KVM_REG_PPC_DSCR:
 		*val = get_reg_val(id, vcpu->arch.dscr);
 		*val = get_reg_val(id, vcpu->arch.dscr);
 		break;
 		break;
@@ -802,7 +855,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_UAMOR:
 	case KVM_REG_PPC_UAMOR:
 		*val = get_reg_val(id, vcpu->arch.uamor);
 		*val = get_reg_val(id, vcpu->arch.uamor);
 		break;
 		break;
-	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
 		i = id - KVM_REG_PPC_MMCR0;
 		i = id - KVM_REG_PPC_MMCR0;
 		*val = get_reg_val(id, vcpu->arch.mmcr[i]);
 		*val = get_reg_val(id, vcpu->arch.mmcr[i]);
 		break;
 		break;
@@ -810,33 +863,87 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		i = id - KVM_REG_PPC_PMC1;
 		i = id - KVM_REG_PPC_PMC1;
 		*val = get_reg_val(id, vcpu->arch.pmc[i]);
 		*val = get_reg_val(id, vcpu->arch.pmc[i]);
 		break;
 		break;
+	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+		i = id - KVM_REG_PPC_SPMC1;
+		*val = get_reg_val(id, vcpu->arch.spmc[i]);
+		break;
 	case KVM_REG_PPC_SIAR:
 	case KVM_REG_PPC_SIAR:
 		*val = get_reg_val(id, vcpu->arch.siar);
 		*val = get_reg_val(id, vcpu->arch.siar);
 		break;
 		break;
 	case KVM_REG_PPC_SDAR:
 	case KVM_REG_PPC_SDAR:
 		*val = get_reg_val(id, vcpu->arch.sdar);
 		*val = get_reg_val(id, vcpu->arch.sdar);
 		break;
 		break;
-#ifdef CONFIG_VSX
-	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-		if (cpu_has_feature(CPU_FTR_VSX)) {
-			/* VSX => FP reg i is stored in arch.vsr[2*i] */
-			long int i = id - KVM_REG_PPC_FPR0;
-			*val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
-		} else {
-			/* let generic code handle it */
-			r = -EINVAL;
-		}
+	case KVM_REG_PPC_SIER:
+		*val = get_reg_val(id, vcpu->arch.sier);
 		break;
 		break;
-	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
-		if (cpu_has_feature(CPU_FTR_VSX)) {
-			long int i = id - KVM_REG_PPC_VSR0;
-			val->vsxval[0] = vcpu->arch.vsr[2 * i];
-			val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
-		} else {
-			r = -ENXIO;
-		}
+	case KVM_REG_PPC_IAMR:
+		*val = get_reg_val(id, vcpu->arch.iamr);
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		*val = get_reg_val(id, vcpu->arch.tfhar);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		*val = get_reg_val(id, vcpu->arch.tfiar);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		*val = get_reg_val(id, vcpu->arch.texasr);
+		break;
+#endif
+	case KVM_REG_PPC_FSCR:
+		*val = get_reg_val(id, vcpu->arch.fscr);
+		break;
+	case KVM_REG_PPC_PSPB:
+		*val = get_reg_val(id, vcpu->arch.pspb);
+		break;
+	case KVM_REG_PPC_EBBHR:
+		*val = get_reg_val(id, vcpu->arch.ebbhr);
+		break;
+	case KVM_REG_PPC_EBBRR:
+		*val = get_reg_val(id, vcpu->arch.ebbrr);
+		break;
+	case KVM_REG_PPC_BESCR:
+		*val = get_reg_val(id, vcpu->arch.bescr);
+		break;
+	case KVM_REG_PPC_TAR:
+		*val = get_reg_val(id, vcpu->arch.tar);
+		break;
+	case KVM_REG_PPC_DPDES:
+		*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+		break;
+	case KVM_REG_PPC_DAWR:
+		*val = get_reg_val(id, vcpu->arch.dawr);
+		break;
+	case KVM_REG_PPC_DAWRX:
+		*val = get_reg_val(id, vcpu->arch.dawrx);
+		break;
+	case KVM_REG_PPC_CIABR:
+		*val = get_reg_val(id, vcpu->arch.ciabr);
+		break;
+	case KVM_REG_PPC_IC:
+		*val = get_reg_val(id, vcpu->arch.ic);
+		break;
+	case KVM_REG_PPC_VTB:
+		*val = get_reg_val(id, vcpu->arch.vtb);
+		break;
+	case KVM_REG_PPC_CSIGR:
+		*val = get_reg_val(id, vcpu->arch.csigr);
+		break;
+	case KVM_REG_PPC_TACR:
+		*val = get_reg_val(id, vcpu->arch.tacr);
+		break;
+	case KVM_REG_PPC_TCSCR:
+		*val = get_reg_val(id, vcpu->arch.tcscr);
+		break;
+	case KVM_REG_PPC_PID:
+		*val = get_reg_val(id, vcpu->arch.pid);
+		break;
+	case KVM_REG_PPC_ACOP:
+		*val = get_reg_val(id, vcpu->arch.acop);
+		break;
+	case KVM_REG_PPC_WORT:
+		*val = get_reg_val(id, vcpu->arch.wort);
 		break;
 		break;
-#endif /* CONFIG_VSX */
 	case KVM_REG_PPC_VPA_ADDR:
 	case KVM_REG_PPC_VPA_ADDR:
 		spin_lock(&vcpu->arch.vpa_update_lock);
 		spin_lock(&vcpu->arch.vpa_update_lock);
 		*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
 		*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
@@ -890,6 +997,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_DABR:
 	case KVM_REG_PPC_DABR:
 		vcpu->arch.dabr = set_reg_val(id, *val);
 		vcpu->arch.dabr = set_reg_val(id, *val);
 		break;
 		break;
+	case KVM_REG_PPC_DABRX:
+		vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
+		break;
 	case KVM_REG_PPC_DSCR:
 	case KVM_REG_PPC_DSCR:
 		vcpu->arch.dscr = set_reg_val(id, *val);
 		vcpu->arch.dscr = set_reg_val(id, *val);
 		break;
 		break;
@@ -905,7 +1015,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_UAMOR:
 	case KVM_REG_PPC_UAMOR:
 		vcpu->arch.uamor = set_reg_val(id, *val);
 		vcpu->arch.uamor = set_reg_val(id, *val);
 		break;
 		break;
-	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
 		i = id - KVM_REG_PPC_MMCR0;
 		i = id - KVM_REG_PPC_MMCR0;
 		vcpu->arch.mmcr[i] = set_reg_val(id, *val);
 		vcpu->arch.mmcr[i] = set_reg_val(id, *val);
 		break;
 		break;
@@ -913,33 +1023,90 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		i = id - KVM_REG_PPC_PMC1;
 		i = id - KVM_REG_PPC_PMC1;
 		vcpu->arch.pmc[i] = set_reg_val(id, *val);
 		vcpu->arch.pmc[i] = set_reg_val(id, *val);
 		break;
 		break;
+	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+		i = id - KVM_REG_PPC_SPMC1;
+		vcpu->arch.spmc[i] = set_reg_val(id, *val);
+		break;
 	case KVM_REG_PPC_SIAR:
 	case KVM_REG_PPC_SIAR:
 		vcpu->arch.siar = set_reg_val(id, *val);
 		vcpu->arch.siar = set_reg_val(id, *val);
 		break;
 		break;
 	case KVM_REG_PPC_SDAR:
 	case KVM_REG_PPC_SDAR:
 		vcpu->arch.sdar = set_reg_val(id, *val);
 		vcpu->arch.sdar = set_reg_val(id, *val);
 		break;
 		break;
-#ifdef CONFIG_VSX
-	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-		if (cpu_has_feature(CPU_FTR_VSX)) {
-			/* VSX => FP reg i is stored in arch.vsr[2*i] */
-			long int i = id - KVM_REG_PPC_FPR0;
-			vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
-		} else {
-			/* let generic code handle it */
-			r = -EINVAL;
-		}
+	case KVM_REG_PPC_SIER:
+		vcpu->arch.sier = set_reg_val(id, *val);
 		break;
 		break;
-	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
-		if (cpu_has_feature(CPU_FTR_VSX)) {
-			long int i = id - KVM_REG_PPC_VSR0;
-			vcpu->arch.vsr[2 * i] = val->vsxval[0];
-			vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
-		} else {
-			r = -ENXIO;
-		}
+	case KVM_REG_PPC_IAMR:
+		vcpu->arch.iamr = set_reg_val(id, *val);
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		vcpu->arch.tfhar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		vcpu->arch.tfiar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		vcpu->arch.texasr = set_reg_val(id, *val);
+		break;
+#endif
+	case KVM_REG_PPC_FSCR:
+		vcpu->arch.fscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PSPB:
+		vcpu->arch.pspb = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_EBBHR:
+		vcpu->arch.ebbhr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_EBBRR:
+		vcpu->arch.ebbrr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_BESCR:
+		vcpu->arch.bescr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TAR:
+		vcpu->arch.tar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DPDES:
+		vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DAWR:
+		vcpu->arch.dawr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DAWRX:
+		vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+		break;
+	case KVM_REG_PPC_CIABR:
+		vcpu->arch.ciabr = set_reg_val(id, *val);
+		/* Don't allow setting breakpoints in hypervisor code */
+		if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+			vcpu->arch.ciabr &= ~CIABR_PRIV;	/* disable */
+		break;
+	case KVM_REG_PPC_IC:
+		vcpu->arch.ic = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_VTB:
+		vcpu->arch.vtb = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_CSIGR:
+		vcpu->arch.csigr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TACR:
+		vcpu->arch.tacr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TCSCR:
+		vcpu->arch.tcscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PID:
+		vcpu->arch.pid = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_ACOP:
+		vcpu->arch.acop = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_WORT:
+		vcpu->arch.wort = set_reg_val(id, *val);
 		break;
 		break;
-#endif /* CONFIG_VSX */
 	case KVM_REG_PPC_VPA_ADDR:
 	case KVM_REG_PPC_VPA_ADDR:
 		addr = set_reg_val(id, *val);
 		addr = set_reg_val(id, *val);
 		r = -EINVAL;
 		r = -EINVAL;
@@ -1017,6 +1184,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
 	vcpu->arch.busy_preempt = TB_NIL;
+	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
 
 
 	kvmppc_mmu_book3s_hv_init(vcpu);
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 
@@ -1034,6 +1202,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 			init_waitqueue_head(&vcore->wq);
 			init_waitqueue_head(&vcore->wq);
 			vcore->preempt_tb = TB_NIL;
 			vcore->preempt_tb = TB_NIL;
 			vcore->lpcr = kvm->arch.lpcr;
 			vcore->lpcr = kvm->arch.lpcr;
+			vcore->first_vcpuid = core * threads_per_core;
+			vcore->kvm = kvm;
 		}
 		}
 		kvm->arch.vcores[core] = vcore;
 		kvm->arch.vcores[core] = vcore;
 		kvm->arch.online_vcores++;
 		kvm->arch.online_vcores++;
@@ -1047,6 +1217,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	++vcore->num_threads;
 	++vcore->num_threads;
 	spin_unlock(&vcore->lock);
 	spin_unlock(&vcore->lock);
 	vcpu->arch.vcore = vcore;
 	vcpu->arch.vcore = vcore;
+	vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
 
 
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	kvmppc_sanity_check(vcpu);
 	kvmppc_sanity_check(vcpu);
@@ -1110,7 +1281,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 	}
 	}
 }
 }
 
 
-extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern void __kvmppc_vcore_entry(void);
 
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
 				   struct kvm_vcpu *vcpu)
@@ -1184,13 +1355,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 	tpaca = &paca[cpu];
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.kvm_vcore = vc;
 	tpaca->kvm_hstate.kvm_vcore = vc;
-	tpaca->kvm_hstate.napping = 0;
+	tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
 	vcpu->cpu = vc->pcpu;
 	vcpu->cpu = vc->pcpu;
 	smp_wmb();
 	smp_wmb();
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
-	if (vcpu->arch.ptid) {
+	if (cpu != smp_processor_id()) {
+#ifdef CONFIG_KVM_XICS
 		xics_wake_cpu(cpu);
 		xics_wake_cpu(cpu);
-		++vc->n_woken;
+#endif
+		if (vcpu->arch.ptid)
+			++vc->n_woken;
 	}
 	}
 #endif
 #endif
 }
 }
@@ -1247,10 +1421,10 @@ static int on_primary_thread(void)
  */
  */
 static void kvmppc_run_core(struct kvmppc_vcore *vc)
 static void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
 {
-	struct kvm_vcpu *vcpu, *vcpu0, *vnext;
+	struct kvm_vcpu *vcpu, *vnext;
 	long ret;
 	long ret;
 	u64 now;
 	u64 now;
-	int ptid, i, need_vpa_update;
+	int i, need_vpa_update;
 	int srcu_idx;
 	int srcu_idx;
 	struct kvm_vcpu *vcpus_to_update[threads_per_core];
 	struct kvm_vcpu *vcpus_to_update[threads_per_core];
 
 
@@ -1287,25 +1461,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		spin_lock(&vc->lock);
 		spin_lock(&vc->lock);
 	}
 	}
 
 
-	/*
-	 * Assign physical thread IDs, first to non-ceded vcpus
-	 * and then to ceded ones.
-	 */
-	ptid = 0;
-	vcpu0 = NULL;
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-		if (!vcpu->arch.ceded) {
-			if (!ptid)
-				vcpu0 = vcpu;
-			vcpu->arch.ptid = ptid++;
-		}
-	}
-	if (!vcpu0)
-		goto out;	/* nothing to run; should never happen */
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
-		if (vcpu->arch.ceded)
-			vcpu->arch.ptid = ptid++;
-
 	/*
 	/*
 	 * Make sure we are running on thread 0, and that
 	 * Make sure we are running on thread 0, and that
 	 * secondary threads are offline.
 	 * secondary threads are offline.
@@ -1322,15 +1477,19 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		kvmppc_create_dtl_entry(vcpu, vc);
 		kvmppc_create_dtl_entry(vcpu, vc);
 	}
 	}
 
 
+	/* Set this explicitly in case thread 0 doesn't have a vcpu */
+	get_paca()->kvm_hstate.kvm_vcore = vc;
+	get_paca()->kvm_hstate.ptid = 0;
+
 	vc->vcore_state = VCORE_RUNNING;
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
 	preempt_disable();
 	spin_unlock(&vc->lock);
 	spin_unlock(&vc->lock);
 
 
 	kvm_guest_enter();
 	kvm_guest_enter();
 
 
-	srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
+	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
 
-	__kvmppc_vcore_entry(NULL, vcpu0);
+	__kvmppc_vcore_entry();
 
 
 	spin_lock(&vc->lock);
 	spin_lock(&vc->lock);
 	/* disable sending of IPIs on virtual external irqs */
 	/* disable sending of IPIs on virtual external irqs */
@@ -1345,7 +1504,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_EXITING;
 	vc->vcore_state = VCORE_EXITING;
 	spin_unlock(&vc->lock);
 	spin_unlock(&vc->lock);
 
 
-	srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
+	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
 	smp_mb();
@@ -1453,7 +1612,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (!signal_pending(current)) {
 	if (!signal_pending(current)) {
 		if (vc->vcore_state == VCORE_RUNNING &&
 		if (vc->vcore_state == VCORE_RUNNING &&
 		    VCORE_EXIT_COUNT(vc) == 0) {
 		    VCORE_EXIT_COUNT(vc) == 0) {
-			vcpu->arch.ptid = vc->n_runnable - 1;
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
 			kvmppc_start_thread(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -2048,6 +2206,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 			LPCR_VPM0 | LPCR_VPM1;
 			LPCR_VPM0 | LPCR_VPM1;
 		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
 		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
 			(VRMA_VSID << SLB_VSID_SHIFT_1T);
 			(VRMA_VSID << SLB_VSID_SHIFT_1T);
+		/* On POWER8 turn on online bit to enable PURR/SPURR */
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			lpcr |= LPCR_ONL;
 	}
 	}
 	kvm->arch.lpcr = lpcr;
 	kvm->arch.lpcr = lpcr;
 
 
@@ -2222,3 +2383,5 @@ static void kvmppc_book3s_exit_hv(void)
 module_init(kvmppc_book3s_init_hv);
 module_init(kvmppc_book3s_init_hv);
 module_exit(kvmppc_book3s_exit_hv);
 module_exit(kvmppc_book3s_exit_hv);
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");

+ 4 - 4
arch/powerpc/kvm/book3s_hv_interrupts.S

@@ -35,7 +35,7 @@
  ****************************************************************************/
  ****************************************************************************/
 
 
 /* Registers:
 /* Registers:
- *  r4: vcpu pointer
+ *  none
  */
  */
 _GLOBAL(__kvmppc_vcore_entry)
 _GLOBAL(__kvmppc_vcore_entry)
 
 
@@ -57,9 +57,11 @@ BEGIN_FTR_SECTION
 	std	r3, HSTATE_DSCR(r13)
 	std	r3, HSTATE_DSCR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 
+BEGIN_FTR_SECTION
 	/* Save host DABR */
 	/* Save host DABR */
 	mfspr	r3, SPRN_DABR
 	mfspr	r3, SPRN_DABR
 	std	r3, HSTATE_DABR(r13)
 	std	r3, HSTATE_DABR(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 
 	/* Hard-disable interrupts */
 	/* Hard-disable interrupts */
 	mfmsr   r10
 	mfmsr   r10
@@ -69,7 +71,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtmsrd  r10,1
 	mtmsrd  r10,1
 
 
 	/* Save host PMU registers */
 	/* Save host PMU registers */
-	/* R4 is live here (vcpu pointer) but not r3 or r5 */
 	li	r3, 1
 	li	r3, 1
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
@@ -134,16 +135,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * enters the guest with interrupts enabled.
 	 * enters the guest with interrupts enabled.
 	 */
 	 */
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
+	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r0, VCPU_PENDING_EXC(r4)
 	ld	r0, VCPU_PENDING_EXC(r4)
 	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
 	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
 	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
 	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
 	and.	r0, r0, r7
 	and.	r0, r0, r7
 	beq	32f
 	beq	32f
-	mr	r31, r4
 	lhz	r3, PACAPACAINDEX(r13)
 	lhz	r3, PACAPACAINDEX(r13)
 	bl	smp_send_reschedule
 	bl	smp_send_reschedule
 	nop
 	nop
-	mr	r4, r31
 32:
 32:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */

+ 5 - 3
arch/powerpc/kvm/book3s_hv_rm_mmu.c

@@ -134,7 +134,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	unlock_rmap(rmap);
 	unlock_rmap(rmap);
 }
 }
 
 
-static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
 			      int writing, unsigned long *pte_sizep)
 			      int writing, unsigned long *pte_sizep)
 {
 {
 	pte_t *ptep;
 	pte_t *ptep;
@@ -232,7 +232,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 
 		/* Look up the Linux PTE for the backing page */
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
+		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
+						  &pte_size);
 		if (pte_present(pte)) {
 		if (pte_present(pte)) {
 			if (writing && !pte_write(pte))
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				/* make the actual HPTE be read-only */
@@ -672,7 +673,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
 			if (memslot) {
 				hva = __gfn_to_hva_memslot(memslot, gfn);
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+				pte = lookup_linux_pte_and_update(pgdir, hva,
+								  1, &psize);
 				if (pte_present(pte) && !pte_write(pte))
 				if (pte_present(pte) && !pte_write(pte))
 					r = hpte_make_readonly(r);
 					r = hpte_make_readonly(r);
 			}
 			}

+ 745 - 468
arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -33,6 +33,10 @@
 #error Need to fix lppaca and SLB shadow accesses in little endian mode
 #error Need to fix lppaca and SLB shadow accesses in little endian mode
 #endif
 #endif
 
 
+/* Values in HSTATE_NAPPING(r13) */
+#define NAPPING_CEDE	1
+#define NAPPING_NOVCPU	2
+
 /*
 /*
  * Call kvmppc_hv_entry in real mode.
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
  * Must be called with interrupts hard-disabled.
@@ -57,29 +61,23 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
 	RFI
 	RFI
 
 
 kvmppc_call_hv_entry:
 kvmppc_call_hv_entry:
+	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_hv_entry
 	bl	kvmppc_hv_entry
 
 
 	/* Back from guest - restore host state and return to caller */
 	/* Back from guest - restore host state and return to caller */
 
 
+BEGIN_FTR_SECTION
 	/* Restore host DABR and DABRX */
 	/* Restore host DABR and DABRX */
 	ld	r5,HSTATE_DABR(r13)
 	ld	r5,HSTATE_DABR(r13)
 	li	r6,7
 	li	r6,7
 	mtspr	SPRN_DABR,r5
 	mtspr	SPRN_DABR,r5
 	mtspr	SPRN_DABRX,r6
 	mtspr	SPRN_DABRX,r6
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 
 	/* Restore SPRG3 */
 	/* Restore SPRG3 */
 	ld	r3,PACA_SPRG3(r13)
 	ld	r3,PACA_SPRG3(r13)
 	mtspr	SPRN_SPRG3,r3
 	mtspr	SPRN_SPRG3,r3
 
 
-	/*
-	 * Reload DEC.  HDEC interrupts were disabled when
-	 * we reloaded the host's LPCR value.
-	 */
-	ld	r3, HSTATE_DECEXP(r13)
-	mftb	r4
-	subf	r4, r4, r3
-	mtspr	SPRN_DEC, r4
-
 	/* Reload the host's PMU registers */
 	/* Reload the host's PMU registers */
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	lbz	r4, LPPACA_PMCINUSE(r3)
 	lbz	r4, LPPACA_PMCINUSE(r3)
@@ -114,6 +112,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	isync
 	isync
 23:
 23:
 
 
+	/*
+	 * Reload DEC.  HDEC interrupts were disabled when
+	 * we reloaded the host's LPCR value.
+	 */
+	ld	r3, HSTATE_DECEXP(r13)
+	mftb	r4
+	subf	r4, r4, r3
+	mtspr	SPRN_DEC, r4
+
 	/*
 	/*
 	 * For external and machine check interrupts, we need
 	 * For external and machine check interrupts, we need
 	 * to call the Linux handler to process the interrupt.
 	 * to call the Linux handler to process the interrupt.
@@ -153,15 +160,75 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 
 13:	b	machine_check_fwnmi
 13:	b	machine_check_fwnmi
 
 
+kvmppc_primary_no_guest:
+	/* We handle this much like a ceded vcpu */
+	/* set our bit in napping_threads */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r7, HSTATE_PTID(r13)
+	li	r0, 1
+	sld	r0, r0, r7
+	addi	r6, r5, VCORE_NAPPING_THREADS
+1:	lwarx	r3, 0, r6
+	or	r3, r3, r0
+	stwcx.	r3, 0, r6
+	bne	1b
+	/* order napping_threads update vs testing entry_exit_count */
+	isync
+	li	r12, 0
+	lwz	r7, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r7, 0x100
+	bge	kvm_novcpu_exit	/* another thread already exiting */
+	li	r3, NAPPING_NOVCPU
+	stb	r3, HSTATE_NAPPING(r13)
+	li	r3, 1
+	stb	r3, HSTATE_HWTHREAD_REQ(r13)
+
+	b	kvm_do_nap
+
+kvm_novcpu_wakeup:
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	li	r0, 0
+	stb	r0, HSTATE_NAPPING(r13)
+	stb	r0, HSTATE_HWTHREAD_REQ(r13)
+
+	/* check the wake reason */
+	bl	kvmppc_check_wake_reason
+	
+	/* see if any other thread is already exiting */
+	lwz	r0, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0, 0x100
+	bge	kvm_novcpu_exit
+
+	/* clear our bit in napping_threads */
+	lbz	r7, HSTATE_PTID(r13)
+	li	r0, 1
+	sld	r0, r0, r7
+	addi	r6, r5, VCORE_NAPPING_THREADS
+4:	lwarx	r7, 0, r6
+	andc	r7, r7, r0
+	stwcx.	r7, 0, r6
+	bne	4b
+
+	/* See if the wake reason means we need to exit */
+	cmpdi	r3, 0
+	bge	kvm_novcpu_exit
+
+	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	bne	kvmppc_got_guest
+
+kvm_novcpu_exit:
+	b	hdec_soon
+
 /*
 /*
- * We come in here when wakened from nap mode on a secondary hw thread.
+ * We come in here when wakened from nap mode.
  * Relocation is off and most register values are lost.
  * Relocation is off and most register values are lost.
  * r13 points to the PACA.
  * r13 points to the PACA.
  */
  */
 	.globl	kvm_start_guest
 	.globl	kvm_start_guest
 kvm_start_guest:
 kvm_start_guest:
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
 	ld	r2,PACATOC(r13)
 	ld	r2,PACATOC(r13)
 
 
 	li	r0,KVM_HWTHREAD_IN_KVM
 	li	r0,KVM_HWTHREAD_IN_KVM
@@ -173,8 +240,13 @@ kvm_start_guest:
 
 
 	/* were we napping due to cede? */
 	/* were we napping due to cede? */
 	lbz	r0,HSTATE_NAPPING(r13)
 	lbz	r0,HSTATE_NAPPING(r13)
-	cmpwi	r0,0
-	bne	kvm_end_cede
+	cmpwi	r0,NAPPING_CEDE
+	beq	kvm_end_cede
+	cmpwi	r0,NAPPING_NOVCPU
+	beq	kvm_novcpu_wakeup
+
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,STACK_FRAME_OVERHEAD
 
 
 	/*
 	/*
 	 * We weren't napping due to cede, so this must be a secondary
 	 * We weren't napping due to cede, so this must be a secondary
@@ -184,40 +256,22 @@ kvm_start_guest:
 	 */
 	 */
 
 
 	/* Check the wake reason in SRR1 to see why we got here */
 	/* Check the wake reason in SRR1 to see why we got here */
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
-	cmpwi	r3,4			/* was it an external interrupt? */
-	bne	27f			/* if not */
-	ld	r5,HSTATE_XICS_PHYS(r13)
-	li	r7,XICS_XIRR		/* if it was an external interrupt, */
-	lwzcix	r8,r5,r7		/* get and ack the interrupt */
-	sync
-	clrldi.	r9,r8,40		/* get interrupt source ID. */
-	beq	28f			/* none there? */
-	cmpwi	r9,XICS_IPI		/* was it an IPI? */
-	bne	29f
-	li	r0,0xff
-	li	r6,XICS_MFRR
-	stbcix	r0,r5,r6		/* clear IPI */
-	stwcix	r8,r5,r7		/* EOI the interrupt */
-	sync				/* order loading of vcpu after that */
+	bl	kvmppc_check_wake_reason
+	cmpdi	r3, 0
+	bge	kvm_no_guest
 
 
 	/* get vcpu pointer, NULL if we have no vcpu to run */
 	/* get vcpu pointer, NULL if we have no vcpu to run */
 	ld	r4,HSTATE_KVM_VCPU(r13)
 	ld	r4,HSTATE_KVM_VCPU(r13)
 	cmpdi	r4,0
 	cmpdi	r4,0
 	/* if we have no vcpu to run, go back to sleep */
 	/* if we have no vcpu to run, go back to sleep */
 	beq	kvm_no_guest
 	beq	kvm_no_guest
-	b	30f
 
 
-27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
-	b	kvm_no_guest
-28:	/* SRR1 said external but ICP said nope?? */
-	b	kvm_no_guest
-29:	/* External non-IPI interrupt to offline secondary thread? help?? */
-	stw	r8,HSTATE_SAVED_XIRR(r13)
-	b	kvm_no_guest
+	/* Set HSTATE_DSCR(r13) to something sensible */
+	LOAD_REG_ADDR(r6, dscr_default)
+	ld	r6, 0(r6)
+	std	r6, HSTATE_DSCR(r13)
 
 
-30:	bl	kvmppc_hv_entry
+	bl	kvmppc_hv_entry
 
 
 	/* Back from the guest, go back to nap */
 	/* Back from the guest, go back to nap */
 	/* Clear our vcpu pointer so we don't come back in early */
 	/* Clear our vcpu pointer so we don't come back in early */
@@ -229,18 +283,6 @@ kvm_start_guest:
 	 * visible we could be given another vcpu.
 	 * visible we could be given another vcpu.
 	 */
 	 */
 	lwsync
 	lwsync
-	/* Clear any pending IPI - we're an offline thread */
-	ld	r5, HSTATE_XICS_PHYS(r13)
-	li	r7, XICS_XIRR
-	lwzcix	r3, r5, r7		/* ack any pending interrupt */
-	rlwinm.	r0, r3, 0, 0xffffff	/* any pending? */
-	beq	37f
-	sync
-	li	r0, 0xff
-	li	r6, XICS_MFRR
-	stbcix	r0, r5, r6		/* clear the IPI */
-	stwcix	r3, r5, r7		/* EOI it */
-37:	sync
 
 
 	/* increment the nap count and then go to nap mode */
 	/* increment the nap count and then go to nap mode */
 	ld	r4, HSTATE_KVM_VCORE(r13)
 	ld	r4, HSTATE_KVM_VCORE(r13)
@@ -253,6 +295,7 @@ kvm_start_guest:
 kvm_no_guest:
 kvm_no_guest:
 	li	r0, KVM_HWTHREAD_IN_NAP
 	li	r0, KVM_HWTHREAD_IN_NAP
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
+kvm_do_nap:
 	li	r3, LPCR_PECE0
 	li	r3, LPCR_PECE0
 	mfspr	r4, SPRN_LPCR
 	mfspr	r4, SPRN_LPCR
 	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
 	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
@@ -277,7 +320,7 @@ kvmppc_hv_entry:
 
 
 	/* Required state:
 	/* Required state:
 	 *
 	 *
-	 * R4 = vcpu pointer
+	 * R4 = vcpu pointer (or NULL)
 	 * MSR = ~IR|DR
 	 * MSR = ~IR|DR
 	 * R13 = PACA
 	 * R13 = PACA
 	 * R1 = host R1
 	 * R1 = host R1
@@ -287,122 +330,12 @@ kvmppc_hv_entry:
 	std	r0, PPC_LR_STKOFF(r1)
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
 	stdu	r1, -112(r1)
 
 
-	/* Set partition DABR */
-	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
-	li	r5,3
-	ld	r6,VCPU_DABR(r4)
-	mtspr	SPRN_DABRX,r5
-	mtspr	SPRN_DABR,r6
-BEGIN_FTR_SECTION
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
-	/* Load guest PMU registers */
-	/* R4 is live here (vcpu pointer) */
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	isync
-	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
-	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
-	lwz	r6, VCPU_PMC + 8(r4)
-	lwz	r7, VCPU_PMC + 12(r4)
-	lwz	r8, VCPU_PMC + 16(r4)
-	lwz	r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-	lwz	r10, VCPU_PMC + 24(r4)
-	lwz	r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-	mtspr	SPRN_PMC1, r3
-	mtspr	SPRN_PMC2, r5
-	mtspr	SPRN_PMC3, r6
-	mtspr	SPRN_PMC4, r7
-	mtspr	SPRN_PMC5, r8
-	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-	ld	r3, VCPU_MMCR(r4)
-	ld	r5, VCPU_MMCR + 8(r4)
-	ld	r6, VCPU_MMCR + 16(r4)
-	ld	r7, VCPU_SIAR(r4)
-	ld	r8, VCPU_SDAR(r4)
-	mtspr	SPRN_MMCR1, r5
-	mtspr	SPRN_MMCRA, r6
-	mtspr	SPRN_SIAR, r7
-	mtspr	SPRN_SDAR, r8
-	mtspr	SPRN_MMCR0, r3
-	isync
-
-	/* Load up FP, VMX and VSX registers */
-	bl	kvmppc_load_fp
-
-	ld	r14, VCPU_GPR(R14)(r4)
-	ld	r15, VCPU_GPR(R15)(r4)
-	ld	r16, VCPU_GPR(R16)(r4)
-	ld	r17, VCPU_GPR(R17)(r4)
-	ld	r18, VCPU_GPR(R18)(r4)
-	ld	r19, VCPU_GPR(R19)(r4)
-	ld	r20, VCPU_GPR(R20)(r4)
-	ld	r21, VCPU_GPR(R21)(r4)
-	ld	r22, VCPU_GPR(R22)(r4)
-	ld	r23, VCPU_GPR(R23)(r4)
-	ld	r24, VCPU_GPR(R24)(r4)
-	ld	r25, VCPU_GPR(R25)(r4)
-	ld	r26, VCPU_GPR(R26)(r4)
-	ld	r27, VCPU_GPR(R27)(r4)
-	ld	r28, VCPU_GPR(R28)(r4)
-	ld	r29, VCPU_GPR(R29)(r4)
-	ld	r30, VCPU_GPR(R30)(r4)
-	ld	r31, VCPU_GPR(R31)(r4)
-
-BEGIN_FTR_SECTION
-	/* Switch DSCR to guest value */
-	ld	r5, VCPU_DSCR(r4)
-	mtspr	SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
-	/*
-	 * Set the decrementer to the guest decrementer.
-	 */
-	ld	r8,VCPU_DEC_EXPIRES(r4)
-	mftb	r7
-	subf	r3,r7,r8
-	mtspr	SPRN_DEC,r3
-	stw	r3,VCPU_DEC(r4)
-
-	ld	r5, VCPU_SPRG0(r4)
-	ld	r6, VCPU_SPRG1(r4)
-	ld	r7, VCPU_SPRG2(r4)
-	ld	r8, VCPU_SPRG3(r4)
-	mtspr	SPRN_SPRG0, r5
-	mtspr	SPRN_SPRG1, r6
-	mtspr	SPRN_SPRG2, r7
-	mtspr	SPRN_SPRG3, r8
-
 	/* Save R1 in the PACA */
 	/* Save R1 in the PACA */
 	std	r1, HSTATE_HOST_R1(r13)
 	std	r1, HSTATE_HOST_R1(r13)
 
 
-	/* Load up DAR and DSISR */
-	ld	r5, VCPU_DAR(r4)
-	lwz	r6, VCPU_DSISR(r4)
-	mtspr	SPRN_DAR, r5
-	mtspr	SPRN_DSISR, r6
-
 	li	r6, KVM_GUEST_MODE_HOST_HV
 	li	r6, KVM_GUEST_MODE_HOST_HV
 	stb	r6, HSTATE_IN_GUEST(r13)
 	stb	r6, HSTATE_IN_GUEST(r13)
 
 
-BEGIN_FTR_SECTION
-	/* Restore AMR and UAMOR, set AMOR to all 1s */
-	ld	r5,VCPU_AMR(r4)
-	ld	r6,VCPU_UAMOR(r4)
-	li	r7,-1
-	mtspr	SPRN_AMR,r5
-	mtspr	SPRN_UAMOR,r6
-	mtspr	SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
 	/* Clear out SLB */
 	/* Clear out SLB */
 	li	r6,0
 	li	r6,0
 	slbmte	r6,r6
 	slbmte	r6,r6
@@ -428,8 +361,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	bne	21b
 	bne	21b
 
 
 	/* Primary thread switches to guest partition. */
 	/* Primary thread switches to guest partition. */
-	ld	r9,VCPU_KVM(r4)		/* pointer to struct kvm */
-	lwz	r6,VCPU_PTID(r4)
+	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
+	lbz	r6,HSTATE_PTID(r13)
 	cmpwi	r6,0
 	cmpwi	r6,0
 	bne	20f
 	bne	20f
 	ld	r6,KVM_SDR1(r9)
 	ld	r6,KVM_SDR1(r9)
@@ -457,7 +390,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	andc	r7,r7,r0
 	andc	r7,r7,r0
 	stdcx.	r7,0,r6
 	stdcx.	r7,0,r6
 	bne	23b
 	bne	23b
-	li	r6,128			/* and flush the TLB */
+	/* Flush the TLB of any entries for this LPID */
+	/* use arch 2.07S as a proxy for POWER8 */
+BEGIN_FTR_SECTION
+	li	r6,512			/* POWER8 has 512 sets */
+FTR_SECTION_ELSE
+	li	r6,128			/* POWER7 has 128 sets */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
 	mtctr	r6
 	mtctr	r6
 	li	r7,0x800		/* IS field = 0b10 */
 	li	r7,0x800		/* IS field = 0b10 */
 	ptesync
 	ptesync
@@ -487,6 +426,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	beq	38f
 	beq	38f
 	mtspr	SPRN_PCR, r7
 	mtspr	SPRN_PCR, r7
 38:
 38:
+
+BEGIN_FTR_SECTION
+	/* DPDES is shared between threads */
+	ld	r8, VCORE_DPDES(r5)
+	mtspr	SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
 	li	r0,1
 	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 	b	10f
 	b	10f
@@ -503,32 +449,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_RMOR,r8
 	mtspr	SPRN_RMOR,r8
 	isync
 	isync
 
 
-	/* Increment yield count if they have a VPA */
-	ld	r3, VCPU_VPA(r4)
-	cmpdi	r3, 0
-	beq	25f
-	lwz	r5, LPPACA_YIELDCOUNT(r3)
-	addi	r5, r5, 1
-	stw	r5, LPPACA_YIELDCOUNT(r3)
-	li	r6, 1
-	stb	r6, VCPU_VPA_DIRTY(r4)
-25:
 	/* Check if HDEC expires soon */
 	/* Check if HDEC expires soon */
 	mfspr	r3,SPRN_HDEC
 	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,10
+	cmpwi	r3,512		/* 1 microsecond */
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	mr	r9,r4
 	blt	hdec_soon
 	blt	hdec_soon
-
-	/* Save purr/spurr */
-	mfspr	r5,SPRN_PURR
-	mfspr	r6,SPRN_SPURR
-	std	r5,HSTATE_PURR(r13)
-	std	r6,HSTATE_SPURR(r13)
-	ld	r7,VCPU_PURR(r4)
-	ld	r8,VCPU_SPURR(r4)
-	mtspr	SPRN_PURR,r7
-	mtspr	SPRN_SPURR,r8
 	b	31f
 	b	31f
 
 
 	/*
 	/*
@@ -539,7 +464,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * We also have to invalidate the TLB since its
 	 * We also have to invalidate the TLB since its
 	 * entries aren't tagged with the LPID.
 	 * entries aren't tagged with the LPID.
 	 */
 	 */
-30:	ld	r9,VCPU_KVM(r4)		/* pointer to struct kvm */
+30:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
 
 
 	/* first take native_tlbie_lock */
 	/* first take native_tlbie_lock */
 	.section ".toc","aw"
 	.section ".toc","aw"
@@ -604,7 +530,6 @@ toc_tlbie_lock:
 	mfspr	r3,SPRN_HDEC
 	mfspr	r3,SPRN_HDEC
 	cmpwi	r3,10
 	cmpwi	r3,10
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	mr	r9,r4
 	blt	hdec_soon
 	blt	hdec_soon
 
 
 	/* Enable HDEC interrupts */
 	/* Enable HDEC interrupts */
@@ -619,9 +544,14 @@ toc_tlbie_lock:
 	mfspr	r0,SPRN_HID0
 	mfspr	r0,SPRN_HID0
 	mfspr	r0,SPRN_HID0
 	mfspr	r0,SPRN_HID0
 	mfspr	r0,SPRN_HID0
 	mfspr	r0,SPRN_HID0
+31:
+	/* Do we have a guest vcpu to run? */
+	cmpdi	r4, 0
+	beq	kvmppc_primary_no_guest
+kvmppc_got_guest:
 
 
 	/* Load up guest SLB entries */
 	/* Load up guest SLB entries */
-31:	lwz	r5,VCPU_SLB_MAX(r4)
+	lwz	r5,VCPU_SLB_MAX(r4)
 	cmpwi	r5,0
 	cmpwi	r5,0
 	beq	9f
 	beq	9f
 	mtctr	r5
 	mtctr	r5
@@ -632,6 +562,209 @@ toc_tlbie_lock:
 	addi	r6,r6,VCPU_SLB_SIZE
 	addi	r6,r6,VCPU_SLB_SIZE
 	bdnz	1b
 	bdnz	1b
 9:
 9:
+	/* Increment yield count if they have a VPA */
+	ld	r3, VCPU_VPA(r4)
+	cmpdi	r3, 0
+	beq	25f
+	lwz	r5, LPPACA_YIELDCOUNT(r3)
+	addi	r5, r5, 1
+	stw	r5, LPPACA_YIELDCOUNT(r3)
+	li	r6, 1
+	stb	r6, VCPU_VPA_DIRTY(r4)
+25:
+
+BEGIN_FTR_SECTION
+	/* Save purr/spurr */
+	mfspr	r5,SPRN_PURR
+	mfspr	r6,SPRN_SPURR
+	std	r5,HSTATE_PURR(r13)
+	std	r6,HSTATE_SPURR(r13)
+	ld	r7,VCPU_PURR(r4)
+	ld	r8,VCPU_SPURR(r4)
+	mtspr	SPRN_PURR,r7
+	mtspr	SPRN_SPURR,r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+	/* Set partition DABR */
+	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+	lwz	r5,VCPU_DABRX(r4)
+	ld	r6,VCPU_DABR(r4)
+	mtspr	SPRN_DABRX,r5
+	mtspr	SPRN_DABR,r6
+ BEGIN_FTR_SECTION_NESTED(89)
+	isync
+ END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+	/* Load guest PMU registers */
+	/* R4 is live here (vcpu pointer) */
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	isync
+	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
+	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
+	lwz	r6, VCPU_PMC + 8(r4)
+	lwz	r7, VCPU_PMC + 12(r4)
+	lwz	r8, VCPU_PMC + 16(r4)
+	lwz	r9, VCPU_PMC + 20(r4)
+BEGIN_FTR_SECTION
+	lwz	r10, VCPU_PMC + 24(r4)
+	lwz	r11, VCPU_PMC + 28(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+	mtspr	SPRN_PMC1, r3
+	mtspr	SPRN_PMC2, r5
+	mtspr	SPRN_PMC3, r6
+	mtspr	SPRN_PMC4, r7
+	mtspr	SPRN_PMC5, r8
+	mtspr	SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+	mtspr	SPRN_PMC7, r10
+	mtspr	SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+	ld	r3, VCPU_MMCR(r4)
+	ld	r5, VCPU_MMCR + 8(r4)
+	ld	r6, VCPU_MMCR + 16(r4)
+	ld	r7, VCPU_SIAR(r4)
+	ld	r8, VCPU_SDAR(r4)
+	mtspr	SPRN_MMCR1, r5
+	mtspr	SPRN_MMCRA, r6
+	mtspr	SPRN_SIAR, r7
+	mtspr	SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_MMCR + 24(r4)
+	ld	r6, VCPU_SIER(r4)
+	lwz	r7, VCPU_PMC + 24(r4)
+	lwz	r8, VCPU_PMC + 28(r4)
+	ld	r9, VCPU_MMCR + 32(r4)
+	mtspr	SPRN_MMCR2, r5
+	mtspr	SPRN_SIER, r6
+	mtspr	SPRN_SPMC1, r7
+	mtspr	SPRN_SPMC2, r8
+	mtspr	SPRN_MMCRS, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mtspr	SPRN_MMCR0, r3
+	isync
+
+	/* Load up FP, VMX and VSX registers */
+	bl	kvmppc_load_fp
+
+	ld	r14, VCPU_GPR(R14)(r4)
+	ld	r15, VCPU_GPR(R15)(r4)
+	ld	r16, VCPU_GPR(R16)(r4)
+	ld	r17, VCPU_GPR(R17)(r4)
+	ld	r18, VCPU_GPR(R18)(r4)
+	ld	r19, VCPU_GPR(R19)(r4)
+	ld	r20, VCPU_GPR(R20)(r4)
+	ld	r21, VCPU_GPR(R21)(r4)
+	ld	r22, VCPU_GPR(R22)(r4)
+	ld	r23, VCPU_GPR(R23)(r4)
+	ld	r24, VCPU_GPR(R24)(r4)
+	ld	r25, VCPU_GPR(R25)(r4)
+	ld	r26, VCPU_GPR(R26)(r4)
+	ld	r27, VCPU_GPR(R27)(r4)
+	ld	r28, VCPU_GPR(R28)(r4)
+	ld	r29, VCPU_GPR(R29)(r4)
+	ld	r30, VCPU_GPR(R30)(r4)
+	ld	r31, VCPU_GPR(R31)(r4)
+
+BEGIN_FTR_SECTION
+	/* Switch DSCR to guest value */
+	ld	r5, VCPU_DSCR(r4)
+	mtspr	SPRN_DSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+	/* Skip next section on POWER7 or PPC970 */
+	b	8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	/* Load up POWER8-specific registers */
+	ld	r5, VCPU_IAMR(r4)
+	lwz	r6, VCPU_PSPB(r4)
+	ld	r7, VCPU_FSCR(r4)
+	mtspr	SPRN_IAMR, r5
+	mtspr	SPRN_PSPB, r6
+	mtspr	SPRN_FSCR, r7
+	ld	r5, VCPU_DAWR(r4)
+	ld	r6, VCPU_DAWRX(r4)
+	ld	r7, VCPU_CIABR(r4)
+	ld	r8, VCPU_TAR(r4)
+	mtspr	SPRN_DAWR, r5
+	mtspr	SPRN_DAWRX, r6
+	mtspr	SPRN_CIABR, r7
+	mtspr	SPRN_TAR, r8
+	ld	r5, VCPU_IC(r4)
+	ld	r6, VCPU_VTB(r4)
+	mtspr	SPRN_IC, r5
+	mtspr	SPRN_VTB, r6
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	ld	r5, VCPU_TFHAR(r4)
+	ld	r6, VCPU_TFIAR(r4)
+	ld	r7, VCPU_TEXASR(r4)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+#endif
+	ld	r8, VCPU_EBBHR(r4)
+	mtspr	SPRN_EBBHR, r8
+	ld	r5, VCPU_EBBRR(r4)
+	ld	r6, VCPU_BESCR(r4)
+	ld	r7, VCPU_CSIGR(r4)
+	ld	r8, VCPU_TACR(r4)
+	mtspr	SPRN_EBBRR, r5
+	mtspr	SPRN_BESCR, r6
+	mtspr	SPRN_CSIGR, r7
+	mtspr	SPRN_TACR, r8
+	ld	r5, VCPU_TCSCR(r4)
+	ld	r6, VCPU_ACOP(r4)
+	lwz	r7, VCPU_GUEST_PID(r4)
+	ld	r8, VCPU_WORT(r4)
+	mtspr	SPRN_TCSCR, r5
+	mtspr	SPRN_ACOP, r6
+	mtspr	SPRN_PID, r7
+	mtspr	SPRN_WORT, r8
+8:
+
+	/*
+	 * Set the decrementer to the guest decrementer.
+	 */
+	ld	r8,VCPU_DEC_EXPIRES(r4)
+	mftb	r7
+	subf	r3,r7,r8
+	mtspr	SPRN_DEC,r3
+	stw	r3,VCPU_DEC(r4)
+
+	ld	r5, VCPU_SPRG0(r4)
+	ld	r6, VCPU_SPRG1(r4)
+	ld	r7, VCPU_SPRG2(r4)
+	ld	r8, VCPU_SPRG3(r4)
+	mtspr	SPRN_SPRG0, r5
+	mtspr	SPRN_SPRG1, r6
+	mtspr	SPRN_SPRG2, r7
+	mtspr	SPRN_SPRG3, r8
+
+	/* Load up DAR and DSISR */
+	ld	r5, VCPU_DAR(r4)
+	lwz	r6, VCPU_DSISR(r4)
+	mtspr	SPRN_DAR, r5
+	mtspr	SPRN_DSISR, r6
+
+BEGIN_FTR_SECTION
+	/* Restore AMR and UAMOR, set AMOR to all 1s */
+	ld	r5,VCPU_AMR(r4)
+	ld	r6,VCPU_UAMOR(r4)
+	li	r7,-1
+	mtspr	SPRN_AMR,r5
+	mtspr	SPRN_UAMOR,r6
+	mtspr	SPRN_AMOR,r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
 	lwz	r5,VCPU_CTRL(r4)
@@ -647,48 +780,53 @@ toc_tlbie_lock:
 	mtctr	r6
 	mtctr	r6
 	mtxer	r7
 	mtxer	r7
 
 
+kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 	ld	r10, VCPU_PC(r4)
 	ld	r10, VCPU_PC(r4)
 	ld	r11, VCPU_MSR(r4)
 	ld	r11, VCPU_MSR(r4)
-kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 	ld	r6, VCPU_SRR0(r4)
 	ld	r6, VCPU_SRR0(r4)
 	ld	r7, VCPU_SRR1(r4)
 	ld	r7, VCPU_SRR1(r4)
+	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r7
 
 
+deliver_guest_interrupt:
 	/* r11 = vcpu->arch.msr & ~MSR_HV */
 	/* r11 = vcpu->arch.msr & ~MSR_HV */
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	ori	r11, r11, MSR_ME
 	ori	r11, r11, MSR_ME
 
 
 	/* Check if we can deliver an external or decrementer interrupt now */
 	/* Check if we can deliver an external or decrementer interrupt now */
-	ld	r0,VCPU_PENDING_EXC(r4)
-	lis	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and	r0,r0,r8
-	cmpdi	cr1,r0,0
-	andi.	r0,r11,MSR_EE
-	beq	cr1,11f
+	ld	r0, VCPU_PENDING_EXC(r4)
+	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+	cmpdi	cr1, r0, 0
+	andi.	r8, r11, MSR_EE
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
-	mfspr	r8,SPRN_LPCR
-	ori	r8,r8,LPCR_MER
-	mtspr	SPRN_LPCR,r8
+	mfspr	r8, SPRN_LPCR
+	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
+	mtspr	SPRN_LPCR, r8
 	isync
 	isync
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	beq	5f
 	beq	5f
-	li	r0,BOOK3S_INTERRUPT_EXTERNAL
-12:	mr	r6,r10
-	mr	r10,r0
-	mr	r7,r11
-	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11,r11,63
-	b	5f
-11:	beq	5f
-	mfspr	r0,SPRN_DEC
-	cmpwi	r0,0
-	li	r0,BOOK3S_INTERRUPT_DECREMENTER
-	blt	12b
+	li	r0, BOOK3S_INTERRUPT_EXTERNAL
+	bne	cr1, 12f
+	mfspr	r0, SPRN_DEC
+	cmpwi	r0, 0
+	li	r0, BOOK3S_INTERRUPT_DECREMENTER
+	bge	5f
 
 
-	/* Move SRR0 and SRR1 into the respective regs */
-5:	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r7
+12:	mtspr	SPRN_SRR0, r10
+	mr	r10,r0
+	mtspr	SPRN_SRR1, r11
+	ld	r11, VCPU_INTR_MSR(r4)
+5:
 
 
+/*
+ * Required state:
+ * R4 = vcpu
+ * R10: value for HSRR0
+ * R11: value for HSRR1
+ * R13 = PACA
+ */
 fast_guest_return:
 fast_guest_return:
 	li	r0,0
 	li	r0,0
 	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
 	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
@@ -868,39 +1006,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 	/* External interrupt, first check for host_ipi. If this is
 	/* External interrupt, first check for host_ipi. If this is
 	 * set, we know the host wants us out so let's do it now
 	 * set, we know the host wants us out so let's do it now
 	 */
 	 */
-do_ext_interrupt:
 	bl	kvmppc_read_intr
 	bl	kvmppc_read_intr
 	cmpdi	r3, 0
 	cmpdi	r3, 0
 	bgt	ext_interrupt_to_host
 	bgt	ext_interrupt_to_host
 
 
-	/* Allright, looks like an IPI for the guest, we need to set MER */
 	/* Check if any CPU is heading out to the host, if so head out too */
 	/* Check if any CPU is heading out to the host, if so head out too */
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	lwz	r0, VCORE_ENTRY_EXIT(r5)
 	lwz	r0, VCORE_ENTRY_EXIT(r5)
 	cmpwi	r0, 0x100
 	cmpwi	r0, 0x100
 	bge	ext_interrupt_to_host
 	bge	ext_interrupt_to_host
 
 
-	/* See if there is a pending interrupt for the guest */
-	mfspr	r8, SPRN_LPCR
-	ld	r0, VCPU_PENDING_EXC(r9)
-	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
-	rldicl.	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
-	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
-	beq	2f
-
-	/* And if the guest EE is set, we can deliver immediately, else
-	 * we return to the guest with MER set
-	 */
-	andi.	r0, r11, MSR_EE
-	beq	2f
-	mtspr	SPRN_SRR0, r10
-	mtspr	SPRN_SRR1, r11
-	li	r10, BOOK3S_INTERRUPT_EXTERNAL
-	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11, r11, 63
-2:	mr	r4, r9
-	mtspr	SPRN_LPCR, r8
-	b	fast_guest_return
+	/* Return to guest after delivering any pending interrupt */
+	mr	r4, r9
+	b	deliver_guest_interrupt
 
 
 ext_interrupt_to_host:
 ext_interrupt_to_host:
 
 
@@ -963,25 +1081,206 @@ BEGIN_FTR_SECTION
 	subf	r5,r7,r5
 	subf	r5,r7,r5
 	subf	r6,r8,r6
 	subf	r6,r8,r6
 
 
-	/*
-	 * Restore host PURR/SPURR and add guest times
-	 * so that the time in the guest gets accounted.
-	 */
-	ld	r3,HSTATE_PURR(r13)
-	ld	r4,HSTATE_SPURR(r13)
-	add	r3,r3,r5
-	add	r4,r4,r6
-	mtspr	SPRN_PURR,r3
-	mtspr	SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+	/*
+	 * Restore host PURR/SPURR and add guest times
+	 * so that the time in the guest gets accounted.
+	 */
+	ld	r3,HSTATE_PURR(r13)
+	ld	r4,HSTATE_SPURR(r13)
+	add	r3,r3,r5
+	add	r4,r4,r6
+	mtspr	SPRN_PURR,r3
+	mtspr	SPRN_SPURR,r4
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
+
+	/* Save DEC */
+	mfspr	r5,SPRN_DEC
+	mftb	r6
+	extsw	r5,r5
+	add	r5,r5,r6
+	std	r5,VCPU_DEC_EXPIRES(r9)
+
+BEGIN_FTR_SECTION
+	b	8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	/* Save POWER8-specific registers */
+	mfspr	r5, SPRN_IAMR
+	mfspr	r6, SPRN_PSPB
+	mfspr	r7, SPRN_FSCR
+	std	r5, VCPU_IAMR(r9)
+	stw	r6, VCPU_PSPB(r9)
+	std	r7, VCPU_FSCR(r9)
+	mfspr	r5, SPRN_IC
+	mfspr	r6, SPRN_VTB
+	mfspr	r7, SPRN_TAR
+	std	r5, VCPU_IC(r9)
+	std	r6, VCPU_VTB(r9)
+	std	r7, VCPU_TAR(r9)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	mfspr	r7, SPRN_TEXASR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+	std	r7, VCPU_TEXASR(r9)
+#endif
+	mfspr	r8, SPRN_EBBHR
+	std	r8, VCPU_EBBHR(r9)
+	mfspr	r5, SPRN_EBBRR
+	mfspr	r6, SPRN_BESCR
+	mfspr	r7, SPRN_CSIGR
+	mfspr	r8, SPRN_TACR
+	std	r5, VCPU_EBBRR(r9)
+	std	r6, VCPU_BESCR(r9)
+	std	r7, VCPU_CSIGR(r9)
+	std	r8, VCPU_TACR(r9)
+	mfspr	r5, SPRN_TCSCR
+	mfspr	r6, SPRN_ACOP
+	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_WORT
+	std	r5, VCPU_TCSCR(r9)
+	std	r6, VCPU_ACOP(r9)
+	stw	r7, VCPU_GUEST_PID(r9)
+	std	r8, VCPU_WORT(r9)
+8:
+
+	/* Save and reset AMR and UAMOR before turning on the MMU */
+BEGIN_FTR_SECTION
+	mfspr	r5,SPRN_AMR
+	mfspr	r6,SPRN_UAMOR
+	std	r5,VCPU_AMR(r9)
+	std	r6,VCPU_UAMOR(r9)
+	li	r6,0
+	mtspr	SPRN_AMR,r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+	/* Switch DSCR back to host value */
+BEGIN_FTR_SECTION
+	mfspr	r8, SPRN_DSCR
+	ld	r7, HSTATE_DSCR(r13)
+	std	r8, VCPU_DSCR(r9)
+	mtspr	SPRN_DSCR, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+	/* Save non-volatile GPRs */
+	std	r14, VCPU_GPR(R14)(r9)
+	std	r15, VCPU_GPR(R15)(r9)
+	std	r16, VCPU_GPR(R16)(r9)
+	std	r17, VCPU_GPR(R17)(r9)
+	std	r18, VCPU_GPR(R18)(r9)
+	std	r19, VCPU_GPR(R19)(r9)
+	std	r20, VCPU_GPR(R20)(r9)
+	std	r21, VCPU_GPR(R21)(r9)
+	std	r22, VCPU_GPR(R22)(r9)
+	std	r23, VCPU_GPR(R23)(r9)
+	std	r24, VCPU_GPR(R24)(r9)
+	std	r25, VCPU_GPR(R25)(r9)
+	std	r26, VCPU_GPR(R26)(r9)
+	std	r27, VCPU_GPR(R27)(r9)
+	std	r28, VCPU_GPR(R28)(r9)
+	std	r29, VCPU_GPR(R29)(r9)
+	std	r30, VCPU_GPR(R30)(r9)
+	std	r31, VCPU_GPR(R31)(r9)
+
+	/* Save SPRGs */
+	mfspr	r3, SPRN_SPRG0
+	mfspr	r4, SPRN_SPRG1
+	mfspr	r5, SPRN_SPRG2
+	mfspr	r6, SPRN_SPRG3
+	std	r3, VCPU_SPRG0(r9)
+	std	r4, VCPU_SPRG1(r9)
+	std	r5, VCPU_SPRG2(r9)
+	std	r6, VCPU_SPRG3(r9)
+
+	/* save FP state */
+	mr	r3, r9
+	bl	kvmppc_save_fp
 
 
+	/* Increment yield count if they have a VPA */
+	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
+	cmpdi	r8, 0
+	beq	25f
+	lwz	r3, LPPACA_YIELDCOUNT(r8)
+	addi	r3, r3, 1
+	stw	r3, LPPACA_YIELDCOUNT(r8)
+	li	r3, 1
+	stb	r3, VCPU_VPA_DIRTY(r9)
+25:
+	/* Save PMU registers if requested */
+	/* r8 and cr0.eq are live here */
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	mfspr	r6, SPRN_MMCRA
+BEGIN_FTR_SECTION
+	/* On P7, clear MMCRA in order to disable SDAR updates */
+	li	r7, 0
+	mtspr	SPRN_MMCRA, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+	isync
+	beq	21f			/* if no VPA, save PMU stuff anyway */
+	lbz	r7, LPPACA_PMCINUSE(r8)
+	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */
+	bne	21f
+	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
+	b	22f
+21:	mfspr	r5, SPRN_MMCR1
+	mfspr	r7, SPRN_SIAR
+	mfspr	r8, SPRN_SDAR
+	std	r4, VCPU_MMCR(r9)
+	std	r5, VCPU_MMCR + 8(r9)
+	std	r6, VCPU_MMCR + 16(r9)
+	std	r7, VCPU_SIAR(r9)
+	std	r8, VCPU_SDAR(r9)
+	mfspr	r3, SPRN_PMC1
+	mfspr	r4, SPRN_PMC2
+	mfspr	r5, SPRN_PMC3
+	mfspr	r6, SPRN_PMC4
+	mfspr	r7, SPRN_PMC5
+	mfspr	r8, SPRN_PMC6
+BEGIN_FTR_SECTION
+	mfspr	r10, SPRN_PMC7
+	mfspr	r11, SPRN_PMC8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+	stw	r3, VCPU_PMC(r9)
+	stw	r4, VCPU_PMC + 4(r9)
+	stw	r5, VCPU_PMC + 8(r9)
+	stw	r6, VCPU_PMC + 12(r9)
+	stw	r7, VCPU_PMC + 16(r9)
+	stw	r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+	stw	r10, VCPU_PMC + 24(r9)
+	stw	r11, VCPU_PMC + 28(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+BEGIN_FTR_SECTION
+	mfspr	r4, SPRN_MMCR2
+	mfspr	r5, SPRN_SIER
+	mfspr	r6, SPRN_SPMC1
+	mfspr	r7, SPRN_SPMC2
+	mfspr	r8, SPRN_MMCRS
+	std	r4, VCPU_MMCR + 24(r9)
+	std	r5, VCPU_SIER(r9)
+	stw	r6, VCPU_PMC + 24(r9)
+	stw	r7, VCPU_PMC + 28(r9)
+	std	r8, VCPU_MMCR + 32(r9)
+	lis	r4, 0x8000
+	mtspr	SPRN_MMCRS, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22:
 	/* Clear out SLB */
 	/* Clear out SLB */
 	li	r5,0
 	li	r5,0
 	slbmte	r5,r5
 	slbmte	r5,r5
 	slbia
 	slbia
 	ptesync
 	ptesync
 
 
-hdec_soon:			/* r9 = vcpu, r12 = trap, r13 = paca */
+hdec_soon:			/* r12 = trap, r13 = paca */
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
 	b	32f
 	b	32f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -1014,8 +1313,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 */
 	 */
 	cmpwi	r3,0x100	/* Are we the first here? */
 	cmpwi	r3,0x100	/* Are we the first here? */
 	bge	43f
 	bge	43f
-	cmpwi	r3,1		/* Are any other threads in the guest? */
-	ble	43f
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	beq	40f
 	beq	40f
 	li	r0,0
 	li	r0,0
@@ -1026,7 +1323,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * doesn't wake CPUs up from nap.
 	 * doesn't wake CPUs up from nap.
 	 */
 	 */
 	lwz	r3,VCORE_NAPPING_THREADS(r5)
 	lwz	r3,VCORE_NAPPING_THREADS(r5)
-	lwz	r4,VCPU_PTID(r9)
+	lbz	r4,HSTATE_PTID(r13)
 	li	r0,1
 	li	r0,1
 	sld	r0,r0,r4
 	sld	r0,r0,r4
 	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
 	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
@@ -1045,10 +1342,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	addi	r6,r6,PACA_SIZE
 	addi	r6,r6,PACA_SIZE
 	bne	42b
 	bne	42b
 
 
+secondary_too_late:
 	/* Secondary threads wait for primary to do partition switch */
 	/* Secondary threads wait for primary to do partition switch */
-43:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	lwz	r3,VCPU_PTID(r9)
+43:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
+	lbz	r3,HSTATE_PTID(r13)
 	cmpwi	r3,0
 	cmpwi	r3,0
 	beq	15f
 	beq	15f
 	HMT_LOW
 	HMT_LOW
@@ -1076,6 +1374,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_LPID,r7
 	mtspr	SPRN_LPID,r7
 	isync
 	isync
 
 
+BEGIN_FTR_SECTION
+	/* DPDES is shared between threads */
+	mfspr	r7, SPRN_DPDES
+	std	r7, VCORE_DPDES(r5)
+	/* clear DPDES so we don't get guest doorbells in the host */
+	li	r8, 0
+	mtspr	SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
 	/* Subtract timebase offset from timebase */
 	/* Subtract timebase offset from timebase */
 	ld	r8,VCORE_TB_OFFSET(r5)
 	ld	r8,VCORE_TB_OFFSET(r5)
 	cmpdi	r8,0
 	cmpdi	r8,0
@@ -1113,7 +1420,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * We have to lock against concurrent tlbies, and
 	 * We have to lock against concurrent tlbies, and
 	 * we have to flush the whole TLB.
 	 * we have to flush the whole TLB.
 	 */
 	 */
-32:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
+32:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
 
 
 	/* Take the guest's tlbie_lock */
 	/* Take the guest's tlbie_lock */
 #ifdef __BIG_ENDIAN__
 #ifdef __BIG_ENDIAN__
@@ -1203,6 +1511,56 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	add	r5,r5,r6
 	add	r5,r5,r6
 	std	r5,VCPU_DEC_EXPIRES(r9)
 	std	r5,VCPU_DEC_EXPIRES(r9)
 
 
+BEGIN_FTR_SECTION
+	b	8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	/* Save POWER8-specific registers */
+	mfspr	r5, SPRN_IAMR
+	mfspr	r6, SPRN_PSPB
+	mfspr	r7, SPRN_FSCR
+	std	r5, VCPU_IAMR(r9)
+	stw	r6, VCPU_PSPB(r9)
+	std	r7, VCPU_FSCR(r9)
+	mfspr	r5, SPRN_IC
+	mfspr	r6, SPRN_VTB
+	mfspr	r7, SPRN_TAR
+	std	r5, VCPU_IC(r9)
+	std	r6, VCPU_VTB(r9)
+	std	r7, VCPU_TAR(r9)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	mfspr	r7, SPRN_TEXASR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+	std	r7, VCPU_TEXASR(r9)
+#endif
+	mfspr	r8, SPRN_EBBHR
+	std	r8, VCPU_EBBHR(r9)
+	mfspr	r5, SPRN_EBBRR
+	mfspr	r6, SPRN_BESCR
+	mfspr	r7, SPRN_CSIGR
+	mfspr	r8, SPRN_TACR
+	std	r5, VCPU_EBBRR(r9)
+	std	r6, VCPU_BESCR(r9)
+	std	r7, VCPU_CSIGR(r9)
+	std	r8, VCPU_TACR(r9)
+	mfspr	r5, SPRN_TCSCR
+	mfspr	r6, SPRN_ACOP
+	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_WORT
+	std	r5, VCPU_TCSCR(r9)
+	std	r6, VCPU_ACOP(r9)
+	stw	r7, VCPU_GUEST_PID(r9)
+	std	r8, VCPU_WORT(r9)
+8:
+
 	/* Save and reset AMR and UAMOR before turning on the MMU */
 	/* Save and reset AMR and UAMOR before turning on the MMU */
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
 	mfspr	r5,SPRN_AMR
@@ -1217,130 +1575,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	li	r0, KVM_GUEST_MODE_NONE
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 	stb	r0, HSTATE_IN_GUEST(r13)
 
 
-	/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
-	mfspr	r8, SPRN_DSCR
-	ld	r7, HSTATE_DSCR(r13)
-	std	r8, VCPU_DSCR(r9)
-	mtspr	SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
-	/* Save non-volatile GPRs */
-	std	r14, VCPU_GPR(R14)(r9)
-	std	r15, VCPU_GPR(R15)(r9)
-	std	r16, VCPU_GPR(R16)(r9)
-	std	r17, VCPU_GPR(R17)(r9)
-	std	r18, VCPU_GPR(R18)(r9)
-	std	r19, VCPU_GPR(R19)(r9)
-	std	r20, VCPU_GPR(R20)(r9)
-	std	r21, VCPU_GPR(R21)(r9)
-	std	r22, VCPU_GPR(R22)(r9)
-	std	r23, VCPU_GPR(R23)(r9)
-	std	r24, VCPU_GPR(R24)(r9)
-	std	r25, VCPU_GPR(R25)(r9)
-	std	r26, VCPU_GPR(R26)(r9)
-	std	r27, VCPU_GPR(R27)(r9)
-	std	r28, VCPU_GPR(R28)(r9)
-	std	r29, VCPU_GPR(R29)(r9)
-	std	r30, VCPU_GPR(R30)(r9)
-	std	r31, VCPU_GPR(R31)(r9)
-
-	/* Save SPRGs */
-	mfspr	r3, SPRN_SPRG0
-	mfspr	r4, SPRN_SPRG1
-	mfspr	r5, SPRN_SPRG2
-	mfspr	r6, SPRN_SPRG3
-	std	r3, VCPU_SPRG0(r9)
-	std	r4, VCPU_SPRG1(r9)
-	std	r5, VCPU_SPRG2(r9)
-	std	r6, VCPU_SPRG3(r9)
-
-	/* save FP state */
-	mr	r3, r9
-	bl	.kvmppc_save_fp
-
-	/* Increment yield count if they have a VPA */
-	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
-	cmpdi	r8, 0
-	beq	25f
-	lwz	r3, LPPACA_YIELDCOUNT(r8)
-	addi	r3, r3, 1
-	stw	r3, LPPACA_YIELDCOUNT(r8)
-	li	r3, 1
-	stb	r3, VCPU_VPA_DIRTY(r9)
-25:
-	/* Save PMU registers if requested */
-	/* r8 and cr0.eq are live here */
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
-	li	r7, 0
-	mtspr	SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-	isync
-	beq	21f			/* if no VPA, save PMU stuff anyway */
-	lbz	r7, LPPACA_PMCINUSE(r8)
-	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */
-	bne	21f
-	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
-	b	22f
-21:	mfspr	r5, SPRN_MMCR1
-	mfspr	r7, SPRN_SIAR
-	mfspr	r8, SPRN_SDAR
-	std	r4, VCPU_MMCR(r9)
-	std	r5, VCPU_MMCR + 8(r9)
-	std	r6, VCPU_MMCR + 16(r9)
-	std	r7, VCPU_SIAR(r9)
-	std	r8, VCPU_SDAR(r9)
-	mfspr	r3, SPRN_PMC1
-	mfspr	r4, SPRN_PMC2
-	mfspr	r5, SPRN_PMC3
-	mfspr	r6, SPRN_PMC4
-	mfspr	r7, SPRN_PMC5
-	mfspr	r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-	stw	r3, VCPU_PMC(r9)
-	stw	r4, VCPU_PMC + 4(r9)
-	stw	r5, VCPU_PMC + 8(r9)
-	stw	r6, VCPU_PMC + 12(r9)
-	stw	r7, VCPU_PMC + 16(r9)
-	stw	r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
-	stw	r10, VCPU_PMC + 24(r9)
-	stw	r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-22:
 	ld	r0, 112+PPC_LR_STKOFF(r1)
 	ld	r0, 112+PPC_LR_STKOFF(r1)
 	addi	r1, r1, 112
 	addi	r1, r1, 112
 	mtlr	r0
 	mtlr	r0
 	blr
 	blr
-secondary_too_late:
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	HMT_LOW
-13:	lbz	r3,VCORE_IN_GUEST(r5)
-	cmpwi	r3,0
-	bne	13b
-	HMT_MEDIUM
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-	ld	r11,PACA_SLBSHADOWPTR(r13)
-
-	.rept	SLB_NUM_BOLTED
-	ld	r5,SLBSHADOW_SAVEAREA(r11)
-	ld	r6,SLBSHADOW_SAVEAREA+8(r11)
-	andis.	r7,r5,SLB_ESID_V@h
-	beq	1f
-	slbmte	r6,r5
-1:	addi	r11,r11,16
-	.endr
-	b	22b
 
 
 /*
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * Check whether an HDSI is an HPTE not found fault or something else.
@@ -1386,8 +1624,7 @@ kvmppc_hdsi:
 	mtspr	SPRN_SRR0, r10
 	mtspr	SPRN_SRR0, r10
 	mtspr	SPRN_SRR1, r11
 	mtspr	SPRN_SRR1, r11
 	li	r10, BOOK3S_INTERRUPT_DATA_STORAGE
 	li	r10, BOOK3S_INTERRUPT_DATA_STORAGE
-	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11, r11, 63
+	ld	r11, VCPU_INTR_MSR(r9)
 fast_interrupt_c_return:
 fast_interrupt_c_return:
 6:	ld	r7, VCPU_CTR(r9)
 6:	ld	r7, VCPU_CTR(r9)
 	lwz	r8, VCPU_XER(r9)
 	lwz	r8, VCPU_XER(r9)
@@ -1456,8 +1693,7 @@ kvmppc_hisi:
 1:	mtspr	SPRN_SRR0, r10
 1:	mtspr	SPRN_SRR0, r10
 	mtspr	SPRN_SRR1, r11
 	mtspr	SPRN_SRR1, r11
 	li	r10, BOOK3S_INTERRUPT_INST_STORAGE
 	li	r10, BOOK3S_INTERRUPT_INST_STORAGE
-	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11, r11, 63
+	ld	r11, VCPU_INTR_MSR(r9)
 	b	fast_interrupt_c_return
 	b	fast_interrupt_c_return
 
 
 3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
 3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
@@ -1474,7 +1710,8 @@ kvmppc_hisi:
 hcall_try_real_mode:
 hcall_try_real_mode:
 	ld	r3,VCPU_GPR(R3)(r9)
 	ld	r3,VCPU_GPR(R3)(r9)
 	andi.	r0,r11,MSR_PR
 	andi.	r0,r11,MSR_PR
-	bne	guest_exit_cont
+	/* sc 1 from userspace - reflect to guest syscall */
+	bne	sc_1_fast_return
 	clrrdi	r3,r3,2
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
 	bge	guest_exit_cont
@@ -1495,6 +1732,14 @@ hcall_try_real_mode:
 	ld	r11,VCPU_MSR(r4)
 	ld	r11,VCPU_MSR(r4)
 	b	fast_guest_return
 	b	fast_guest_return
 
 
+sc_1_fast_return:
+	mtspr	SPRN_SRR0,r10
+	mtspr	SPRN_SRR1,r11
+	li	r10, BOOK3S_INTERRUPT_SYSCALL
+	ld	r11, VCPU_INTR_MSR(r9)
+	mr	r4,r9
+	b	fast_guest_return
+
 	/* We've attempted a real mode hcall, but it's punted it back
 	/* We've attempted a real mode hcall, but it's punted it back
 	 * to userspace.  We need to restore some clobbered volatiles
 	 * to userspace.  We need to restore some clobbered volatiles
 	 * before resuming the pass-it-to-qemu path */
 	 * before resuming the pass-it-to-qemu path */
@@ -1588,14 +1833,34 @@ hcall_real_table:
 	.long	0		/* 0x11c */
 	.long	0		/* 0x11c */
 	.long	0		/* 0x120 */
 	.long	0		/* 0x120 */
 	.long	.kvmppc_h_bulk_remove - hcall_real_table
 	.long	.kvmppc_h_bulk_remove - hcall_real_table
+	.long	0		/* 0x128 */
+	.long	0		/* 0x12c */
+	.long	0		/* 0x130 */
+	.long	.kvmppc_h_set_xdabr - hcall_real_table
 hcall_real_table_end:
 hcall_real_table_end:
 
 
 ignore_hdec:
 ignore_hdec:
 	mr	r4,r9
 	mr	r4,r9
 	b	fast_guest_return
 	b	fast_guest_return
 
 
+_GLOBAL(kvmppc_h_set_xdabr)
+	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
+	beq	6f
+	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
+	andc.	r0, r5, r0
+	beq	3f
+6:	li	r3, H_PARAMETER
+	blr
+
 _GLOBAL(kvmppc_h_set_dabr)
 _GLOBAL(kvmppc_h_set_dabr)
+	li	r5, DABRX_USER | DABRX_KERNEL
+3:
+BEGIN_FTR_SECTION
+	b	2f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	std	r4,VCPU_DABR(r3)
 	std	r4,VCPU_DABR(r3)
+	stw	r5, VCPU_DABRX(r3)
+	mtspr	SPRN_DABRX, r5
 	/* Work around P7 bug where DABR can get corrupted on mtspr */
 	/* Work around P7 bug where DABR can get corrupted on mtspr */
 1:	mtspr	SPRN_DABR,r4
 1:	mtspr	SPRN_DABR,r4
 	mfspr	r5, SPRN_DABR
 	mfspr	r5, SPRN_DABR
@@ -1605,6 +1870,17 @@ _GLOBAL(kvmppc_h_set_dabr)
 	li	r3,0
 	li	r3,0
 	blr
 	blr
 
 
+	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
+2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
+	rlwimi	r5, r4, 1, DAWRX_WT
+	clrrdi	r4, r4, 3
+	std	r4, VCPU_DAWR(r3)
+	std	r5, VCPU_DAWRX(r3)
+	mtspr	SPRN_DAWR, r4
+	mtspr	SPRN_DAWRX, r5
+	li	r3, 0
+	blr
+
 _GLOBAL(kvmppc_h_cede)
 _GLOBAL(kvmppc_h_cede)
 	ori	r11,r11,MSR_EE
 	ori	r11,r11,MSR_EE
 	std	r11,VCPU_MSR(r3)
 	std	r11,VCPU_MSR(r3)
@@ -1628,7 +1904,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 	 * up to the host.
 	 * up to the host.
 	 */
 	 */
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	ld	r5,HSTATE_KVM_VCORE(r13)
-	lwz	r6,VCPU_PTID(r3)
+	lbz	r6,HSTATE_PTID(r13)
 	lwz	r8,VCORE_ENTRY_EXIT(r5)
 	lwz	r8,VCORE_ENTRY_EXIT(r5)
 	clrldi	r8,r8,56
 	clrldi	r8,r8,56
 	li	r0,1
 	li	r0,1
@@ -1643,9 +1919,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 	bne	31b
 	bne	31b
 	/* order napping_threads update vs testing entry_exit_count */
 	/* order napping_threads update vs testing entry_exit_count */
 	isync
 	isync
-	li	r0,1
+	li	r0,NAPPING_CEDE
 	stb	r0,HSTATE_NAPPING(r13)
 	stb	r0,HSTATE_NAPPING(r13)
-	mr	r4,r3
 	lwz	r7,VCORE_ENTRY_EXIT(r5)
 	lwz	r7,VCORE_ENTRY_EXIT(r5)
 	cmpwi	r7,0x100
 	cmpwi	r7,0x100
 	bge	33f		/* another thread already exiting */
 	bge	33f		/* another thread already exiting */
@@ -1677,16 +1952,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 	std	r31, VCPU_GPR(R31)(r3)
 	std	r31, VCPU_GPR(R31)(r3)
 
 
 	/* save FP state */
 	/* save FP state */
-	bl	.kvmppc_save_fp
+	bl	kvmppc_save_fp
 
 
 	/*
 	/*
-	 * Take a nap until a decrementer or external interrupt occurs,
-	 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+	 * Take a nap until a decrementer or external or doobell interrupt
+	 * occurs, with PECE1, PECE0 and PECEDP set in LPCR
 	 */
 	 */
 	li	r0,1
 	li	r0,1
 	stb	r0,HSTATE_HWTHREAD_REQ(r13)
 	stb	r0,HSTATE_HWTHREAD_REQ(r13)
 	mfspr	r5,SPRN_LPCR
 	mfspr	r5,SPRN_LPCR
 	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
 	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
+BEGIN_FTR_SECTION
+	oris	r5,r5,LPCR_PECEDP@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_LPCR,r5
 	mtspr	SPRN_LPCR,r5
 	isync
 	isync
 	li	r0, 0
 	li	r0, 0
@@ -1698,6 +1976,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 	nap
 	nap
 	b	.
 	b	.
 
 
+33:	mr	r4, r3
+	li	r3, 0
+	li	r12, 0
+	b	34f
+
 kvm_end_cede:
 kvm_end_cede:
 	/* get vcpu pointer */
 	/* get vcpu pointer */
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r4, HSTATE_KVM_VCPU(r13)
@@ -1727,12 +2010,15 @@ kvm_end_cede:
 	ld	r29, VCPU_GPR(R29)(r4)
 	ld	r29, VCPU_GPR(R29)(r4)
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
+ 
+	/* Check the wake reason in SRR1 to see why we got here */
+	bl	kvmppc_check_wake_reason
 
 
 	/* clear our bit in vcore->napping_threads */
 	/* clear our bit in vcore->napping_threads */
-33:	ld	r5,HSTATE_KVM_VCORE(r13)
-	lwz	r3,VCPU_PTID(r4)
+34:	ld	r5,HSTATE_KVM_VCORE(r13)
+	lbz	r7,HSTATE_PTID(r13)
 	li	r0,1
 	li	r0,1
-	sld	r0,r0,r3
+	sld	r0,r0,r7
 	addi	r6,r5,VCORE_NAPPING_THREADS
 	addi	r6,r5,VCORE_NAPPING_THREADS
 32:	lwarx	r7,0,r6
 32:	lwarx	r7,0,r6
 	andc	r7,r7,r0
 	andc	r7,r7,r0
@@ -1741,23 +2027,18 @@ kvm_end_cede:
 	li	r0,0
 	li	r0,0
 	stb	r0,HSTATE_NAPPING(r13)
 	stb	r0,HSTATE_NAPPING(r13)
 
 
-	/* Check the wake reason in SRR1 to see why we got here */
-	mfspr	r3, SPRN_SRR1
-	rlwinm	r3, r3, 44-31, 0x7	/* extract wake reason field */
-	cmpwi	r3, 4			/* was it an external interrupt? */
-	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+	/* See if the wake reason means we need to exit */
+	stw	r12, VCPU_TRAP(r4)
 	mr	r9, r4
 	mr	r9, r4
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	beq	do_ext_interrupt	/* if so */
+	cmpdi	r3, 0
+	bgt	guest_exit_cont
 
 
 	/* see if any other thread is already exiting */
 	/* see if any other thread is already exiting */
 	lwz	r0,VCORE_ENTRY_EXIT(r5)
 	lwz	r0,VCORE_ENTRY_EXIT(r5)
 	cmpwi	r0,0x100
 	cmpwi	r0,0x100
-	blt	kvmppc_cede_reentry	/* if not go back to guest */
+	bge	guest_exit_cont
 
 
-	/* some threads are exiting, so go to the guest exit path */
-	b	hcall_real_fallback
+	b	kvmppc_cede_reentry	/* if not go back to guest */
 
 
 	/* cede when already previously prodded case */
 	/* cede when already previously prodded case */
 kvm_cede_prodded:
 kvm_cede_prodded:
@@ -1783,10 +2064,47 @@ machine_check_realmode:
 	beq	mc_cont
 	beq	mc_cont
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
-	rotldi	r11, r11, 63
+	ld	r11, VCPU_INTR_MSR(r9)
 	b	fast_interrupt_c_return
 	b	fast_interrupt_c_return
 
 
+/*
+ * Check the reason we woke from nap, and take appropriate action.
+ * Returns:
+ *	0 if nothing needs to be done
+ *	1 if something happened that needs to be handled by the host
+ *	-1 if there was a guest wakeup (IPI)
+ *
+ * Also sets r12 to the interrupt vector for any interrupt that needs
+ * to be handled now by the host (0x500 for external interrupt), or zero.
+ */
+kvmppc_check_wake_reason:
+	mfspr	r6, SPRN_SRR1
+BEGIN_FTR_SECTION
+	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
+FTR_SECTION_ELSE
+	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+	cmpwi	r6, 8			/* was it an external interrupt? */
+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+	beq	kvmppc_read_intr	/* if so, see what it was */
+	li	r3, 0
+	li	r12, 0
+	cmpwi	r6, 6			/* was it the decrementer? */
+	beq	0f
+BEGIN_FTR_SECTION
+	cmpwi	r6, 5			/* privileged doorbell? */
+	beq	0f
+	cmpwi	r6, 3			/* hypervisor doorbell? */
+	beq	3f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	li	r3, 1			/* anything else, return 1 */
+0:	blr
+
+	/* hypervisor doorbell */
+3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
+	li	r3, 1
+	blr
+
 /*
 /*
  * Determine what sort of external interrupt is pending (if any).
  * Determine what sort of external interrupt is pending (if any).
  * Returns:
  * Returns:
@@ -1818,7 +2136,6 @@ kvmppc_read_intr:
 	 * interrupts directly to the guest
 	 * interrupts directly to the guest
 	 */
 	 */
 	cmpwi	r3, XICS_IPI		/* if there is, is it an IPI? */
 	cmpwi	r3, XICS_IPI		/* if there is, is it an IPI? */
-	li	r3, 1
 	bne	42f
 	bne	42f
 
 
 	/* It's an IPI, clear the MFRR and EOI it */
 	/* It's an IPI, clear the MFRR and EOI it */
@@ -1844,19 +2161,25 @@ kvmppc_read_intr:
 	 * before exit, it will be picked up by the host ICP driver
 	 * before exit, it will be picked up by the host ICP driver
 	 */
 	 */
 	stw	r0, HSTATE_SAVED_XIRR(r13)
 	stw	r0, HSTATE_SAVED_XIRR(r13)
+	li	r3, 1
 	b	1b
 	b	1b
 
 
 43:	/* We raced with the host, we need to resend that IPI, bummer */
 43:	/* We raced with the host, we need to resend that IPI, bummer */
 	li	r0, IPI_PRIORITY
 	li	r0, IPI_PRIORITY
 	stbcix	r0, r6, r8		/* set the IPI */
 	stbcix	r0, r6, r8		/* set the IPI */
 	sync
 	sync
+	li	r3, 1
 	b	1b
 	b	1b
 
 
 /*
 /*
  * Save away FP, VMX and VSX registers.
  * Save away FP, VMX and VSX registers.
  * r3 = vcpu pointer
  * r3 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
  */
  */
-_GLOBAL(kvmppc_save_fp)
+kvmppc_save_fp:
+	mflr	r30
+	mr	r31,r3
 	mfmsr	r5
 	mfmsr	r5
 	ori	r8,r5,MSR_FP
 	ori	r8,r5,MSR_FP
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
@@ -1871,42 +2194,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 #endif
 	mtmsrd	r8
 	mtmsrd	r8
 	isync
 	isync
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	reg = 0
-	.rept	32
-	li	r6,reg*16+VCPU_VSRS
-	STXVD2X(reg,R6,R3)
-	reg = reg + 1
-	.endr
-FTR_SECTION_ELSE
-#endif
-	reg = 0
-	.rept	32
-	stfd	reg,reg*8+VCPU_FPRS(r3)
-	reg = reg + 1
-	.endr
-#ifdef CONFIG_VSX
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#endif
-	mffs	fr0
-	stfd	fr0,VCPU_FPSCR(r3)
-
+	addi	r3,r3,VCPU_FPRS
+	bl	.store_fp_state
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
-	reg = 0
-	.rept	32
-	li	r6,reg*16+VCPU_VRS
-	stvx	reg,r6,r3
-	reg = reg + 1
-	.endr
-	mfvscr	vr0
-	li	r6,VCPU_VSCR
-	stvx	vr0,r6,r3
+	addi	r3,r31,VCPU_VRS
+	bl	.store_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 #endif
 	mfspr	r6,SPRN_VRSAVE
 	mfspr	r6,SPRN_VRSAVE
 	stw	r6,VCPU_VRSAVE(r3)
 	stw	r6,VCPU_VRSAVE(r3)
+	mtlr	r30
 	mtmsrd	r5
 	mtmsrd	r5
 	isync
 	isync
 	blr
 	blr
@@ -1914,9 +2212,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 /*
 /*
  * Load up FP, VMX and VSX registers
  * Load up FP, VMX and VSX registers
  * r4 = vcpu pointer
  * r4 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
  */
  */
-	.globl	kvmppc_load_fp
 kvmppc_load_fp:
 kvmppc_load_fp:
+	mflr	r30
+	mr	r31,r4
 	mfmsr	r9
 	mfmsr	r9
 	ori	r8,r9,MSR_FP
 	ori	r8,r9,MSR_FP
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
@@ -1931,42 +2232,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 #endif
 	mtmsrd	r8
 	mtmsrd	r8
 	isync
 	isync
-	lfd	fr0,VCPU_FPSCR(r4)
-	MTFSF_L(fr0)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	reg = 0
-	.rept	32
-	li	r7,reg*16+VCPU_VSRS
-	LXVD2X(reg,R7,R4)
-	reg = reg + 1
-	.endr
-FTR_SECTION_ELSE
-#endif
-	reg = 0
-	.rept	32
-	lfd	reg,reg*8+VCPU_FPRS(r4)
-	reg = reg + 1
-	.endr
-#ifdef CONFIG_VSX
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#endif
-
+	addi	r3,r4,VCPU_FPRS
+	bl	.load_fp_state
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
-	li	r7,VCPU_VSCR
-	lvx	vr0,r7,r4
-	mtvscr	vr0
-	reg = 0
-	.rept	32
-	li	r7,reg*16+VCPU_VRS
-	lvx	reg,r7,r4
-	reg = reg + 1
-	.endr
+	addi	r3,r31,VCPU_VRS
+	bl	.load_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 #endif
 	lwz	r7,VCPU_VRSAVE(r4)
 	lwz	r7,VCPU_VRSAVE(r4)
 	mtspr	SPRN_VRSAVE,r7
 	mtspr	SPRN_VRSAVE,r7
+	mtlr	r30
+	mr	r4,r31
 	blr
 	blr
 
 
 /*
 /*

+ 83 - 86
arch/powerpc/kvm/book3s_paired_singles.c

@@ -160,7 +160,7 @@
 
 
 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 {
 {
-	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
+	kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]);
 }
 }
 
 
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
@@ -207,11 +207,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* put in registers */
 	/* put in registers */
 	switch (ls_type) {
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
 	case FPU_LS_SINGLE:
-		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
+		kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs));
 		vcpu->arch.qpr[rs] = *((u32*)tmp);
 		vcpu->arch.qpr[rs] = *((u32*)tmp);
 		break;
 		break;
 	case FPU_LS_DOUBLE:
 	case FPU_LS_DOUBLE:
-		vcpu->arch.fpr[rs] = *((u64*)tmp);
+		VCPU_FPR(vcpu, rs) = *((u64*)tmp);
 		break;
 		break;
 	}
 	}
 
 
@@ -233,18 +233,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 
 	switch (ls_type) {
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
 	case FPU_LS_SINGLE:
-		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
+		kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp);
 		val = *((u32*)tmp);
 		val = *((u32*)tmp);
 		len = sizeof(u32);
 		len = sizeof(u32);
 		break;
 		break;
 	case FPU_LS_SINGLE_LOW:
 	case FPU_LS_SINGLE_LOW:
-		*((u32*)tmp) = vcpu->arch.fpr[rs];
-		val = vcpu->arch.fpr[rs] & 0xffffffff;
+		*((u32*)tmp) = VCPU_FPR(vcpu, rs);
+		val = VCPU_FPR(vcpu, rs) & 0xffffffff;
 		len = sizeof(u32);
 		len = sizeof(u32);
 		break;
 		break;
 	case FPU_LS_DOUBLE:
 	case FPU_LS_DOUBLE:
-		*((u64*)tmp) = vcpu->arch.fpr[rs];
-		val = vcpu->arch.fpr[rs];
+		*((u64*)tmp) = VCPU_FPR(vcpu, rs);
+		val = VCPU_FPR(vcpu, rs);
 		len = sizeof(u64);
 		len = sizeof(u64);
 		break;
 		break;
 	default:
 	default:
@@ -301,7 +301,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	emulated = EMULATE_DONE;
 	emulated = EMULATE_DONE;
 
 
 	/* put in registers */
 	/* put in registers */
-	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
+	kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs));
 	vcpu->arch.qpr[rs] = tmp[1];
 	vcpu->arch.qpr[rs] = tmp[1];
 
 
 	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
 	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -319,7 +319,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	u32 tmp[2];
 	u32 tmp[2];
 	int len = w ? sizeof(u32) : sizeof(u64);
 	int len = w ? sizeof(u32) : sizeof(u64);
 
 
-	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
+	kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]);
 	tmp[1] = vcpu->arch.qpr[rs];
 	tmp[1] = vcpu->arch.qpr[rs];
 
 
 	r = kvmppc_st(vcpu, &addr, len, tmp, true);
 	r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -512,7 +512,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 						 u32 *src2, u32 *src3))
 						 u32 *src2, u32 *src3))
 {
 {
 	u32 *qpr = vcpu->arch.qpr;
 	u32 *qpr = vcpu->arch.qpr;
-	u64 *fpr = vcpu->arch.fpr;
 	u32 ps0_out;
 	u32 ps0_out;
 	u32 ps0_in1, ps0_in2, ps0_in3;
 	u32 ps0_in1, ps0_in2, ps0_in3;
 	u32 ps1_in1, ps1_in2, ps1_in3;
 	u32 ps1_in1, ps1_in2, ps1_in3;
@@ -521,20 +520,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 	WARN_ON(rc);
 
 
 	/* PS0 */
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
-	kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
-	kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3);
 
 
 	if (scalar & SCALAR_LOW)
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 		ps0_in2 = qpr[reg_in2];
 
 
-	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
 
 
 	dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 	dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
 			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
 
 
 	if (!(scalar & SCALAR_NO_PS0))
 	if (!(scalar & SCALAR_NO_PS0))
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
 
 
 	/* PS1 */
 	/* PS1 */
 	ps1_in1 = qpr[reg_in1];
 	ps1_in1 = qpr[reg_in1];
@@ -545,7 +544,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 		ps1_in2 = ps0_in2;
 		ps1_in2 = ps0_in2;
 
 
 	if (!(scalar & SCALAR_NO_PS1))
 	if (!(scalar & SCALAR_NO_PS1))
-		func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+		func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
 
 
 	dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 	dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
 			  ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
 			  ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
@@ -561,7 +560,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 						 u32 *src2))
 						 u32 *src2))
 {
 {
 	u32 *qpr = vcpu->arch.qpr;
 	u32 *qpr = vcpu->arch.qpr;
-	u64 *fpr = vcpu->arch.fpr;
 	u32 ps0_out;
 	u32 ps0_out;
 	u32 ps0_in1, ps0_in2;
 	u32 ps0_in1, ps0_in2;
 	u32 ps1_out;
 	u32 ps1_out;
@@ -571,20 +569,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 	WARN_ON(rc);
 
 
 	/* PS0 */
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
 
 
 	if (scalar & SCALAR_LOW)
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 		ps0_in2 = qpr[reg_in2];
 	else
 	else
-		kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+		kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
 
 
-	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
 
 
 	if (!(scalar & SCALAR_NO_PS0)) {
 	if (!(scalar & SCALAR_NO_PS0)) {
 		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
 		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
 				  ps0_in1, ps0_in2, ps0_out);
 				  ps0_in1, ps0_in2, ps0_out);
 
 
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
 	}
 	}
 
 
 	/* PS1 */
 	/* PS1 */
@@ -594,7 +592,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	if (scalar & SCALAR_HIGH)
 	if (scalar & SCALAR_HIGH)
 		ps1_in2 = ps0_in2;
 		ps1_in2 = ps0_in2;
 
 
-	func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
+	func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
 
 
 	if (!(scalar & SCALAR_NO_PS1)) {
 	if (!(scalar & SCALAR_NO_PS1)) {
 		qpr[reg_out] = ps1_out;
 		qpr[reg_out] = ps1_out;
@@ -612,7 +610,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 						 u32 *dst, u32 *src1))
 						 u32 *dst, u32 *src1))
 {
 {
 	u32 *qpr = vcpu->arch.qpr;
 	u32 *qpr = vcpu->arch.qpr;
-	u64 *fpr = vcpu->arch.fpr;
 	u32 ps0_out, ps0_in;
 	u32 ps0_out, ps0_in;
 	u32 ps1_in;
 	u32 ps1_in;
 
 
@@ -620,17 +617,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 	WARN_ON(rc);
 
 
 	/* PS0 */
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in], &ps0_in);
-	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in);
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in);
 
 
 	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
 	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
 			  ps0_in, ps0_out);
 			  ps0_in, ps0_out);
 
 
-	kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
+	kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
 
 
 	/* PS1 */
 	/* PS1 */
 	ps1_in = qpr[reg_in];
 	ps1_in = qpr[reg_in];
-	func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in);
+	func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in);
 
 
 	dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
 	dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
 			  ps1_in, qpr[reg_out]);
 			  ps1_in, qpr[reg_out]);
@@ -649,10 +646,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	int ax_rc = inst_get_field(inst, 21, 25);
 	int ax_rc = inst_get_field(inst, 21, 25);
 	short full_d = inst_get_field(inst, 16, 31);
 	short full_d = inst_get_field(inst, 16, 31);
 
 
-	u64 *fpr_d = &vcpu->arch.fpr[ax_rd];
-	u64 *fpr_a = &vcpu->arch.fpr[ax_ra];
-	u64 *fpr_b = &vcpu->arch.fpr[ax_rb];
-	u64 *fpr_c = &vcpu->arch.fpr[ax_rc];
+	u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd);
+	u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra);
+	u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb);
+	u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);
 
 
 	bool rcomp = (inst & 1) ? true : false;
 	bool rcomp = (inst & 1) ? true : false;
 	u32 cr = kvmppc_get_cr(vcpu);
 	u32 cr = kvmppc_get_cr(vcpu);
@@ -674,11 +671,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* Do we need to clear FE0 / FE1 here? Don't think so. */
 	/* Do we need to clear FE0 / FE1 here? Don't think so. */
 
 
 #ifdef DEBUG
 #ifdef DEBUG
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
 		u32 f;
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
 		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
-			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
+			i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]);
 	}
 	}
 #endif
 #endif
 
 
@@ -764,8 +761,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 			break;
 		}
 		}
 		case OP_4X_PS_NEG:
 		case OP_4X_PS_NEG:
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
-			vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL;
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL;
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] ^= 0x80000000;
 			vcpu->arch.qpr[ax_rd] ^= 0x80000000;
 			break;
 			break;
@@ -775,7 +772,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 			break;
 		case OP_4X_PS_MR:
 		case OP_4X_PS_MR:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 			break;
 		case OP_4X_PS_CMPO1:
 		case OP_4X_PS_CMPO1:
@@ -784,44 +781,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 			break;
 		case OP_4X_PS_NABS:
 		case OP_4X_PS_NABS:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
-			vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL;
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL;
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] |= 0x80000000;
 			vcpu->arch.qpr[ax_rd] |= 0x80000000;
 			break;
 			break;
 		case OP_4X_PS_ABS:
 		case OP_4X_PS_ABS:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
-			vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL;
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL;
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] &= ~0x80000000;
 			vcpu->arch.qpr[ax_rd] &= ~0x80000000;
 			break;
 			break;
 		case OP_4X_PS_MERGE00:
 		case OP_4X_PS_MERGE00:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
-			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
-			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
+			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
 				   &vcpu->arch.qpr[ax_rd]);
 				   &vcpu->arch.qpr[ax_rd]);
 			break;
 			break;
 		case OP_4X_PS_MERGE01:
 		case OP_4X_PS_MERGE01:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 			break;
 		case OP_4X_PS_MERGE10:
 		case OP_4X_PS_MERGE10:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd]);
-			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
-			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
+				   &VCPU_FPR(vcpu, ax_rd));
+			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
 				   &vcpu->arch.qpr[ax_rd]);
 				   &vcpu->arch.qpr[ax_rd]);
 			break;
 			break;
 		case OP_4X_PS_MERGE11:
 		case OP_4X_PS_MERGE11:
 			WARN_ON(rcomp);
 			WARN_ON(rcomp);
-			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
+			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd]);
+				   &VCPU_FPR(vcpu, ax_rd));
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 			break;
 		}
 		}
@@ -856,7 +853,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		case OP_4A_PS_SUM1:
 		case OP_4A_PS_SUM1:
 			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
 			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
 					ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
 					ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
-			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc];
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc);
 			break;
 			break;
 		case OP_4A_PS_SUM0:
 		case OP_4A_PS_SUM0:
 			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
 			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
@@ -1106,45 +1103,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case 59:
 	case 59:
 		switch (inst_get_field(inst, 21, 30)) {
 		switch (inst_get_field(inst, 21, 30)) {
 		case OP_59_FADDS:
 		case OP_59_FADDS:
-			fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FSUBS:
 		case OP_59_FSUBS:
-			fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FDIVS:
 		case OP_59_FDIVS:
-			fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FRES:
 		case OP_59_FRES:
-			fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FRSQRTES:
 		case OP_59_FRSQRTES:
-			fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		}
 		}
 		switch (inst_get_field(inst, 26, 30)) {
 		switch (inst_get_field(inst, 26, 30)) {
 		case OP_59_FMULS:
 		case OP_59_FMULS:
-			fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+			fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FMSUBS:
 		case OP_59_FMSUBS:
-			fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FMADDS:
 		case OP_59_FMADDS:
-			fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FNMSUBS:
 		case OP_59_FNMSUBS:
-			fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_59_FNMADDS:
 		case OP_59_FNMADDS:
-			fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		}
 		}
@@ -1159,12 +1156,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 			break;
 		case OP_63_MFFS:
 		case OP_63_MFFS:
 			/* XXX missing CR */
 			/* XXX missing CR */
-			*fpr_d = vcpu->arch.fpscr;
+			*fpr_d = vcpu->arch.fp.fpscr;
 			break;
 			break;
 		case OP_63_MTFSF:
 		case OP_63_MTFSF:
 			/* XXX missing fm bits */
 			/* XXX missing fm bits */
 			/* XXX missing CR */
 			/* XXX missing CR */
-			vcpu->arch.fpscr = *fpr_b;
+			vcpu->arch.fp.fpscr = *fpr_b;
 			break;
 			break;
 		case OP_63_FCMPU:
 		case OP_63_FCMPU:
 		{
 		{
@@ -1172,7 +1169,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			u32 cr0_mask = 0xf0000000;
 			u32 cr0_mask = 0xf0000000;
 			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
 			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
 
 
-			fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+			fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
 			cr &= ~(cr0_mask >> cr_shift);
 			cr &= ~(cr0_mask >> cr_shift);
 			cr |= (cr & cr0_mask) >> cr_shift;
 			cr |= (cr & cr0_mask) >> cr_shift;
 			break;
 			break;
@@ -1183,40 +1180,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			u32 cr0_mask = 0xf0000000;
 			u32 cr0_mask = 0xf0000000;
 			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
 			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
 
 
-			fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
+			fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
 			cr &= ~(cr0_mask >> cr_shift);
 			cr &= ~(cr0_mask >> cr_shift);
 			cr |= (cr & cr0_mask) >> cr_shift;
 			cr |= (cr & cr0_mask) >> cr_shift;
 			break;
 			break;
 		}
 		}
 		case OP_63_FNEG:
 		case OP_63_FNEG:
-			fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			break;
 			break;
 		case OP_63_FMR:
 		case OP_63_FMR:
 			*fpr_d = *fpr_b;
 			*fpr_d = *fpr_b;
 			break;
 			break;
 		case OP_63_FABS:
 		case OP_63_FABS:
-			fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			break;
 			break;
 		case OP_63_FCPSGN:
 		case OP_63_FCPSGN:
-			fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			break;
 			break;
 		case OP_63_FDIV:
 		case OP_63_FDIV:
-			fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			break;
 			break;
 		case OP_63_FADD:
 		case OP_63_FADD:
-			fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			break;
 			break;
 		case OP_63_FSUB:
 		case OP_63_FSUB:
-			fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
 			break;
 			break;
 		case OP_63_FCTIW:
 		case OP_63_FCTIW:
-			fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			break;
 			break;
 		case OP_63_FCTIWZ:
 		case OP_63_FCTIWZ:
-			fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			break;
 			break;
 		case OP_63_FRSP:
 		case OP_63_FRSP:
-			fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			kvmppc_sync_qpr(vcpu, ax_rd);
 			break;
 			break;
 		case OP_63_FRSQRTE:
 		case OP_63_FRSQRTE:
@@ -1224,39 +1221,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			double one = 1.0f;
 			double one = 1.0f;
 
 
 			/* fD = sqrt(fB) */
 			/* fD = sqrt(fB) */
-			fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
+			fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
 			/* fD = 1.0f / fD */
 			/* fD = 1.0f / fD */
-			fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
+			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
 			break;
 			break;
 		}
 		}
 		}
 		}
 		switch (inst_get_field(inst, 26, 30)) {
 		switch (inst_get_field(inst, 26, 30)) {
 		case OP_63_FMUL:
 		case OP_63_FMUL:
-			fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+			fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
 			break;
 			break;
 		case OP_63_FSEL:
 		case OP_63_FSEL:
-			fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			break;
 			break;
 		case OP_63_FMSUB:
 		case OP_63_FMSUB:
-			fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			break;
 			break;
 		case OP_63_FMADD:
 		case OP_63_FMADD:
-			fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			break;
 			break;
 		case OP_63_FNMSUB:
 		case OP_63_FNMSUB:
-			fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			break;
 			break;
 		case OP_63_FNMADD:
 		case OP_63_FNMADD:
-			fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
 			break;
 			break;
 		}
 		}
 		break;
 		break;
 	}
 	}
 
 
 #ifdef DEBUG
 #ifdef DEBUG
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
 		u32 f;
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
+		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
 	}
 	}
 #endif
 #endif

+ 34 - 121
arch/powerpc/kvm/book3s_pr.c

@@ -41,6 +41,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/miscdevice.h>
 
 
 #include "book3s.h"
 #include "book3s.h"
 
 
@@ -566,12 +567,6 @@ static inline int get_fpr_index(int i)
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
 {
 	struct thread_struct *t = &current->thread;
 	struct thread_struct *t = &current->thread;
-	u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
-	u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
-	u64 *thread_fpr = &t->fp_state.fpr[0][0];
-	int i;
 
 
 	/*
 	/*
 	 * VSX instructions can access FP and vector registers, so if
 	 * VSX instructions can access FP and vector registers, so if
@@ -594,26 +589,16 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 		 * both the traditional FP registers and the added VSX
 		 * both the traditional FP registers and the added VSX
 		 * registers into thread.fp_state.fpr[].
 		 * registers into thread.fp_state.fpr[].
 		 */
 		 */
-		if (current->thread.regs->msr & MSR_FP)
+		if (t->regs->msr & MSR_FP)
 			giveup_fpu(current);
 			giveup_fpu(current);
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
-			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
-
-		vcpu->arch.fpscr = t->fp_state.fpscr;
-
-#ifdef CONFIG_VSX
-		if (cpu_has_feature(CPU_FTR_VSX))
-			for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
-				vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
+		t->fp_save_area = NULL;
 	}
 	}
 
 
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 	if (msr & MSR_VEC) {
 	if (msr & MSR_VEC) {
 		if (current->thread.regs->msr & MSR_VEC)
 		if (current->thread.regs->msr & MSR_VEC)
 			giveup_altivec(current);
 			giveup_altivec(current);
-		memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr));
-		vcpu->arch.vscr = t->vr_state.vscr;
+		t->vr_save_area = NULL;
 	}
 	}
 #endif
 #endif
 
 
@@ -661,12 +646,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 			     ulong msr)
 			     ulong msr)
 {
 {
 	struct thread_struct *t = &current->thread;
 	struct thread_struct *t = &current->thread;
-	u64 *vcpu_fpr = vcpu->arch.fpr;
-#ifdef CONFIG_VSX
-	u64 *vcpu_vsx = vcpu->arch.vsr;
-#endif
-	u64 *thread_fpr = &t->fp_state.fpr[0][0];
-	int i;
 
 
 	/* When we have paired singles, we emulate in software */
 	/* When we have paired singles, we emulate in software */
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
@@ -704,27 +683,20 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #endif
 #endif
 
 
 	if (msr & MSR_FP) {
 	if (msr & MSR_FP) {
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
-			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-#ifdef CONFIG_VSX
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
-			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
-#endif
-		t->fp_state.fpscr = vcpu->arch.fpscr;
-		t->fpexc_mode = 0;
-		kvmppc_load_up_fpu();
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		t->fp_save_area = &vcpu->arch.fp;
 	}
 	}
 
 
 	if (msr & MSR_VEC) {
 	if (msr & MSR_VEC) {
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-		memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
-		t->vr_state.vscr = vcpu->arch.vscr;
-		t->vrsave = -1;
-		kvmppc_load_up_altivec();
+		enable_kernel_altivec();
+		load_vr_state(&vcpu->arch.vr);
+		t->vr_save_area = &vcpu->arch.vr;
 #endif
 #endif
 	}
 	}
 
 
-	current->thread.regs->msr |= msr;
+	t->regs->msr |= msr;
 	vcpu->arch.guest_owned_ext |= msr;
 	vcpu->arch.guest_owned_ext |= msr;
 	kvmppc_recalc_shadow_msr(vcpu);
 	kvmppc_recalc_shadow_msr(vcpu);
 
 
@@ -743,11 +715,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 	if (!lost_ext)
 	if (!lost_ext)
 		return;
 		return;
 
 
-	if (lost_ext & MSR_FP)
-		kvmppc_load_up_fpu();
+	if (lost_ext & MSR_FP) {
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+	}
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-	if (lost_ext & MSR_VEC)
-		kvmppc_load_up_altivec();
+	if (lost_ext & MSR_VEC) {
+		enable_kernel_altivec();
+		load_vr_state(&vcpu->arch.vr);
+	}
 #endif
 #endif
 	current->thread.regs->msr |= lost_ext;
 	current->thread.regs->msr |= lost_ext;
 }
 }
@@ -873,6 +849,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* We're good on these - the host merely wanted to get our attention */
 	/* We're good on these - the host merely wanted to get our attention */
 	case BOOK3S_INTERRUPT_DECREMENTER:
 	case BOOK3S_INTERRUPT_DECREMENTER:
 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
+	case BOOK3S_INTERRUPT_DOORBELL:
 		vcpu->stat.dec_exits++;
 		vcpu->stat.dec_exits++;
 		r = RESUME_GUEST;
 		r = RESUME_GUEST;
 		break;
 		break;
@@ -1045,14 +1022,14 @@ program_interrupt:
 		 * and if we really did time things so badly, then we just exit
 		 * and if we really did time things so badly, then we just exit
 		 * again due to a host external interrupt.
 		 * again due to a host external interrupt.
 		 */
 		 */
-		local_irq_disable();
 		s = kvmppc_prepare_to_enter(vcpu);
 		s = kvmppc_prepare_to_enter(vcpu);
-		if (s <= 0) {
-			local_irq_enable();
+		if (s <= 0)
 			r = s;
 			r = s;
-		} else {
+		else {
+			/* interrupts now hard-disabled */
 			kvmppc_fix_ee_before_entry();
 			kvmppc_fix_ee_before_entry();
 		}
 		}
+
 		kvmppc_handle_lost_ext(vcpu);
 		kvmppc_handle_lost_ext(vcpu);
 	}
 	}
 
 
@@ -1133,19 +1110,6 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_HIOR:
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
 		break;
-#ifdef CONFIG_VSX
-	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
-		long int i = id - KVM_REG_PPC_VSR0;
-
-		if (!cpu_has_feature(CPU_FTR_VSX)) {
-			r = -ENXIO;
-			break;
-		}
-		val->vsxval[0] = vcpu->arch.fpr[i];
-		val->vsxval[1] = vcpu->arch.vsr[i];
-		break;
-	}
-#endif /* CONFIG_VSX */
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 		break;
 		break;
@@ -1164,19 +1128,6 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior_explicit = true;
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
 		break;
-#ifdef CONFIG_VSX
-	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
-		long int i = id - KVM_REG_PPC_VSR0;
-
-		if (!cpu_has_feature(CPU_FTR_VSX)) {
-			r = -ENXIO;
-			break;
-		}
-		vcpu->arch.fpr[i] = val->vsxval[0];
-		vcpu->arch.vsr[i] = val->vsxval[1];
-		break;
-	}
-#endif /* CONFIG_VSX */
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 		break;
 		break;
@@ -1274,17 +1225,9 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
 static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 {
 	int ret;
 	int ret;
-	struct thread_fp_state fp;
-	int fpexc_mode;
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-	struct thread_vr_state vr;
 	unsigned long uninitialized_var(vrsave);
 	unsigned long uninitialized_var(vrsave);
-	int used_vr;
 #endif
 #endif
-#ifdef CONFIG_VSX
-	int used_vsr;
-#endif
-	ulong ext_msr;
 
 
 	/* Check if we can run the vcpu at all */
 	/* Check if we can run the vcpu at all */
 	if (!vcpu->arch.sane) {
 	if (!vcpu->arch.sane) {
@@ -1299,40 +1242,27 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * really did time things so badly, then we just exit again due to
 	 * really did time things so badly, then we just exit again due to
 	 * a host external interrupt.
 	 * a host external interrupt.
 	 */
 	 */
-	local_irq_disable();
 	ret = kvmppc_prepare_to_enter(vcpu);
 	ret = kvmppc_prepare_to_enter(vcpu);
-	if (ret <= 0) {
-		local_irq_enable();
+	if (ret <= 0)
 		goto out;
 		goto out;
-	}
+	/* interrupts now hard-disabled */
 
 
-	/* Save FPU state in stack */
+	/* Save FPU state in thread_struct */
 	if (current->thread.regs->msr & MSR_FP)
 	if (current->thread.regs->msr & MSR_FP)
 		giveup_fpu(current);
 		giveup_fpu(current);
-	fp = current->thread.fp_state;
-	fpexc_mode = current->thread.fpexc_mode;
 
 
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
-	/* Save Altivec state in stack */
-	used_vr = current->thread.used_vr;
-	if (used_vr) {
-		if (current->thread.regs->msr & MSR_VEC)
-			giveup_altivec(current);
-		vr = current->thread.vr_state;
-		vrsave = current->thread.vrsave;
-	}
+	/* Save Altivec state in thread_struct */
+	if (current->thread.regs->msr & MSR_VEC)
+		giveup_altivec(current);
 #endif
 #endif
 
 
 #ifdef CONFIG_VSX
 #ifdef CONFIG_VSX
-	/* Save VSX state in stack */
-	used_vsr = current->thread.used_vsr;
-	if (used_vsr && (current->thread.regs->msr & MSR_VSX))
+	/* Save VSX state in thread_struct */
+	if (current->thread.regs->msr & MSR_VSX)
 		__giveup_vsx(current);
 		__giveup_vsx(current);
 #endif
 #endif
 
 
-	/* Remember the MSR with disabled extensions */
-	ext_msr = current->thread.regs->msr;
-
 	/* Preload FPU if it's enabled */
 	/* Preload FPU if it's enabled */
 	if (vcpu->arch.shared->msr & MSR_FP)
 	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
@@ -1347,25 +1277,6 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Make sure we save the guest FPU/Altivec/VSX state */
 	/* Make sure we save the guest FPU/Altivec/VSX state */
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 
 
-	current->thread.regs->msr = ext_msr;
-
-	/* Restore FPU/VSX state from stack */
-	current->thread.fp_state = fp;
-	current->thread.fpexc_mode = fpexc_mode;
-
-#ifdef CONFIG_ALTIVEC
-	/* Restore Altivec state from stack */
-	if (used_vr && current->thread.used_vr) {
-		current->thread.vr_state = vr;
-		current->thread.vrsave = vrsave;
-	}
-	current->thread.used_vr = used_vr;
-#endif
-
-#ifdef CONFIG_VSX
-	current->thread.used_vsr = used_vsr;
-#endif
-
 out:
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	return ret;
 	return ret;
@@ -1606,4 +1517,6 @@ module_init(kvmppc_book3s_init_pr);
 module_exit(kvmppc_book3s_exit_pr);
 module_exit(kvmppc_book3s_exit_pr);
 
 
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
 #endif
 #endif

+ 0 - 47
arch/powerpc/kvm/book3s_rmhandlers.S

@@ -162,51 +162,4 @@ _GLOBAL(kvmppc_entry_trampoline)
 	mtsrr1	r6
 	mtsrr1	r6
 	RFI
 	RFI
 
 
-#if defined(CONFIG_PPC_BOOK3S_32)
-#define STACK_LR	INT_FRAME_SIZE+4
-
-/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
-#define MSR_EXT_START						\
-	PPC_STL	r20, _NIP(r1);					\
-	mfmsr	r20;						\
-	LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE);			\
-	andc	r3,r20,r3;		/* Disable DR,EE */	\
-	mtmsr	r3;						\
-	sync
-
-#define MSR_EXT_END						\
-	mtmsr	r20;			/* Enable DR,EE */	\
-	sync;							\
-	PPC_LL	r20, _NIP(r1)
-
-#elif defined(CONFIG_PPC_BOOK3S_64)
-#define STACK_LR	_LINK
-#define MSR_EXT_START
-#define MSR_EXT_END
-#endif
-
-/*
- * Activate current's external feature (FPU/Altivec/VSX)
- */
-#define define_load_up(what) 					\
-								\
-_GLOBAL(kvmppc_load_up_ ## what);				\
-	PPC_STLU r1, -INT_FRAME_SIZE(r1);			\
-	mflr	r3;						\
-	PPC_STL	r3, STACK_LR(r1);				\
-	MSR_EXT_START;						\
-								\
-	bl	FUNC(load_up_ ## what);				\
-								\
-	MSR_EXT_END;						\
-	PPC_LL	r3, STACK_LR(r1);				\
-	mtlr	r3;						\
-	addi	r1, r1, INT_FRAME_SIZE;				\
-	blr
-
-define_load_up(fpu)
-#ifdef CONFIG_ALTIVEC
-define_load_up(altivec)
-#endif
-
 #include "book3s_segment.S"
 #include "book3s_segment.S"

+ 2 - 0
arch/powerpc/kvm/book3s_segment.S

@@ -361,6 +361,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	beqa	BOOK3S_INTERRUPT_DECREMENTER
 	beqa	BOOK3S_INTERRUPT_DECREMENTER
 	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
 	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
 	beqa	BOOK3S_INTERRUPT_PERFMON
 	beqa	BOOK3S_INTERRUPT_PERFMON
+	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
+	beqa	BOOK3S_INTERRUPT_DOORBELL
 
 
 	RFI
 	RFI
 kvmppc_handler_trampoline_exit_end:
 kvmppc_handler_trampoline_exit_end:

+ 3 - 1
arch/powerpc/kvm/book3s_xics.c

@@ -1246,8 +1246,10 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
 		kvm->arch.xics = xics;
 		kvm->arch.xics = xics;
 	mutex_unlock(&kvm->lock);
 	mutex_unlock(&kvm->lock);
 
 
-	if (ret)
+	if (ret) {
+		kfree(xics);
 		return ret;
 		return ret;
+	}
 
 
 	xics_debugfs_init(xics);
 	xics_debugfs_init(xics);
 
 

+ 6 - 38
arch/powerpc/kvm/booke.c

@@ -643,7 +643,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 		local_irq_enable();
 		local_irq_enable();
 		kvm_vcpu_block(vcpu);
 		kvm_vcpu_block(vcpu);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		local_irq_disable();
+		hard_irq_disable();
 
 
 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
 		r = 1;
 		r = 1;
@@ -682,34 +682,22 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 {
 	int ret, s;
 	int ret, s;
 	struct debug_reg debug;
 	struct debug_reg debug;
-#ifdef CONFIG_PPC_FPU
-	struct thread_fp_state fp;
-	int fpexc_mode;
-#endif
 
 
 	if (!vcpu->arch.sane) {
 	if (!vcpu->arch.sane) {
 		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	local_irq_disable();
 	s = kvmppc_prepare_to_enter(vcpu);
 	s = kvmppc_prepare_to_enter(vcpu);
 	if (s <= 0) {
 	if (s <= 0) {
-		local_irq_enable();
 		ret = s;
 		ret = s;
 		goto out;
 		goto out;
 	}
 	}
+	/* interrupts now hard-disabled */
 
 
 #ifdef CONFIG_PPC_FPU
 #ifdef CONFIG_PPC_FPU
 	/* Save userspace FPU state in stack */
 	/* Save userspace FPU state in stack */
 	enable_kernel_fp();
 	enable_kernel_fp();
-	fp = current->thread.fp_state;
-	fpexc_mode = current->thread.fpexc_mode;
-
-	/* Restore guest FPU state to thread */
-	memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr,
-	       sizeof(vcpu->arch.fpr));
-	current->thread.fp_state.fpscr = vcpu->arch.fpscr;
 
 
 	/*
 	/*
 	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
 	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
@@ -728,6 +716,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	debug = current->thread.debug;
 	debug = current->thread.debug;
 	current->thread.debug = vcpu->arch.shadow_dbg_reg;
 	current->thread.debug = vcpu->arch.shadow_dbg_reg;
 
 
+	vcpu->arch.pgdir = current->mm->pgd;
 	kvmppc_fix_ee_before_entry();
 	kvmppc_fix_ee_before_entry();
 
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -743,15 +732,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_save_guest_fp(vcpu);
 	kvmppc_save_guest_fp(vcpu);
 
 
 	vcpu->fpu_active = 0;
 	vcpu->fpu_active = 0;
-
-	/* Save guest FPU state from thread */
-	memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr,
-	       sizeof(vcpu->arch.fpr));
-	vcpu->arch.fpscr = current->thread.fp_state.fpscr;
-
-	/* Restore userspace FPU state from stack */
-	current->thread.fp_state = fp;
-	current->thread.fpexc_mode = fpexc_mode;
 #endif
 #endif
 
 
 out:
 out:
@@ -898,17 +878,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int s;
 	int s;
 	int idx;
 	int idx;
 
 
-#ifdef CONFIG_PPC64
-	WARN_ON(local_paca->irq_happened != 0);
-#endif
-
-	/*
-	 * We enter with interrupts disabled in hardware, but
-	 * we need to call hard_irq_disable anyway to ensure that
-	 * the software state is kept in sync.
-	 */
-	hard_irq_disable();
-
 	/* update before a new last_exit_type is rewritten */
 	/* update before a new last_exit_type is rewritten */
 	kvmppc_update_timing_stats(vcpu);
 	kvmppc_update_timing_stats(vcpu);
 
 
@@ -1217,12 +1186,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * aren't already exiting to userspace for some other reason.
 	 * aren't already exiting to userspace for some other reason.
 	 */
 	 */
 	if (!(r & RESUME_HOST)) {
 	if (!(r & RESUME_HOST)) {
-		local_irq_disable();
 		s = kvmppc_prepare_to_enter(vcpu);
 		s = kvmppc_prepare_to_enter(vcpu);
-		if (s <= 0) {
-			local_irq_enable();
+		if (s <= 0)
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-		} else {
+		else {
+			/* interrupts now hard-disabled */
 			kvmppc_fix_ee_before_entry();
 			kvmppc_fix_ee_before_entry();
 		}
 		}
 	}
 	}

+ 4 - 1
arch/powerpc/kvm/booke.h

@@ -136,7 +136,9 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
 {
 {
 #ifdef CONFIG_PPC_FPU
 #ifdef CONFIG_PPC_FPU
 	if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
 	if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
-		load_up_fpu();
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		current->thread.fp_save_area = &vcpu->arch.fp;
 		current->thread.regs->msr |= MSR_FP;
 		current->thread.regs->msr |= MSR_FP;
 	}
 	}
 #endif
 #endif
@@ -151,6 +153,7 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_FPU
 #ifdef CONFIG_PPC_FPU
 	if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
 	if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
 		giveup_fpu(current);
 		giveup_fpu(current);
+	current->thread.fp_save_area = NULL;
 #endif
 #endif
 }
 }
 
 

+ 11 - 0
arch/powerpc/kvm/bookehv_interrupts.S

@@ -33,6 +33,8 @@
 
 
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_64BIT
 #include <asm/exception-64e.h>
 #include <asm/exception-64e.h>
+#include <asm/hw_irq.h>
+#include <asm/irqflags.h>
 #else
 #else
 #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
 #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
 #endif
 #endif
@@ -467,6 +469,15 @@ _GLOBAL(kvmppc_resume_host)
 	mtspr	SPRN_EPCR, r3
 	mtspr	SPRN_EPCR, r3
 	isync
 	isync
 
 
+#ifdef CONFIG_64BIT
+	/*
+	 * We enter with interrupts disabled in hardware, but
+	 * we need to call RECONCILE_IRQ_STATE to ensure
+	 * that the software state is kept in sync.
+	 */
+	RECONCILE_IRQ_STATE(r3,r5)
+#endif
+
 	/* Switch to kernel stack and jump to handler. */
 	/* Switch to kernel stack and jump to handler. */
 	PPC_LL	r3, HOST_RUN(r1)
 	PPC_LL	r3, HOST_RUN(r1)
 	mr	r5, r14 /* intno */
 	mr	r5, r14 /* intno */

+ 4 - 0
arch/powerpc/kvm/e500.c

@@ -16,6 +16,8 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
 
 
 #include <asm/reg.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cputable.h>
@@ -573,3 +575,5 @@ static void __exit kvmppc_e500_exit(void)
 
 
 module_init(kvmppc_e500_init);
 module_init(kvmppc_e500_init);
 module_exit(kvmppc_e500_exit);
 module_exit(kvmppc_e500_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");

+ 5 - 3
arch/powerpc/kvm/e500.h

@@ -31,11 +31,13 @@ enum vcpu_ftr {
 #define E500_TLB_NUM   2
 #define E500_TLB_NUM   2
 
 
 /* entry is mapped somewhere in host TLB */
 /* entry is mapped somewhere in host TLB */
-#define E500_TLB_VALID		(1 << 0)
+#define E500_TLB_VALID		(1 << 31)
 /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
 /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
-#define E500_TLB_BITMAP		(1 << 1)
+#define E500_TLB_BITMAP		(1 << 30)
 /* TLB1 entry is mapped by host TLB0 */
 /* TLB1 entry is mapped by host TLB0 */
-#define E500_TLB_TLB0		(1 << 2)
+#define E500_TLB_TLB0		(1 << 29)
+/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
+#define E500_TLB_MAS2_ATTR	(0x7f)
 
 
 struct tlbe_ref {
 struct tlbe_ref {
 	pfn_t pfn;		/* valid only for TLB0, except briefly */
 	pfn_t pfn;		/* valid only for TLB0, except briefly */

+ 1 - 1
arch/powerpc/kvm/e500_mmu.c

@@ -127,7 +127,7 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
 }
 }
 
 
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
-		unsigned int eaddr, int as)
+		gva_t eaddr, int as)
 {
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	unsigned int victim, tsized;
 	unsigned int victim, tsized;

+ 34 - 25
arch/powerpc/kvm/e500_mmu_host.c

@@ -65,15 +65,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 	return mas3;
 	return mas3;
 }
 }
 
 
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
-{
-#ifdef CONFIG_SMP
-	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-	return mas2 & MAS2_ATTRIB_MASK;
-#endif
-}
-
 /*
 /*
  * writing shadow tlb entry to host TLB
  * writing shadow tlb entry to host TLB
  */
  */
@@ -231,15 +222,15 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
 		ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
 		ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
 	}
 	}
 
 
-	/* Already invalidated in between */
-	if (!(ref->flags & E500_TLB_VALID))
-		return;
-
-	/* Guest tlbe is backed by at most one host tlbe per shadow pid. */
-	kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
+	/*
+	 * If TLB entry is still valid then it's a TLB0 entry, and thus
+	 * backed by at most one host tlbe per shadow pid
+	 */
+	if (ref->flags & E500_TLB_VALID)
+		kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
 
 
 	/* Mark the TLB as not backed by the host anymore */
 	/* Mark the TLB as not backed by the host anymore */
-	ref->flags &= ~E500_TLB_VALID;
+	ref->flags = 0;
 }
 }
 
 
 static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
 static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
@@ -249,10 +240,13 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
 
 
 static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 					 struct kvm_book3e_206_tlb_entry *gtlbe,
 					 struct kvm_book3e_206_tlb_entry *gtlbe,
-					 pfn_t pfn)
+					 pfn_t pfn, unsigned int wimg)
 {
 {
 	ref->pfn = pfn;
 	ref->pfn = pfn;
-	ref->flags |= E500_TLB_VALID;
+	ref->flags = E500_TLB_VALID;
+
+	/* Use guest supplied MAS2_G and MAS2_E */
+	ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
 
 
 	/* Mark the page accessed */
 	/* Mark the page accessed */
 	kvm_set_pfn_accessed(pfn);
 	kvm_set_pfn_accessed(pfn);
@@ -316,8 +310,7 @@ static void kvmppc_e500_setup_stlbe(
 
 
 	/* Force IPROT=0 for all guest mappings. */
 	/* Force IPROT=0 for all guest mappings. */
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
-	stlbe->mas2 = (gvaddr & MAS2_EPN) |
-		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+	stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
 
 
@@ -339,6 +332,10 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	int ret = 0;
 	int ret = 0;
 	unsigned long mmu_seq;
 	unsigned long mmu_seq;
 	struct kvm *kvm = vcpu_e500->vcpu.kvm;
 	struct kvm *kvm = vcpu_e500->vcpu.kvm;
+	unsigned long tsize_pages = 0;
+	pte_t *ptep;
+	unsigned int wimg = 0;
+	pgd_t *pgdir;
 
 
 	/* used to check for invalidations in progress */
 	/* used to check for invalidations in progress */
 	mmu_seq = kvm->mmu_notifier_seq;
 	mmu_seq = kvm->mmu_notifier_seq;
@@ -405,7 +402,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 			 */
 			 */
 
 
 			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
 			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
-				unsigned long gfn_start, gfn_end, tsize_pages;
+				unsigned long gfn_start, gfn_end;
 				tsize_pages = 1 << (tsize - 2);
 				tsize_pages = 1 << (tsize - 2);
 
 
 				gfn_start = gfn & ~(tsize_pages - 1);
 				gfn_start = gfn & ~(tsize_pages - 1);
@@ -447,11 +444,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	}
 	}
 
 
 	if (likely(!pfnmap)) {
 	if (likely(!pfnmap)) {
-		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+		tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
 		pfn = gfn_to_pfn_memslot(slot, gfn);
 		pfn = gfn_to_pfn_memslot(slot, gfn);
 		if (is_error_noslot_pfn(pfn)) {
 		if (is_error_noslot_pfn(pfn)) {
-			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
-					(long)gfn);
+			if (printk_ratelimit())
+				pr_err("%s: real page not found for gfn %lx\n",
+				       __func__, (long)gfn);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 
 
@@ -466,7 +464,18 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	kvmppc_e500_ref_setup(ref, gtlbe, pfn);
+
+	pgdir = vcpu_e500->vcpu.arch.pgdir;
+	ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
+	if (pte_present(*ptep))
+		wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+	else {
+		if (printk_ratelimit())
+			pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
+				__func__, (long)gfn, pfn);
+		return -EINVAL;
+	}
+	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
 
 
 	kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
 	kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
 				ref, gvaddr, stlbe);
 				ref, gvaddr, stlbe);

+ 4 - 0
arch/powerpc/kvm/e500mc.c

@@ -16,6 +16,8 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
 
 
 #include <asm/reg.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/cputable.h>
@@ -391,3 +393,5 @@ static void __exit kvmppc_e500mc_exit(void)
 
 
 module_init(kvmppc_e500mc_init);
 module_init(kvmppc_e500mc_init);
 module_exit(kvmppc_e500mc_exit);
 module_exit(kvmppc_e500mc_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");

+ 0 - 1
arch/powerpc/kvm/emulate.c

@@ -219,7 +219,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
  * lmw
  * lmw
  * stmw
  * stmw
  *
  *
- * XXX is_bigendian should depend on MMU mapping or MSR[LE]
  */
  */
 /* XXX Should probably auto-generate instruction decoding for a particular core
 /* XXX Should probably auto-generate instruction decoding for a particular core
  * from opcode tables in the future. */
  * from opcode tables in the future. */

+ 1 - 0
arch/powerpc/kvm/mpic.c

@@ -1635,6 +1635,7 @@ static void mpic_destroy(struct kvm_device *dev)
 
 
 	dev->kvm->arch.mpic = NULL;
 	dev->kvm->arch.mpic = NULL;
 	kfree(opp);
 	kfree(opp);
+	kfree(dev);
 }
 }
 
 
 static int mpic_set_default_irq_routing(struct openpic *opp)
 static int mpic_set_default_irq_routing(struct openpic *opp)

+ 35 - 23
arch/powerpc/kvm/powerpc.c

@@ -68,14 +68,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  */
  */
 int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 {
 {
-	int r = 1;
+	int r;
+
+	WARN_ON(irqs_disabled());
+	hard_irq_disable();
 
 
-	WARN_ON_ONCE(!irqs_disabled());
 	while (true) {
 	while (true) {
 		if (need_resched()) {
 		if (need_resched()) {
 			local_irq_enable();
 			local_irq_enable();
 			cond_resched();
 			cond_resched();
-			local_irq_disable();
+			hard_irq_disable();
 			continue;
 			continue;
 		}
 		}
 
 
@@ -101,7 +103,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			local_irq_enable();
 			local_irq_enable();
 			trace_kvm_check_requests(vcpu);
 			trace_kvm_check_requests(vcpu);
 			r = kvmppc_core_check_requests(vcpu);
 			r = kvmppc_core_check_requests(vcpu);
-			local_irq_disable();
+			hard_irq_disable();
 			if (r > 0)
 			if (r > 0)
 				continue;
 				continue;
 			break;
 			break;
@@ -113,22 +115,12 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			continue;
 			continue;
 		}
 		}
 
 
-#ifdef CONFIG_PPC64
-		/* lazy EE magic */
-		hard_irq_disable();
-		if (lazy_irq_pending()) {
-			/* Got an interrupt in between, try again */
-			local_irq_enable();
-			local_irq_disable();
-			kvm_guest_exit();
-			continue;
-		}
-#endif
-
 		kvm_guest_enter();
 		kvm_guest_enter();
-		break;
+		return 1;
 	}
 	}
 
 
+	/* return to host */
+	local_irq_enable();
 	return r;
 	return r;
 }
 }
 EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
 EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
@@ -656,14 +648,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
 		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
 		break;
 		break;
 	case KVM_MMIO_REG_FPR:
 	case KVM_MMIO_REG_FPR:
-		vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
 		break;
 		break;
 #ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_BOOK3S
 	case KVM_MMIO_REG_QPR:
 	case KVM_MMIO_REG_QPR:
 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
 		break;
 		break;
 	case KVM_MMIO_REG_FQPR:
 	case KVM_MMIO_REG_FQPR:
-		vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
 		break;
 		break;
 #endif
 #endif
@@ -673,9 +665,19 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 }
 }
 
 
 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int rt, unsigned int bytes, int is_bigendian)
+		       unsigned int rt, unsigned int bytes,
+		       int is_default_endian)
 {
 {
 	int idx, ret;
 	int idx, ret;
+	int is_bigendian;
+
+	if (kvmppc_need_byteswap(vcpu)) {
+		/* Default endianness is "little endian". */
+		is_bigendian = !is_default_endian;
+	} else {
+		/* Default endianness is "big endian". */
+		is_bigendian = is_default_endian;
+	}
 
 
 	if (bytes > sizeof(run->mmio.data)) {
 	if (bytes > sizeof(run->mmio.data)) {
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
@@ -711,21 +713,31 @@ EXPORT_SYMBOL_GPL(kvmppc_handle_load);
 
 
 /* Same as above, but sign extends */
 /* Same as above, but sign extends */
 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                        unsigned int rt, unsigned int bytes, int is_bigendian)
+			unsigned int rt, unsigned int bytes,
+			int is_default_endian)
 {
 {
 	int r;
 	int r;
 
 
 	vcpu->arch.mmio_sign_extend = 1;
 	vcpu->arch.mmio_sign_extend = 1;
-	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
+	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
 
 
 	return r;
 	return r;
 }
 }
 
 
 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                        u64 val, unsigned int bytes, int is_bigendian)
+			u64 val, unsigned int bytes, int is_default_endian)
 {
 {
 	void *data = run->mmio.data;
 	void *data = run->mmio.data;
 	int idx, ret;
 	int idx, ret;
+	int is_bigendian;
+
+	if (kvmppc_need_byteswap(vcpu)) {
+		/* Default endianness is "little endian". */
+		is_bigendian = !is_default_endian;
+	} else {
+		/* Default endianness is "big endian". */
+		is_bigendian = is_default_endian;
+	}
 
 
 	if (bytes > sizeof(run->mmio.data)) {
 	if (bytes > sizeof(run->mmio.data)) {
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,

+ 14 - 1
arch/s390/include/asm/kvm_host.h

@@ -106,9 +106,22 @@ struct kvm_s390_sie_block {
 	__u64	gbea;			/* 0x0180 */
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[24];	/* 0x0188 */
 	__u8	reserved188[24];	/* 0x0188 */
 	__u32	fac;			/* 0x01a0 */
 	__u32	fac;			/* 0x01a0 */
-	__u8	reserved1a4[92];	/* 0x01a4 */
+	__u8	reserved1a4[68];	/* 0x01a4 */
+	__u64	itdba;			/* 0x01e8 */
+	__u8	reserved1f0[16];	/* 0x01f0 */
 } __attribute__((packed));
 } __attribute__((packed));
 
 
+struct kvm_s390_itdb {
+	__u8	data[256];
+} __packed;
+
+struct sie_page {
+	struct kvm_s390_sie_block sie_block;
+	__u8 reserved200[1024];		/* 0x0200 */
+	struct kvm_s390_itdb itdb;	/* 0x0600 */
+	__u8 reserved700[2304];		/* 0x0700 */
+} __packed;
+
 struct kvm_vcpu_stat {
 struct kvm_vcpu_stat {
 	u32 exit_userspace;
 	u32 exit_userspace;
 	u32 exit_null;
 	u32 exit_null;

+ 11 - 0
arch/s390/kvm/intercept.c

@@ -112,6 +112,17 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
 static int handle_prog(struct kvm_vcpu *vcpu)
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 {
 	vcpu->stat.exit_program_interruption++;
 	vcpu->stat.exit_program_interruption++;
+
+	/* Restore ITDB to Program-Interruption TDB in guest memory */
+	if (IS_TE_ENABLED(vcpu) &&
+	    !(current->thread.per_flags & PER_FLAG_NO_TE) &&
+	    IS_ITDB_VALID(vcpu)) {
+		copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
+			      sizeof(struct kvm_s390_itdb));
+		memset((void *) vcpu->arch.sie_block->itdba, 0,
+		       sizeof(struct kvm_s390_itdb));
+	}
+
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
 	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
 	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
 }
 }

+ 11 - 6
arch/s390/kvm/kvm-s390.c

@@ -395,6 +395,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 						    CPUSTAT_STOPPED |
 						    CPUSTAT_STOPPED |
 						    CPUSTAT_GED);
 						    CPUSTAT_GED);
 	vcpu->arch.sie_block->ecb   = 6;
 	vcpu->arch.sie_block->ecb   = 6;
+	if (test_vfacility(50) && test_vfacility(73))
+		vcpu->arch.sie_block->ecb |= 0x10;
+
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
@@ -411,6 +414,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 				      unsigned int id)
 				      unsigned int id)
 {
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vcpu *vcpu;
+	struct sie_page *sie_page;
 	int rc = -EINVAL;
 	int rc = -EINVAL;
 
 
 	if (id >= KVM_MAX_VCPUS)
 	if (id >= KVM_MAX_VCPUS)
@@ -422,12 +426,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	if (!vcpu)
 	if (!vcpu)
 		goto out;
 		goto out;
 
 
-	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
-					get_zeroed_page(GFP_KERNEL);
-
-	if (!vcpu->arch.sie_block)
+	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+	if (!sie_page)
 		goto out_free_cpu;
 		goto out_free_cpu;
 
 
+	vcpu->arch.sie_block = &sie_page->sie_block;
+	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+
 	vcpu->arch.sie_block->icpua = id;
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
 	if (!kvm_is_ucontrol(kvm)) {
 		if (!kvm->arch.sca) {
 		if (!kvm->arch.sca) {
@@ -1182,8 +1187,8 @@ static int __init kvm_s390_init(void)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
 	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
-	vfacilities[0] &= 0xff82fff3f47c0000UL;
-	vfacilities[1] &= 0x001c000000000000UL;
+	vfacilities[0] &= 0xff82fff3f4fc2000UL;
+	vfacilities[1] &= 0x005c000000000000UL;
 	return 0;
 	return 0;
 }
 }
 
 

+ 6 - 0
arch/s390/kvm/kvm-s390.h

@@ -26,6 +26,12 @@ extern unsigned long *vfacilities;
 
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
+#define TDB_ADDR		0x1800UL
+#define TDB_FORMAT1		1
+#define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \

+ 12 - 21
arch/x86/include/asm/kvm_para.h

@@ -85,28 +85,9 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	return ret;
 	return ret;
 }
 }
 
 
-static inline uint32_t kvm_cpuid_base(void)
-{
-	if (boot_cpu_data.cpuid_level < 0)
-		return 0;	/* So we don't blow up on old processors */
-
-	if (cpu_has_hypervisor)
-		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
-
-	return 0;
-}
-
-static inline bool kvm_para_available(void)
-{
-	return kvm_cpuid_base() != 0;
-}
-
-static inline unsigned int kvm_arch_para_features(void)
-{
-	return cpuid_eax(KVM_CPUID_FEATURES);
-}
-
 #ifdef CONFIG_KVM_GUEST
 #ifdef CONFIG_KVM_GUEST
+bool kvm_para_available(void);
+unsigned int kvm_arch_para_features(void);
 void __init kvm_guest_init(void);
 void __init kvm_guest_init(void);
 void kvm_async_pf_task_wait(u32 token);
 void kvm_async_pf_task_wait(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 void kvm_async_pf_task_wake(u32 token);
@@ -126,6 +107,16 @@ static inline void kvm_spinlock_init(void)
 #define kvm_async_pf_task_wait(T) do {} while(0)
 #define kvm_async_pf_task_wait(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
 
 
+static inline bool kvm_para_available(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
 static inline u32 kvm_read_and_reset_pf_reason(void)
 static inline u32 kvm_read_and_reset_pf_reason(void)
 {
 {
 	return 0;
 	return 0;

+ 32 - 0
arch/x86/kernel/kvm.c

@@ -500,6 +500,38 @@ void __init kvm_guest_init(void)
 #endif
 #endif
 }
 }
 
 
+static noinline uint32_t __kvm_cpuid_base(void)
+{
+	if (boot_cpu_data.cpuid_level < 0)
+		return 0;	/* So we don't blow up on old processors */
+
+	if (cpu_has_hypervisor)
+		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
+
+	return 0;
+}
+
+static inline uint32_t kvm_cpuid_base(void)
+{
+	static int kvm_cpuid_base = -1;
+
+	if (kvm_cpuid_base == -1)
+		kvm_cpuid_base = __kvm_cpuid_base();
+
+	return kvm_cpuid_base;
+}
+
+bool kvm_para_available(void)
+{
+	return kvm_cpuid_base() != 0;
+}
+EXPORT_SYMBOL_GPL(kvm_para_available);
+
+unsigned int kvm_arch_para_features(void)
+{
+	return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
+}
+
 static uint32_t __init kvm_detect(void)
 static uint32_t __init kvm_detect(void)
 {
 {
 	return kvm_cpuid_base();
 	return kvm_cpuid_base();

+ 8 - 0
arch/x86/kvm/cpuid.h

@@ -72,4 +72,12 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_PCID));
 	return best && (best->ecx & bit(X86_FEATURE_PCID));
 }
 }
 
 
+static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_X2APIC));
+}
+
 #endif
 #endif

+ 1 - 1
arch/x86/kvm/lapic.h

@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		struct kvm_lapic_state *s);
 		struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);

+ 5 - 4
arch/x86/kvm/vmx.c

@@ -4392,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u64 msr;
+	struct msr_data apic_base_msr;
 
 
 	vmx->rmode.vm86_active = 0;
 	vmx->rmode.vm86_active = 0;
 
 
@@ -4400,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	kvm_set_cr8(&vmx->vcpu, 0);
-	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 	if (kvm_vcpu_is_bsp(&vmx->vcpu))
 	if (kvm_vcpu_is_bsp(&vmx->vcpu))
-		msr |= MSR_IA32_APICBASE_BSP;
-	kvm_set_apic_base(&vmx->vcpu, msr);
+		apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+	apic_base_msr.host_initiated = true;
+	kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
 
 
 	vmx_segment_cache_clear(vmx);
 	vmx_segment_cache_clear(vmx);
 
 

+ 31 - 10
arch/x86/kvm/x86.c

@@ -257,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 }
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
 
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
-{
-	/* TODO: reserve bits check */
-	kvm_lapic_set_base(vcpu, data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	u64 old_state = vcpu->arch.apic_base &
+		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+	u64 new_state = msr_info->data &
+		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+	u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
+		0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+
+	if (!msr_info->host_initiated &&
+	    ((msr_info->data & reserved_bits) != 0 ||
+	     new_state == X2APIC_ENABLE ||
+	     (new_state == MSR_IA32_APICBASE_ENABLE &&
+	      old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
+	     (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
+	      old_state == 0)))
+		return 1;
+
+	kvm_lapic_set_base(vcpu, msr_info->data);
+	return 0;
 }
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
 
@@ -1840,6 +1856,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (__copy_to_user((void __user *)addr, instructions, 4))
 		if (__copy_to_user((void __user *)addr, instructions, 4))
 			return 1;
 			return 1;
 		kvm->arch.hv_hypercall = data;
 		kvm->arch.hv_hypercall = data;
+		mark_page_dirty(kvm, gfn);
 		break;
 		break;
 	}
 	}
 	case HV_X64_MSR_REFERENCE_TSC: {
 	case HV_X64_MSR_REFERENCE_TSC: {
@@ -1868,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 {
 	switch (msr) {
 	switch (msr) {
 	case HV_X64_MSR_APIC_ASSIST_PAGE: {
 	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		u64 gfn;
 		unsigned long addr;
 		unsigned long addr;
 
 
 		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
 		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
 			vcpu->arch.hv_vapic = data;
 			vcpu->arch.hv_vapic = data;
 			break;
 			break;
 		}
 		}
-		addr = gfn_to_hva(vcpu->kvm, data >>
-				  HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(vcpu->kvm, gfn);
 		if (kvm_is_error_hva(addr))
 		if (kvm_is_error_hva(addr))
 			return 1;
 			return 1;
 		if (__clear_user((void __user *)addr, PAGE_SIZE))
 		if (__clear_user((void __user *)addr, PAGE_SIZE))
 			return 1;
 			return 1;
 		vcpu->arch.hv_vapic = data;
 		vcpu->arch.hv_vapic = data;
+		mark_page_dirty(vcpu->kvm, gfn);
 		break;
 		break;
 	}
 	}
 	case HV_X64_MSR_EOI:
 	case HV_X64_MSR_EOI:
@@ -2006,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case 0x200 ... 0x2ff:
 	case 0x200 ... 0x2ff:
 		return set_msr_mtrr(vcpu, msr, data);
 		return set_msr_mtrr(vcpu, msr, data);
 	case MSR_IA32_APICBASE:
 	case MSR_IA32_APICBASE:
-		kvm_set_apic_base(vcpu, data);
-		break;
+		return kvm_set_apic_base(vcpu, msr_info);
 	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
 	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
 		return kvm_x2apic_msr_write(vcpu, msr, data);
 		return kvm_x2apic_msr_write(vcpu, msr, data);
 	case MSR_IA32_TSCDEADLINE:
 	case MSR_IA32_TSCDEADLINE:
@@ -2598,10 +2616,10 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_HYPERV_TIME:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_PCI_2_3:
-	case KVM_CAP_HYPERV_TIME:
 #endif
 #endif
 		r = 1;
 		r = 1;
 		break;
 		break;
@@ -6409,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 				  struct kvm_sregs *sregs)
 {
 {
+	struct msr_data apic_base_msr;
 	int mmu_reset_needed = 0;
 	int mmu_reset_needed = 0;
 	int pending_vec, max_bits, idx;
 	int pending_vec, max_bits, idx;
 	struct desc_ptr dt;
 	struct desc_ptr dt;
@@ -6432,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 
 	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
 	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
 	kvm_x86_ops->set_efer(vcpu, sregs->efer);
 	kvm_x86_ops->set_efer(vcpu, sregs->efer);
-	kvm_set_apic_base(vcpu, sregs->apic_base);
+	apic_base_msr.data = sregs->apic_base;
+	apic_base_msr.host_initiated = true;
+	kvm_set_apic_base(vcpu, &apic_base_msr);
 
 
 	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
 	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);

+ 9 - 2
drivers/s390/kvm/virtio_ccw.c

@@ -642,8 +642,15 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
 	     (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))) {
 	     (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))) {
 		/* OK */
 		/* OK */
 	}
 	}
-	if (irb_is_error(irb))
-		vcdev->err = -EIO; /* XXX - use real error */
+	if (irb_is_error(irb)) {
+		/* Command reject? */
+		if ((scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK) &&
+		    (irb->ecw[0] & SNS0_CMD_REJECT))
+			vcdev->err = -EOPNOTSUPP;
+		else
+			/* Map everything else to -EIO. */
+			vcdev->err = -EIO;
+	}
 	if (vcdev->curr_io & activity) {
 	if (vcdev->curr_io & activity) {
 		switch (activity) {
 		switch (activity) {
 		case VIRTIO_CCW_DOING_READ_FEAT:
 		case VIRTIO_CCW_DOING_READ_FEAT: