瀏覽代碼

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm into next

Pull KVM updates from Paolo Bonzini:
 "At over 200 commits, covering almost all supported architectures, this
  was a pretty active cycle for KVM.  Changes include:

   - a lot of s390 changes: optimizations, support for migration, GDB
     support and more

   - ARM changes are pretty small: support for the PSCI 0.2 hypercall
     interface on both the guest and the host (the latter acked by
     Catalin)

   - initial POWER8 and little-endian host support

   - support for running u-boot on embedded POWER targets

   - pretty large changes to MIPS too, completing the userspace
     interface and improving the handling of virtualized timer hardware

   - for x86, a larger set of changes is scheduled for 3.17.  Still, we
     have a few emulator bugfixes and support for running nested
     fully-virtualized Xen guests (para-virtualized Xen guests have
     always worked).  And some optimizations too.

  The only missing architecture here is ia64.  It's not a coincidence
  that support for KVM on ia64 is scheduled for removal in 3.17"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (203 commits)
  KVM: add missing cleanup_srcu_struct
  KVM: PPC: Book3S PR: Rework SLB switching code
  KVM: PPC: Book3S PR: Use SLB entry 0
  KVM: PPC: Book3S HV: Fix machine check delivery to guest
  KVM: PPC: Book3S HV: Work around POWER8 performance monitor bugs
  KVM: PPC: Book3S HV: Make sure we don't miss dirty pages
  KVM: PPC: Book3S HV: Fix dirty map for hugepages
  KVM: PPC: Book3S HV: Put huge-page HPTEs in rmap chain for base address
  KVM: PPC: Book3S HV: Fix check for running inside guest in global_invalidates()
  KVM: PPC: Book3S: Move KVM_REG_PPC_WORT to an unused register number
  KVM: PPC: Book3S: Add ONE_REG register names that were missed
  KVM: PPC: Add CAP to indicate hcall fixes
  KVM: PPC: MPIC: Reset IRQ source private members
  KVM: PPC: Graciously fail broken LE hypercalls
  PPC: ePAPR: Fix hypercall on LE guest
  KVM: PPC: BOOK3S: Remove open coded make_dsisr in alignment handler
  KVM: PPC: BOOK3S: Always use the saved DAR value
  PPC: KVM: Make NX bit available with magic page
  KVM: PPC: Disable NX for old magic page using guests
  KVM: PPC: BOOK3S: HV: Add mixed page-size support for guest
  ...
Linus Torvalds 11 年之前
父節點
當前提交
b05d59dfce
共有 100 個文件被更改,包括 5863 次插入1204 次删除
  1. 36 1
      Documentation/devicetree/bindings/arm/psci.txt
  2. 29 4
      Documentation/virtual/kvm/api.txt
  3. 26 0
      Documentation/virtual/kvm/devices/vm.txt
  4. 14 0
      Documentation/virtual/kvm/ppc-pv.txt
  5. 2 0
      Documentation/virtual/kvm/s390-diag.txt
  6. 1 1
      arch/arm/include/asm/kvm_host.h
  7. 5 1
      arch/arm/include/asm/kvm_psci.h
  8. 5 2
      arch/arm/include/asm/psci.h
  9. 6 4
      arch/arm/include/uapi/asm/kvm.h
  10. 159 37
      arch/arm/kernel/psci.c
  11. 33 0
      arch/arm/kernel/psci_smp.c
  12. 1 0
      arch/arm/kvm/arm.c
  13. 7 3
      arch/arm/kvm/handle_exit.c
  14. 216 19
      arch/arm/kvm/psci.c
  15. 2 0
      arch/arm64/include/asm/cpu_ops.h
  16. 1 0
      arch/arm64/include/asm/cputype.h
  17. 1 1
      arch/arm64/include/asm/kvm_host.h
  18. 5 1
      arch/arm64/include/asm/kvm_psci.h
  19. 1 1
      arch/arm64/include/asm/psci.h
  20. 8 5
      arch/arm64/include/uapi/asm/kvm.h
  21. 194 37
      arch/arm64/kernel/psci.c
  22. 22 0
      arch/arm64/kernel/smp.c
  23. 2 0
      arch/arm64/kvm/guest.c
  24. 7 3
      arch/arm64/kvm/handle_exit.c
  25. 2 0
      arch/arm64/kvm/sys_regs_generic_v8.c
  26. 6 6
      arch/mips/Kconfig
  27. 148 35
      arch/mips/include/asm/kvm_host.h
  28. 35 0
      arch/mips/include/uapi/asm/kvm.h
  29. 0 32
      arch/mips/kvm/kvm_locore.S
  30. 73 72
      arch/mips/kvm/kvm_mips.c
  31. 9 6
      arch/mips/kvm/kvm_mips_dyntrans.c
  32. 532 25
      arch/mips/kvm/kvm_mips_emul.c
  33. 40 37
      arch/mips/kvm/kvm_tlb.c
  34. 74 12
      arch/mips/kvm/kvm_trap_emul.c
  35. 1 0
      arch/mips/mm/cache.c
  36. 2 12
      arch/mips/mti-malta/malta-time.c
  37. 34 0
      arch/powerpc/include/asm/disassemble.h
  38. 10 8
      arch/powerpc/include/asm/kvm_asm.h
  39. 2 1
      arch/powerpc/include/asm/kvm_book3s.h
  40. 123 23
      arch/powerpc/include/asm/kvm_book3s_64.h
  41. 2 0
      arch/powerpc/include/asm/kvm_book3s_asm.h
  42. 0 5
      arch/powerpc/include/asm/kvm_booke.h
  43. 8 1
      arch/powerpc/include/asm/kvm_host.h
  44. 79 1
      arch/powerpc/include/asm/kvm_ppc.h
  45. 7 5
      arch/powerpc/include/asm/reg.h
  46. 1 0
      arch/powerpc/include/asm/reg_booke.h
  47. 1 1
      arch/powerpc/include/uapi/asm/kvm.h
  48. 6 0
      arch/powerpc/include/uapi/asm/kvm_para.h
  49. 1 33
      arch/powerpc/kernel/align.c
  50. 10 1
      arch/powerpc/kernel/asm-offsets.c
  51. 3 2
      arch/powerpc/kernel/epapr_paravirt.c
  52. 1 1
      arch/powerpc/kernel/kvm.c
  53. 3 0
      arch/powerpc/kernel/paca.c
  54. 1 1
      arch/powerpc/kvm/Kconfig
  55. 70 36
      arch/powerpc/kvm/book3s.c
  56. 23 18
      arch/powerpc/kvm/book3s_32_mmu.c
  57. 2 2
      arch/powerpc/kvm/book3s_32_mmu_host.c
  58. 23 16
      arch/powerpc/kvm/book3s_64_mmu.c
  59. 6 9
      arch/powerpc/kvm/book3s_64_mmu_host.c
  60. 79 37
      arch/powerpc/kvm/book3s_64_mmu_hv.c
  61. 40 47
      arch/powerpc/kvm/book3s_64_slb.S
  62. 104 52
      arch/powerpc/kvm/book3s_emulate.c
  63. 1 0
      arch/powerpc/kvm/book3s_exports.c
  64. 18 30
      arch/powerpc/kvm/book3s_hv.c
  65. 2 1
      arch/powerpc/kvm/book3s_hv_rm_mmu.c
  66. 58 2
      arch/powerpc/kvm/book3s_hv_rmhandlers.S
  67. 21 2
      arch/powerpc/kvm/book3s_interrupts.S
  68. 9 7
      arch/powerpc/kvm/book3s_paired_singles.c
  69. 195 43
      arch/powerpc/kvm/book3s_pr.c
  70. 12 4
      arch/powerpc/kvm/book3s_pr_papr.c
  71. 29 0
      arch/powerpc/kvm/book3s_rtas.c
  72. 25 0
      arch/powerpc/kvm/book3s_segment.S
  73. 15 0
      arch/powerpc/kvm/e500_emulate.c
  74. 12 12
      arch/powerpc/kvm/emulate.c
  75. 4 1
      arch/powerpc/kvm/mpic.c
  76. 53 11
      arch/powerpc/kvm/powerpc.c
  77. 1 1
      arch/powerpc/kvm/trace_pr.h
  78. 1 1
      arch/powerpc/mm/slb.c
  79. 14 0
      arch/s390/include/asm/ctl_reg.h
  80. 149 14
      arch/s390/include/asm/kvm_host.h
  81. 6 4
      arch/s390/include/asm/lowcore.h
  82. 2 0
      arch/s390/include/asm/mmu.h
  83. 1 0
      arch/s390/include/asm/mmu_context.h
  84. 2 1
      arch/s390/include/asm/pgalloc.h
  85. 81 88
      arch/s390/include/asm/pgtable.h
  86. 44 0
      arch/s390/include/asm/ptrace.h
  87. 7 1
      arch/s390/include/asm/sclp.h
  88. 28 0
      arch/s390/include/uapi/asm/kvm.h
  89. 245 0
      arch/s390/include/uapi/asm/sie.h
  90. 11 3
      arch/s390/kernel/asm-offsets.c
  91. 2 2
      arch/s390/kernel/entry.S
  92. 2 2
      arch/s390/kernel/entry64.S
  93. 3 1
      arch/s390/kvm/Makefile
  94. 6 13
      arch/s390/kvm/diag.c
  95. 726 0
      arch/s390/kvm/gaccess.c
  96. 300 79
      arch/s390/kvm/gaccess.h
  97. 482 0
      arch/s390/kvm/guestdbg.c
  98. 206 16
      arch/s390/kvm/intercept.c
  99. 299 101
      arch/s390/kvm/interrupt.c
  100. 447 107
      arch/s390/kvm/kvm-s390.c

+ 36 - 1
Documentation/devicetree/bindings/arm/psci.txt

@@ -21,7 +21,15 @@ to #0.
 
 
 Main node required properties:
 Main node required properties:
 
 
- - compatible    : Must be "arm,psci"
+ - compatible    : should contain at least one of:
+
+				 * "arm,psci" : for implementations complying to PSCI versions prior to
+					0.2. For these cases function IDs must be provided.
+
+				 * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function
+					IDs are not required and should be ignored by an OS with PSCI 0.2
+					support, but are permitted to be present for compatibility with
+					existing software when "arm,psci" is later in the compatible list.
 
 
  - method        : The method of calling the PSCI firmware. Permitted
  - method        : The method of calling the PSCI firmware. Permitted
                    values are:
                    values are:
@@ -45,6 +53,8 @@ Main node optional properties:
 
 
 Example:
 Example:
 
 
+Case 1: PSCI v0.1 only.
+
 	psci {
 	psci {
 		compatible	= "arm,psci";
 		compatible	= "arm,psci";
 		method		= "smc";
 		method		= "smc";
@@ -53,3 +63,28 @@ Example:
 		cpu_on		= <0x95c10002>;
 		cpu_on		= <0x95c10002>;
 		migrate		= <0x95c10003>;
 		migrate		= <0x95c10003>;
 	};
 	};
+
+
+Case 2: PSCI v0.2 only
+
+	psci {
+		compatible	= "arm,psci-0.2";
+		method		= "smc";
+	};
+
+Case 3: PSCI v0.2 and PSCI v0.1.
+
+	A DTB may provide IDs for use by kernels without PSCI 0.2 support,
+	enabling firmware and hypervisors to support existing and new kernels.
+	These IDs will be ignored by kernels with PSCI 0.2 support, which will
+	use the standard PSCI 0.2 IDs exclusively.
+
+	psci {
+		compatible = "arm,psci-0.2", "arm,psci";
+		method = "hvc";
+
+		cpu_on = < arbitrary value >;
+		cpu_off = < arbitrary value >;
+
+		...
+	};

+ 29 - 4
Documentation/virtual/kvm/api.txt

@@ -1794,6 +1794,11 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_MMCR0     | 64
   PPC   | KVM_REG_PPC_MMCR0     | 64
   PPC   | KVM_REG_PPC_MMCR1     | 64
   PPC   | KVM_REG_PPC_MMCR1     | 64
   PPC   | KVM_REG_PPC_MMCRA     | 64
   PPC   | KVM_REG_PPC_MMCRA     | 64
+  PPC   | KVM_REG_PPC_MMCR2     | 64
+  PPC   | KVM_REG_PPC_MMCRS     | 64
+  PPC   | KVM_REG_PPC_SIAR      | 64
+  PPC   | KVM_REG_PPC_SDAR      | 64
+  PPC   | KVM_REG_PPC_SIER      | 64
   PPC   | KVM_REG_PPC_PMC1      | 32
   PPC   | KVM_REG_PPC_PMC1      | 32
   PPC   | KVM_REG_PPC_PMC2      | 32
   PPC   | KVM_REG_PPC_PMC2      | 32
   PPC   | KVM_REG_PPC_PMC3      | 32
   PPC   | KVM_REG_PPC_PMC3      | 32
@@ -1868,6 +1873,7 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_ARCH_COMPAT 32
   PPC   | KVM_REG_PPC_ARCH_COMPAT 32
   PPC   | KVM_REG_PPC_DABRX     | 32
   PPC   | KVM_REG_PPC_DABRX     | 32
+  PPC   | KVM_REG_PPC_WORT      | 64
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
   PPC   | KVM_REG_PPC_TM_GPR0	| 64
           ...
           ...
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
   PPC   | KVM_REG_PPC_TM_GPR31	| 64
@@ -2211,6 +2217,8 @@ KVM_S390_SIGP_STOP (vcpu) - sigp restart
 KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
 KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
 KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
 KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
 KVM_S390_RESTART (vcpu) - restart
 KVM_S390_RESTART (vcpu) - restart
+KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
+KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
 KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
 KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
 			   parameters in parm and parm64
 			   parameters in parm and parm64
 KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
 KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
@@ -2314,8 +2322,8 @@ struct kvm_create_device {
 
 
 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Returns: 0 on success, -1 on error
 Errors:
 Errors:
@@ -2340,8 +2348,8 @@ struct kvm_device_attr {
 
 
 4.81 KVM_HAS_DEVICE_ATTR
 4.81 KVM_HAS_DEVICE_ATTR
 
 
-Capability: KVM_CAP_DEVICE_CTRL
-Type: device ioctl
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
 Parameters: struct kvm_device_attr
 Parameters: struct kvm_device_attr
 Returns: 0 on success, -1 on error
 Returns: 0 on success, -1 on error
 Errors:
 Errors:
@@ -2376,6 +2384,8 @@ Possible features:
 	  Depends on KVM_CAP_ARM_PSCI.
 	  Depends on KVM_CAP_ARM_PSCI.
 	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
 	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
 	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
+	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
+	  Depends on KVM_CAP_ARM_PSCI_0_2.
 
 
 
 
 4.83 KVM_ARM_PREFERRED_TARGET
 4.83 KVM_ARM_PREFERRED_TARGET
@@ -2738,6 +2748,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
 external interrupt has just been delivered into the guest. User space
 external interrupt has just been delivered into the guest. User space
 should put the acknowledged interrupt vector into the 'epr' field.
 should put the acknowledged interrupt vector into the 'epr' field.
 
 
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
 		/* Fix the size of the union. */
 		/* Fix the size of the union. */
 		char padding[256];
 		char padding[256];
 	};
 	};

+ 26 - 0
Documentation/virtual/kvm/devices/vm.txt

@@ -0,0 +1,26 @@
+Generic vm interface
+====================================
+
+The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
+struct kvm_device_attr as other devices, but targets VM-wide settings
+and controls.
+
+The groups and attributes per virtual machine, if any, are architecture
+specific.
+
+1. GROUP: KVM_S390_VM_MEM_CTRL
+Architectures: s390
+
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+Parameters: none
+Returns: -EBUSY if already a vcpus is defined, otherwise 0
+
+Enables CMMA for the virtual machine
+
+1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
+Parameteres: none
+Returns: 0
+
+Clear the CMMA status for all guest pages, so any pages the guest marked
+as unused are again used any may not be reclaimed by the host.

+ 14 - 0
Documentation/virtual/kvm/ppc-pv.txt

@@ -94,10 +94,24 @@ a bitmap of available features inside the magic page.
 The following enhancements to the magic page are currently available:
 The following enhancements to the magic page are currently available:
 
 
   KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page
   KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page
+  KVM_MAGIC_FEAT_MAS0_TO_SPRG7	Maps MASn, ESR, PIR and high SPRGs
 
 
 For enhanced features in the magic page, please check for the existence of the
 For enhanced features in the magic page, please check for the existence of the
 feature before using them!
 feature before using them!
 
 
+Magic page flags
+================
+
+In addition to features that indicate whether a host is capable of a particular
+feature we also have a channel for a guest to tell the guest whether it's capable
+of something. This is what we call "flags".
+
+Flags are passed to the host in the low 12 bits of the Effective Address.
+
+The following flags are currently available for a guest to expose:
+
+  MAGIC_PAGE_FLAG_NOT_MAPPED_NX Guest handles NX bits correclty wrt magic page
+
 MSR bits
 MSR bits
 ========
 ========
 
 

+ 2 - 0
Documentation/virtual/kvm/s390-diag.txt

@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
 
 
 If the function code specifies 0x501, breakpoint functions may be performed.
 If the function code specifies 0x501, breakpoint functions may be performed.
 This function code is handled by userspace.
 This function code is handled by userspace.
+
+This diagnose function code has no subfunctions and uses no parameters.

+ 1 - 1
arch/arm/include/asm/kvm_host.h

@@ -36,7 +36,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 #define KVM_HAVE_ONE_REG
 
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_vgic.h>
 
 

+ 5 - 1
arch/arm/include/asm/kvm_psci.h

@@ -18,6 +18,10 @@
 #ifndef __ARM_KVM_PSCI_H__
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 
 #endif /* __ARM_KVM_PSCI_H__ */
 #endif /* __ARM_KVM_PSCI_H__ */

+ 5 - 2
arch/arm/include/asm/psci.h

@@ -29,16 +29,19 @@ struct psci_operations {
 	int (*cpu_off)(struct psci_power_state state);
 	int (*cpu_off)(struct psci_power_state state);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
 	int (*migrate)(unsigned long cpuid);
 	int (*migrate)(unsigned long cpuid);
+	int (*affinity_info)(unsigned long target_affinity,
+			unsigned long lowest_affinity_level);
+	int (*migrate_info_type)(void);
 };
 };
 
 
 extern struct psci_operations psci_ops;
 extern struct psci_operations psci_ops;
 extern struct smp_operations psci_smp_ops;
 extern struct smp_operations psci_smp_ops;
 
 
 #ifdef CONFIG_ARM_PSCI
 #ifdef CONFIG_ARM_PSCI
-void psci_init(void);
+int psci_init(void);
 bool psci_smp_available(void);
 bool psci_smp_available(void);
 #else
 #else
-static inline void psci_init(void) { }
+static inline int psci_init(void) { return 0; }
 static inline bool psci_smp_available(void) { return false; }
 static inline bool psci_smp_available(void) { return false; }
 #endif
 #endif
 
 

+ 6 - 4
arch/arm/include/uapi/asm/kvm.h

@@ -20,6 +20,7 @@
 #define __ARM_KVM_H__
 #define __ARM_KVM_H__
 
 
 #include <linux/types.h>
 #include <linux/types.h>
+#include <linux/psci.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
 
 struct kvm_vcpu_init {
 struct kvm_vcpu_init {
 	__u32 target;
 	__u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 
 #endif /* __ARM_KVM_H__ */
 #endif /* __ARM_KVM_H__ */

+ 159 - 37
arch/arm/kernel/psci.c

@@ -17,63 +17,58 @@
 
 
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <uapi/linux/psci.h>
 
 
 #include <asm/compiler.h>
 #include <asm/compiler.h>
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/opcodes-sec.h>
 #include <asm/opcodes-sec.h>
 #include <asm/opcodes-virt.h>
 #include <asm/opcodes-virt.h>
 #include <asm/psci.h>
 #include <asm/psci.h>
+#include <asm/system_misc.h>
 
 
 struct psci_operations psci_ops;
 struct psci_operations psci_ops;
 
 
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
+typedef int (*psci_initcall_t)(const struct device_node *);
 
 
 enum psci_function {
 enum psci_function {
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_ON,
 	PSCI_FN_CPU_ON,
 	PSCI_FN_CPU_OFF,
 	PSCI_FN_CPU_OFF,
 	PSCI_FN_MIGRATE,
 	PSCI_FN_MIGRATE,
+	PSCI_FN_AFFINITY_INFO,
+	PSCI_FN_MIGRATE_INFO_TYPE,
 	PSCI_FN_MAX,
 	PSCI_FN_MAX,
 };
 };
 
 
 static u32 psci_function_id[PSCI_FN_MAX];
 static u32 psci_function_id[PSCI_FN_MAX];
 
 
-#define PSCI_RET_SUCCESS		0
-#define PSCI_RET_EOPNOTSUPP		-1
-#define PSCI_RET_EINVAL			-2
-#define PSCI_RET_EPERM			-3
-
 static int psci_to_linux_errno(int errno)
 static int psci_to_linux_errno(int errno)
 {
 {
 	switch (errno) {
 	switch (errno) {
 	case PSCI_RET_SUCCESS:
 	case PSCI_RET_SUCCESS:
 		return 0;
 		return 0;
-	case PSCI_RET_EOPNOTSUPP:
+	case PSCI_RET_NOT_SUPPORTED:
 		return -EOPNOTSUPP;
 		return -EOPNOTSUPP;
-	case PSCI_RET_EINVAL:
+	case PSCI_RET_INVALID_PARAMS:
 		return -EINVAL;
 		return -EINVAL;
-	case PSCI_RET_EPERM:
+	case PSCI_RET_DENIED:
 		return -EPERM;
 		return -EPERM;
 	};
 	};
 
 
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-#define PSCI_POWER_STATE_ID_MASK	0xffff
-#define PSCI_POWER_STATE_ID_SHIFT	0
-#define PSCI_POWER_STATE_TYPE_MASK	0x1
-#define PSCI_POWER_STATE_TYPE_SHIFT	16
-#define PSCI_POWER_STATE_AFFL_MASK	0x3
-#define PSCI_POWER_STATE_AFFL_SHIFT	24
-
 static u32 psci_power_state_pack(struct psci_power_state state)
 static u32 psci_power_state_pack(struct psci_power_state state)
 {
 {
-	return	((state.id & PSCI_POWER_STATE_ID_MASK)
-			<< PSCI_POWER_STATE_ID_SHIFT)	|
-		((state.type & PSCI_POWER_STATE_TYPE_MASK)
-			<< PSCI_POWER_STATE_TYPE_SHIFT)	|
-		((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK)
-			<< PSCI_POWER_STATE_AFFL_SHIFT);
+	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+			& PSCI_0_2_POWER_STATE_ID_MASK) |
+		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
 }
 }
 
 
 /*
 /*
@@ -110,6 +105,14 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
 	return function_id;
 	return function_id;
 }
 }
 
 
+static int psci_get_version(void)
+{
+	int err;
+
+	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+	return err;
+}
+
 static int psci_cpu_suspend(struct psci_power_state state,
 static int psci_cpu_suspend(struct psci_power_state state,
 			    unsigned long entry_point)
 			    unsigned long entry_point)
 {
 {
@@ -153,26 +156,36 @@ static int psci_migrate(unsigned long cpuid)
 	return psci_to_linux_errno(err);
 	return psci_to_linux_errno(err);
 }
 }
 
 
-static const struct of_device_id psci_of_match[] __initconst = {
-	{ .compatible = "arm,psci",	},
-	{},
-};
+static int psci_affinity_info(unsigned long target_affinity,
+		unsigned long lowest_affinity_level)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+	return err;
+}
 
 
-void __init psci_init(void)
+static int psci_migrate_info_type(void)
 {
 {
-	struct device_node *np;
-	const char *method;
-	u32 id;
+	int err;
+	u32 fn;
 
 
-	np = of_find_matching_node(NULL, psci_of_match);
-	if (!np)
-		return;
+	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+	err = invoke_psci_fn(fn, 0, 0, 0);
+	return err;
+}
+
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
 
 
-	pr_info("probing function IDs from device-tree\n");
+	pr_info("probing for conduit method from DT.\n");
 
 
 	if (of_property_read_string(np, "method", &method)) {
 	if (of_property_read_string(np, "method", &method)) {
-		pr_warning("missing \"method\" property\n");
-		goto out_put_node;
+		pr_warn("missing \"method\" property\n");
+		return -ENXIO;
 	}
 	}
 
 
 	if (!strcmp("hvc", method)) {
 	if (!strcmp("hvc", method)) {
@@ -180,10 +193,99 @@ void __init psci_init(void)
 	} else if (!strcmp("smc", method)) {
 	} else if (!strcmp("smc", method)) {
 		invoke_psci_fn = __invoke_psci_fn_smc;
 		invoke_psci_fn = __invoke_psci_fn_smc;
 	} else {
 	} else {
-		pr_warning("invalid \"method\" property: %s\n", method);
+		pr_warn("invalid \"method\" property: %s\n", method);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+	int err, ver;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	ver = psci_get_version();
+
+	if (ver == PSCI_RET_NOT_SUPPORTED) {
+		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+		err = -EOPNOTSUPP;
 		goto out_put_node;
 		goto out_put_node;
+	} else {
+		pr_info("PSCIv%d.%d detected in firmware.\n",
+				PSCI_VERSION_MAJOR(ver),
+				PSCI_VERSION_MINOR(ver));
+
+		if (PSCI_VERSION_MAJOR(ver) == 0 &&
+				PSCI_VERSION_MINOR(ver) < 2) {
+			err = -EINVAL;
+			pr_err("Conflicting PSCI version detected.\n");
+			goto out_put_node;
+		}
 	}
 	}
 
 
+	pr_info("Using standard PSCI v0.2 function IDs\n");
+	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
+	psci_ops.cpu_suspend = psci_cpu_suspend;
+
+	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+	psci_ops.cpu_off = psci_cpu_off;
+
+	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
+	psci_ops.cpu_on = psci_cpu_on;
+
+	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
+	psci_ops.migrate = psci_migrate;
+
+	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
+	psci_ops.affinity_info = psci_affinity_info;
+
+	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+	psci_ops.migrate_info_type = psci_migrate_info_type;
+
+	arm_pm_restart = psci_sys_reset;
+
+	pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
+{
+	u32 id;
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
 	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
 	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
 		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
 		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
 		psci_ops.cpu_suspend = psci_cpu_suspend;
 		psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -206,5 +308,25 @@ void __init psci_init(void)
 
 
 out_put_node:
 out_put_node:
 	of_node_put(np);
 	of_node_put(np);
-	return;
+	return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+	{ .compatible = "arm,psci", .data = psci_0_1_init},
+	{ .compatible = "arm,psci-0.2", .data = psci_0_2_init},
+	{},
+};
+
+int __init psci_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *matched_np;
+	psci_initcall_t init_fn;
+
+	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+	if (!np)
+		return -ENODEV;
+
+	init_fn = (psci_initcall_t)matched_np->data;
+	return init_fn(np);
 }
 }

+ 33 - 0
arch/arm/kernel/psci_smp.c

@@ -16,6 +16,8 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/of.h>
 #include <linux/of.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
 
 
 #include <asm/psci.h>
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_plat.h>
@@ -66,6 +68,36 @@ void __ref psci_cpu_die(unsigned int cpu)
        /* We should never return */
        /* We should never return */
        panic("psci: cpu %d failed to shutdown\n", cpu);
        panic("psci: cpu %d failed to shutdown\n", cpu);
 }
 }
+
+int __ref psci_cpu_kill(unsigned int cpu)
+{
+	int err, i;
+
+	if (!psci_ops.affinity_info)
+		return 1;
+	/*
+	 * cpu_kill could race with cpu_die and we can
+	 * potentially end up declaring this cpu undead
+	 * while it is dying. So, try again a few times.
+	 */
+
+	for (i = 0; i < 10; i++) {
+		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+			pr_info("CPU%d killed.\n", cpu);
+			return 1;
+		}
+
+		msleep(10);
+		pr_info("Retrying again to check for CPU kill\n");
+	}
+
+	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+			cpu, err);
+	/* Make platform_cpu_kill() fail. */
+	return 0;
+}
+
 #endif
 #endif
 
 
 bool __init psci_smp_available(void)
 bool __init psci_smp_available(void)
@@ -78,5 +110,6 @@ struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
 	.smp_boot_secondary	= psci_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= psci_cpu_die,
 	.cpu_die		= psci_cpu_die,
+	.cpu_kill		= psci_cpu_kill,
 #endif
 #endif
 };
 };

+ 1 - 0
arch/arm/kvm/arm.c

@@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
 		r = 1;
 		r = 1;
 		break;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
 	case KVM_CAP_COALESCED_MMIO:

+ 7 - 3
arch/arm/kvm/handle_exit.c

@@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
 		return 1;
+	}
 
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 }
 
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)

+ 216 - 19
arch/arm/kvm/psci.c

@@ -27,6 +27,36 @@
  * as described in ARM document number ARM DEN 0022A.
  * as described in ARM document number ARM DEN 0022A.
  */
  */
 
 
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 {
 	vcpu->arch.pause = true;
 	vcpu->arch.pause = true;
@@ -38,6 +68,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	struct kvm_vcpu *vcpu = NULL, *tmp;
 	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
 	unsigned long cpu_id;
+	unsigned long context_id;
 	unsigned long mpidr;
 	unsigned long mpidr;
 	phys_addr_t target_pc;
 	phys_addr_t target_pc;
 	int i;
 	int i;
@@ -58,10 +89,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * Make sure the caller requested a valid CPU and that the CPU is
 	 * Make sure the caller requested a valid CPU and that the CPU is
 	 * turned off.
 	 * turned off.
 	 */
 	 */
-	if (!vcpu || !vcpu->arch.pause)
-		return KVM_PSCI_RET_INVAL;
+	if (!vcpu)
+		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.pause) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
 
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 	target_pc = *vcpu_reg(source_vcpu, 2);
+	context_id = *vcpu_reg(source_vcpu, 3);
 
 
 	kvm_reset_vcpu(vcpu);
 	kvm_reset_vcpu(vcpu);
 
 
@@ -76,26 +114,160 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		kvm_vcpu_set_be(vcpu);
 		kvm_vcpu_set_be(vcpu);
 
 
 	*vcpu_pc(vcpu) = target_pc;
 	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	*vcpu_reg(vcpu, 0) = context_id;
 	vcpu->arch.pause = false;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 	smp_mb();		/* Make sure the above is visible */
 
 
 	wq = kvm_arch_vcpu_wq(vcpu);
 	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 	wake_up_interruptible(wq);
 
 
-	return KVM_PSCI_RET_SUCCESS;
+	return PSCI_RET_SUCCESS;
 }
 }
 
 
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
- */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = *vcpu_reg(vcpu, 1);
+	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if (((mpidr & target_affinity_mask) == target_affinity) &&
+		    !tmp->arch.pause) {
+			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	int ret = 1;
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 	unsigned long val;
@@ -103,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	switch (psci_fn) {
 	switch (psci_fn) {
 	case KVM_PSCI_FN_CPU_OFF:
 	case KVM_PSCI_FN_CPU_OFF:
 		kvm_psci_vcpu_off(vcpu);
 		kvm_psci_vcpu_off(vcpu);
-		val = KVM_PSCI_RET_SUCCESS;
+		val = PSCI_RET_SUCCESS;
 		break;
 		break;
 	case KVM_PSCI_FN_CPU_ON:
 	case KVM_PSCI_FN_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
 		break;
 	case KVM_PSCI_FN_CPU_SUSPEND:
 	case KVM_PSCI_FN_CPU_SUSPEND:
 	case KVM_PSCI_FN_MIGRATE:
 	case KVM_PSCI_FN_MIGRATE:
-		val = KVM_PSCI_RET_NI;
+		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 		break;
-
 	default:
 	default:
-		return false;
+		return -EINVAL;
 	}
 	}
 
 
 	*vcpu_reg(vcpu, 0) = val;
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
+	default:
+		return -EINVAL;
+	};
 }
 }

+ 2 - 0
arch/arm64/include/asm/cpu_ops.h

@@ -39,6 +39,7 @@ struct device_node;
  * 		from the cpu to be killed.
  * 		from the cpu to be killed.
  * @cpu_die:	Makes a cpu leave the kernel. Must not fail. Called from the
  * @cpu_die:	Makes a cpu leave the kernel. Must not fail. Called from the
  *		cpu being killed.
  *		cpu being killed.
+ * @cpu_kill:  Ensures a cpu has left the kernel. Called from another cpu.
  * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
  * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
  *               to wrong parameters or error conditions. Called from the
  *               to wrong parameters or error conditions. Called from the
  *               CPU being suspended. Must be called with IRQs disabled.
  *               CPU being suspended. Must be called with IRQs disabled.
@@ -52,6 +53,7 @@ struct cpu_operations {
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 	int		(*cpu_disable)(unsigned int cpu);
 	int		(*cpu_disable)(unsigned int cpu);
 	void		(*cpu_die)(unsigned int cpu);
 	void		(*cpu_die)(unsigned int cpu);
+	int		(*cpu_kill)(unsigned int cpu);
 #endif
 #endif
 #ifdef CONFIG_ARM64_CPU_SUSPEND
 #ifdef CONFIG_ARM64_CPU_SUSPEND
 	int		(*cpu_suspend)(unsigned long);
 	int		(*cpu_suspend)(unsigned long);

+ 1 - 0
arch/arm64/include/asm/cputype.h

@@ -41,6 +41,7 @@
 
 
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_FOUNDATION	0xD000
 #define ARM_CPU_PART_FOUNDATION	0xD000
+#define ARM_CPU_PART_CORTEX_A53	0xD030
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 
 
 #define APM_CPU_PART_POTENZA	0x0000
 #define APM_CPU_PART_POTENZA	0x0000

+ 1 - 1
arch/arm64/include/asm/kvm_host.h

@@ -39,7 +39,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 #include <kvm/arm_arch_timer.h>
 
 
-#define KVM_VCPU_MAX_FEATURES 2
+#define KVM_VCPU_MAX_FEATURES 3
 
 
 struct kvm_vcpu;
 struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_target_cpu(void);

+ 5 - 1
arch/arm64/include/asm/kvm_psci.h

@@ -18,6 +18,10 @@
 #ifndef __ARM64_KVM_PSCI_H__
 #ifndef __ARM64_KVM_PSCI_H__
 #define __ARM64_KVM_PSCI_H__
 #define __ARM64_KVM_PSCI_H__
 
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 
 #endif /* __ARM64_KVM_PSCI_H__ */
 #endif /* __ARM64_KVM_PSCI_H__ */

+ 1 - 1
arch/arm64/include/asm/psci.h

@@ -14,6 +14,6 @@
 #ifndef __ASM_PSCI_H
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 #define __ASM_PSCI_H
 
 
-void psci_init(void);
+int psci_init(void);
 
 
 #endif /* __ASM_PSCI_H */
 #endif /* __ASM_PSCI_H */

+ 8 - 5
arch/arm64/include/uapi/asm/kvm.h

@@ -31,6 +31,7 @@
 #define KVM_NR_SPSR	5
 #define KVM_NR_SPSR	5
 
 
 #ifndef __ASSEMBLY__
 #ifndef __ASSEMBLY__
+#include <linux/psci.h>
 #include <asm/types.h>
 #include <asm/types.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 
 
@@ -56,8 +57,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
 #define KVM_ARM_TARGET_CORTEX_A57	2
 #define KVM_ARM_TARGET_XGENE_POTENZA	3
 #define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
 
 
-#define KVM_ARM_NUM_TARGETS		4
+#define KVM_ARM_NUM_TARGETS		5
 
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
@@ -77,6 +79,7 @@ struct kvm_regs {
 
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
 
 
 struct kvm_vcpu_init {
 struct kvm_vcpu_init {
 	__u32 target;
 	__u32 target;
@@ -186,10 +189,10 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 
 #endif
 #endif
 
 

+ 194 - 37
arch/arm64/kernel/psci.c

@@ -18,12 +18,17 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <uapi/linux/psci.h>
 
 
 #include <asm/compiler.h>
 #include <asm/compiler.h>
 #include <asm/cpu_ops.h>
 #include <asm/cpu_ops.h>
 #include <asm/errno.h>
 #include <asm/errno.h>
 #include <asm/psci.h>
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_plat.h>
+#include <asm/system_misc.h>
 
 
 #define PSCI_POWER_STATE_TYPE_STANDBY		0
 #define PSCI_POWER_STATE_TYPE_STANDBY		0
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
@@ -40,58 +45,52 @@ struct psci_operations {
 	int (*cpu_off)(struct psci_power_state state);
 	int (*cpu_off)(struct psci_power_state state);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
 	int (*migrate)(unsigned long cpuid);
 	int (*migrate)(unsigned long cpuid);
+	int (*affinity_info)(unsigned long target_affinity,
+			unsigned long lowest_affinity_level);
+	int (*migrate_info_type)(void);
 };
 };
 
 
 static struct psci_operations psci_ops;
 static struct psci_operations psci_ops;
 
 
 static int (*invoke_psci_fn)(u64, u64, u64, u64);
 static int (*invoke_psci_fn)(u64, u64, u64, u64);
+typedef int (*psci_initcall_t)(const struct device_node *);
 
 
 enum psci_function {
 enum psci_function {
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_ON,
 	PSCI_FN_CPU_ON,
 	PSCI_FN_CPU_OFF,
 	PSCI_FN_CPU_OFF,
 	PSCI_FN_MIGRATE,
 	PSCI_FN_MIGRATE,
+	PSCI_FN_AFFINITY_INFO,
+	PSCI_FN_MIGRATE_INFO_TYPE,
 	PSCI_FN_MAX,
 	PSCI_FN_MAX,
 };
 };
 
 
 static u32 psci_function_id[PSCI_FN_MAX];
 static u32 psci_function_id[PSCI_FN_MAX];
 
 
-#define PSCI_RET_SUCCESS		0
-#define PSCI_RET_EOPNOTSUPP		-1
-#define PSCI_RET_EINVAL			-2
-#define PSCI_RET_EPERM			-3
-
 static int psci_to_linux_errno(int errno)
 static int psci_to_linux_errno(int errno)
 {
 {
 	switch (errno) {
 	switch (errno) {
 	case PSCI_RET_SUCCESS:
 	case PSCI_RET_SUCCESS:
 		return 0;
 		return 0;
-	case PSCI_RET_EOPNOTSUPP:
+	case PSCI_RET_NOT_SUPPORTED:
 		return -EOPNOTSUPP;
 		return -EOPNOTSUPP;
-	case PSCI_RET_EINVAL:
+	case PSCI_RET_INVALID_PARAMS:
 		return -EINVAL;
 		return -EINVAL;
-	case PSCI_RET_EPERM:
+	case PSCI_RET_DENIED:
 		return -EPERM;
 		return -EPERM;
 	};
 	};
 
 
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-#define PSCI_POWER_STATE_ID_MASK	0xffff
-#define PSCI_POWER_STATE_ID_SHIFT	0
-#define PSCI_POWER_STATE_TYPE_MASK	0x1
-#define PSCI_POWER_STATE_TYPE_SHIFT	16
-#define PSCI_POWER_STATE_AFFL_MASK	0x3
-#define PSCI_POWER_STATE_AFFL_SHIFT	24
-
 static u32 psci_power_state_pack(struct psci_power_state state)
 static u32 psci_power_state_pack(struct psci_power_state state)
 {
 {
-	return	((state.id & PSCI_POWER_STATE_ID_MASK)
-			<< PSCI_POWER_STATE_ID_SHIFT)	|
-		((state.type & PSCI_POWER_STATE_TYPE_MASK)
-			<< PSCI_POWER_STATE_TYPE_SHIFT)	|
-		((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK)
-			<< PSCI_POWER_STATE_AFFL_SHIFT);
+	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
+			& PSCI_0_2_POWER_STATE_ID_MASK) |
+		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
+		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
 }
 }
 
 
 /*
 /*
@@ -128,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,
 	return function_id;
 	return function_id;
 }
 }
 
 
+static int psci_get_version(void)
+{
+	int err;
+
+	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+	return err;
+}
+
 static int psci_cpu_suspend(struct psci_power_state state,
 static int psci_cpu_suspend(struct psci_power_state state,
 			    unsigned long entry_point)
 			    unsigned long entry_point)
 {
 {
@@ -171,26 +178,36 @@ static int psci_migrate(unsigned long cpuid)
 	return psci_to_linux_errno(err);
 	return psci_to_linux_errno(err);
 }
 }
 
 
-static const struct of_device_id psci_of_match[] __initconst = {
-	{ .compatible = "arm,psci",	},
-	{},
-};
+static int psci_affinity_info(unsigned long target_affinity,
+		unsigned long lowest_affinity_level)
+{
+	int err;
+	u32 fn;
+
+	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
+	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
+	return err;
+}
 
 
-void __init psci_init(void)
+static int psci_migrate_info_type(void)
 {
 {
-	struct device_node *np;
-	const char *method;
-	u32 id;
+	int err;
+	u32 fn;
 
 
-	np = of_find_matching_node(NULL, psci_of_match);
-	if (!np)
-		return;
+	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
+	err = invoke_psci_fn(fn, 0, 0, 0);
+	return err;
+}
 
 
-	pr_info("probing function IDs from device-tree\n");
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
+
+	pr_info("probing for conduit method from DT.\n");
 
 
 	if (of_property_read_string(np, "method", &method)) {
 	if (of_property_read_string(np, "method", &method)) {
-		pr_warning("missing \"method\" property\n");
-		goto out_put_node;
+		pr_warn("missing \"method\" property\n");
+		return -ENXIO;
 	}
 	}
 
 
 	if (!strcmp("hvc", method)) {
 	if (!strcmp("hvc", method)) {
@@ -198,10 +215,99 @@ void __init psci_init(void)
 	} else if (!strcmp("smc", method)) {
 	} else if (!strcmp("smc", method)) {
 		invoke_psci_fn = __invoke_psci_fn_smc;
 		invoke_psci_fn = __invoke_psci_fn_smc;
 	} else {
 	} else {
-		pr_warning("invalid \"method\" property: %s\n", method);
+		pr_warn("invalid \"method\" property: %s\n", method);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+}
+
+static void psci_sys_poweroff(void)
+{
+	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+}
+
+/*
+ * PSCI Function IDs for v0.2+ are well defined so use
+ * standard values.
+ */
+static int psci_0_2_init(struct device_node *np)
+{
+	int err, ver;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	ver = psci_get_version();
+
+	if (ver == PSCI_RET_NOT_SUPPORTED) {
+		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
+		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+		err = -EOPNOTSUPP;
 		goto out_put_node;
 		goto out_put_node;
+	} else {
+		pr_info("PSCIv%d.%d detected in firmware.\n",
+				PSCI_VERSION_MAJOR(ver),
+				PSCI_VERSION_MINOR(ver));
+
+		if (PSCI_VERSION_MAJOR(ver) == 0 &&
+				PSCI_VERSION_MINOR(ver) < 2) {
+			err = -EINVAL;
+			pr_err("Conflicting PSCI version detected.\n");
+			goto out_put_node;
+		}
 	}
 	}
 
 
+	pr_info("Using standard PSCI v0.2 function IDs\n");
+	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
+	psci_ops.cpu_suspend = psci_cpu_suspend;
+
+	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
+	psci_ops.cpu_off = psci_cpu_off;
+
+	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON;
+	psci_ops.cpu_on = psci_cpu_on;
+
+	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE;
+	psci_ops.migrate = psci_migrate;
+
+	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO;
+	psci_ops.affinity_info = psci_affinity_info;
+
+	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
+		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
+	psci_ops.migrate_info_type = psci_migrate_info_type;
+
+	arm_pm_restart = psci_sys_reset;
+
+	pm_power_off = psci_sys_poweroff;
+
+out_put_node:
+	of_node_put(np);
+	return err;
+}
+
+/*
+ * PSCI < v0.2 get PSCI Function IDs via DT.
+ */
+static int psci_0_1_init(struct device_node *np)
+{
+	u32 id;
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+
+	pr_info("Using PSCI v0.1 Function IDs from DT\n");
+
 	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
 	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
 		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
 		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
 		psci_ops.cpu_suspend = psci_cpu_suspend;
 		psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -224,7 +330,28 @@ void __init psci_init(void)
 
 
 out_put_node:
 out_put_node:
 	of_node_put(np);
 	of_node_put(np);
-	return;
+	return err;
+}
+
+static const struct of_device_id psci_of_match[] __initconst = {
+	{ .compatible = "arm,psci",	.data = psci_0_1_init},
+	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init},
+	{},
+};
+
+int __init psci_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *matched_np;
+	psci_initcall_t init_fn;
+
+	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
+
+	if (!np)
+		return -ENODEV;
+
+	init_fn = (psci_initcall_t)matched_np->data;
+	return init_fn(np);
 }
 }
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
@@ -277,6 +404,35 @@ static void cpu_psci_cpu_die(unsigned int cpu)
 
 
 	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
 	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
 }
 }
+
+static int cpu_psci_cpu_kill(unsigned int cpu)
+{
+	int err, i;
+
+	if (!psci_ops.affinity_info)
+		return 1;
+	/*
+	 * cpu_kill could race with cpu_die and we can
+	 * potentially end up declaring this cpu undead
+	 * while it is dying. So, try again a few times.
+	 */
+
+	for (i = 0; i < 10; i++) {
+		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
+		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
+			pr_info("CPU%d killed.\n", cpu);
+			return 1;
+		}
+
+		msleep(10);
+		pr_info("Retrying again to check for CPU kill\n");
+	}
+
+	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+			cpu, err);
+	/* Make op_cpu_kill() fail. */
+	return 0;
+}
 #endif
 #endif
 
 
 const struct cpu_operations cpu_psci_ops = {
 const struct cpu_operations cpu_psci_ops = {
@@ -287,6 +443,7 @@ const struct cpu_operations cpu_psci_ops = {
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= cpu_psci_cpu_disable,
 	.cpu_disable	= cpu_psci_cpu_disable,
 	.cpu_die	= cpu_psci_cpu_die,
 	.cpu_die	= cpu_psci_cpu_die,
+	.cpu_kill	= cpu_psci_cpu_kill,
 #endif
 #endif
 };
 };
 
 

+ 22 - 0
arch/arm64/kernel/smp.c

@@ -228,6 +228,19 @@ int __cpu_disable(void)
 	return 0;
 	return 0;
 }
 }
 
 
+static int op_cpu_kill(unsigned int cpu)
+{
+	/*
+	 * If we have no means of synchronising with the dying CPU, then assume
+	 * that it is really dead. We can only wait for an arbitrary length of
+	 * time and hope that it's dead, so let's skip the wait and just hope.
+	 */
+	if (!cpu_ops[cpu]->cpu_kill)
+		return 1;
+
+	return cpu_ops[cpu]->cpu_kill(cpu);
+}
+
 static DECLARE_COMPLETION(cpu_died);
 static DECLARE_COMPLETION(cpu_died);
 
 
 /*
 /*
@@ -241,6 +254,15 @@ void __cpu_die(unsigned int cpu)
 		return;
 		return;
 	}
 	}
 	pr_notice("CPU%u: shutdown\n", cpu);
 	pr_notice("CPU%u: shutdown\n", cpu);
+
+	/*
+	 * Now that the dying CPU is beyond the point of no return w.r.t.
+	 * in-kernel synchronisation, try to get the firwmare to help us to
+	 * verify that it has really left the kernel before we consider
+	 * clobbering anything it might still be using.
+	 */
+	if (!op_cpu_kill(cpu))
+		pr_warn("CPU%d may not have shut down cleanly\n", cpu);
 }
 }
 
 
 /*
 /*

+ 2 - 0
arch/arm64/kvm/guest.c

@@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void)
 			return KVM_ARM_TARGET_AEM_V8;
 			return KVM_ARM_TARGET_AEM_V8;
 		case ARM_CPU_PART_FOUNDATION:
 		case ARM_CPU_PART_FOUNDATION:
 			return KVM_ARM_TARGET_FOUNDATION_V8;
 			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A53:
+			return KVM_ARM_TARGET_CORTEX_A53;
 		case ARM_CPU_PART_CORTEX_A57:
 		case ARM_CPU_PART_CORTEX_A57:
 			return KVM_ARM_TARGET_CORTEX_A57;
 			return KVM_ARM_TARGET_CORTEX_A57;
 		};
 		};

+ 7 - 3
arch/arm64/kvm/handle_exit.c

@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 {
-	if (kvm_psci_call(vcpu))
+	int ret;
+
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
 		return 1;
+	}
 
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 }
 
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)

+ 2 - 0
arch/arm64/kvm/sys_regs_generic_v8.c

@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
 					  &genericv8_target_table);
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,

+ 6 - 6
arch/mips/Kconfig

@@ -1756,14 +1756,14 @@ config KVM_GUEST
 	help
 	help
 	  Select this option if building a guest kernel for KVM (Trap & Emulate) mode
 	  Select this option if building a guest kernel for KVM (Trap & Emulate) mode
 
 
-config KVM_HOST_FREQ
-	int "KVM Host Processor Frequency (MHz)"
+config KVM_GUEST_TIMER_FREQ
+	int "Count/Compare Timer Frequency (MHz)"
 	depends on KVM_GUEST
 	depends on KVM_GUEST
-	default 500
+	default 100
 	help
 	help
-	  Select this option if building a guest kernel for KVM to skip
-	  RTC emulation when determining guest CPU Frequency.  Instead, the guest
-	  processor frequency is automatically derived from the host frequency.
+	  Set this to non-zero if building a guest kernel for KVM to skip RTC
+	  emulation when determining guest CPU Frequency. Instead, the guest's
+	  timer frequency is specified directly.
 
 
 choice
 choice
 	prompt "Kernel page size"
 	prompt "Kernel page size"

+ 148 - 35
arch/mips/include/asm/kvm_host.h

@@ -19,6 +19,38 @@
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 
 
+/* MIPS KVM register ids */
+#define MIPS_CP0_32(_R, _S)					\
+	(KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+
+#define MIPS_CP0_64(_R, _S)					\
+	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+
+#define KVM_REG_MIPS_CP0_INDEX		MIPS_CP0_32(0, 0)
+#define KVM_REG_MIPS_CP0_ENTRYLO0	MIPS_CP0_64(2, 0)
+#define KVM_REG_MIPS_CP0_ENTRYLO1	MIPS_CP0_64(3, 0)
+#define KVM_REG_MIPS_CP0_CONTEXT	MIPS_CP0_64(4, 0)
+#define KVM_REG_MIPS_CP0_USERLOCAL	MIPS_CP0_64(4, 2)
+#define KVM_REG_MIPS_CP0_PAGEMASK	MIPS_CP0_32(5, 0)
+#define KVM_REG_MIPS_CP0_PAGEGRAIN	MIPS_CP0_32(5, 1)
+#define KVM_REG_MIPS_CP0_WIRED		MIPS_CP0_32(6, 0)
+#define KVM_REG_MIPS_CP0_HWRENA		MIPS_CP0_32(7, 0)
+#define KVM_REG_MIPS_CP0_BADVADDR	MIPS_CP0_64(8, 0)
+#define KVM_REG_MIPS_CP0_COUNT		MIPS_CP0_32(9, 0)
+#define KVM_REG_MIPS_CP0_ENTRYHI	MIPS_CP0_64(10, 0)
+#define KVM_REG_MIPS_CP0_COMPARE	MIPS_CP0_32(11, 0)
+#define KVM_REG_MIPS_CP0_STATUS		MIPS_CP0_32(12, 0)
+#define KVM_REG_MIPS_CP0_CAUSE		MIPS_CP0_32(13, 0)
+#define KVM_REG_MIPS_CP0_EPC		MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_EBASE		MIPS_CP0_64(15, 1)
+#define KVM_REG_MIPS_CP0_CONFIG		MIPS_CP0_32(16, 0)
+#define KVM_REG_MIPS_CP0_CONFIG1	MIPS_CP0_32(16, 1)
+#define KVM_REG_MIPS_CP0_CONFIG2	MIPS_CP0_32(16, 2)
+#define KVM_REG_MIPS_CP0_CONFIG3	MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG7	MIPS_CP0_32(16, 7)
+#define KVM_REG_MIPS_CP0_XCONTEXT	MIPS_CP0_64(20, 0)
+#define KVM_REG_MIPS_CP0_ERROREPC	MIPS_CP0_64(30, 0)
+
 
 
 #define KVM_MAX_VCPUS		1
 #define KVM_MAX_VCPUS		1
 #define KVM_USER_MEM_SLOTS	8
 #define KVM_USER_MEM_SLOTS	8
@@ -372,8 +404,19 @@ struct kvm_vcpu_arch {
 
 
 	u32 io_gpr;		/* GPR used as IO source/target */
 	u32 io_gpr;		/* GPR used as IO source/target */
 
 
-	/* Used to calibrate the virutal count register for the guest */
-	int32_t host_cp0_count;
+	struct hrtimer comparecount_timer;
+	/* Count timer control KVM register */
+	uint32_t count_ctl;
+	/* Count bias from the raw time */
+	uint32_t count_bias;
+	/* Frequency of timer in Hz */
+	uint32_t count_hz;
+	/* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */
+	s64 count_dyn_bias;
+	/* Resume time */
+	ktime_t count_resume;
+	/* Period of timer tick in ns */
+	u64 count_period;
 
 
 	/* Bitmask of exceptions that are pending */
 	/* Bitmask of exceptions that are pending */
 	unsigned long pending_exceptions;
 	unsigned long pending_exceptions;
@@ -394,8 +437,6 @@ struct kvm_vcpu_arch {
 	uint32_t guest_kernel_asid[NR_CPUS];
 	uint32_t guest_kernel_asid[NR_CPUS];
 	struct mm_struct guest_kernel_mm, guest_user_mm;
 	struct mm_struct guest_kernel_mm, guest_user_mm;
 
 
-	struct hrtimer comparecount_timer;
-
 	int last_sched_cpu;
 	int last_sched_cpu;
 
 
 	/* WAIT executed */
 	/* WAIT executed */
@@ -410,6 +451,7 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_context(cop0)		(cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
 #define kvm_read_c0_guest_context(cop0)		(cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
 #define kvm_write_c0_guest_context(cop0, val)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
 #define kvm_write_c0_guest_context(cop0, val)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
 #define kvm_read_c0_guest_userlocal(cop0)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
 #define kvm_read_c0_guest_userlocal(cop0)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
+#define kvm_write_c0_guest_userlocal(cop0, val)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
 #define kvm_read_c0_guest_pagemask(cop0)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
 #define kvm_read_c0_guest_pagemask(cop0)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
 #define kvm_write_c0_guest_pagemask(cop0, val)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
 #define kvm_write_c0_guest_pagemask(cop0, val)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
 #define kvm_read_c0_guest_wired(cop0)		(cop0->reg[MIPS_CP0_TLB_WIRED][0])
 #define kvm_read_c0_guest_wired(cop0)		(cop0->reg[MIPS_CP0_TLB_WIRED][0])
@@ -449,15 +491,74 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
 
 
+/*
+ * Some of the guest registers may be modified asynchronously (e.g. from a
+ * hrtimer callback in hard irq context) and therefore need stronger atomicity
+ * guarantees than other registers.
+ */
+
+static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
+						unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	or	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (val));
+	} while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
+						  unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	and	%0, %2				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (~val));
+	} while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
+						   unsigned long change,
+						   unsigned long val)
+{
+	unsigned long temp;
+	do {
+		__asm__ __volatile__(
+		"	.set	mips3				\n"
+		"	" __LL "%0, %1				\n"
+		"	and	%0, %2				\n"
+		"	or	%0, %3				\n"
+		"	" __SC	"%0, %1				\n"
+		"	.set	mips0				\n"
+		: "=&r" (temp), "+m" (*reg)
+		: "r" (~change), "r" (val & change));
+	} while (unlikely(!temp));
+}
+
 #define kvm_set_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val))
 #define kvm_set_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val))
 #define kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
 #define kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+#define kvm_set_c0_guest_cause(cop0, val)				\
+	_kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
+#define kvm_clear_c0_guest_cause(cop0, val)				\
+	_kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
 #define kvm_change_c0_guest_cause(cop0, change, val)			\
 #define kvm_change_c0_guest_cause(cop0, change, val)			\
-{									\
-	kvm_clear_c0_guest_cause(cop0, change);				\
-	kvm_set_c0_guest_cause(cop0, ((val) & (change)));		\
-}
+	_kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0],	\
+					change, val)
+
 #define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] |= (val))
 #define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] |= (val))
 #define kvm_clear_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
 #define kvm_clear_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
 #define kvm_change_c0_guest_ebase(cop0, change, val)			\
 #define kvm_change_c0_guest_ebase(cop0, change, val)			\
@@ -468,29 +569,33 @@ struct kvm_vcpu_arch {
 
 
 
 
 struct kvm_mips_callbacks {
 struct kvm_mips_callbacks {
-	int (*handle_cop_unusable) (struct kvm_vcpu *vcpu);
-	int (*handle_tlb_mod) (struct kvm_vcpu *vcpu);
-	int (*handle_tlb_ld_miss) (struct kvm_vcpu *vcpu);
-	int (*handle_tlb_st_miss) (struct kvm_vcpu *vcpu);
-	int (*handle_addr_err_st) (struct kvm_vcpu *vcpu);
-	int (*handle_addr_err_ld) (struct kvm_vcpu *vcpu);
-	int (*handle_syscall) (struct kvm_vcpu *vcpu);
-	int (*handle_res_inst) (struct kvm_vcpu *vcpu);
-	int (*handle_break) (struct kvm_vcpu *vcpu);
-	int (*vm_init) (struct kvm *kvm);
-	int (*vcpu_init) (struct kvm_vcpu *vcpu);
-	int (*vcpu_setup) (struct kvm_vcpu *vcpu);
-	 gpa_t(*gva_to_gpa) (gva_t gva);
-	void (*queue_timer_int) (struct kvm_vcpu *vcpu);
-	void (*dequeue_timer_int) (struct kvm_vcpu *vcpu);
-	void (*queue_io_int) (struct kvm_vcpu *vcpu,
-			      struct kvm_mips_interrupt *irq);
-	void (*dequeue_io_int) (struct kvm_vcpu *vcpu,
-				struct kvm_mips_interrupt *irq);
-	int (*irq_deliver) (struct kvm_vcpu *vcpu, unsigned int priority,
-			    uint32_t cause);
-	int (*irq_clear) (struct kvm_vcpu *vcpu, unsigned int priority,
-			  uint32_t cause);
+	int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
+	int (*handle_tlb_mod)(struct kvm_vcpu *vcpu);
+	int (*handle_tlb_ld_miss)(struct kvm_vcpu *vcpu);
+	int (*handle_tlb_st_miss)(struct kvm_vcpu *vcpu);
+	int (*handle_addr_err_st)(struct kvm_vcpu *vcpu);
+	int (*handle_addr_err_ld)(struct kvm_vcpu *vcpu);
+	int (*handle_syscall)(struct kvm_vcpu *vcpu);
+	int (*handle_res_inst)(struct kvm_vcpu *vcpu);
+	int (*handle_break)(struct kvm_vcpu *vcpu);
+	int (*vm_init)(struct kvm *kvm);
+	int (*vcpu_init)(struct kvm_vcpu *vcpu);
+	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
+	gpa_t (*gva_to_gpa)(gva_t gva);
+	void (*queue_timer_int)(struct kvm_vcpu *vcpu);
+	void (*dequeue_timer_int)(struct kvm_vcpu *vcpu);
+	void (*queue_io_int)(struct kvm_vcpu *vcpu,
+			     struct kvm_mips_interrupt *irq);
+	void (*dequeue_io_int)(struct kvm_vcpu *vcpu,
+			       struct kvm_mips_interrupt *irq);
+	int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority,
+			   uint32_t cause);
+	int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority,
+			 uint32_t cause);
+	int (*get_one_reg)(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg, s64 *v);
+	int (*set_one_reg)(struct kvm_vcpu *vcpu,
+			   const struct kvm_one_reg *reg, s64 v);
 };
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -609,7 +714,16 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 							 struct kvm_run *run);
 							 struct kvm_run *run);
 
 
-enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu);
+uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
+void kvm_mips_init_count(struct kvm_vcpu *vcpu);
+int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
+int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
+int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
+void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
+void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
 
 
 enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 					       uint32_t *opc,
 					       uint32_t *opc,
@@ -646,7 +760,6 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 			       struct kvm_vcpu *vcpu);
 			       struct kvm_vcpu *vcpu);
 
 
 /* Misc */
 /* Misc */
-extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
 

+ 35 - 0
arch/mips/include/uapi/asm/kvm.h

@@ -106,6 +106,41 @@ struct kvm_fpu {
 #define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
 #define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
 #define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
 #define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
 
 
+/* KVM specific control registers */
+
+/*
+ * CP0_Count control
+ * DC:    Set 0: Master disable CP0_Count and set COUNT_RESUME to now
+ *        Set 1: Master re-enable CP0_Count with unchanged bias, handling timer
+ *               interrupts since COUNT_RESUME
+ *        This can be used to freeze the timer to get a consistent snapshot of
+ *        the CP0_Count and timer interrupt pending state, while also resuming
+ *        safely without losing time or guest timer interrupts.
+ * Other: Reserved, do not change.
+ */
+#define KVM_REG_MIPS_COUNT_CTL		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+					 0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL_DC	0x00000001
+
+/*
+ * CP0_Count resume monotonic nanoseconds
+ * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master
+ * disable). Any reads and writes of Count related registers while
+ * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is
+ * cleared again (master enable) any timer interrupts since this time will be
+ * emulated.
+ * Modifications to times in the future are rejected.
+ */
+#define KVM_REG_MIPS_COUNT_RESUME	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+					 0x20000 | 1)
+/*
+ * CP0_Count rate in Hz
+ * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
+ * discontinuities in CP0_Count.
+ */
+#define KVM_REG_MIPS_COUNT_HZ		(KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
+					 0x20000 | 2)
+
 /*
 /*
  * KVM MIPS specific structures and definitions
  * KVM MIPS specific structures and definitions
  *
  *

+ 0 - 32
arch/mips/kvm/kvm_locore.S

@@ -611,35 +611,3 @@ MIPSX(exceptions):
 	.word _C_LABEL(MIPSX(GuestException))	# 29
 	.word _C_LABEL(MIPSX(GuestException))	# 29
 	.word _C_LABEL(MIPSX(GuestException))	# 30
 	.word _C_LABEL(MIPSX(GuestException))	# 30
 	.word _C_LABEL(MIPSX(GuestException))	# 31
 	.word _C_LABEL(MIPSX(GuestException))	# 31
-
-
-/* This routine makes changes to the instruction stream effective to the hardware.
- * It should be called after the instruction stream is written.
- * On return, the new instructions are effective.
- * Inputs:
- * a0 = Start address of new instruction stream
- * a1 = Size, in bytes, of new instruction stream
- */
-
-#define HW_SYNCI_Step       $1
-LEAF(MIPSX(SyncICache))
-	.set	push
-	.set	mips32r2
-	beq	a1, zero, 20f
-	 nop
-	REG_ADDU a1, a0, a1
-	rdhwr	v0, HW_SYNCI_Step
-	beq	v0, zero, 20f
-	 nop
-10:
-	synci	0(a0)
-	REG_ADDU a0, a0, v0
-	sltu	v1, a0, a1
-	bne	v1, zero, 10b
-	 nop
-	sync
-20:
-	jr.hb	ra
-	 nop
-	.set	pop
-END(MIPSX(SyncICache))

+ 73 - 72
arch/mips/kvm/kvm_mips.c

@@ -61,11 +61,6 @@ static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
-	return gfn;
-}
-
 /* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we
 /* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we
  * are "runnable" if interrupts are pending
  * are "runnable" if interrupts are pending
  */
  */
@@ -130,8 +125,8 @@ static void kvm_mips_init_vm_percpu(void *arg)
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 {
 	if (atomic_inc_return(&kvm_mips_instance) == 1) {
 	if (atomic_inc_return(&kvm_mips_instance) == 1) {
-		kvm_info("%s: 1st KVM instance, setup host TLB parameters\n",
-			 __func__);
+		kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n",
+			  __func__);
 		on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
 		on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
 	}
 	}
 
 
@@ -149,9 +144,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
 		if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
 		if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
 			kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
 			kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
 	}
 	}
-
-	if (kvm->arch.guest_pmap)
-		kfree(kvm->arch.guest_pmap);
+	kfree(kvm->arch.guest_pmap);
 
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		kvm_arch_vcpu_free(vcpu);
 		kvm_arch_vcpu_free(vcpu);
@@ -186,8 +179,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 
 
 	/* If this is the last instance, restore wired count */
 	/* If this is the last instance, restore wired count */
 	if (atomic_dec_return(&kvm_mips_instance) == 0) {
 	if (atomic_dec_return(&kvm_mips_instance) == 0) {
-		kvm_info("%s: last KVM instance, restoring TLB parameters\n",
-			 __func__);
+		kvm_debug("%s: last KVM instance, restoring TLB parameters\n",
+			  __func__);
 		on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
 		on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
 	}
 	}
 }
 }
@@ -249,9 +242,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				goto out;
 				goto out;
 			}
 			}
 
 
-			kvm_info
-			    ("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
-			     npages, kvm->arch.guest_pmap);
+			kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
+				  npages, kvm->arch.guest_pmap);
 
 
 			/* Now setup the page table */
 			/* Now setup the page table */
 			for (i = 0; i < npages; i++) {
 			for (i = 0; i < npages; i++) {
@@ -296,7 +288,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 	if (err)
 		goto out_free_cpu;
 		goto out_free_cpu;
 
 
-	kvm_info("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
+	kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
 
 
 	/* Allocate space for host mode exception handlers that handle
 	/* Allocate space for host mode exception handlers that handle
 	 * guest mode exits
 	 * guest mode exits
@@ -304,7 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (cpu_has_veic || cpu_has_vint) {
 	if (cpu_has_veic || cpu_has_vint) {
 		size = 0x200 + VECTORSPACING * 64;
 		size = 0x200 + VECTORSPACING * 64;
 	} else {
 	} else {
-		size = 0x200;
+		size = 0x4000;
 	}
 	}
 
 
 	/* Save Linux EBASE */
 	/* Save Linux EBASE */
@@ -316,8 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto out_free_cpu;
 		goto out_free_cpu;
 	}
 	}
-	kvm_info("Allocated %d bytes for KVM Exception Handlers @ %p\n",
-		 ALIGN(size, PAGE_SIZE), gebase);
+	kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
+		  ALIGN(size, PAGE_SIZE), gebase);
 
 
 	/* Save new ebase */
 	/* Save new ebase */
 	vcpu->arch.guest_ebase = gebase;
 	vcpu->arch.guest_ebase = gebase;
@@ -342,15 +334,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 
 
 	/* General handler, relocate to unmapped space for sanity's sake */
 	/* General handler, relocate to unmapped space for sanity's sake */
 	offset = 0x2000;
 	offset = 0x2000;
-	kvm_info("Installing KVM Exception handlers @ %p, %#x bytes\n",
-		 gebase + offset,
-		 mips32_GuestExceptionEnd - mips32_GuestException);
+	kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n",
+		  gebase + offset,
+		  mips32_GuestExceptionEnd - mips32_GuestException);
 
 
 	memcpy(gebase + offset, mips32_GuestException,
 	memcpy(gebase + offset, mips32_GuestException,
 	       mips32_GuestExceptionEnd - mips32_GuestException);
 	       mips32_GuestExceptionEnd - mips32_GuestException);
 
 
 	/* Invalidate the icache for these ranges */
 	/* Invalidate the icache for these ranges */
-	mips32_SyncICache((unsigned long) gebase, ALIGN(size, PAGE_SIZE));
+	local_flush_icache_range((unsigned long)gebase,
+				(unsigned long)gebase + ALIGN(size, PAGE_SIZE));
 
 
 	/* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */
 	/* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */
 	vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL);
 	vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL);
@@ -360,14 +353,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 		goto out_free_gebase;
 		goto out_free_gebase;
 	}
 	}
 
 
-	kvm_info("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage);
+	kvm_debug("Allocated COMM page @ %p\n", vcpu->arch.kseg0_commpage);
 	kvm_mips_commpage_init(vcpu);
 	kvm_mips_commpage_init(vcpu);
 
 
 	/* Init */
 	/* Init */
 	vcpu->arch.last_sched_cpu = -1;
 	vcpu->arch.last_sched_cpu = -1;
 
 
 	/* Start off the timer */
 	/* Start off the timer */
-	kvm_mips_emulate_count(vcpu);
+	kvm_mips_init_count(vcpu);
 
 
 	return vcpu;
 	return vcpu;
 
 
@@ -389,12 +382,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 
 
 	kvm_mips_dump_stats(vcpu);
 	kvm_mips_dump_stats(vcpu);
 
 
-	if (vcpu->arch.guest_ebase)
-		kfree(vcpu->arch.guest_ebase);
-
-	if (vcpu->arch.kseg0_commpage)
-		kfree(vcpu->arch.kseg0_commpage);
-
+	kfree(vcpu->arch.guest_ebase);
+	kfree(vcpu->arch.kseg0_commpage);
 }
 }
 
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -423,11 +412,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		vcpu->mmio_needed = 0;
 		vcpu->mmio_needed = 0;
 	}
 	}
 
 
+	local_irq_disable();
 	/* Check if we have any exceptions/interrupts pending */
 	/* Check if we have any exceptions/interrupts pending */
 	kvm_mips_deliver_interrupts(vcpu,
 	kvm_mips_deliver_interrupts(vcpu,
 				    kvm_read_c0_guest_cause(vcpu->arch.cop0));
 				    kvm_read_c0_guest_cause(vcpu->arch.cop0));
 
 
-	local_irq_disable();
 	kvm_guest_enter();
 	kvm_guest_enter();
 
 
 	r = __kvm_mips_vcpu_run(run, vcpu);
 	r = __kvm_mips_vcpu_run(run, vcpu);
@@ -490,36 +479,6 @@ kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -ENOIOCTLCMD;
 	return -ENOIOCTLCMD;
 }
 }
 
 
-#define MIPS_CP0_32(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
-
-#define MIPS_CP0_64(_R, _S)					\
-	(KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
-
-#define KVM_REG_MIPS_CP0_INDEX		MIPS_CP0_32(0, 0)
-#define KVM_REG_MIPS_CP0_ENTRYLO0	MIPS_CP0_64(2, 0)
-#define KVM_REG_MIPS_CP0_ENTRYLO1	MIPS_CP0_64(3, 0)
-#define KVM_REG_MIPS_CP0_CONTEXT	MIPS_CP0_64(4, 0)
-#define KVM_REG_MIPS_CP0_USERLOCAL	MIPS_CP0_64(4, 2)
-#define KVM_REG_MIPS_CP0_PAGEMASK	MIPS_CP0_32(5, 0)
-#define KVM_REG_MIPS_CP0_PAGEGRAIN	MIPS_CP0_32(5, 1)
-#define KVM_REG_MIPS_CP0_WIRED		MIPS_CP0_32(6, 0)
-#define KVM_REG_MIPS_CP0_HWRENA		MIPS_CP0_32(7, 0)
-#define KVM_REG_MIPS_CP0_BADVADDR	MIPS_CP0_64(8, 0)
-#define KVM_REG_MIPS_CP0_COUNT		MIPS_CP0_32(9, 0)
-#define KVM_REG_MIPS_CP0_ENTRYHI	MIPS_CP0_64(10, 0)
-#define KVM_REG_MIPS_CP0_COMPARE	MIPS_CP0_32(11, 0)
-#define KVM_REG_MIPS_CP0_STATUS		MIPS_CP0_32(12, 0)
-#define KVM_REG_MIPS_CP0_CAUSE		MIPS_CP0_32(13, 0)
-#define KVM_REG_MIPS_CP0_EBASE		MIPS_CP0_64(15, 1)
-#define KVM_REG_MIPS_CP0_CONFIG		MIPS_CP0_32(16, 0)
-#define KVM_REG_MIPS_CP0_CONFIG1	MIPS_CP0_32(16, 1)
-#define KVM_REG_MIPS_CP0_CONFIG2	MIPS_CP0_32(16, 2)
-#define KVM_REG_MIPS_CP0_CONFIG3	MIPS_CP0_32(16, 3)
-#define KVM_REG_MIPS_CP0_CONFIG7	MIPS_CP0_32(16, 7)
-#define KVM_REG_MIPS_CP0_XCONTEXT	MIPS_CP0_64(20, 0)
-#define KVM_REG_MIPS_CP0_ERROREPC	MIPS_CP0_64(30, 0)
-
 static u64 kvm_mips_get_one_regs[] = {
 static u64 kvm_mips_get_one_regs[] = {
 	KVM_REG_MIPS_R0,
 	KVM_REG_MIPS_R0,
 	KVM_REG_MIPS_R1,
 	KVM_REG_MIPS_R1,
@@ -560,25 +519,34 @@ static u64 kvm_mips_get_one_regs[] = {
 
 
 	KVM_REG_MIPS_CP0_INDEX,
 	KVM_REG_MIPS_CP0_INDEX,
 	KVM_REG_MIPS_CP0_CONTEXT,
 	KVM_REG_MIPS_CP0_CONTEXT,
+	KVM_REG_MIPS_CP0_USERLOCAL,
 	KVM_REG_MIPS_CP0_PAGEMASK,
 	KVM_REG_MIPS_CP0_PAGEMASK,
 	KVM_REG_MIPS_CP0_WIRED,
 	KVM_REG_MIPS_CP0_WIRED,
+	KVM_REG_MIPS_CP0_HWRENA,
 	KVM_REG_MIPS_CP0_BADVADDR,
 	KVM_REG_MIPS_CP0_BADVADDR,
+	KVM_REG_MIPS_CP0_COUNT,
 	KVM_REG_MIPS_CP0_ENTRYHI,
 	KVM_REG_MIPS_CP0_ENTRYHI,
+	KVM_REG_MIPS_CP0_COMPARE,
 	KVM_REG_MIPS_CP0_STATUS,
 	KVM_REG_MIPS_CP0_STATUS,
 	KVM_REG_MIPS_CP0_CAUSE,
 	KVM_REG_MIPS_CP0_CAUSE,
-	/* EPC set via kvm_regs, et al. */
+	KVM_REG_MIPS_CP0_EPC,
 	KVM_REG_MIPS_CP0_CONFIG,
 	KVM_REG_MIPS_CP0_CONFIG,
 	KVM_REG_MIPS_CP0_CONFIG1,
 	KVM_REG_MIPS_CP0_CONFIG1,
 	KVM_REG_MIPS_CP0_CONFIG2,
 	KVM_REG_MIPS_CP0_CONFIG2,
 	KVM_REG_MIPS_CP0_CONFIG3,
 	KVM_REG_MIPS_CP0_CONFIG3,
 	KVM_REG_MIPS_CP0_CONFIG7,
 	KVM_REG_MIPS_CP0_CONFIG7,
-	KVM_REG_MIPS_CP0_ERROREPC
+	KVM_REG_MIPS_CP0_ERROREPC,
+
+	KVM_REG_MIPS_COUNT_CTL,
+	KVM_REG_MIPS_COUNT_RESUME,
+	KVM_REG_MIPS_COUNT_HZ,
 };
 };
 
 
 static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 			    const struct kvm_one_reg *reg)
 			    const struct kvm_one_reg *reg)
 {
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	int ret;
 	s64 v;
 	s64 v;
 
 
 	switch (reg->id) {
 	switch (reg->id) {
@@ -601,24 +569,36 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_CONTEXT:
 	case KVM_REG_MIPS_CP0_CONTEXT:
 		v = (long)kvm_read_c0_guest_context(cop0);
 		v = (long)kvm_read_c0_guest_context(cop0);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_USERLOCAL:
+		v = (long)kvm_read_c0_guest_userlocal(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_PAGEMASK:
 	case KVM_REG_MIPS_CP0_PAGEMASK:
 		v = (long)kvm_read_c0_guest_pagemask(cop0);
 		v = (long)kvm_read_c0_guest_pagemask(cop0);
 		break;
 		break;
 	case KVM_REG_MIPS_CP0_WIRED:
 	case KVM_REG_MIPS_CP0_WIRED:
 		v = (long)kvm_read_c0_guest_wired(cop0);
 		v = (long)kvm_read_c0_guest_wired(cop0);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_HWRENA:
+		v = (long)kvm_read_c0_guest_hwrena(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_BADVADDR:
 	case KVM_REG_MIPS_CP0_BADVADDR:
 		v = (long)kvm_read_c0_guest_badvaddr(cop0);
 		v = (long)kvm_read_c0_guest_badvaddr(cop0);
 		break;
 		break;
 	case KVM_REG_MIPS_CP0_ENTRYHI:
 	case KVM_REG_MIPS_CP0_ENTRYHI:
 		v = (long)kvm_read_c0_guest_entryhi(cop0);
 		v = (long)kvm_read_c0_guest_entryhi(cop0);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_COMPARE:
+		v = (long)kvm_read_c0_guest_compare(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_STATUS:
 	case KVM_REG_MIPS_CP0_STATUS:
 		v = (long)kvm_read_c0_guest_status(cop0);
 		v = (long)kvm_read_c0_guest_status(cop0);
 		break;
 		break;
 	case KVM_REG_MIPS_CP0_CAUSE:
 	case KVM_REG_MIPS_CP0_CAUSE:
 		v = (long)kvm_read_c0_guest_cause(cop0);
 		v = (long)kvm_read_c0_guest_cause(cop0);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_EPC:
+		v = (long)kvm_read_c0_guest_epc(cop0);
+		break;
 	case KVM_REG_MIPS_CP0_ERROREPC:
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		v = (long)kvm_read_c0_guest_errorepc(cop0);
 		v = (long)kvm_read_c0_guest_errorepc(cop0);
 		break;
 		break;
@@ -637,6 +617,15 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_CONFIG7:
 	case KVM_REG_MIPS_CP0_CONFIG7:
 		v = (long)kvm_read_c0_guest_config7(cop0);
 		v = (long)kvm_read_c0_guest_config7(cop0);
 		break;
 		break;
+	/* registers to be handled specially */
+	case KVM_REG_MIPS_CP0_COUNT:
+	case KVM_REG_MIPS_COUNT_CTL:
+	case KVM_REG_MIPS_COUNT_RESUME:
+	case KVM_REG_MIPS_COUNT_HZ:
+		ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
+		if (ret)
+			return ret;
+		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -697,12 +686,18 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_CONTEXT:
 	case KVM_REG_MIPS_CP0_CONTEXT:
 		kvm_write_c0_guest_context(cop0, v);
 		kvm_write_c0_guest_context(cop0, v);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_USERLOCAL:
+		kvm_write_c0_guest_userlocal(cop0, v);
+		break;
 	case KVM_REG_MIPS_CP0_PAGEMASK:
 	case KVM_REG_MIPS_CP0_PAGEMASK:
 		kvm_write_c0_guest_pagemask(cop0, v);
 		kvm_write_c0_guest_pagemask(cop0, v);
 		break;
 		break;
 	case KVM_REG_MIPS_CP0_WIRED:
 	case KVM_REG_MIPS_CP0_WIRED:
 		kvm_write_c0_guest_wired(cop0, v);
 		kvm_write_c0_guest_wired(cop0, v);
 		break;
 		break;
+	case KVM_REG_MIPS_CP0_HWRENA:
+		kvm_write_c0_guest_hwrena(cop0, v);
+		break;
 	case KVM_REG_MIPS_CP0_BADVADDR:
 	case KVM_REG_MIPS_CP0_BADVADDR:
 		kvm_write_c0_guest_badvaddr(cop0, v);
 		kvm_write_c0_guest_badvaddr(cop0, v);
 		break;
 		break;
@@ -712,12 +707,20 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_STATUS:
 	case KVM_REG_MIPS_CP0_STATUS:
 		kvm_write_c0_guest_status(cop0, v);
 		kvm_write_c0_guest_status(cop0, v);
 		break;
 		break;
-	case KVM_REG_MIPS_CP0_CAUSE:
-		kvm_write_c0_guest_cause(cop0, v);
+	case KVM_REG_MIPS_CP0_EPC:
+		kvm_write_c0_guest_epc(cop0, v);
 		break;
 		break;
 	case KVM_REG_MIPS_CP0_ERROREPC:
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		kvm_write_c0_guest_errorepc(cop0, v);
 		kvm_write_c0_guest_errorepc(cop0, v);
 		break;
 		break;
+	/* registers to be handled specially */
+	case KVM_REG_MIPS_CP0_COUNT:
+	case KVM_REG_MIPS_CP0_COMPARE:
+	case KVM_REG_MIPS_CP0_CAUSE:
+	case KVM_REG_MIPS_COUNT_CTL:
+	case KVM_REG_MIPS_COUNT_RESUME:
+	case KVM_REG_MIPS_COUNT_HZ:
+		return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
@@ -920,7 +923,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
 		return -1;
 		return -1;
 
 
 	printk("VCPU Register Dump:\n");
 	printk("VCPU Register Dump:\n");
-	printk("\tpc = 0x%08lx\n", vcpu->arch.pc);;
+	printk("\tpc = 0x%08lx\n", vcpu->arch.pc);
 	printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
 	printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
 
 
 	for (i = 0; i < 32; i += 4) {
 	for (i = 0; i < 32; i += 4) {
@@ -969,7 +972,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 	return 0;
 }
 }
 
 
-void kvm_mips_comparecount_func(unsigned long data)
+static void kvm_mips_comparecount_func(unsigned long data)
 {
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 
 
@@ -984,15 +987,13 @@ void kvm_mips_comparecount_func(unsigned long data)
 /*
 /*
  * low level hrtimer wake routine.
  * low level hrtimer wake routine.
  */
  */
-enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
+static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
 {
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vcpu *vcpu;
 
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
 	vcpu = container_of(timer, struct kvm_vcpu, arch.comparecount_timer);
 	kvm_mips_comparecount_func((unsigned long) vcpu);
 	kvm_mips_comparecount_func((unsigned long) vcpu);
-	hrtimer_forward_now(&vcpu->arch.comparecount_timer,
-			    ktime_set(0, MS_TO_NS(10)));
-	return HRTIMER_RESTART;
+	return kvm_mips_count_timeout(vcpu);
 }
 }
 
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)

+ 9 - 6
arch/mips/kvm/kvm_mips_dyntrans.c

@@ -16,6 +16,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <asm/cacheflush.h>
 
 
 #include "kvm_mips_comm.h"
 #include "kvm_mips_comm.h"
 
 
@@ -40,7 +41,7 @@ kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
 	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 		       (vcpu, (unsigned long) opc));
 		       (vcpu, (unsigned long) opc));
 	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
 	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
-	mips32_SyncICache(kseg0_opc, 32);
+	local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
 
 
 	return result;
 	return result;
 }
 }
@@ -66,7 +67,7 @@ kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc,
 	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 		       (vcpu, (unsigned long) opc));
 		       (vcpu, (unsigned long) opc));
 	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
 	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
-	mips32_SyncICache(kseg0_opc, 32);
+	local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
 
 
 	return result;
 	return result;
 }
 }
@@ -99,11 +100,12 @@ kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
 		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 			       (vcpu, (unsigned long) opc));
 			       (vcpu, (unsigned long) opc));
 		memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t));
 		memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t));
-		mips32_SyncICache(kseg0_opc, 32);
+		local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
 	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
 	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
 		local_irq_save(flags);
 		local_irq_save(flags);
 		memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t));
 		memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t));
-		mips32_SyncICache((unsigned long) opc, 32);
+		local_flush_icache_range((unsigned long)opc,
+					 (unsigned long)opc + 32);
 		local_irq_restore(flags);
 		local_irq_restore(flags);
 	} else {
 	} else {
 		kvm_err("%s: Invalid address: %p\n", __func__, opc);
 		kvm_err("%s: Invalid address: %p\n", __func__, opc);
@@ -134,11 +136,12 @@ kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
 		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
 			       (vcpu, (unsigned long) opc));
 			       (vcpu, (unsigned long) opc));
 		memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t));
 		memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t));
-		mips32_SyncICache(kseg0_opc, 32);
+		local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
 	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
 	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
 		local_irq_save(flags);
 		local_irq_save(flags);
 		memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t));
 		memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t));
-		mips32_SyncICache((unsigned long) opc, 32);
+		local_flush_icache_range((unsigned long)opc,
+					 (unsigned long)opc + 32);
 		local_irq_restore(flags);
 		local_irq_restore(flags);
 	} else {
 	} else {
 		kvm_err("%s: Invalid address: %p\n", __func__, opc);
 		kvm_err("%s: Invalid address: %p\n", __func__, opc);

+ 532 - 25
arch/mips/kvm/kvm_mips_emul.c

@@ -11,6 +11,7 @@
 
 
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/err.h>
+#include <linux/ktime.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
@@ -228,25 +229,520 @@ enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause)
 	return er;
 	return er;
 }
 }
 
 
-/* Everytime the compare register is written to, we need to decide when to fire
- * the timer that represents timer ticks to the GUEST.
+/**
+ * kvm_mips_count_disabled() - Find whether the CP0_Count timer is disabled.
+ * @vcpu:	Virtual CPU.
  *
  *
+ * Returns:	1 if the CP0_Count timer is disabled by either the guest
+ *		CP0_Cause.DC bit or the count_ctl.DC bit.
+ *		0 otherwise (in which case CP0_Count timer is running).
  */
  */
-enum emulation_result kvm_mips_emulate_count(struct kvm_vcpu *vcpu)
+static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
 {
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	enum emulation_result er = EMULATE_DONE;
+	return	(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) ||
+		(kvm_read_c0_guest_cause(cop0) & CAUSEF_DC);
+}
+
+/**
+ * kvm_mips_ktime_to_count() - Scale ktime_t to a 32-bit count.
+ *
+ * Caches the dynamic nanosecond bias in vcpu->arch.count_dyn_bias.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now)
+{
+	s64 now_ns, periods;
+	u64 delta;
+
+	now_ns = ktime_to_ns(now);
+	delta = now_ns + vcpu->arch.count_dyn_bias;
+
+	if (delta >= vcpu->arch.count_period) {
+		/* If delta is out of safe range the bias needs adjusting */
+		periods = div64_s64(now_ns, vcpu->arch.count_period);
+		vcpu->arch.count_dyn_bias = -periods * vcpu->arch.count_period;
+		/* Recalculate delta with new bias */
+		delta = now_ns + vcpu->arch.count_dyn_bias;
+	}
+
+	/*
+	 * We've ensured that:
+	 *   delta < count_period
+	 *
+	 * Therefore the intermediate delta*count_hz will never overflow since
+	 * at the boundary condition:
+	 *   delta = count_period
+	 *   delta = NSEC_PER_SEC * 2^32 / count_hz
+	 *   delta * count_hz = NSEC_PER_SEC * 2^32
+	 */
+	return div_u64(delta * vcpu->arch.count_hz, NSEC_PER_SEC);
+}
+
+/**
+ * kvm_mips_count_time() - Get effective current time.
+ * @vcpu:	Virtual CPU.
+ *
+ * Get effective monotonic ktime. This is usually a straightforward ktime_get(),
+ * except when the master disable bit is set in count_ctl, in which case it is
+ * count_resume, i.e. the time that the count was disabled.
+ *
+ * Returns:	Effective monotonic ktime for CP0_Count.
+ */
+static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
+		return vcpu->arch.count_resume;
+
+	return ktime_get();
+}
+
+/**
+ * kvm_mips_read_count_running() - Read the current count value as if running.
+ * @vcpu:	Virtual CPU.
+ * @now:	Kernel time to read CP0_Count at.
+ *
+ * Returns the current guest CP0_Count register at time @now and handles if the
+ * timer interrupt is pending and hasn't been handled yet.
+ *
+ * Returns:	The current value of the guest CP0_Count register.
+ */
+static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+{
+	ktime_t expires;
+	int running;
+
+	/* Is the hrtimer pending? */
+	expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
+	if (ktime_compare(now, expires) >= 0) {
+		/*
+		 * Cancel it while we handle it so there's no chance of
+		 * interference with the timeout handler.
+		 */
+		running = hrtimer_cancel(&vcpu->arch.comparecount_timer);
+
+		/* Nothing should be waiting on the timeout */
+		kvm_mips_callbacks->queue_timer_int(vcpu);
+
+		/*
+		 * Restart the timer if it was running based on the expiry time
+		 * we read, so that we don't push it back 2 periods.
+		 */
+		if (running) {
+			expires = ktime_add_ns(expires,
+					       vcpu->arch.count_period);
+			hrtimer_start(&vcpu->arch.comparecount_timer, expires,
+				      HRTIMER_MODE_ABS);
+		}
+	}
+
+	/* Return the biased and scaled guest CP0_Count */
+	return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+}
+
+/**
+ * kvm_mips_read_count() - Read the current count value.
+ * @vcpu:	Virtual CPU.
+ *
+ * Read the current guest CP0_Count value, taking into account whether the timer
+ * is stopped.
+ *
+ * Returns:	The current guest CP0_Count value.
+ */
+uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+	/* If count disabled just read static copy of count */
+	if (kvm_mips_count_disabled(vcpu))
+		return kvm_read_c0_guest_count(cop0);
+
+	return kvm_mips_read_count_running(vcpu, ktime_get());
+}
+
+/**
+ * kvm_mips_freeze_hrtimer() - Safely stop the hrtimer.
+ * @vcpu:	Virtual CPU.
+ * @count:	Output pointer for CP0_Count value at point of freeze.
+ *
+ * Freeze the hrtimer safely and return both the ktime and the CP0_Count value
+ * at the point it was frozen. It is guaranteed that any pending interrupts at
+ * the point it was frozen are handled, and none after that point.
+ *
+ * This is useful where the time/CP0_Count is needed in the calculation of the
+ * new parameters.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ *
+ * Returns:	The ktime at the point of freeze.
+ */
+static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu,
+				       uint32_t *count)
+{
+	ktime_t now;
+
+	/* stop hrtimer before finding time */
+	hrtimer_cancel(&vcpu->arch.comparecount_timer);
+	now = ktime_get();
+
+	/* find count at this point and handle pending hrtimer */
+	*count = kvm_mips_read_count_running(vcpu, now);
+
+	return now;
+}
+
 
 
-	/* If COUNT is enabled */
-	if (!(kvm_read_c0_guest_cause(cop0) & CAUSEF_DC)) {
-		hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer);
-		hrtimer_start(&vcpu->arch.comparecount_timer,
-			      ktime_set(0, MS_TO_NS(10)), HRTIMER_MODE_REL);
+/**
+ * kvm_mips_resume_hrtimer() - Resume hrtimer, updating expiry.
+ * @vcpu:	Virtual CPU.
+ * @now:	ktime at point of resume.
+ * @count:	CP0_Count at point of resume.
+ *
+ * Resumes the timer and updates the timer expiry based on @now and @count.
+ * This can be used in conjunction with kvm_mips_freeze_timer() when timer
+ * parameters need to be changed.
+ *
+ * It is guaranteed that a timer interrupt immediately after resume will be
+ * handled, but not if CP_Compare is exactly at @count. That case is already
+ * handled by kvm_mips_freeze_timer().
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
+				    ktime_t now, uint32_t count)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	uint32_t compare;
+	u64 delta;
+	ktime_t expire;
+
+	/* Calculate timeout (wrap 0 to 2^32) */
+	compare = kvm_read_c0_guest_compare(cop0);
+	delta = (u64)(uint32_t)(compare - count - 1) + 1;
+	delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
+	expire = ktime_add_ns(now, delta);
+
+	/* Update hrtimer to use new timeout */
+	hrtimer_cancel(&vcpu->arch.comparecount_timer);
+	hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS);
+}
+
+/**
+ * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
+ * @vcpu:	Virtual CPU.
+ *
+ * Recalculates and updates the expiry time of the hrtimer. This can be used
+ * after timer parameters have been altered which do not depend on the time that
+ * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
+ * kvm_mips_resume_hrtimer() are used directly).
+ *
+ * It is guaranteed that no timer interrupts will be lost in the process.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+ */
+static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
+{
+	ktime_t now;
+	uint32_t count;
+
+	/*
+	 * freeze_hrtimer takes care of a timer interrupts <= count, and
+	 * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
+	 */
+	now = kvm_mips_freeze_hrtimer(vcpu, &count);
+	kvm_mips_resume_hrtimer(vcpu, now, count);
+}
+
+/**
+ * kvm_mips_write_count() - Modify the count and update timer.
+ * @vcpu:	Virtual CPU.
+ * @count:	Guest CP0_Count value to set.
+ *
+ * Sets the CP0_Count value and updates the timer accordingly.
+ */
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	ktime_t now;
+
+	/* Calculate bias */
+	now = kvm_mips_count_time(vcpu);
+	vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
+
+	if (kvm_mips_count_disabled(vcpu))
+		/* The timer's disabled, adjust the static count */
+		kvm_write_c0_guest_count(cop0, count);
+	else
+		/* Update timeout */
+		kvm_mips_resume_hrtimer(vcpu, now, count);
+}
+
+/**
+ * kvm_mips_init_count() - Initialise timer.
+ * @vcpu:	Virtual CPU.
+ *
+ * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set
+ * it going if it's enabled.
+ */
+void kvm_mips_init_count(struct kvm_vcpu *vcpu)
+{
+	/* 100 MHz */
+	vcpu->arch.count_hz = 100*1000*1000;
+	vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
+					  vcpu->arch.count_hz);
+	vcpu->arch.count_dyn_bias = 0;
+
+	/* Starting at 0 */
+	kvm_mips_write_count(vcpu, 0);
+}
+
+/**
+ * kvm_mips_set_count_hz() - Update the frequency of the timer.
+ * @vcpu:	Virtual CPU.
+ * @count_hz:	Frequency of CP0_Count timer in Hz.
+ *
+ * Change the frequency of the CP0_Count timer. This is done atomically so that
+ * CP0_Count is continuous and no timer interrupt is lost.
+ *
+ * Returns:	-EINVAL if @count_hz is out of range.
+ *		0 on success.
+ */
+int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	int dc;
+	ktime_t now;
+	u32 count;
+
+	/* ensure the frequency is in a sensible range... */
+	if (count_hz <= 0 || count_hz > NSEC_PER_SEC)
+		return -EINVAL;
+	/* ... and has actually changed */
+	if (vcpu->arch.count_hz == count_hz)
+		return 0;
+
+	/* Safely freeze timer so we can keep it continuous */
+	dc = kvm_mips_count_disabled(vcpu);
+	if (dc) {
+		now = kvm_mips_count_time(vcpu);
+		count = kvm_read_c0_guest_count(cop0);
 	} else {
 	} else {
-		hrtimer_try_to_cancel(&vcpu->arch.comparecount_timer);
+		now = kvm_mips_freeze_hrtimer(vcpu, &count);
 	}
 	}
 
 
-	return er;
+	/* Update the frequency */
+	vcpu->arch.count_hz = count_hz;
+	vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
+	vcpu->arch.count_dyn_bias = 0;
+
+	/* Calculate adjusted bias so dynamic count is unchanged */
+	vcpu->arch.count_bias = count - kvm_mips_ktime_to_count(vcpu, now);
+
+	/* Update and resume hrtimer */
+	if (!dc)
+		kvm_mips_resume_hrtimer(vcpu, now, count);
+	return 0;
+}
+
+/**
+ * kvm_mips_write_compare() - Modify compare and update timer.
+ * @vcpu:	Virtual CPU.
+ * @compare:	New CP0_Compare value.
+ *
+ * Update CP0_Compare to a new value and update the timeout.
+ */
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+	/* if unchanged, must just be an ack */
+	if (kvm_read_c0_guest_compare(cop0) == compare)
+		return;
+
+	/* Update compare */
+	kvm_write_c0_guest_compare(cop0, compare);
+
+	/* Update timeout if count enabled */
+	if (!kvm_mips_count_disabled(vcpu))
+		kvm_mips_update_hrtimer(vcpu);
+}
+
+/**
+ * kvm_mips_count_disable() - Disable count.
+ * @vcpu:	Virtual CPU.
+ *
+ * Disable the CP0_Count timer. A timer interrupt on or before the final stop
+ * time will be handled but not after.
+ *
+ * Assumes CP0_Count was previously enabled but now Guest.CP0_Cause.DC or
+ * count_ctl.DC has been set (count disabled).
+ *
+ * Returns:	The time that the timer was stopped.
+ */
+static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	uint32_t count;
+	ktime_t now;
+
+	/* Stop hrtimer */
+	hrtimer_cancel(&vcpu->arch.comparecount_timer);
+
+	/* Set the static count from the dynamic count, handling pending TI */
+	now = ktime_get();
+	count = kvm_mips_read_count_running(vcpu, now);
+	kvm_write_c0_guest_count(cop0, count);
+
+	return now;
+}
+
+/**
+ * kvm_mips_count_disable_cause() - Disable count using CP0_Cause.DC.
+ * @vcpu:	Virtual CPU.
+ *
+ * Disable the CP0_Count timer and set CP0_Cause.DC. A timer interrupt on or
+ * before the final stop time will be handled if the timer isn't disabled by
+ * count_ctl.DC, but not after.
+ *
+ * Assumes CP0_Cause.DC is clear (count enabled).
+ */
+void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+	kvm_set_c0_guest_cause(cop0, CAUSEF_DC);
+	if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC))
+		kvm_mips_count_disable(vcpu);
+}
+
+/**
+ * kvm_mips_count_enable_cause() - Enable count using CP0_Cause.DC.
+ * @vcpu:	Virtual CPU.
+ *
+ * Enable the CP0_Count timer and clear CP0_Cause.DC. A timer interrupt after
+ * the start time will be handled if the timer isn't disabled by count_ctl.DC,
+ * potentially before even returning, so the caller should be careful with
+ * ordering of CP0_Cause modifications so as not to lose it.
+ *
+ * Assumes CP0_Cause.DC is set (count disabled).
+ */
+void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	uint32_t count;
+
+	kvm_clear_c0_guest_cause(cop0, CAUSEF_DC);
+
+	/*
+	 * Set the dynamic count to match the static count.
+	 * This starts the hrtimer if count_ctl.DC allows it.
+	 * Otherwise it conveniently updates the biases.
+	 */
+	count = kvm_read_c0_guest_count(cop0);
+	kvm_mips_write_count(vcpu, count);
+}
+
+/**
+ * kvm_mips_set_count_ctl() - Update the count control KVM register.
+ * @vcpu:	Virtual CPU.
+ * @count_ctl:	Count control register new value.
+ *
+ * Set the count control KVM register. The timer is updated accordingly.
+ *
+ * Returns:	-EINVAL if reserved bits are set.
+ *		0 on success.
+ */
+int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	s64 changed = count_ctl ^ vcpu->arch.count_ctl;
+	s64 delta;
+	ktime_t expire, now;
+	uint32_t count, compare;
+
+	/* Only allow defined bits to be changed */
+	if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC))
+		return -EINVAL;
+
+	/* Apply new value */
+	vcpu->arch.count_ctl = count_ctl;
+
+	/* Master CP0_Count disable */
+	if (changed & KVM_REG_MIPS_COUNT_CTL_DC) {
+		/* Is CP0_Cause.DC already disabling CP0_Count? */
+		if (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC) {
+			if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)
+				/* Just record the current time */
+				vcpu->arch.count_resume = ktime_get();
+		} else if (count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) {
+			/* disable timer and record current time */
+			vcpu->arch.count_resume = kvm_mips_count_disable(vcpu);
+		} else {
+			/*
+			 * Calculate timeout relative to static count at resume
+			 * time (wrap 0 to 2^32).
+			 */
+			count = kvm_read_c0_guest_count(cop0);
+			compare = kvm_read_c0_guest_compare(cop0);
+			delta = (u64)(uint32_t)(compare - count - 1) + 1;
+			delta = div_u64(delta * NSEC_PER_SEC,
+					vcpu->arch.count_hz);
+			expire = ktime_add_ns(vcpu->arch.count_resume, delta);
+
+			/* Handle pending interrupt */
+			now = ktime_get();
+			if (ktime_compare(now, expire) >= 0)
+				/* Nothing should be waiting on the timeout */
+				kvm_mips_callbacks->queue_timer_int(vcpu);
+
+			/* Resume hrtimer without changing bias */
+			count = kvm_mips_read_count_running(vcpu, now);
+			kvm_mips_resume_hrtimer(vcpu, now, count);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kvm_mips_set_count_resume() - Update the count resume KVM register.
+ * @vcpu:		Virtual CPU.
+ * @count_resume:	Count resume register new value.
+ *
+ * Set the count resume KVM register.
+ *
+ * Returns:	-EINVAL if out of valid range (0..now).
+ *		0 on success.
+ */
+int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume)
+{
+	/*
+	 * It doesn't make sense for the resume time to be in the future, as it
+	 * would be possible for the next interrupt to be more than a full
+	 * period in the future.
+	 */
+	if (count_resume < 0 || count_resume > ktime_to_ns(ktime_get()))
+		return -EINVAL;
+
+	vcpu->arch.count_resume = ns_to_ktime(count_resume);
+	return 0;
+}
+
+/**
+ * kvm_mips_count_timeout() - Push timer forward on timeout.
+ * @vcpu:	Virtual CPU.
+ *
+ * Handle an hrtimer event by push the hrtimer forward a period.
+ *
+ * Returns:	The hrtimer_restart value to return to the hrtimer subsystem.
+ */
+enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu)
+{
+	/* Add the Count period to the current expiry time */
+	hrtimer_add_expires_ns(&vcpu->arch.comparecount_timer,
+			       vcpu->arch.count_period);
+	return HRTIMER_RESTART;
 }
 }
 
 
 enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
 enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
@@ -471,8 +967,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
 #endif
 #endif
 			/* Get reg */
 			/* Get reg */
 			if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
 			if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
-				/* XXXKYMA: Run the Guest count register @ 1/4 the rate of the host */
-				vcpu->arch.gprs[rt] = (read_c0_count() >> 2);
+				vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu);
 			} else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
 			} else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
 				vcpu->arch.gprs[rt] = 0x0;
 				vcpu->arch.gprs[rt] = 0x0;
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -539,10 +1034,7 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
 			}
 			}
 			/* Are we writing to COUNT */
 			/* Are we writing to COUNT */
 			else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
 			else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
-				/* Linux doesn't seem to write into COUNT, we throw an error
-				 * if we notice a write to COUNT
-				 */
-				/*er = EMULATE_FAIL; */
+				kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
 				goto done;
 				goto done;
 			} else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) {
 			} else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) {
 				kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n",
 				kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n",
@@ -552,8 +1044,8 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
 				/* If we are writing to COMPARE */
 				/* If we are writing to COMPARE */
 				/* Clear pending timer interrupt, if any */
 				/* Clear pending timer interrupt, if any */
 				kvm_mips_callbacks->dequeue_timer_int(vcpu);
 				kvm_mips_callbacks->dequeue_timer_int(vcpu);
-				kvm_write_c0_guest_compare(cop0,
-							   vcpu->arch.gprs[rt]);
+				kvm_mips_write_compare(vcpu,
+						       vcpu->arch.gprs[rt]);
 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
 				kvm_write_c0_guest_status(cop0,
 				kvm_write_c0_guest_status(cop0,
 							  vcpu->arch.gprs[rt]);
 							  vcpu->arch.gprs[rt]);
@@ -564,6 +1056,20 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 				kvm_mips_trans_mtc0(inst, opc, vcpu);
 				kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
 #endif
+			} else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
+				uint32_t old_cause, new_cause;
+				old_cause = kvm_read_c0_guest_cause(cop0);
+				new_cause = vcpu->arch.gprs[rt];
+				/* Update R/W bits */
+				kvm_change_c0_guest_cause(cop0, 0x08800300,
+							  new_cause);
+				/* DC bit enabling/disabling timer? */
+				if ((old_cause ^ new_cause) & CAUSEF_DC) {
+					if (new_cause & CAUSEF_DC)
+						kvm_mips_count_disable_cause(vcpu);
+					else
+						kvm_mips_count_enable_cause(vcpu);
+				}
 			} else {
 			} else {
 				cop0->reg[rd][sel] = vcpu->arch.gprs[rt];
 				cop0->reg[rd][sel] = vcpu->arch.gprs[rt];
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -887,7 +1393,7 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
 
 
 	printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa));
 	printk("%s: va: %#lx, unmapped: %#x\n", __func__, va, CKSEG0ADDR(pa));
 
 
-	mips32_SyncICache(CKSEG0ADDR(pa), 32);
+	local_flush_icache_range(CKSEG0ADDR(pa), 32);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1325,8 +1831,12 @@ kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc,
 		       struct kvm_run *run, struct kvm_vcpu *vcpu)
 		       struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 {
 	enum emulation_result er = EMULATE_DONE;
 	enum emulation_result er = EMULATE_DONE;
-
 #ifdef DEBUG
 #ifdef DEBUG
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
+				(kvm_read_c0_guest_entryhi(cop0) & ASID_MASK);
+	int index;
+
 	/*
 	/*
 	 * If address not in the guest TLB, then we are in trouble
 	 * If address not in the guest TLB, then we are in trouble
 	 */
 	 */
@@ -1553,8 +2063,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 					     current_cpu_data.icache.linesz);
 					     current_cpu_data.icache.linesz);
 			break;
 			break;
 		case 2:	/* Read count register */
 		case 2:	/* Read count register */
-			printk("RDHWR: Cont register\n");
-			arch->gprs[rt] = kvm_read_c0_guest_count(cop0);
+			arch->gprs[rt] = kvm_mips_read_count(vcpu);
 			break;
 			break;
 		case 3:	/* Count register resolution */
 		case 3:	/* Count register resolution */
 			switch (current_cpu_data.cputype) {
 			switch (current_cpu_data.cputype) {
@@ -1810,11 +2319,9 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc,
 				er = EMULATE_FAIL;
 				er = EMULATE_FAIL;
 			}
 			}
 		} else {
 		} else {
-#ifdef DEBUG
 			kvm_debug
 			kvm_debug
 			    ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n",
 			    ("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n",
 			     tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1);
 			     tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1);
-#endif
 			/* OK we have a Guest TLB entry, now inject it into the shadow host TLB */
 			/* OK we have a Guest TLB entry, now inject it into the shadow host TLB */
 			kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
 			kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
 							     NULL);
 							     NULL);

+ 40 - 37
arch/mips/kvm/kvm_tlb.c

@@ -222,26 +222,19 @@ kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
 		return -1;
 		return -1;
 	}
 	}
 
 
-	if (idx < 0) {
-		idx = read_c0_random() % current_cpu_data.tlbsize;
-		write_c0_index(idx);
-		mtc0_tlbw_hazard();
-	}
 	write_c0_entrylo0(entrylo0);
 	write_c0_entrylo0(entrylo0);
 	write_c0_entrylo1(entrylo1);
 	write_c0_entrylo1(entrylo1);
 	mtc0_tlbw_hazard();
 	mtc0_tlbw_hazard();
 
 
-	tlb_write_indexed();
+	if (idx < 0)
+		tlb_write_random();
+	else
+		tlb_write_indexed();
 	tlbw_use_hazard();
 	tlbw_use_hazard();
 
 
-#ifdef DEBUG
-	if (debug) {
-		kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] "
-			  "entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
-			  vcpu->arch.pc, idx, read_c0_entryhi(),
-			  read_c0_entrylo0(), read_c0_entrylo1());
-	}
-#endif
+	kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
+		  vcpu->arch.pc, idx, read_c0_entryhi(),
+		  read_c0_entrylo0(), read_c0_entrylo1());
 
 
 	/* Flush D-cache */
 	/* Flush D-cache */
 	if (flush_dcache_mask) {
 	if (flush_dcache_mask) {
@@ -348,11 +341,9 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 	mtc0_tlbw_hazard();
 	mtc0_tlbw_hazard();
 	tlbw_use_hazard();
 	tlbw_use_hazard();
 
 
-#ifdef DEBUG
 	kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
 	kvm_debug ("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
 	     vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
 	     vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
 	     read_c0_entrylo0(), read_c0_entrylo1());
 	     read_c0_entrylo0(), read_c0_entrylo1());
-#endif
 
 
 	/* Restore old ASID */
 	/* Restore old ASID */
 	write_c0_entryhi(old_entryhi);
 	write_c0_entryhi(old_entryhi);
@@ -400,10 +391,8 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
 	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
 	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
 			(tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
 			(tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
 
 
-#ifdef DEBUG
 	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
 	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
 		  tlb->tlb_lo0, tlb->tlb_lo1);
 		  tlb->tlb_lo0, tlb->tlb_lo1);
-#endif
 
 
 	return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
 	return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
 				       tlb->tlb_mask);
 				       tlb->tlb_mask);
@@ -424,10 +413,8 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 		}
 		}
 	}
 	}
 
 
-#ifdef DEBUG
 	kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n",
 	kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n",
 		  __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1);
 		  __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1);
-#endif
 
 
 	return index;
 	return index;
 }
 }
@@ -461,9 +448,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 
 
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 
 
-#ifdef DEBUG
 	kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
 	kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
-#endif
 
 
 	return idx;
 	return idx;
 }
 }
@@ -508,12 +493,9 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 
 
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 
 
-#ifdef DEBUG
-	if (idx > 0) {
+	if (idx > 0)
 		kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
 		kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
-			  (va & VPN2_MASK) | (vcpu->arch.asid_map[va & ASID_MASK] & ASID_MASK), idx);
-	}
-#endif
+			  (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -658,15 +640,30 @@ void kvm_local_flush_tlb_all(void)
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 }
 }
 
 
+/**
+ * kvm_mips_migrate_count() - Migrate timer.
+ * @vcpu:	Virtual CPU.
+ *
+ * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
+ * if it was running prior to being cancelled.
+ *
+ * Must be called when the VCPU is migrated to a different CPU to ensure that
+ * timer expiry during guest execution interrupts the guest and causes the
+ * interrupt to be delivered in a timely manner.
+ */
+static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
+{
+	if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
+		hrtimer_restart(&vcpu->arch.comparecount_timer);
+}
+
 /* Restore ASID once we are scheduled back after preemption */
 /* Restore ASID once we are scheduled back after preemption */
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	int newasid = 0;
 	int newasid = 0;
 
 
-#ifdef DEBUG
 	kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
 	kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
-#endif
 
 
 	/* Alocate new kernel and user ASIDs if needed */
 	/* Alocate new kernel and user ASIDs if needed */
 
 
@@ -682,17 +679,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		    vcpu->arch.guest_user_mm.context.asid[cpu];
 		    vcpu->arch.guest_user_mm.context.asid[cpu];
 		newasid++;
 		newasid++;
 
 
-		kvm_info("[%d]: cpu_context: %#lx\n", cpu,
-			 cpu_context(cpu, current->mm));
-		kvm_info("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
-			 cpu, vcpu->arch.guest_kernel_asid[cpu]);
-		kvm_info("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
-			 vcpu->arch.guest_user_asid[cpu]);
+		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
+			  cpu_context(cpu, current->mm));
+		kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
+			  cpu, vcpu->arch.guest_kernel_asid[cpu]);
+		kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
+			  vcpu->arch.guest_user_asid[cpu]);
 	}
 	}
 
 
 	if (vcpu->arch.last_sched_cpu != cpu) {
 	if (vcpu->arch.last_sched_cpu != cpu) {
-		kvm_info("[%d->%d]KVM VCPU[%d] switch\n",
-			 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+		kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
+			  vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+		/*
+		 * Migrate the timer interrupt to the current CPU so that it
+		 * always interrupts the guest and synchronously triggers a
+		 * guest timer interrupt.
+		 */
+		kvm_mips_migrate_count(vcpu);
 	}
 	}
 
 
 	if (!newasid) {
 	if (!newasid) {

+ 74 - 12
arch/mips/kvm/kvm_trap_emul.c

@@ -32,9 +32,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
 		gpa = KVM_INVALID_ADDR;
 		gpa = KVM_INVALID_ADDR;
 	}
 	}
 
 
-#ifdef DEBUG
 	kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa);
 	kvm_debug("%s: gva %#lx, gpa: %#llx\n", __func__, gva, gpa);
-#endif
 
 
 	return gpa;
 	return gpa;
 }
 }
@@ -85,11 +83,9 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
 
 
 	if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 	if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 	    || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
 	    || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
 		kvm_debug
 		kvm_debug
 		    ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
 		    ("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
 		     cause, opc, badvaddr);
 		     cause, opc, badvaddr);
-#endif
 		er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
 		er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
 
 
 		if (er == EMULATE_DONE)
 		if (er == EMULATE_DONE)
@@ -138,11 +134,9 @@ static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
 		}
 		}
 	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
 		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
 		kvm_debug
 		kvm_debug
 		    ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
 		    ("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
 		     cause, opc, badvaddr);
 		     cause, opc, badvaddr);
-#endif
 		er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
 		er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
 		if (er == EMULATE_DONE)
 		if (er == EMULATE_DONE)
 			ret = RESUME_GUEST;
 			ret = RESUME_GUEST;
@@ -188,10 +182,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
 		}
 		}
 	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
 		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-#ifdef DEBUG
 		kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n",
 		kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n",
 			  vcpu->arch.pc, badvaddr);
 			  vcpu->arch.pc, badvaddr);
-#endif
 
 
 		/* User Address (UA) fault, this could happen if
 		/* User Address (UA) fault, this could happen if
 		 * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this
 		 * (1) TLB entry not present/valid in both Guest and shadow host TLBs, in this
@@ -236,9 +228,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
 
 
 	if (KVM_GUEST_KERNEL_MODE(vcpu)
 	if (KVM_GUEST_KERNEL_MODE(vcpu)
 	    && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
 	    && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
-#ifdef DEBUG
 		kvm_debug("Emulate Store to MMIO space\n");
 		kvm_debug("Emulate Store to MMIO space\n");
-#endif
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
 		if (er == EMULATE_FAIL) {
 		if (er == EMULATE_FAIL) {
 			printk("Emulate Store to MMIO space failed\n");
 			printk("Emulate Store to MMIO space failed\n");
@@ -268,9 +258,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
 	int ret = RESUME_GUEST;
 	int ret = RESUME_GUEST;
 
 
 	if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
 	if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
-#ifdef DEBUG
 		kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
 		kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
-#endif
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
 		er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
 		if (er == EMULATE_FAIL) {
 		if (er == EMULATE_FAIL) {
 			printk("Emulate Load from MMIO space failed\n");
 			printk("Emulate Load from MMIO space failed\n");
@@ -401,6 +389,78 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
+static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
+				     const struct kvm_one_reg *reg,
+				     s64 *v)
+{
+	switch (reg->id) {
+	case KVM_REG_MIPS_CP0_COUNT:
+		*v = kvm_mips_read_count(vcpu);
+		break;
+	case KVM_REG_MIPS_COUNT_CTL:
+		*v = vcpu->arch.count_ctl;
+		break;
+	case KVM_REG_MIPS_COUNT_RESUME:
+		*v = ktime_to_ns(vcpu->arch.count_resume);
+		break;
+	case KVM_REG_MIPS_COUNT_HZ:
+		*v = vcpu->arch.count_hz;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
+				     const struct kvm_one_reg *reg,
+				     s64 v)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	int ret = 0;
+
+	switch (reg->id) {
+	case KVM_REG_MIPS_CP0_COUNT:
+		kvm_mips_write_count(vcpu, v);
+		break;
+	case KVM_REG_MIPS_CP0_COMPARE:
+		kvm_mips_write_compare(vcpu, v);
+		break;
+	case KVM_REG_MIPS_CP0_CAUSE:
+		/*
+		 * If the timer is stopped or started (DC bit) it must look
+		 * atomic with changes to the interrupt pending bits (TI, IRQ5).
+		 * A timer interrupt should not happen in between.
+		 */
+		if ((kvm_read_c0_guest_cause(cop0) ^ v) & CAUSEF_DC) {
+			if (v & CAUSEF_DC) {
+				/* disable timer first */
+				kvm_mips_count_disable_cause(vcpu);
+				kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+			} else {
+				/* enable timer last */
+				kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+				kvm_mips_count_enable_cause(vcpu);
+			}
+		} else {
+			kvm_write_c0_guest_cause(cop0, v);
+		}
+		break;
+	case KVM_REG_MIPS_COUNT_CTL:
+		ret = kvm_mips_set_count_ctl(vcpu, v);
+		break;
+	case KVM_REG_MIPS_COUNT_RESUME:
+		ret = kvm_mips_set_count_resume(vcpu, v);
+		break;
+	case KVM_REG_MIPS_COUNT_HZ:
+		ret = kvm_mips_set_count_hz(vcpu, v);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return ret;
+}
+
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	/* exit handlers */
 	/* exit handlers */
 	.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
 	.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -423,6 +483,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	.dequeue_io_int = kvm_mips_dequeue_io_int_cb,
 	.dequeue_io_int = kvm_mips_dequeue_io_int_cb,
 	.irq_deliver = kvm_mips_irq_deliver_cb,
 	.irq_deliver = kvm_mips_irq_deliver_cb,
 	.irq_clear = kvm_mips_irq_clear_cb,
 	.irq_clear = kvm_mips_irq_clear_cb,
+	.get_one_reg = kvm_trap_emul_get_one_reg,
+	.set_one_reg = kvm_trap_emul_set_one_reg,
 };
 };
 
 
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)

+ 1 - 0
arch/mips/mm/cache.c

@@ -31,6 +31,7 @@ void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
 void (*flush_icache_range)(unsigned long start, unsigned long end);
 void (*flush_icache_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(flush_icache_range);
 EXPORT_SYMBOL_GPL(flush_icache_range);
 void (*local_flush_icache_range)(unsigned long start, unsigned long end);
 void (*local_flush_icache_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(local_flush_icache_range);
 
 
 void (*__flush_cache_vmap)(void);
 void (*__flush_cache_vmap)(void);
 void (*__flush_cache_vunmap)(void);
 void (*__flush_cache_vunmap)(void);

+ 2 - 12
arch/mips/mti-malta/malta-time.c

@@ -74,18 +74,8 @@ static void __init estimate_frequencies(void)
 	unsigned int giccount = 0, gicstart = 0;
 	unsigned int giccount = 0, gicstart = 0;
 #endif
 #endif
 
 
-#if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ)
-	unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
-
-	/*
-	 * XXXKYMA: hardwire the CPU frequency to Host Freq/4
-	 */
-	count = (CONFIG_KVM_HOST_FREQ * 1000000) >> 3;
-	if ((prid != (PRID_COMP_MIPS | PRID_IMP_20KC)) &&
-	    (prid != (PRID_COMP_MIPS | PRID_IMP_25KF)))
-		count *= 2;
-
-	mips_hpt_frequency = count;
+#if defined(CONFIG_KVM_GUEST) && CONFIG_KVM_GUEST_TIMER_FREQ
+	mips_hpt_frequency = CONFIG_KVM_GUEST_TIMER_FREQ * 1000000;
 	return;
 	return;
 #endif
 #endif
 
 

+ 34 - 0
arch/powerpc/include/asm/disassemble.h

@@ -81,4 +81,38 @@ static inline unsigned int get_oc(u32 inst)
 {
 {
 	return (inst >> 11) & 0x7fff;
 	return (inst >> 11) & 0x7fff;
 }
 }
+
+#define IS_XFORM(inst)	(get_op(inst)  == 31)
+#define IS_DSFORM(inst)	(get_op(inst) >= 56)
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+	unsigned dsisr;
+
+
+	/* bits  6:15 --> 22:31 */
+	dsisr = (instr & 0x03ff0000) >> 16;
+
+	if (IS_XFORM(instr)) {
+		/* bits 29:30 --> 15:16 */
+		dsisr |= (instr & 0x00000006) << 14;
+		/* bit     25 -->    17 */
+		dsisr |= (instr & 0x00000040) << 8;
+		/* bits 21:24 --> 18:21 */
+		dsisr |= (instr & 0x00000780) << 3;
+	} else {
+		/* bit      5 -->    17 */
+		dsisr |= (instr & 0x04000000) >> 12;
+		/* bits  1: 4 --> 18:21 */
+		dsisr |= (instr & 0x78000000) >> 17;
+		/* bits 30:31 --> 12:13 */
+		if (IS_DSFORM(instr))
+			dsisr |= (instr & 0x00000003) << 18;
+	}
+
+	return dsisr;
+}
 #endif /* __ASM_PPC_DISASSEMBLE_H__ */
 #endif /* __ASM_PPC_DISASSEMBLE_H__ */

+ 10 - 8
arch/powerpc/include/asm/kvm_asm.h

@@ -102,6 +102,7 @@
 #define BOOK3S_INTERRUPT_PERFMON	0xf00
 #define BOOK3S_INTERRUPT_PERFMON	0xf00
 #define BOOK3S_INTERRUPT_ALTIVEC	0xf20
 #define BOOK3S_INTERRUPT_ALTIVEC	0xf20
 #define BOOK3S_INTERRUPT_VSX		0xf40
 #define BOOK3S_INTERRUPT_VSX		0xf40
+#define BOOK3S_INTERRUPT_FAC_UNAVAIL	0xf60
 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL	0xf80
 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL	0xf80
 
 
 #define BOOK3S_IRQPRIO_SYSTEM_RESET		0
 #define BOOK3S_IRQPRIO_SYSTEM_RESET		0
@@ -114,14 +115,15 @@
 #define BOOK3S_IRQPRIO_FP_UNAVAIL		7
 #define BOOK3S_IRQPRIO_FP_UNAVAIL		7
 #define BOOK3S_IRQPRIO_ALTIVEC			8
 #define BOOK3S_IRQPRIO_ALTIVEC			8
 #define BOOK3S_IRQPRIO_VSX			9
 #define BOOK3S_IRQPRIO_VSX			9
-#define BOOK3S_IRQPRIO_SYSCALL			10
-#define BOOK3S_IRQPRIO_MACHINE_CHECK		11
-#define BOOK3S_IRQPRIO_DEBUG			12
-#define BOOK3S_IRQPRIO_EXTERNAL			13
-#define BOOK3S_IRQPRIO_DECREMENTER		14
-#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	15
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL		16
-#define BOOK3S_IRQPRIO_MAX			17
+#define BOOK3S_IRQPRIO_FAC_UNAVAIL		10
+#define BOOK3S_IRQPRIO_SYSCALL			11
+#define BOOK3S_IRQPRIO_MACHINE_CHECK		12
+#define BOOK3S_IRQPRIO_DEBUG			13
+#define BOOK3S_IRQPRIO_EXTERNAL			14
+#define BOOK3S_IRQPRIO_DECREMENTER		15
+#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	16
+#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL		17
+#define BOOK3S_IRQPRIO_MAX			18
 
 
 #define BOOK3S_HFLAG_DCBZ32			0x1
 #define BOOK3S_HFLAG_DCBZ32			0x1
 #define BOOK3S_HFLAG_SLB			0x2
 #define BOOK3S_HFLAG_SLB			0x2

+ 2 - 1
arch/powerpc/include/asm/kvm_book3s.h

@@ -268,9 +268,10 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pc;
 	return vcpu->arch.pc;
 }
 }
 
 
+static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
 static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 {
 {
-	return (vcpu->arch.shared->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+	return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
 }
 }
 
 
 static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)
 static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)

+ 123 - 23
arch/powerpc/include/asm/kvm_book3s_64.h

@@ -77,34 +77,122 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
 	return old == 0;
 	return old == 0;
 }
 }
 
 
+static inline int __hpte_actual_psize(unsigned int lp, int psize)
+{
+	int i, shift;
+	unsigned int mask;
+
+	/* start from 1 ignoring MMU_PAGE_4K */
+	for (i = 1; i < MMU_PAGE_COUNT; i++) {
+
+		/* invalid penc */
+		if (mmu_psize_defs[psize].penc[i] == -1)
+			continue;
+		/*
+		 * encoding bits per actual page size
+		 *        PTE LP     actual page size
+		 *    rrrr rrrz		>=8KB
+		 *    rrrr rrzz		>=16KB
+		 *    rrrr rzzz		>=32KB
+		 *    rrrr zzzz		>=64KB
+		 * .......
+		 */
+		shift = mmu_psize_defs[i].shift - LP_SHIFT;
+		if (shift > LP_BITS)
+			shift = LP_BITS;
+		mask = (1 << shift) - 1;
+		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
+			return i;
+	}
+	return -1;
+}
+
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 					     unsigned long pte_index)
 					     unsigned long pte_index)
 {
 {
-	unsigned long rb, va_low;
+	int b_psize, a_psize;
+	unsigned int penc;
+	unsigned long rb = 0, va_low, sllp;
+	unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+
+	if (!(v & HPTE_V_LARGE)) {
+		/* both base and actual psize is 4k */
+		b_psize = MMU_PAGE_4K;
+		a_psize = MMU_PAGE_4K;
+	} else {
+		for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
+
+			/* valid entries have a shift value */
+			if (!mmu_psize_defs[b_psize].shift)
+				continue;
 
 
+			a_psize = __hpte_actual_psize(lp, b_psize);
+			if (a_psize != -1)
+				break;
+		}
+	}
+	/*
+	 * Ignore the top 14 bits of va
+	 * v have top two bits covering segment size, hence move
+	 * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
+	 * AVA field in v also have the lower 23 bits ignored.
+	 * For base page size 4K we need 14 .. 65 bits (so need to
+	 * collect extra 11 bits)
+	 * For others we need 14..14+i
+	 */
+	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
+	/*
+	 * AVA in v had cleared lower 23 bits. We need to derive
+	 * that from pteg index
+	 */
 	va_low = pte_index >> 3;
 	va_low = pte_index >> 3;
 	if (v & HPTE_V_SECONDARY)
 	if (v & HPTE_V_SECONDARY)
 		va_low = ~va_low;
 		va_low = ~va_low;
-	/* xor vsid from AVA */
+	/*
+	 * get the vpn bits from va_low using reverse of hashing.
+	 * In v we have va with 23 bits dropped and then left shifted
+	 * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
+	 * right shift it with (SID_SHIFT - (23 - 7))
+	 */
 	if (!(v & HPTE_V_1TB_SEG))
 	if (!(v & HPTE_V_1TB_SEG))
-		va_low ^= v >> 12;
+		va_low ^= v >> (SID_SHIFT - 16);
 	else
 	else
-		va_low ^= v >> 24;
+		va_low ^= v >> (SID_SHIFT_1T - 16);
 	va_low &= 0x7ff;
 	va_low &= 0x7ff;
-	if (v & HPTE_V_LARGE) {
-		rb |= 1;			/* L field */
-		if (cpu_has_feature(CPU_FTR_ARCH_206) &&
-		    (r & 0xff000)) {
-			/* non-16MB large page, must be 64k */
-			/* (masks depend on page size) */
-			rb |= 0x1000;		/* page encoding in LP field */
-			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
-			rb |= ((va_low << 4) & 0xf0);	/* AVAL field (P7 doesn't seem to care) */
-		}
-	} else {
-		/* 4kB page */
-		rb |= (va_low & 0x7ff) << 12;	/* remaining 11b of VA */
+
+	switch (b_psize) {
+	case MMU_PAGE_4K:
+		sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
+			((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
+		rb |= sllp << 5;	/*  AP field */
+		rb |= (va_low & 0x7ff) << 12;	/* remaining 11 bits of AVA */
+		break;
+	default:
+	{
+		int aval_shift;
+		/*
+		 * remaining 7bits of AVA/LP fields
+		 * Also contain the rr bits of LP
+		 */
+		rb |= (va_low & 0x7f) << 16;
+		/*
+		 * Now clear not needed LP bits based on actual psize
+		 */
+		rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
+		/*
+		 * AVAL field 58..77 - base_page_shift bits of va
+		 * we have space for 58..64 bits, Missing bits should
+		 * be zero filled. +1 is to take care of L bit shift
+		 */
+		aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
+		rb |= ((va_low << aval_shift) & 0xfe);
+
+		rb |= 1;		/* L field */
+		penc = mmu_psize_defs[b_psize].penc[a_psize];
+		rb |= penc << 12;	/* LP field */
+		break;
+	}
 	}
 	}
 	rb |= (v >> 54) & 0x300;		/* B field */
 	rb |= (v >> 54) & 0x300;		/* B field */
 	return rb;
 	return rb;
@@ -112,14 +200,26 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 
 
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 {
 {
+	int size, a_psize;
+	/* Look at the 8 bit LP value */
+	unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+
 	/* only handle 4k, 64k and 16M pages for now */
 	/* only handle 4k, 64k and 16M pages for now */
 	if (!(h & HPTE_V_LARGE))
 	if (!(h & HPTE_V_LARGE))
-		return 1ul << 12;		/* 4k page */
-	if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
-		return 1ul << 16;		/* 64k page */
-	if ((l & 0xff000) == 0)
-		return 1ul << 24;		/* 16M page */
-	return 0;				/* error */
+		return 1ul << 12;
+	else {
+		for (size = 0; size < MMU_PAGE_COUNT; size++) {
+			/* valid entries have a shift value */
+			if (!mmu_psize_defs[size].shift)
+				continue;
+
+			a_psize = __hpte_actual_psize(lp, size);
+			if (a_psize != -1)
+				return 1ul << mmu_psize_defs[a_psize].shift;
+		}
+
+	}
+	return 0;
 }
 }
 
 
 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)

+ 2 - 0
arch/powerpc/include/asm/kvm_book3s_asm.h

@@ -104,6 +104,7 @@ struct kvmppc_host_state {
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
 	u64 cfar;
 	u64 cfar;
 	u64 ppr;
 	u64 ppr;
+	u64 host_fscr;
 #endif
 #endif
 };
 };
 
 
@@ -133,6 +134,7 @@ struct kvmppc_book3s_shadow_vcpu {
 		u64     esid;
 		u64     esid;
 		u64     vsid;
 		u64     vsid;
 	} slb[64];			/* guest SLB */
 	} slb[64];			/* guest SLB */
+	u64 shadow_fscr;
 #endif
 #endif
 };
 };
 
 

+ 0 - 5
arch/powerpc/include/asm/kvm_booke.h

@@ -108,9 +108,4 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 {
 	return vcpu->arch.fault_dear;
 	return vcpu->arch.fault_dear;
 }
 }
-
-static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.shared->msr;
-}
 #endif /* __ASM_KVM_BOOKE_H__ */
 #endif /* __ASM_KVM_BOOKE_H__ */

+ 8 - 1
arch/powerpc/include/asm/kvm_host.h

@@ -449,7 +449,9 @@ struct kvm_vcpu_arch {
 	ulong pc;
 	ulong pc;
 	ulong ctr;
 	ulong ctr;
 	ulong lr;
 	ulong lr;
+#ifdef CONFIG_PPC_BOOK3S
 	ulong tar;
 	ulong tar;
+#endif
 
 
 	ulong xer;
 	ulong xer;
 	u32 cr;
 	u32 cr;
@@ -475,6 +477,7 @@ struct kvm_vcpu_arch {
 	ulong ppr;
 	ulong ppr;
 	ulong pspb;
 	ulong pspb;
 	ulong fscr;
 	ulong fscr;
+	ulong shadow_fscr;
 	ulong ebbhr;
 	ulong ebbhr;
 	ulong ebbrr;
 	ulong ebbrr;
 	ulong bescr;
 	ulong bescr;
@@ -562,6 +565,7 @@ struct kvm_vcpu_arch {
 #ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_BOOK3S
 	ulong fault_dar;
 	ulong fault_dar;
 	u32 fault_dsisr;
 	u32 fault_dsisr;
+	unsigned long intr_msr;
 #endif
 #endif
 
 
 #ifdef CONFIG_BOOKE
 #ifdef CONFIG_BOOKE
@@ -622,8 +626,12 @@ struct kvm_vcpu_arch {
 	wait_queue_head_t cpu_run;
 	wait_queue_head_t cpu_run;
 
 
 	struct kvm_vcpu_arch_shared *shared;
 	struct kvm_vcpu_arch_shared *shared;
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	bool shared_big_endian;
+#endif
 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
+	bool disable_kernel_nx;
 
 
 	int irq_type;		/* one of KVM_IRQ_* */
 	int irq_type;		/* one of KVM_IRQ_* */
 	int irq_cpu_id;
 	int irq_cpu_id;
@@ -654,7 +662,6 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_stolen;
 	u64 busy_preempt;
 	u64 busy_preempt;
-	unsigned long intr_msr;
 #endif
 #endif
 };
 };
 
 

+ 79 - 1
arch/powerpc/include/asm/kvm_ppc.h

@@ -448,6 +448,84 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 	}
 	}
 }
 }
 
 
+/*
+ * Shared struct helpers. The shared struct can be little or big endian,
+ * depending on the guest endianness. So expose helpers to all of them.
+ */
+static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	/* Only Book3S_64 PR supports bi-endian for now */
+	return vcpu->arch.shared_big_endian;
+#elif defined(CONFIG_PPC_BOOK3S_64) && defined(__LITTLE_ENDIAN__)
+	/* Book3s_64 HV on little endian is always little endian */
+	return false;
+#else
+	return true;
+#endif
+}
+
+#define SHARED_WRAPPER_GET(reg, size)					\
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)	\
+{									\
+	if (kvmppc_shared_big_endian(vcpu))				\
+	       return be##size##_to_cpu(vcpu->arch.shared->reg);	\
+	else								\
+	       return le##size##_to_cpu(vcpu->arch.shared->reg);	\
+}									\
+
+#define SHARED_WRAPPER_SET(reg, size)					\
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)	\
+{									\
+	if (kvmppc_shared_big_endian(vcpu))				\
+	       vcpu->arch.shared->reg = cpu_to_be##size(val);		\
+	else								\
+	       vcpu->arch.shared->reg = cpu_to_le##size(val);		\
+}									\
+
+#define SHARED_WRAPPER(reg, size)					\
+	SHARED_WRAPPER_GET(reg, size)					\
+	SHARED_WRAPPER_SET(reg, size)					\
+
+SHARED_WRAPPER(critical, 64)
+SHARED_WRAPPER(sprg0, 64)
+SHARED_WRAPPER(sprg1, 64)
+SHARED_WRAPPER(sprg2, 64)
+SHARED_WRAPPER(sprg3, 64)
+SHARED_WRAPPER(srr0, 64)
+SHARED_WRAPPER(srr1, 64)
+SHARED_WRAPPER(dar, 64)
+SHARED_WRAPPER_GET(msr, 64)
+static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       vcpu->arch.shared->msr = cpu_to_be64(val);
+	else
+	       vcpu->arch.shared->msr = cpu_to_le64(val);
+}
+SHARED_WRAPPER(dsisr, 32)
+SHARED_WRAPPER(int_pending, 32)
+SHARED_WRAPPER(sprg4, 64)
+SHARED_WRAPPER(sprg5, 64)
+SHARED_WRAPPER(sprg6, 64)
+SHARED_WRAPPER(sprg7, 64)
+
+static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       return be32_to_cpu(vcpu->arch.shared->sr[nr]);
+	else
+	       return le32_to_cpu(vcpu->arch.shared->sr[nr]);
+}
+
+static inline void kvmppc_set_sr(struct kvm_vcpu *vcpu, int nr, u32 val)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       vcpu->arch.shared->sr[nr] = cpu_to_be32(val);
+	else
+	       vcpu->arch.shared->sr[nr] = cpu_to_le32(val);
+}
+
 /*
 /*
  * Please call after prepare_to_enter. This function puts the lazy ee and irq
  * Please call after prepare_to_enter. This function puts the lazy ee and irq
  * disabled tracking state back to normal mode, without actually enabling
  * disabled tracking state back to normal mode, without actually enabling
@@ -485,7 +563,7 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 	msr_64bit = MSR_SF;
 	msr_64bit = MSR_SF;
 #endif
 #endif
 
 
-	if (!(vcpu->arch.shared->msr & msr_64bit))
+	if (!(kvmppc_get_msr(vcpu) & msr_64bit))
 		ea = (uint32_t)ea;
 		ea = (uint32_t)ea;
 
 
 	return ea;
 	return ea;

+ 7 - 5
arch/powerpc/include/asm/reg.h

@@ -670,18 +670,20 @@
 #define   MMCR0_PROBLEM_DISABLE MMCR0_FCP
 #define   MMCR0_PROBLEM_DISABLE MMCR0_FCP
 #define   MMCR0_FCM1	0x10000000UL /* freeze counters while MSR mark = 1 */
 #define   MMCR0_FCM1	0x10000000UL /* freeze counters while MSR mark = 1 */
 #define   MMCR0_FCM0	0x08000000UL /* freeze counters while MSR mark = 0 */
 #define   MMCR0_FCM0	0x08000000UL /* freeze counters while MSR mark = 0 */
-#define   MMCR0_PMXE	0x04000000UL /* performance monitor exception enable */
-#define   MMCR0_FCECE	0x02000000UL /* freeze ctrs on enabled cond or event */
+#define   MMCR0_PMXE	ASM_CONST(0x04000000) /* perf mon exception enable */
+#define   MMCR0_FCECE	ASM_CONST(0x02000000) /* freeze ctrs on enabled cond or event */
 #define   MMCR0_TBEE	0x00400000UL /* time base exception enable */
 #define   MMCR0_TBEE	0x00400000UL /* time base exception enable */
 #define   MMCR0_BHRBA	0x00200000UL /* BHRB Access allowed in userspace */
 #define   MMCR0_BHRBA	0x00200000UL /* BHRB Access allowed in userspace */
 #define   MMCR0_EBE	0x00100000UL /* Event based branch enable */
 #define   MMCR0_EBE	0x00100000UL /* Event based branch enable */
 #define   MMCR0_PMCC	0x000c0000UL /* PMC control */
 #define   MMCR0_PMCC	0x000c0000UL /* PMC control */
 #define   MMCR0_PMCC_U6	0x00080000UL /* PMC1-6 are R/W by user (PR) */
 #define   MMCR0_PMCC_U6	0x00080000UL /* PMC1-6 are R/W by user (PR) */
 #define   MMCR0_PMC1CE	0x00008000UL /* PMC1 count enable*/
 #define   MMCR0_PMC1CE	0x00008000UL /* PMC1 count enable*/
-#define   MMCR0_PMCjCE	0x00004000UL /* PMCj count enable*/
+#define   MMCR0_PMCjCE	ASM_CONST(0x00004000) /* PMCj count enable*/
 #define   MMCR0_TRIGGER	0x00002000UL /* TRIGGER enable */
 #define   MMCR0_TRIGGER	0x00002000UL /* TRIGGER enable */
-#define   MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */
-#define   MMCR0_PMAO	0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
+#define   MMCR0_PMAO_SYNC ASM_CONST(0x00000800) /* PMU intr is synchronous */
+#define   MMCR0_C56RUN	ASM_CONST(0x00000100) /* PMC5/6 count when RUN=0 */
+/* performance monitor alert has occurred, set to 0 after handling exception */
+#define   MMCR0_PMAO	ASM_CONST(0x00000080)
 #define   MMCR0_SHRFC	0x00000040UL /* SHRre freeze conditions between threads */
 #define   MMCR0_SHRFC	0x00000040UL /* SHRre freeze conditions between threads */
 #define   MMCR0_FC56	0x00000010UL /* freeze counters 5 and 6 */
 #define   MMCR0_FC56	0x00000010UL /* freeze counters 5 and 6 */
 #define   MMCR0_FCTI	0x00000008UL /* freeze counters in tags inactive mode */
 #define   MMCR0_FCTI	0x00000008UL /* freeze counters in tags inactive mode */

+ 1 - 0
arch/powerpc/include/asm/reg_booke.h

@@ -583,6 +583,7 @@
 
 
 /* Bit definitions for L1CSR0. */
 /* Bit definitions for L1CSR0. */
 #define L1CSR0_CPE	0x00010000	/* Data Cache Parity Enable */
 #define L1CSR0_CPE	0x00010000	/* Data Cache Parity Enable */
+#define L1CSR0_CUL	0x00000400	/* Data Cache Unable to Lock */
 #define L1CSR0_CLFC	0x00000100	/* Cache Lock Bits Flash Clear */
 #define L1CSR0_CLFC	0x00000100	/* Cache Lock Bits Flash Clear */
 #define L1CSR0_DCFI	0x00000002	/* Data Cache Flash Invalidate */
 #define L1CSR0_DCFI	0x00000002	/* Data Cache Flash Invalidate */
 #define L1CSR0_CFI	0x00000002	/* Cache Flash Invalidate */
 #define L1CSR0_CFI	0x00000002	/* Cache Flash Invalidate */

+ 1 - 1
arch/powerpc/include/uapi/asm/kvm.h

@@ -545,7 +545,6 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
 #define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
-#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4)
 
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
@@ -555,6 +554,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
 #define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
 
 
 #define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
 #define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
 
 
 /* Transactional Memory checkpointed state:
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  * This is all GPRs, all VSX regs and a subset of SPRs

+ 6 - 0
arch/powerpc/include/uapi/asm/kvm_para.h

@@ -82,10 +82,16 @@ struct kvm_vcpu_arch_shared {
 
 
 #define KVM_FEATURE_MAGIC_PAGE	1
 #define KVM_FEATURE_MAGIC_PAGE	1
 
 
+/* Magic page flags from host to guest */
+
 #define KVM_MAGIC_FEAT_SR		(1 << 0)
 #define KVM_MAGIC_FEAT_SR		(1 << 0)
 
 
 /* MASn, ESR, PIR, and high SPRGs */
 /* MASn, ESR, PIR, and high SPRGs */
 #define KVM_MAGIC_FEAT_MAS0_TO_SPRG7	(1 << 1)
 #define KVM_MAGIC_FEAT_MAS0_TO_SPRG7	(1 << 1)
 
 
+/* Magic page flags from guest to host */
+
+#define MAGIC_PAGE_FLAG_NOT_MAPPED_NX	(1 << 0)
+
 
 
 #endif /* _UAPI__POWERPC_KVM_PARA_H__ */
 #endif /* _UAPI__POWERPC_KVM_PARA_H__ */

+ 1 - 33
arch/powerpc/kernel/align.c

@@ -25,14 +25,13 @@
 #include <asm/cputable.h>
 #include <asm/cputable.h>
 #include <asm/emulated_ops.h>
 #include <asm/emulated_ops.h>
 #include <asm/switch_to.h>
 #include <asm/switch_to.h>
+#include <asm/disassemble.h>
 
 
 struct aligninfo {
 struct aligninfo {
 	unsigned char len;
 	unsigned char len;
 	unsigned char flags;
 	unsigned char flags;
 };
 };
 
 
-#define IS_XFORM(inst)	(((inst) >> 26) == 31)
-#define IS_DSFORM(inst)	(((inst) >> 26) >= 56)
 
 
 #define INVALID	{ 0, 0 }
 #define INVALID	{ 0, 0 }
 
 
@@ -191,37 +190,6 @@ static struct aligninfo aligninfo[128] = {
 	INVALID,		/* 11 1 1111 */
 	INVALID,		/* 11 1 1111 */
 };
 };
 
 
-/*
- * Create a DSISR value from the instruction
- */
-static inline unsigned make_dsisr(unsigned instr)
-{
-	unsigned dsisr;
-
-
-	/* bits  6:15 --> 22:31 */
-	dsisr = (instr & 0x03ff0000) >> 16;
-
-	if (IS_XFORM(instr)) {
-		/* bits 29:30 --> 15:16 */
-		dsisr |= (instr & 0x00000006) << 14;
-		/* bit     25 -->    17 */
-		dsisr |= (instr & 0x00000040) << 8;
-		/* bits 21:24 --> 18:21 */
-		dsisr |= (instr & 0x00000780) << 3;
-	} else {
-		/* bit      5 -->    17 */
-		dsisr |= (instr & 0x04000000) >> 12;
-		/* bits  1: 4 --> 18:21 */
-		dsisr |= (instr & 0x78000000) >> 17;
-		/* bits 30:31 --> 12:13 */
-		if (IS_DSFORM(instr))
-			dsisr |= (instr & 0x00000003) << 18;
-	}
-
-	return dsisr;
-}
-
 /*
 /*
  * The dcbz (data cache block zero) instruction
  * The dcbz (data cache block zero) instruction
  * gives an alignment fault if used on non-cacheable
  * gives an alignment fault if used on non-cacheable

+ 10 - 1
arch/powerpc/kernel/asm-offsets.c

@@ -54,6 +54,7 @@
 #endif
 #endif
 #if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
 #if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
 #endif
 #endif
 
 
 #ifdef CONFIG_PPC32
 #ifdef CONFIG_PPC32
@@ -445,7 +446,9 @@ int main(void)
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+#ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
 	DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
+#endif
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -467,6 +470,9 @@ int main(void)
 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
 	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
 	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
+#endif
 
 
 	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
 	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
 	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
 	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
@@ -493,7 +499,6 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
-	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
 #endif
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -528,11 +533,13 @@ int main(void)
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
 	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
+	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
 	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
 	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
 	DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
 	DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
+	DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
 	DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
 	DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
 	DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
 	DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
 	DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
 	DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
@@ -614,6 +621,7 @@ int main(void)
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC64
 	SVCPU_FIELD(SVCPU_SLB, slb);
 	SVCPU_FIELD(SVCPU_SLB, slb);
 	SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
 	SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
+	SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
 #endif
 #endif
 
 
 	HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
 	HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
@@ -649,6 +657,7 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
 	HSTATE_FIELD(HSTATE_PPR, ppr);
 	HSTATE_FIELD(HSTATE_PPR, ppr);
+	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 
 #else /* CONFIG_PPC_BOOK3S */
 #else /* CONFIG_PPC_BOOK3S */

+ 3 - 2
arch/powerpc/kernel/epapr_paravirt.c

@@ -47,9 +47,10 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
 		return -1;
 		return -1;
 
 
 	for (i = 0; i < (len / 4); i++) {
 	for (i = 0; i < (len / 4); i++) {
-		patch_instruction(epapr_hypercall_start + i, insts[i]);
+		u32 inst = be32_to_cpu(insts[i]);
+		patch_instruction(epapr_hypercall_start + i, inst);
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
-		patch_instruction(epapr_ev_idle_start + i, insts[i]);
+		patch_instruction(epapr_ev_idle_start + i, inst);
 #endif
 #endif
 	}
 	}
 
 

+ 1 - 1
arch/powerpc/kernel/kvm.c

@@ -417,7 +417,7 @@ static void kvm_map_magic_page(void *data)
 	ulong out[8];
 	ulong out[8];
 
 
 	in[0] = KVM_MAGIC_PAGE;
 	in[0] = KVM_MAGIC_PAGE;
-	in[1] = KVM_MAGIC_PAGE;
+	in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX;
 
 
 	epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
 	epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
 
 

+ 3 - 0
arch/powerpc/kernel/paca.c

@@ -98,6 +98,9 @@ static inline void free_lppacas(void) { }
 /*
 /*
  * 3 persistent SLBs are registered here.  The buffer will be zero
  * 3 persistent SLBs are registered here.  The buffer will be zero
  * initially, hence will all be invaild until we actually write them.
  * initially, hence will all be invaild until we actually write them.
+ *
+ * If you make the number of persistent SLB entries dynamic, please also
+ * update PR KVM to flush and restore them accordingly.
  */
  */
 static struct slb_shadow *slb_shadow;
 static struct slb_shadow *slb_shadow;
 
 

+ 1 - 1
arch/powerpc/kvm/Kconfig

@@ -6,7 +6,6 @@ source "virt/kvm/Kconfig"
 
 
 menuconfig VIRTUALIZATION
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
 	bool "Virtualization"
-	depends on !CPU_LITTLE_ENDIAN
 	---help---
 	---help---
 	  Say Y here to get to see options for using your Linux host to run
 	  Say Y here to get to see options for using your Linux host to run
 	  other operating systems inside virtual machines (guests).
 	  other operating systems inside virtual machines (guests).
@@ -76,6 +75,7 @@ config KVM_BOOK3S_64
 config KVM_BOOK3S_64_HV
 config KVM_BOOK3S_64_HV
 	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	depends on KVM_BOOK3S_64
+	depends on !CPU_LITTLE_ENDIAN
 	select KVM_BOOK3S_HV_POSSIBLE
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select MMU_NOTIFIER
 	select CMA
 	select CMA

+ 70 - 36
arch/powerpc/kvm/book3s.c

@@ -85,9 +85,9 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 	if (is_kvmppc_hv_enabled(vcpu->kvm))
 	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return;
 		return;
 	if (pending_now)
 	if (pending_now)
-		vcpu->arch.shared->int_pending = 1;
+		kvmppc_set_int_pending(vcpu, 1);
 	else if (old_pending)
 	else if (old_pending)
-		vcpu->arch.shared->int_pending = 0;
+		kvmppc_set_int_pending(vcpu, 0);
 }
 }
 
 
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
@@ -99,11 +99,11 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	if (is_kvmppc_hv_enabled(vcpu->kvm))
 	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return false;
 		return false;
 
 
-	crit_raw = vcpu->arch.shared->critical;
+	crit_raw = kvmppc_get_critical(vcpu);
 	crit_r1 = kvmppc_get_gpr(vcpu, 1);
 	crit_r1 = kvmppc_get_gpr(vcpu, 1);
 
 
 	/* Truncate crit indicators in 32 bit mode */
 	/* Truncate crit indicators in 32 bit mode */
-	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
 		crit_raw &= 0xffffffff;
 		crit_raw &= 0xffffffff;
 		crit_r1 &= 0xffffffff;
 		crit_r1 &= 0xffffffff;
 	}
 	}
@@ -111,15 +111,15 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	/* Critical section when crit == r1 */
 	/* Critical section when crit == r1 */
 	crit = (crit_raw == crit_r1);
 	crit = (crit_raw == crit_r1);
 	/* ... and we're in supervisor mode */
 	/* ... and we're in supervisor mode */
-	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+	crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
 
 
 	return crit;
 	return crit;
 }
 }
 
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 {
-	vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
-	vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
+	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+	kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
 	vcpu->arch.mmu.reset_msr(vcpu);
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
 }
@@ -145,6 +145,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 	case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;		break;
 	case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;		break;
 	case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC;		break;
 	case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC;		break;
 	case 0xf40: prio = BOOK3S_IRQPRIO_VSX;			break;
 	case 0xf40: prio = BOOK3S_IRQPRIO_VSX;			break;
+	case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL;		break;
 	default:    prio = BOOK3S_IRQPRIO_MAX;			break;
 	default:    prio = BOOK3S_IRQPRIO_MAX;			break;
 	}
 	}
 
 
@@ -225,12 +226,12 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 
 
 	switch (priority) {
 	switch (priority) {
 	case BOOK3S_IRQPRIO_DECREMENTER:
 	case BOOK3S_IRQPRIO_DECREMENTER:
-		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		break;
 		break;
 	case BOOK3S_IRQPRIO_EXTERNAL:
 	case BOOK3S_IRQPRIO_EXTERNAL:
 	case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
 	case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
-		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
+		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		break;
 		break;
 	case BOOK3S_IRQPRIO_SYSTEM_RESET:
 	case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -275,6 +276,9 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 	case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
 	case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
 		vec = BOOK3S_INTERRUPT_PERFMON;
 		vec = BOOK3S_INTERRUPT_PERFMON;
 		break;
 		break;
+	case BOOK3S_IRQPRIO_FAC_UNAVAIL:
+		vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
+		break;
 	default:
 	default:
 		deliver = 0;
 		deliver = 0;
 		printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
 		printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
@@ -343,7 +347,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 {
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
 
-	if (!(vcpu->arch.shared->msr & MSR_SF))
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 		mp_pa = (uint32_t)mp_pa;
 
 
 	/* Magic page override */
 	/* Magic page override */
@@ -367,7 +371,7 @@ EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 			bool iswrite, struct kvmppc_pte *pte)
 			bool iswrite, struct kvmppc_pte *pte)
 {
 {
-	int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
+	int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
 	int r;
 	int r;
 
 
 	if (relocated) {
 	if (relocated) {
@@ -498,18 +502,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->ctr = kvmppc_get_ctr(vcpu);
 	regs->ctr = kvmppc_get_ctr(vcpu);
 	regs->lr = kvmppc_get_lr(vcpu);
 	regs->lr = kvmppc_get_lr(vcpu);
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->xer = kvmppc_get_xer(vcpu);
-	regs->msr = vcpu->arch.shared->msr;
-	regs->srr0 = vcpu->arch.shared->srr0;
-	regs->srr1 = vcpu->arch.shared->srr1;
+	regs->msr = kvmppc_get_msr(vcpu);
+	regs->srr0 = kvmppc_get_srr0(vcpu);
+	regs->srr1 = kvmppc_get_srr1(vcpu);
 	regs->pid = vcpu->arch.pid;
 	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.shared->sprg0;
-	regs->sprg1 = vcpu->arch.shared->sprg1;
-	regs->sprg2 = vcpu->arch.shared->sprg2;
-	regs->sprg3 = vcpu->arch.shared->sprg3;
-	regs->sprg4 = vcpu->arch.shared->sprg4;
-	regs->sprg5 = vcpu->arch.shared->sprg5;
-	regs->sprg6 = vcpu->arch.shared->sprg6;
-	regs->sprg7 = vcpu->arch.shared->sprg7;
+	regs->sprg0 = kvmppc_get_sprg0(vcpu);
+	regs->sprg1 = kvmppc_get_sprg1(vcpu);
+	regs->sprg2 = kvmppc_get_sprg2(vcpu);
+	regs->sprg3 = kvmppc_get_sprg3(vcpu);
+	regs->sprg4 = kvmppc_get_sprg4(vcpu);
+	regs->sprg5 = kvmppc_get_sprg5(vcpu);
+	regs->sprg6 = kvmppc_get_sprg6(vcpu);
+	regs->sprg7 = kvmppc_get_sprg7(vcpu);
 
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -527,16 +531,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_lr(vcpu, regs->lr);
 	kvmppc_set_lr(vcpu, regs->lr);
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
 	kvmppc_set_msr(vcpu, regs->msr);
-	vcpu->arch.shared->srr0 = regs->srr0;
-	vcpu->arch.shared->srr1 = regs->srr1;
-	vcpu->arch.shared->sprg0 = regs->sprg0;
-	vcpu->arch.shared->sprg1 = regs->sprg1;
-	vcpu->arch.shared->sprg2 = regs->sprg2;
-	vcpu->arch.shared->sprg3 = regs->sprg3;
-	vcpu->arch.shared->sprg4 = regs->sprg4;
-	vcpu->arch.shared->sprg5 = regs->sprg5;
-	vcpu->arch.shared->sprg6 = regs->sprg6;
-	vcpu->arch.shared->sprg7 = regs->sprg7;
+	kvmppc_set_srr0(vcpu, regs->srr0);
+	kvmppc_set_srr1(vcpu, regs->srr1);
+	kvmppc_set_sprg0(vcpu, regs->sprg0);
+	kvmppc_set_sprg1(vcpu, regs->sprg1);
+	kvmppc_set_sprg2(vcpu, regs->sprg2);
+	kvmppc_set_sprg3(vcpu, regs->sprg3);
+	kvmppc_set_sprg4(vcpu, regs->sprg4);
+	kvmppc_set_sprg5(vcpu, regs->sprg5);
+	kvmppc_set_sprg6(vcpu, regs->sprg6);
+	kvmppc_set_sprg7(vcpu, regs->sprg7);
 
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -570,10 +574,10 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		r = 0;
 		r = 0;
 		switch (reg->id) {
 		switch (reg->id) {
 		case KVM_REG_PPC_DAR:
 		case KVM_REG_PPC_DAR:
-			val = get_reg_val(reg->id, vcpu->arch.shared->dar);
+			val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
 			break;
 			break;
 		case KVM_REG_PPC_DSISR:
 		case KVM_REG_PPC_DSISR:
-			val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
+			val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
 			break;
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 			i = reg->id - KVM_REG_PPC_FPR0;
 			i = reg->id - KVM_REG_PPC_FPR0;
@@ -627,6 +631,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
 			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
 			break;
 			break;
 #endif /* CONFIG_KVM_XICS */
 #endif /* CONFIG_KVM_XICS */
+		case KVM_REG_PPC_FSCR:
+			val = get_reg_val(reg->id, vcpu->arch.fscr);
+			break;
+		case KVM_REG_PPC_TAR:
+			val = get_reg_val(reg->id, vcpu->arch.tar);
+			break;
+		case KVM_REG_PPC_EBBHR:
+			val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+			break;
+		case KVM_REG_PPC_EBBRR:
+			val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+			break;
+		case KVM_REG_PPC_BESCR:
+			val = get_reg_val(reg->id, vcpu->arch.bescr);
+			break;
 		default:
 		default:
 			r = -EINVAL;
 			r = -EINVAL;
 			break;
 			break;
@@ -660,10 +679,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		r = 0;
 		r = 0;
 		switch (reg->id) {
 		switch (reg->id) {
 		case KVM_REG_PPC_DAR:
 		case KVM_REG_PPC_DAR:
-			vcpu->arch.shared->dar = set_reg_val(reg->id, val);
+			kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
 			break;
 			break;
 		case KVM_REG_PPC_DSISR:
 		case KVM_REG_PPC_DSISR:
-			vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
+			kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
 			break;
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
 			i = reg->id - KVM_REG_PPC_FPR0;
 			i = reg->id - KVM_REG_PPC_FPR0;
@@ -716,6 +735,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 						set_reg_val(reg->id, val));
 						set_reg_val(reg->id, val));
 			break;
 			break;
 #endif /* CONFIG_KVM_XICS */
 #endif /* CONFIG_KVM_XICS */
+		case KVM_REG_PPC_FSCR:
+			vcpu->arch.fscr = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_TAR:
+			vcpu->arch.tar = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_EBBHR:
+			vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_EBBRR:
+			vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_BESCR:
+			vcpu->arch.bescr = set_reg_val(reg->id, val);
+			break;
 		default:
 		default:
 			r = -EINVAL;
 			r = -EINVAL;
 			break;
 			break;

+ 23 - 18
arch/powerpc/kvm/book3s_32_mmu.c

@@ -91,7 +91,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 
 
 static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
 static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 {
-	return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
+	return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);
 }
 }
 
 
 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -131,7 +131,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
 	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
 	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
 
 
 	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
 	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
-		kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+		kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,
 		sr_vsid(sre));
 		sr_vsid(sre));
 
 
 	r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
 	r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
@@ -160,7 +160,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 		else
 		else
 			bat = &vcpu_book3s->ibat[i];
 			bat = &vcpu_book3s->ibat[i];
 
 
-		if (vcpu->arch.shared->msr & MSR_PR) {
+		if (kvmppc_get_msr(vcpu) & MSR_PR) {
 			if (!bat->vp)
 			if (!bat->vp)
 				continue;
 				continue;
 		} else {
 		} else {
@@ -208,6 +208,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	u32 sre;
 	u32 sre;
 	hva_t ptegp;
 	hva_t ptegp;
 	u32 pteg[16];
 	u32 pteg[16];
+	u32 pte0, pte1;
 	u32 ptem = 0;
 	u32 ptem = 0;
 	int i;
 	int i;
 	int found = 0;
 	int found = 0;
@@ -233,14 +234,16 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	}
 	}
 
 
 	for (i=0; i<16; i+=2) {
 	for (i=0; i<16; i+=2) {
-		if (ptem == pteg[i]) {
+		pte0 = be32_to_cpu(pteg[i]);
+		pte1 = be32_to_cpu(pteg[i + 1]);
+		if (ptem == pte0) {
 			u8 pp;
 			u8 pp;
 
 
-			pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
-			pp = pteg[i+1] & 3;
+			pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF);
+			pp = pte1 & 3;
 
 
-			if ((sr_kp(sre) &&  (vcpu->arch.shared->msr & MSR_PR)) ||
-			    (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
+			if ((sr_kp(sre) &&  (kvmppc_get_msr(vcpu) & MSR_PR)) ||
+			    (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))
 				pp |= 4;
 				pp |= 4;
 
 
 			pte->may_write = false;
 			pte->may_write = false;
@@ -260,7 +263,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 			}
 			}
 
 
 			dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
 			dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
-				    pteg[i], pteg[i+1], pp);
+				    pte0, pte1, pp);
 			found = 1;
 			found = 1;
 			break;
 			break;
 		}
 		}
@@ -269,8 +272,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	/* Update PTE C and A bits, so the guest's swapper knows we used the
 	/* Update PTE C and A bits, so the guest's swapper knows we used the
 	   page */
 	   page */
 	if (found) {
 	if (found) {
-		u32 pte_r = pteg[i+1];
-		char __user *addr = (char __user *) &pteg[i+1];
+		u32 pte_r = pte1;
+		char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32));
 
 
 		/*
 		/*
 		 * Use single-byte writes to update the HPTE, to
 		 * Use single-byte writes to update the HPTE, to
@@ -296,7 +299,8 @@ no_page_found:
 			    to_book3s(vcpu)->sdr1, ptegp);
 			    to_book3s(vcpu)->sdr1, ptegp);
 		for (i=0; i<16; i+=2) {
 		for (i=0; i<16; i+=2) {
 			dprintk_pte("   %02d: 0x%x - 0x%x (0x%x)\n",
 			dprintk_pte("   %02d: 0x%x - 0x%x (0x%x)\n",
-				    i, pteg[i], pteg[i+1], ptem);
+				    i, be32_to_cpu(pteg[i]),
+				    be32_to_cpu(pteg[i+1]), ptem);
 		}
 		}
 	}
 	}
 
 
@@ -316,7 +320,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	/* Magic page override */
 	/* Magic page override */
 	if (unlikely(mp_ea) &&
 	if (unlikely(mp_ea) &&
 	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
 	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
-	    !(vcpu->arch.shared->msr & MSR_PR)) {
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 		pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 		pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 		pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
 		pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
 		pte->raddr &= KVM_PAM;
 		pte->raddr &= KVM_PAM;
@@ -341,13 +345,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 
 static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
 static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
 {
 {
-	return vcpu->arch.shared->sr[srnum];
+	return kvmppc_get_sr(vcpu, srnum);
 }
 }
 
 
 static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 					ulong value)
 					ulong value)
 {
 {
-	vcpu->arch.shared->sr[srnum] = value;
+	kvmppc_set_sr(vcpu, srnum, value);
 	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
 	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
 }
 }
 
 
@@ -367,8 +371,9 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	ulong ea = esid << SID_SHIFT;
 	ulong ea = esid << SID_SHIFT;
 	u32 sr;
 	u32 sr;
 	u64 gvsid = esid;
 	u64 gvsid = esid;
+	u64 msr = kvmppc_get_msr(vcpu);
 
 
-	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+	if (msr & (MSR_DR|MSR_IR)) {
 		sr = find_sr(vcpu, ea);
 		sr = find_sr(vcpu, ea);
 		if (sr_valid(sr))
 		if (sr_valid(sr))
 			gvsid = sr_vsid(sr);
 			gvsid = sr_vsid(sr);
@@ -377,7 +382,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	/* In case we only have one of MSR_IR or MSR_DR set, let's put
 	/* In case we only have one of MSR_IR or MSR_DR set, let's put
 	   that in the real-mode context (and hope RM doesn't access
 	   that in the real-mode context (and hope RM doesn't access
 	   high memory) */
 	   high memory) */
-	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+	switch (msr & (MSR_DR|MSR_IR)) {
 	case 0:
 	case 0:
 		*vsid = VSID_REAL | esid;
 		*vsid = VSID_REAL | esid;
 		break;
 		break;
@@ -397,7 +402,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		BUG();
 		BUG();
 	}
 	}
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (msr & MSR_PR)
 		*vsid |= VSID_PR;
 		*vsid |= VSID_PR;
 
 
 	return 0;
 	return 0;

+ 2 - 2
arch/powerpc/kvm/book3s_32_mmu_host.c

@@ -92,7 +92,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	struct kvmppc_sid_map *map;
 	struct kvmppc_sid_map *map;
 	u16 sid_map_mask;
 	u16 sid_map_mask;
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
 		gvsid |= VSID_PR;
 		gvsid |= VSID_PR;
 
 
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -279,7 +279,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	u16 sid_map_mask;
 	u16 sid_map_mask;
 	static int backwards_map = 0;
 	static int backwards_map = 0;
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
 		gvsid |= VSID_PR;
 		gvsid |= VSID_PR;
 
 
 	/* We might get collisions that trap in preceding order, so let's
 	/* We might get collisions that trap in preceding order, so let's

+ 23 - 16
arch/powerpc/kvm/book3s_64_mmu.c

@@ -38,7 +38,7 @@
 
 
 static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
 static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
 {
 {
-	kvmppc_set_msr(vcpu, MSR_SF);
+	kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
 }
 }
 
 
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
@@ -226,7 +226,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	/* Magic page override */
 	/* Magic page override */
 	if (unlikely(mp_ea) &&
 	if (unlikely(mp_ea) &&
 	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
 	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
-	    !(vcpu->arch.shared->msr & MSR_PR)) {
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 		gpte->eaddr = eaddr;
 		gpte->eaddr = eaddr;
 		gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
 		gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
 		gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
 		gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
@@ -269,18 +269,21 @@ do_second:
 		goto no_page_found;
 		goto no_page_found;
 	}
 	}
 
 
-	if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
+	if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)
 		key = 4;
 		key = 4;
-	else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
+	else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)
 		key = 4;
 		key = 4;
 
 
 	for (i=0; i<16; i+=2) {
 	for (i=0; i<16; i+=2) {
+		u64 pte0 = be64_to_cpu(pteg[i]);
+		u64 pte1 = be64_to_cpu(pteg[i + 1]);
+
 		/* Check all relevant fields of 1st dword */
 		/* Check all relevant fields of 1st dword */
-		if ((pteg[i] & v_mask) == v_val) {
+		if ((pte0 & v_mask) == v_val) {
 			/* If large page bit is set, check pgsize encoding */
 			/* If large page bit is set, check pgsize encoding */
 			if (slbe->large &&
 			if (slbe->large &&
 			    (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
 			    (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
-				pgsize = decode_pagesize(slbe, pteg[i+1]);
+				pgsize = decode_pagesize(slbe, pte1);
 				if (pgsize < 0)
 				if (pgsize < 0)
 					continue;
 					continue;
 			}
 			}
@@ -297,8 +300,8 @@ do_second:
 		goto do_second;
 		goto do_second;
 	}
 	}
 
 
-	v = pteg[i];
-	r = pteg[i+1];
+	v = be64_to_cpu(pteg[i]);
+	r = be64_to_cpu(pteg[i+1]);
 	pp = (r & HPTE_R_PP) | key;
 	pp = (r & HPTE_R_PP) | key;
 	if (r & HPTE_R_PP0)
 	if (r & HPTE_R_PP0)
 		pp |= 8;
 		pp |= 8;
@@ -310,6 +313,9 @@ do_second:
 	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
 	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
 	gpte->page_size = pgsize;
 	gpte->page_size = pgsize;
 	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
 	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+	if (unlikely(vcpu->arch.disable_kernel_nx) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR))
+		gpte->may_execute = true;
 	gpte->may_read = false;
 	gpte->may_read = false;
 	gpte->may_write = false;
 	gpte->may_write = false;
 
 
@@ -342,14 +348,14 @@ do_second:
 		 * non-PAPR platforms such as mac99, and this is
 		 * non-PAPR platforms such as mac99, and this is
 		 * what real hardware does.
 		 * what real hardware does.
 		 */
 		 */
-		char __user *addr = (char __user *) &pteg[i+1];
+                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
 		r |= HPTE_R_R;
 		r |= HPTE_R_R;
 		put_user(r >> 8, addr + 6);
 		put_user(r >> 8, addr + 6);
 	}
 	}
 	if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
 	if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
 		/* Set the dirty flag */
 		/* Set the dirty flag */
 		/* Use a single byte write */
 		/* Use a single byte write */
-		char __user *addr = (char __user *) &pteg[i+1];
+                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
 		r |= HPTE_R_C;
 		r |= HPTE_R_C;
 		put_user(r, addr + 7);
 		put_user(r, addr + 7);
 	}
 	}
@@ -479,7 +485,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
 		vcpu->arch.slb[i].origv = 0;
 		vcpu->arch.slb[i].origv = 0;
 	}
 	}
 
 
-	if (vcpu->arch.shared->msr & MSR_IR) {
+	if (kvmppc_get_msr(vcpu) & MSR_IR) {
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 	}
 	}
@@ -563,7 +569,7 @@ static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
 {
 {
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 
-	return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) &&
+	return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) &&
 		(mp_ea >> SID_SHIFT) == esid;
 		(mp_ea >> SID_SHIFT) == esid;
 }
 }
 #endif
 #endif
@@ -576,8 +582,9 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	u64 gvsid = esid;
 	u64 gvsid = esid;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 	int pagesize = MMU_PAGE_64K;
 	int pagesize = MMU_PAGE_64K;
+	u64 msr = kvmppc_get_msr(vcpu);
 
 
-	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+	if (msr & (MSR_DR|MSR_IR)) {
 		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
 		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
 		if (slb) {
 		if (slb) {
 			gvsid = slb->vsid;
 			gvsid = slb->vsid;
@@ -590,7 +597,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		}
 		}
 	}
 	}
 
 
-	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+	switch (msr & (MSR_DR|MSR_IR)) {
 	case 0:
 	case 0:
 		gvsid = VSID_REAL | esid;
 		gvsid = VSID_REAL | esid;
 		break;
 		break;
@@ -623,7 +630,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		gvsid |= VSID_64K;
 		gvsid |= VSID_64K;
 #endif
 #endif
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
 		gvsid |= VSID_PR;
 		gvsid |= VSID_PR;
 
 
 	*vsid = gvsid;
 	*vsid = gvsid;
@@ -633,7 +640,7 @@ no_slb:
 	/* Catch magic page case */
 	/* Catch magic page case */
 	if (unlikely(mp_ea) &&
 	if (unlikely(mp_ea) &&
 	    unlikely(esid == (mp_ea >> SID_SHIFT)) &&
 	    unlikely(esid == (mp_ea >> SID_SHIFT)) &&
-	    !(vcpu->arch.shared->msr & MSR_PR)) {
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 		*vsid = VSID_REAL | esid;
 		*vsid = VSID_REAL | esid;
 		return 0;
 		return 0;
 	}
 	}

+ 6 - 9
arch/powerpc/kvm/book3s_64_mmu_host.c

@@ -58,7 +58,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	struct kvmppc_sid_map *map;
 	struct kvmppc_sid_map *map;
 	u16 sid_map_mask;
 	u16 sid_map_mask;
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
 		gvsid |= VSID_PR;
 		gvsid |= VSID_PR;
 
 
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -230,7 +230,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	u16 sid_map_mask;
 	u16 sid_map_mask;
 	static int backwards_map = 0;
 	static int backwards_map = 0;
 
 
-	if (vcpu->arch.shared->msr & MSR_PR)
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
 		gvsid |= VSID_PR;
 		gvsid |= VSID_PR;
 
 
 	/* We might get collisions that trap in preceding order, so let's
 	/* We might get collisions that trap in preceding order, so let's
@@ -271,11 +271,8 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
 	int found_inval = -1;
 	int found_inval = -1;
 	int r;
 	int r;
 
 
-	if (!svcpu->slb_max)
-		svcpu->slb_max = 1;
-
 	/* Are we overwriting? */
 	/* Are we overwriting? */
-	for (i = 1; i < svcpu->slb_max; i++) {
+	for (i = 0; i < svcpu->slb_max; i++) {
 		if (!(svcpu->slb[i].esid & SLB_ESID_V))
 		if (!(svcpu->slb[i].esid & SLB_ESID_V))
 			found_inval = i;
 			found_inval = i;
 		else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
 		else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
@@ -285,7 +282,7 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
 	}
 	}
 
 
 	/* Found a spare entry that was invalidated before */
 	/* Found a spare entry that was invalidated before */
-	if (found_inval > 0) {
+	if (found_inval >= 0) {
 		r = found_inval;
 		r = found_inval;
 		goto out;
 		goto out;
 	}
 	}
@@ -359,7 +356,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
 	ulong seg_mask = -seg_size;
 	ulong seg_mask = -seg_size;
 	int i;
 	int i;
 
 
-	for (i = 1; i < svcpu->slb_max; i++) {
+	for (i = 0; i < svcpu->slb_max; i++) {
 		if ((svcpu->slb[i].esid & SLB_ESID_V) &&
 		if ((svcpu->slb[i].esid & SLB_ESID_V) &&
 		    (svcpu->slb[i].esid & seg_mask) == ea) {
 		    (svcpu->slb[i].esid & seg_mask) == ea) {
 			/* Invalidate this entry */
 			/* Invalidate this entry */
@@ -373,7 +370,7 @@ void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
 void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 {
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->slb_max = 1;
+	svcpu->slb_max = 0;
 	svcpu->slb[0].esid = 0;
 	svcpu->slb[0].esid = 0;
 	svcpu_put(svcpu);
 	svcpu_put(svcpu);
 }
 }

+ 79 - 37
arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -52,7 +52,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm);
 
 
 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 {
-	unsigned long hpt;
+	unsigned long hpt = 0;
 	struct revmap_entry *rev;
 	struct revmap_entry *rev;
 	struct page *page = NULL;
 	struct page *page = NULL;
 	long order = KVM_DEFAULT_HPT_ORDER;
 	long order = KVM_DEFAULT_HPT_ORDER;
@@ -64,22 +64,11 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	}
 	}
 
 
 	kvm->arch.hpt_cma_alloc = 0;
 	kvm->arch.hpt_cma_alloc = 0;
-	/*
-	 * try first to allocate it from the kernel page allocator.
-	 * We keep the CMA reserved for failed allocation.
-	 */
-	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
-			       __GFP_NOWARN, order - PAGE_SHIFT);
-
-	/* Next try to allocate from the preallocated pool */
-	if (!hpt) {
-		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
-		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
-		if (page) {
-			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-			kvm->arch.hpt_cma_alloc = 1;
-		} else
-			--order;
+	VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
+	page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+	if (page) {
+		hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+		kvm->arch.hpt_cma_alloc = 1;
 	}
 	}
 
 
 	/* Lastly try successively smaller sizes from the page allocator */
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -596,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hptep, hpte[3], r;
 	unsigned long *hptep, hpte[3], r;
 	unsigned long mmu_seq, psize, pte_size;
 	unsigned long mmu_seq, psize, pte_size;
+	unsigned long gpa_base, gfn_base;
 	unsigned long gpa, gfn, hva, pfn;
 	unsigned long gpa, gfn, hva, pfn;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot *memslot;
 	unsigned long *rmap;
 	unsigned long *rmap;
@@ -634,7 +624,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 
 	/* Translate the logical address and get the page */
 	/* Translate the logical address and get the page */
 	psize = hpte_page_size(hpte[0], r);
 	psize = hpte_page_size(hpte[0], r);
-	gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+	gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+	gfn_base = gpa_base >> PAGE_SHIFT;
+	gpa = gpa_base | (ea & (psize - 1));
 	gfn = gpa >> PAGE_SHIFT;
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 	memslot = gfn_to_memslot(kvm, gfn);
 
 
@@ -646,6 +638,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (!kvm->arch.using_mmu_notifiers)
 	if (!kvm->arch.using_mmu_notifiers)
 		return -EFAULT;		/* should never get here */
 		return -EFAULT;		/* should never get here */
 
 
+	/*
+	 * This should never happen, because of the slot_is_aligned()
+	 * check in kvmppc_do_h_enter().
+	 */
+	if (gfn_base < memslot->base_gfn)
+		return -EFAULT;
+
 	/* used to check for invalidations in progress */
 	/* used to check for invalidations in progress */
 	mmu_seq = kvm->mmu_notifier_seq;
 	mmu_seq = kvm->mmu_notifier_seq;
 	smp_rmb();
 	smp_rmb();
@@ -738,7 +737,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_unlock;
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
 
-	rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+	/* Always put the HPTE in the rmap chain for the page base address */
+	rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
 	lock_rmap(rmap);
 	lock_rmap(rmap);
 
 
 	/* Check if we might have been invalidated; let the guest retry if so */
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -1060,22 +1060,33 @@ void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 }
 
 
-static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+static int vcpus_running(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.vcpus_running) != 0;
+}
+
+/*
+ * Returns the number of system pages that are dirty.
+ * This can be more than 1 if we find a huge-page HPTE.
+ */
+static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 {
 {
 	struct revmap_entry *rev = kvm->arch.revmap;
 	struct revmap_entry *rev = kvm->arch.revmap;
 	unsigned long head, i, j;
 	unsigned long head, i, j;
+	unsigned long n;
+	unsigned long v, r;
 	unsigned long *hptep;
 	unsigned long *hptep;
-	int ret = 0;
+	int npages_dirty = 0;
 
 
  retry:
  retry:
 	lock_rmap(rmapp);
 	lock_rmap(rmapp);
 	if (*rmapp & KVMPPC_RMAP_CHANGED) {
 	if (*rmapp & KVMPPC_RMAP_CHANGED) {
 		*rmapp &= ~KVMPPC_RMAP_CHANGED;
 		*rmapp &= ~KVMPPC_RMAP_CHANGED;
-		ret = 1;
+		npages_dirty = 1;
 	}
 	}
 	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
 	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
 		unlock_rmap(rmapp);
 		unlock_rmap(rmapp);
-		return ret;
+		return npages_dirty;
 	}
 	}
 
 
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
@@ -1083,7 +1094,22 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
 		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
 		j = rev[i].forw;
 		j = rev[i].forw;
 
 
-		if (!(hptep[1] & HPTE_R_C))
+		/*
+		 * Checking the C (changed) bit here is racy since there
+		 * is no guarantee about when the hardware writes it back.
+		 * If the HPTE is not writable then it is stable since the
+		 * page can't be written to, and we would have done a tlbie
+		 * (which forces the hardware to complete any writeback)
+		 * when making the HPTE read-only.
+		 * If vcpus are running then this call is racy anyway
+		 * since the page could get dirtied subsequently, so we
+		 * expect there to be a further call which would pick up
+		 * any delayed C bit writeback.
+		 * Otherwise we need to do the tlbie even if C==0 in
+		 * order to pick up any delayed writeback of C.
+		 */
+		if (!(hptep[1] & HPTE_R_C) &&
+		    (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
 			continue;
 			continue;
 
 
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
@@ -1095,24 +1121,33 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 		}
 		}
 
 
 		/* Now check and modify the HPTE */
 		/* Now check and modify the HPTE */
-		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
-			/* need to make it temporarily absent to clear C */
-			hptep[0] |= HPTE_V_ABSENT;
-			kvmppc_invalidate_hpte(kvm, hptep, i);
-			hptep[1] &= ~HPTE_R_C;
-			eieio();
-			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+		if (!(hptep[0] & HPTE_V_VALID))
+			continue;
+
+		/* need to make it temporarily absent so C is stable */
+		hptep[0] |= HPTE_V_ABSENT;
+		kvmppc_invalidate_hpte(kvm, hptep, i);
+		v = hptep[0];
+		r = hptep[1];
+		if (r & HPTE_R_C) {
+			hptep[1] = r & ~HPTE_R_C;
 			if (!(rev[i].guest_rpte & HPTE_R_C)) {
 			if (!(rev[i].guest_rpte & HPTE_R_C)) {
 				rev[i].guest_rpte |= HPTE_R_C;
 				rev[i].guest_rpte |= HPTE_R_C;
 				note_hpte_modification(kvm, &rev[i]);
 				note_hpte_modification(kvm, &rev[i]);
 			}
 			}
-			ret = 1;
+			n = hpte_page_size(v, r);
+			n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+			if (n > npages_dirty)
+				npages_dirty = n;
+			eieio();
 		}
 		}
-		hptep[0] &= ~HPTE_V_HVLOCK;
+		v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+		v |= HPTE_V_VALID;
+		hptep[0] = v;
 	} while ((i = j) != head);
 	} while ((i = j) != head);
 
 
 	unlock_rmap(rmapp);
 	unlock_rmap(rmapp);
-	return ret;
+	return npages_dirty;
 }
 }
 
 
 static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
 static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
@@ -1136,15 +1171,22 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			     unsigned long *map)
 			     unsigned long *map)
 {
 {
-	unsigned long i;
+	unsigned long i, j;
 	unsigned long *rmapp;
 	unsigned long *rmapp;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vcpu *vcpu;
 
 
 	preempt_disable();
 	preempt_disable();
 	rmapp = memslot->arch.rmap;
 	rmapp = memslot->arch.rmap;
 	for (i = 0; i < memslot->npages; ++i) {
 	for (i = 0; i < memslot->npages; ++i) {
-		if (kvm_test_clear_dirty(kvm, rmapp) && map)
-			__set_bit_le(i, map);
+		int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
+		/*
+		 * Note that if npages > 0 then i must be a multiple of npages,
+		 * since we always put huge-page HPTEs in the rmap chain
+		 * corresponding to their page base address.
+		 */
+		if (npages && map)
+			for (j = i; npages; ++j, --npages)
+				__set_bit_le(j, map);
 		++rmapp;
 		++rmapp;
 	}
 	}
 
 

+ 40 - 47
arch/powerpc/kvm/book3s_64_slb.S

@@ -17,30 +17,9 @@
  * Authors: Alexander Graf <agraf@suse.de>
  * Authors: Alexander Graf <agraf@suse.de>
  */
  */
 
 
-#ifdef __LITTLE_ENDIAN__
-#error Need to fix SLB shadow accesses in little endian mode
-#endif
-
-#define SHADOW_SLB_ESID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10))
-#define SHADOW_SLB_VSID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
-#define UNBOLT_SLB_ENTRY(num) \
-	ld	r9, SHADOW_SLB_ESID(num)(r12); \
-	/* Invalid? Skip. */; \
-	rldicl. r0, r9, 37, 63; \
-	beq	slb_entry_skip_ ## num; \
-	xoris	r9, r9, SLB_ESID_V@h; \
-	std	r9, SHADOW_SLB_ESID(num)(r12); \
-  slb_entry_skip_ ## num:
-
-#define REBOLT_SLB_ENTRY(num) \
-	ld	r10, SHADOW_SLB_ESID(num)(r11); \
-	cmpdi	r10, 0; \
-	beq	slb_exit_skip_ ## num; \
-	oris	r10, r10, SLB_ESID_V@h; \
-	ld	r9, SHADOW_SLB_VSID(num)(r11); \
-	slbmte	r9, r10; \
-	std	r10, SHADOW_SLB_ESID(num)(r11); \
-slb_exit_skip_ ## num:
+#define SHADOW_SLB_ENTRY_LEN	0x10
+#define OFFSET_ESID(x)		(SHADOW_SLB_ENTRY_LEN * x)
+#define OFFSET_VSID(x)		((SHADOW_SLB_ENTRY_LEN * x) + 8)
 
 
 /******************************************************************************
 /******************************************************************************
  *                                                                            *
  *                                                                            *
@@ -64,20 +43,15 @@ slb_exit_skip_ ## num:
 	 * SVCPU[LR]  = guest LR
 	 * SVCPU[LR]  = guest LR
 	 */
 	 */
 
 
-	/* Remove LPAR shadow entries */
+BEGIN_FW_FTR_SECTION
 
 
-#if SLB_NUM_BOLTED == 3
+	/* Declare SLB shadow as 0 entries big */
 
 
-	ld	r12, PACA_SLBSHADOWPTR(r13)
+	ld	r11, PACA_SLBSHADOWPTR(r13)
+	li	r8, 0
+	stb	r8, 3(r11)
 
 
-	/* Remove bolted entries */
-	UNBOLT_SLB_ENTRY(0)
-	UNBOLT_SLB_ENTRY(1)
-	UNBOLT_SLB_ENTRY(2)
-	
-#else
-#error unknown number of bolted entries
-#endif
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
 
 
 	/* Flush SLB */
 	/* Flush SLB */
 
 
@@ -100,7 +74,7 @@ slb_loop_enter:
 
 
 	ld	r10, 0(r11)
 	ld	r10, 0(r11)
 
 
-	rldicl. r0, r10, 37, 63
+	andis.	r9, r10, SLB_ESID_V@h
 	beq	slb_loop_enter_skip
 	beq	slb_loop_enter_skip
 
 
 	ld	r9, 8(r11)
 	ld	r9, 8(r11)
@@ -137,23 +111,42 @@ slb_do_enter:
 	 *
 	 *
 	 */
 	 */
 
 
-	/* Restore bolted entries from the shadow and fix it along the way */
+	/* Remove all SLB entries that are in use. */
 
 
-	/* We don't store anything in entry 0, so we don't need to take care of it */
+	li	r0, r0
+	slbmte	r0, r0
 	slbia
 	slbia
-	isync
 
 
-#if SLB_NUM_BOLTED == 3
+	/* Restore bolted entries from the shadow */
 
 
 	ld	r11, PACA_SLBSHADOWPTR(r13)
 	ld	r11, PACA_SLBSHADOWPTR(r13)
 
 
-	REBOLT_SLB_ENTRY(0)
-	REBOLT_SLB_ENTRY(1)
-	REBOLT_SLB_ENTRY(2)
-	
-#else
-#error unknown number of bolted entries
-#endif
+BEGIN_FW_FTR_SECTION
+
+	/* Declare SLB shadow as SLB_NUM_BOLTED entries big */
+
+	li	r8, SLB_NUM_BOLTED
+	stb	r8, 3(r11)
+
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
+
+	/* Manually load all entries from shadow SLB */
+
+	li	r8, SLBSHADOW_SAVEAREA
+	li	r7, SLBSHADOW_SAVEAREA + 8
+
+	.rept	SLB_NUM_BOLTED
+	LDX_BE	r10, r11, r8
+	cmpdi	r10, 0
+	beq	1f
+	LDX_BE	r9, r11, r7
+	slbmte	r9, r10
+1:	addi	r7, r7, SHADOW_SLB_ENTRY_LEN
+	addi	r8, r8, SHADOW_SLB_ENTRY_LEN
+	.endr
+
+	isync
+	sync
 
 
 slb_do_exit:
 slb_do_exit:
 
 

+ 104 - 52
arch/powerpc/kvm/book3s_emulate.c

@@ -80,7 +80,7 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
 		return false;
 		return false;
 
 
 	/* Limit user space to its own small SPR set */
 	/* Limit user space to its own small SPR set */
-	if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
+	if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)
 		return false;
 		return false;
 
 
 	return true;
 	return true;
@@ -94,14 +94,31 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int rs = get_rs(inst);
 	int rs = get_rs(inst);
 	int ra = get_ra(inst);
 	int ra = get_ra(inst);
 	int rb = get_rb(inst);
 	int rb = get_rb(inst);
+	u32 inst_sc = 0x44000002;
 
 
 	switch (get_op(inst)) {
 	switch (get_op(inst)) {
+	case 0:
+		emulated = EMULATE_FAIL;
+		if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
+		    (inst == swab32(inst_sc))) {
+			/*
+			 * This is the byte reversed syscall instruction of our
+			 * hypercall handler. Early versions of LE Linux didn't
+			 * swap the instructions correctly and ended up in
+			 * illegal instructions.
+			 * Just always fail hypercalls on these broken systems.
+			 */
+			kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
+			kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+			emulated = EMULATE_DONE;
+		}
+		break;
 	case 19:
 	case 19:
 		switch (get_xop(inst)) {
 		switch (get_xop(inst)) {
 		case OP_19_XOP_RFID:
 		case OP_19_XOP_RFID:
 		case OP_19_XOP_RFI:
 		case OP_19_XOP_RFI:
-			kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
-			kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+			kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
+			kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
 			*advance = 0;
 			*advance = 0;
 			break;
 			break;
 
 
@@ -113,16 +130,16 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case 31:
 	case 31:
 		switch (get_xop(inst)) {
 		switch (get_xop(inst)) {
 		case OP_31_XOP_MFMSR:
 		case OP_31_XOP_MFMSR:
-			kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
+			kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));
 			break;
 			break;
 		case OP_31_XOP_MTMSRD:
 		case OP_31_XOP_MTMSRD:
 		{
 		{
 			ulong rs_val = kvmppc_get_gpr(vcpu, rs);
 			ulong rs_val = kvmppc_get_gpr(vcpu, rs);
 			if (inst & 0x10000) {
 			if (inst & 0x10000) {
-				ulong new_msr = vcpu->arch.shared->msr;
+				ulong new_msr = kvmppc_get_msr(vcpu);
 				new_msr &= ~(MSR_RI | MSR_EE);
 				new_msr &= ~(MSR_RI | MSR_EE);
 				new_msr |= rs_val & (MSR_RI | MSR_EE);
 				new_msr |= rs_val & (MSR_RI | MSR_EE);
-				vcpu->arch.shared->msr = new_msr;
+				kvmppc_set_msr_fast(vcpu, new_msr);
 			} else
 			} else
 				kvmppc_set_msr(vcpu, rs_val);
 				kvmppc_set_msr(vcpu, rs_val);
 			break;
 			break;
@@ -179,7 +196,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
 			int i;
 			int i;
 
 
-		        if ((vcpu->arch.shared->msr & MSR_PR) ||
+		        if ((kvmppc_get_msr(vcpu) & MSR_PR) ||
 			    !vcpu->arch.papr_enabled) {
 			    !vcpu->arch.papr_enabled) {
 				emulated = EMULATE_FAIL;
 				emulated = EMULATE_FAIL;
 				break;
 				break;
@@ -261,14 +278,14 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				ra_val = kvmppc_get_gpr(vcpu, ra);
 				ra_val = kvmppc_get_gpr(vcpu, ra);
 
 
 			addr = (ra_val + rb_val) & ~31ULL;
 			addr = (ra_val + rb_val) & ~31ULL;
-			if (!(vcpu->arch.shared->msr & MSR_SF))
+			if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 				addr &= 0xffffffff;
 				addr &= 0xffffffff;
 			vaddr = addr;
 			vaddr = addr;
 
 
 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
 			if ((r == -ENOENT) || (r == -EPERM)) {
 			if ((r == -ENOENT) || (r == -EPERM)) {
 				*advance = 0;
 				*advance = 0;
-				vcpu->arch.shared->dar = vaddr;
+				kvmppc_set_dar(vcpu, vaddr);
 				vcpu->arch.fault_dar = vaddr;
 				vcpu->arch.fault_dar = vaddr;
 
 
 				dsisr = DSISR_ISSTORE;
 				dsisr = DSISR_ISSTORE;
@@ -277,7 +294,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				else if (r == -EPERM)
 				else if (r == -EPERM)
 					dsisr |= DSISR_PROTFAULT;
 					dsisr |= DSISR_PROTFAULT;
 
 
-				vcpu->arch.shared->dsisr = dsisr;
+				kvmppc_set_dsisr(vcpu, dsisr);
 				vcpu->arch.fault_dsisr = dsisr;
 				vcpu->arch.fault_dsisr = dsisr;
 
 
 				kvmppc_book3s_queue_irqprio(vcpu,
 				kvmppc_book3s_queue_irqprio(vcpu,
@@ -356,10 +373,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		to_book3s(vcpu)->sdr1 = spr_val;
 		to_book3s(vcpu)->sdr1 = spr_val;
 		break;
 		break;
 	case SPRN_DSISR:
 	case SPRN_DSISR:
-		vcpu->arch.shared->dsisr = spr_val;
+		kvmppc_set_dsisr(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_DAR:
 	case SPRN_DAR:
-		vcpu->arch.shared->dar = spr_val;
+		kvmppc_set_dar(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_HIOR:
 	case SPRN_HIOR:
 		to_book3s(vcpu)->hior = spr_val;
 		to_book3s(vcpu)->hior = spr_val;
@@ -438,6 +455,31 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_GQR7:
 	case SPRN_GQR7:
 		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
 		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
 		break;
 		break;
+	case SPRN_FSCR:
+		vcpu->arch.fscr = spr_val;
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_BESCR:
+		vcpu->arch.bescr = spr_val;
+		break;
+	case SPRN_EBBHR:
+		vcpu->arch.ebbhr = spr_val;
+		break;
+	case SPRN_EBBRR:
+		vcpu->arch.ebbrr = spr_val;
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case SPRN_TFHAR:
+		vcpu->arch.tfhar = spr_val;
+		break;
+	case SPRN_TEXASR:
+		vcpu->arch.texasr = spr_val;
+		break;
+	case SPRN_TFIAR:
+		vcpu->arch.tfiar = spr_val;
+		break;
+#endif
+#endif
 	case SPRN_ICTC:
 	case SPRN_ICTC:
 	case SPRN_THRM1:
 	case SPRN_THRM1:
 	case SPRN_THRM2:
 	case SPRN_THRM2:
@@ -455,6 +497,13 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_WPAR_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
 	case SPRN_MSSSR0:
 	case SPRN_DABR:
 	case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_MMCRS:
+	case SPRN_MMCRA:
+	case SPRN_MMCR0:
+	case SPRN_MMCR1:
+	case SPRN_MMCR2:
+#endif
 		break;
 		break;
 unprivileged:
 unprivileged:
 	default:
 	default:
@@ -493,10 +542,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		*spr_val = to_book3s(vcpu)->sdr1;
 		*spr_val = to_book3s(vcpu)->sdr1;
 		break;
 		break;
 	case SPRN_DSISR:
 	case SPRN_DSISR:
-		*spr_val = vcpu->arch.shared->dsisr;
+		*spr_val = kvmppc_get_dsisr(vcpu);
 		break;
 		break;
 	case SPRN_DAR:
 	case SPRN_DAR:
-		*spr_val = vcpu->arch.shared->dar;
+		*spr_val = kvmppc_get_dar(vcpu);
 		break;
 		break;
 	case SPRN_HIOR:
 	case SPRN_HIOR:
 		*spr_val = to_book3s(vcpu)->hior;
 		*spr_val = to_book3s(vcpu)->hior;
@@ -538,6 +587,31 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 	case SPRN_GQR7:
 	case SPRN_GQR7:
 		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
 		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
 		break;
 		break;
+	case SPRN_FSCR:
+		*spr_val = vcpu->arch.fscr;
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_BESCR:
+		*spr_val = vcpu->arch.bescr;
+		break;
+	case SPRN_EBBHR:
+		*spr_val = vcpu->arch.ebbhr;
+		break;
+	case SPRN_EBBRR:
+		*spr_val = vcpu->arch.ebbrr;
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case SPRN_TFHAR:
+		*spr_val = vcpu->arch.tfhar;
+		break;
+	case SPRN_TEXASR:
+		*spr_val = vcpu->arch.texasr;
+		break;
+	case SPRN_TFIAR:
+		*spr_val = vcpu->arch.tfiar;
+		break;
+#endif
+#endif
 	case SPRN_THRM1:
 	case SPRN_THRM1:
 	case SPRN_THRM2:
 	case SPRN_THRM2:
 	case SPRN_THRM3:
 	case SPRN_THRM3:
@@ -553,6 +627,14 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 	case SPRN_WPAR_GEKKO:
 	case SPRN_WPAR_GEKKO:
 	case SPRN_MSSSR0:
 	case SPRN_MSSSR0:
 	case SPRN_DABR:
 	case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_MMCRS:
+	case SPRN_MMCRA:
+	case SPRN_MMCR0:
+	case SPRN_MMCR1:
+	case SPRN_MMCR2:
+	case SPRN_TIR:
+#endif
 		*spr_val = 0;
 		*spr_val = 0;
 		break;
 		break;
 	default:
 	default:
@@ -569,48 +651,17 @@ unprivileged:
 
 
 u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
 u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
 {
 {
-	u32 dsisr = 0;
-
-	/*
-	 * This is what the spec says about DSISR bits (not mentioned = 0):
-	 *
-	 * 12:13		[DS]	Set to bits 30:31
-	 * 15:16		[X]	Set to bits 29:30
-	 * 17			[X]	Set to bit 25
-	 *			[D/DS]	Set to bit 5
-	 * 18:21		[X]	Set to bits 21:24
-	 *			[D/DS]	Set to bits 1:4
-	 * 22:26			Set to bits 6:10 (RT/RS/FRT/FRS)
-	 * 27:31			Set to bits 11:15 (RA)
-	 */
-
-	switch (get_op(inst)) {
-	/* D-form */
-	case OP_LFS:
-	case OP_LFD:
-	case OP_STFD:
-	case OP_STFS:
-		dsisr |= (inst >> 12) & 0x4000;	/* bit 17 */
-		dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
-		break;
-	/* X-form */
-	case 31:
-		dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
-		dsisr |= (inst << 8)  & 0x04000; /* bit 17 */
-		dsisr |= (inst << 3)  & 0x03c00; /* bits 18:21 */
-		break;
-	default:
-		printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
-		break;
-	}
-
-	dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
-
-	return dsisr;
+	return make_dsisr(inst);
 }
 }
 
 
 ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
 ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
 {
 {
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Linux's fix_alignment() assumes that DAR is valid, so can we
+	 */
+	return vcpu->arch.fault_dar;
+#else
 	ulong dar = 0;
 	ulong dar = 0;
 	ulong ra = get_ra(inst);
 	ulong ra = get_ra(inst);
 	ulong rb = get_rb(inst);
 	ulong rb = get_rb(inst);
@@ -635,4 +686,5 @@ ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
 	}
 	}
 
 
 	return dar;
 	return dar;
+#endif
 }
 }

+ 1 - 0
arch/powerpc/kvm/book3s_exports.c

@@ -18,6 +18,7 @@
  */
  */
 
 
 #include <linux/export.h>
 #include <linux/export.h>
+#include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_book3s.h>
 
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE

+ 18 - 30
arch/powerpc/kvm/book3s_hv.c

@@ -879,24 +879,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_IAMR:
 	case KVM_REG_PPC_IAMR:
 		*val = get_reg_val(id, vcpu->arch.iamr);
 		*val = get_reg_val(id, vcpu->arch.iamr);
 		break;
 		break;
-	case KVM_REG_PPC_FSCR:
-		*val = get_reg_val(id, vcpu->arch.fscr);
-		break;
 	case KVM_REG_PPC_PSPB:
 	case KVM_REG_PPC_PSPB:
 		*val = get_reg_val(id, vcpu->arch.pspb);
 		*val = get_reg_val(id, vcpu->arch.pspb);
 		break;
 		break;
-	case KVM_REG_PPC_EBBHR:
-		*val = get_reg_val(id, vcpu->arch.ebbhr);
-		break;
-	case KVM_REG_PPC_EBBRR:
-		*val = get_reg_val(id, vcpu->arch.ebbrr);
-		break;
-	case KVM_REG_PPC_BESCR:
-		*val = get_reg_val(id, vcpu->arch.bescr);
-		break;
-	case KVM_REG_PPC_TAR:
-		*val = get_reg_val(id, vcpu->arch.tar);
-		break;
 	case KVM_REG_PPC_DPDES:
 	case KVM_REG_PPC_DPDES:
 		*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
 		*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
 		break;
 		break;
@@ -1091,24 +1076,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_IAMR:
 	case KVM_REG_PPC_IAMR:
 		vcpu->arch.iamr = set_reg_val(id, *val);
 		vcpu->arch.iamr = set_reg_val(id, *val);
 		break;
 		break;
-	case KVM_REG_PPC_FSCR:
-		vcpu->arch.fscr = set_reg_val(id, *val);
-		break;
 	case KVM_REG_PPC_PSPB:
 	case KVM_REG_PPC_PSPB:
 		vcpu->arch.pspb = set_reg_val(id, *val);
 		vcpu->arch.pspb = set_reg_val(id, *val);
 		break;
 		break;
-	case KVM_REG_PPC_EBBHR:
-		vcpu->arch.ebbhr = set_reg_val(id, *val);
-		break;
-	case KVM_REG_PPC_EBBRR:
-		vcpu->arch.ebbrr = set_reg_val(id, *val);
-		break;
-	case KVM_REG_PPC_BESCR:
-		vcpu->arch.bescr = set_reg_val(id, *val);
-		break;
-	case KVM_REG_PPC_TAR:
-		vcpu->arch.tar = set_reg_val(id, *val);
-		break;
 	case KVM_REG_PPC_DPDES:
 	case KVM_REG_PPC_DPDES:
 		vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
 		vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
 		break;
 		break;
@@ -1280,6 +1250,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 		goto free_vcpu;
 		goto free_vcpu;
 
 
 	vcpu->arch.shared = &vcpu->arch.shregs;
 	vcpu->arch.shared = &vcpu->arch.shregs;
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	/*
+	 * The shared struct is never shared on HV,
+	 * so we can always use host endianness
+	 */
+#ifdef __BIG_ENDIAN__
+	vcpu->arch.shared_big_endian = true;
+#else
+	vcpu->arch.shared_big_endian = false;
+#endif
+#endif
 	vcpu->arch.mmcr[0] = MMCR0_FC;
 	vcpu->arch.mmcr[0] = MMCR0_FC;
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	/* default to host PVR, since we can't spoof it */
 	/* default to host PVR, since we can't spoof it */
@@ -1949,6 +1930,13 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 	 * support pte_enc here
 	 * support pte_enc here
 	 */
 	 */
 	(*sps)->enc[0].pte_enc = def->penc[linux_psize];
 	(*sps)->enc[0].pte_enc = def->penc[linux_psize];
+	/*
+	 * Add 16MB MPSS support if host supports it
+	 */
+	if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
+		(*sps)->enc[1].page_shift = 24;
+		(*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
+	}
 	(*sps)++;
 	(*sps)++;
 }
 }
 
 

+ 2 - 1
arch/powerpc/kvm/book3s_hv_rm_mmu.c

@@ -42,13 +42,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
 
 
 	/*
 	/*
 	 * If there is only one vcore, and it's currently running,
 	 * If there is only one vcore, and it's currently running,
+	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
 	 * we can use tlbiel as long as we mark all other physical
 	 * we can use tlbiel as long as we mark all other physical
 	 * cores as potentially having stale TLB entries for this lpid.
 	 * cores as potentially having stale TLB entries for this lpid.
 	 * If we're not using MMU notifiers, we never take pages away
 	 * If we're not using MMU notifiers, we never take pages away
 	 * from the guest, so we can use tlbiel if requested.
 	 * from the guest, so we can use tlbiel if requested.
 	 * Otherwise, don't use tlbiel.
 	 * Otherwise, don't use tlbiel.
 	 */
 	 */
-	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
 		global = 0;
 		global = 0;
 	else if (kvm->arch.using_mmu_notifiers)
 	else if (kvm->arch.using_mmu_notifiers)
 		global = 1;
 		global = 1;

+ 58 - 2
arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -86,6 +86,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	lbz	r4, LPPACA_PMCINUSE(r3)
 	lbz	r4, LPPACA_PMCINUSE(r3)
 	cmpwi	r4, 0
 	cmpwi	r4, 0
 	beq	23f			/* skip if not */
 	beq	23f			/* skip if not */
+BEGIN_FTR_SECTION
+	ld	r3, HSTATE_MMCR(r13)
+	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r4, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r3, HSTATE_PMC(r13)
 	lwz	r3, HSTATE_PMC(r13)
 	lwz	r4, HSTATE_PMC + 4(r13)
 	lwz	r4, HSTATE_PMC + 4(r13)
 	lwz	r5, HSTATE_PMC + 8(r13)
 	lwz	r5, HSTATE_PMC + 8(r13)
@@ -737,6 +743,12 @@ skip_tm:
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	isync
 	isync
+BEGIN_FTR_SECTION
+	ld	r3, VCPU_MMCR(r4)
+	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r5, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
 	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
 	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
 	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
 	lwz	r6, VCPU_PMC + 8(r4)
 	lwz	r6, VCPU_PMC + 8(r4)
@@ -1439,6 +1451,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 25:
 25:
 	/* Save PMU registers if requested */
 	/* Save PMU registers if requested */
 	/* r8 and cr0.eq are live here */
 	/* r8 and cr0.eq are live here */
+BEGIN_FTR_SECTION
+	/*
+	 * POWER8 seems to have a hardware bug where setting
+	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+	 * when some counters are already negative doesn't seem
+	 * to cause a performance monitor alert (and hence interrupt).
+	 * The effect of this is that when saving the PMU state,
+	 * if there is no PMU alert pending when we read MMCR0
+	 * before freezing the counters, but one becomes pending
+	 * before we read the counters, we lose it.
+	 * To work around this, we need a way to freeze the counters
+	 * before reading MMCR0.  Normally, freezing the counters
+	 * is done by writing MMCR0 (to set MMCR0[FC]) which
+	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
+	 * we can also freeze the counters using MMCR2, by writing
+	 * 1s to all the counter freeze condition bits (there are
+	 * 9 bits each for 6 counters).
+	 */
+	li	r3, -1			/* set all freeze bits */
+	clrrdi	r3, r3, 10
+	mfspr	r10, SPRN_MMCR2
+	mtspr	SPRN_MMCR2, r3
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r3, 1
 	li	r3, 1
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
@@ -1462,6 +1498,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r4, VCPU_MMCR(r9)
 	std	r4, VCPU_MMCR(r9)
 	std	r5, VCPU_MMCR + 8(r9)
 	std	r5, VCPU_MMCR + 8(r9)
 	std	r6, VCPU_MMCR + 16(r9)
 	std	r6, VCPU_MMCR + 16(r9)
+BEGIN_FTR_SECTION
+	std	r10, VCPU_MMCR + 24(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	std	r7, VCPU_SIAR(r9)
 	std	r7, VCPU_SIAR(r9)
 	std	r8, VCPU_SDAR(r9)
 	std	r8, VCPU_SDAR(r9)
 	mfspr	r3, SPRN_PMC1
 	mfspr	r3, SPRN_PMC1
@@ -1485,12 +1524,10 @@ BEGIN_FTR_SECTION
 	stw	r11, VCPU_PMC + 28(r9)
 	stw	r11, VCPU_PMC + 28(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
-	mfspr	r4, SPRN_MMCR2
 	mfspr	r5, SPRN_SIER
 	mfspr	r5, SPRN_SIER
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
 	mfspr	r7, SPRN_SPMC2
 	mfspr	r8, SPRN_MMCRS
 	mfspr	r8, SPRN_MMCRS
-	std	r4, VCPU_MMCR + 24(r9)
 	std	r5, VCPU_SIER(r9)
 	std	r5, VCPU_SIER(r9)
 	stw	r6, VCPU_PMC + 24(r9)
 	stw	r6, VCPU_PMC + 24(r9)
 	stw	r7, VCPU_PMC + 28(r9)
 	stw	r7, VCPU_PMC + 28(r9)
@@ -2227,6 +2264,7 @@ machine_check_realmode:
 	beq	mc_cont
 	beq	mc_cont
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	/* If not, deliver a machine check.  SRR0/1 are already set */
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
+	ld	r11, VCPU_MSR(r9)
 	bl	kvmppc_msr_interrupt
 	bl	kvmppc_msr_interrupt
 	b	fast_interrupt_c_return
 	b	fast_interrupt_c_return
 
 
@@ -2431,3 +2469,21 @@ kvmppc_msr_interrupt:
 	li	r0, 1
 	li	r0, 1
 1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
 1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
 	blr
 	blr
+
+/*
+ * This works around a hardware bug on POWER8E processors, where
+ * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
+ * performance monitor interrupt.  Instead, when we need to have
+ * an interrupt pending, we have to arrange for a counter to overflow.
+ */
+kvmppc_fix_pmao:
+	li	r3, 0
+	mtspr	SPRN_MMCR2, r3
+	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
+	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
+	mtspr	SPRN_MMCR0, r3
+	lis	r3, 0x7fff
+	ori	r3, r3, 0xffff
+	mtspr	SPRN_PMC6, r3
+	isync
+	blr

+ 21 - 2
arch/powerpc/kvm/book3s_interrupts.S

@@ -104,8 +104,27 @@ kvm_start_lightweight:
 	stb	r3, HSTATE_RESTORE_HID5(r13)
 	stb	r3, HSTATE_RESTORE_HID5(r13)
 
 
 	/* Load up guest SPRG3 value, since it's user readable */
 	/* Load up guest SPRG3 value, since it's user readable */
-	ld	r3, VCPU_SHARED(r4)
-	ld	r3, VCPU_SHARED_SPRG3(r3)
+	lwz	r3, VCPU_SHAREDBE(r4)
+	cmpwi	r3, 0
+	ld	r5, VCPU_SHARED(r4)
+	beq	sprg3_little_endian
+sprg3_big_endian:
+#ifdef __BIG_ENDIAN__
+	ld	r3, VCPU_SHARED_SPRG3(r5)
+#else
+	addi	r5, r5, VCPU_SHARED_SPRG3
+	ldbrx	r3, 0, r5
+#endif
+	b	after_sprg3_load
+sprg3_little_endian:
+#ifdef __LITTLE_ENDIAN__
+	ld	r3, VCPU_SHARED_SPRG3(r5)
+#else
+	addi	r5, r5, VCPU_SHARED_SPRG3
+	ldbrx	r3, 0, r5
+#endif
+
+after_sprg3_load:
 	mtspr	SPRN_SPRG3, r3
 	mtspr	SPRN_SPRG3, r3
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 

+ 9 - 7
arch/powerpc/kvm/book3s_paired_singles.c

@@ -165,16 +165,18 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 
 
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 {
 {
-	u64 dsisr;
-	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+	u32 dsisr;
+	u64 msr = kvmppc_get_msr(vcpu);
 
 
-	shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
-	shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
-	shared->dar = eaddr;
+	msr = kvmppc_set_field(msr, 33, 36, 0);
+	msr = kvmppc_set_field(msr, 42, 47, 0);
+	kvmppc_set_msr(vcpu, msr);
+	kvmppc_set_dar(vcpu, eaddr);
 	/* Page Fault */
 	/* Page Fault */
 	dsisr = kvmppc_set_field(0, 33, 33, 1);
 	dsisr = kvmppc_set_field(0, 33, 33, 1);
 	if (is_store)
 	if (is_store)
-		shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+		dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+	kvmppc_set_dsisr(vcpu, dsisr);
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
 }
 }
 
 
@@ -660,7 +662,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 		return EMULATE_FAIL;
 		return EMULATE_FAIL;
 
 
-	if (!(vcpu->arch.shared->msr & MSR_FP)) {
+	if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
 		return EMULATE_AGAIN;
 		return EMULATE_AGAIN;
 	}
 	}

+ 195 - 43
arch/powerpc/kvm/book3s_pr.c

@@ -53,6 +53,7 @@
 
 
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 			     ulong msr);
 			     ulong msr);
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
 
 /* Some compatibility defines */
 /* Some compatibility defines */
 #ifdef CONFIG_PPC_BOOK3S_32
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -89,6 +90,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 #endif
 #endif
 
 
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 	vcpu->cpu = -1;
 	vcpu->cpu = -1;
 }
 }
 
 
@@ -115,6 +117,9 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 	svcpu->ctr = vcpu->arch.ctr;
 	svcpu->ctr = vcpu->arch.ctr;
 	svcpu->lr  = vcpu->arch.lr;
 	svcpu->lr  = vcpu->arch.lr;
 	svcpu->pc  = vcpu->arch.pc;
 	svcpu->pc  = vcpu->arch.pc;
+#ifdef CONFIG_PPC_BOOK3S_64
+	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
+#endif
 	svcpu->in_use = true;
 	svcpu->in_use = true;
 }
 }
 
 
@@ -158,6 +163,9 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 	vcpu->arch.fault_dar   = svcpu->fault_dar;
 	vcpu->arch.fault_dar   = svcpu->fault_dar;
 	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
 	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
 	vcpu->arch.last_inst   = svcpu->last_inst;
 	vcpu->arch.last_inst   = svcpu->last_inst;
+#ifdef CONFIG_PPC_BOOK3S_64
+	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
+#endif
 	svcpu->in_use = false;
 	svcpu->in_use = false;
 
 
 out:
 out:
@@ -246,14 +254,15 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 
 
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
 {
-	ulong smsr = vcpu->arch.shared->msr;
+	ulong guest_msr = kvmppc_get_msr(vcpu);
+	ulong smsr = guest_msr;
 
 
 	/* Guest MSR values */
 	/* Guest MSR values */
-	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
 	/* Process MSR values */
 	/* Process MSR values */
 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	/* External providers the guest reserved */
 	/* External providers the guest reserved */
-	smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
+	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
 	/* 64-bit Process MSR values */
 	/* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
 	smsr |= MSR_ISF | MSR_HV;
 	smsr |= MSR_ISF | MSR_HV;
@@ -263,14 +272,14 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 
 
 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
 {
-	ulong old_msr = vcpu->arch.shared->msr;
+	ulong old_msr = kvmppc_get_msr(vcpu);
 
 
 #ifdef EXIT_DEBUG
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 #endif
 
 
 	msr &= to_book3s(vcpu)->msr_mask;
 	msr &= to_book3s(vcpu)->msr_mask;
-	vcpu->arch.shared->msr = msr;
+	kvmppc_set_msr_fast(vcpu, msr);
 	kvmppc_recalc_shadow_msr(vcpu);
 	kvmppc_recalc_shadow_msr(vcpu);
 
 
 	if (msr & MSR_POW) {
 	if (msr & MSR_POW) {
@@ -281,11 +290,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 
 
 			/* Unset POW bit after we woke up */
 			/* Unset POW bit after we woke up */
 			msr &= ~MSR_POW;
 			msr &= ~MSR_POW;
-			vcpu->arch.shared->msr = msr;
+			kvmppc_set_msr_fast(vcpu, msr);
 		}
 		}
 	}
 	}
 
 
-	if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
@@ -317,7 +326,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 	}
 	}
 
 
 	/* Preload FPU if it's enabled */
 	/* Preload FPU if it's enabled */
-	if (vcpu->arch.shared->msr & MSR_FP)
+	if (kvmppc_get_msr(vcpu) & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 }
 
 
@@ -427,8 +436,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 
 
 	/* patch dcbz into reserved instruction, so we trap */
 	/* patch dcbz into reserved instruction, so we trap */
 	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
 	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
-		if ((page[i] & 0xff0007ff) == INS_DCBZ)
-			page[i] &= 0xfffffff7;
+		if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
+			page[i] &= cpu_to_be32(0xfffffff7);
 
 
 	kunmap_atomic(page);
 	kunmap_atomic(page);
 	put_page(hpage);
 	put_page(hpage);
@@ -438,7 +447,7 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
 
-	if (!(vcpu->arch.shared->msr & MSR_SF))
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 		mp_pa = (uint32_t)mp_pa;
 
 
 	if (unlikely(mp_pa) &&
 	if (unlikely(mp_pa) &&
@@ -459,8 +468,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int page_found = 0;
 	int page_found = 0;
 	struct kvmppc_pte pte;
 	struct kvmppc_pte pte;
 	bool is_mmio = false;
 	bool is_mmio = false;
-	bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
-	bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
+	bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
+	bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
 	u64 vsid;
 	u64 vsid;
 
 
 	relocated = data ? dr : ir;
 	relocated = data ? dr : ir;
@@ -480,7 +489,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.page_size = MMU_PAGE_64K;
 		pte.page_size = MMU_PAGE_64K;
 	}
 	}
 
 
-	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
 	case 0:
 	case 0:
 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 		break;
 		break;
@@ -488,7 +497,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case MSR_IR:
 	case MSR_IR:
 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 
 
-		if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
+		if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
 			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
 			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
 		else
 		else
 			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
 			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -511,22 +520,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 
 	if (page_found == -ENOENT) {
 	if (page_found == -ENOENT) {
 		/* Page not found in guest PTE entries */
 		/* Page not found in guest PTE entries */
-		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
-		vcpu->arch.shared->msr |=
-			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
+		u64 ssrr1 = vcpu->arch.shadow_srr1;
+		u64 msr = kvmppc_get_msr(vcpu);
+		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+		kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
+		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
 		/* Storage protection */
-		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
-		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
-		vcpu->arch.shared->msr |=
-			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
+		u32 dsisr = vcpu->arch.fault_dsisr;
+		u64 ssrr1 = vcpu->arch.shadow_srr1;
+		u64 msr = kvmppc_get_msr(vcpu);
+		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+		dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
+		kvmppc_set_dsisr(vcpu, dsisr);
+		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
 		/* Page not found in guest SLB */
-		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 	} else if (!is_mmio &&
 	} else if (!is_mmio &&
 		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
 		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -606,6 +618,25 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 	kvmppc_recalc_shadow_msr(vcpu);
 	kvmppc_recalc_shadow_msr(vcpu);
 }
 }
 
 
+/* Give up facility (TAR / EBB / DSCR) */
+static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
+		/* Facility not available to the guest, ignore giveup request*/
+		return;
+	}
+
+	switch (fac) {
+	case FSCR_TAR_LG:
+		vcpu->arch.tar = mfspr(SPRN_TAR);
+		mtspr(SPRN_TAR, current->thread.tar);
+		vcpu->arch.shadow_fscr &= ~FSCR_TAR;
+		break;
+	}
+#endif
+}
+
 static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 {
 {
 	ulong srr0 = kvmppc_get_pc(vcpu);
 	ulong srr0 = kvmppc_get_pc(vcpu);
@@ -614,11 +645,12 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 
 
 	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
 	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
 	if (ret == -ENOENT) {
 	if (ret == -ENOENT) {
-		ulong msr = vcpu->arch.shared->msr;
+		ulong msr = kvmppc_get_msr(vcpu);
 
 
 		msr = kvmppc_set_field(msr, 33, 33, 1);
 		msr = kvmppc_set_field(msr, 33, 33, 1);
 		msr = kvmppc_set_field(msr, 34, 36, 0);
 		msr = kvmppc_set_field(msr, 34, 36, 0);
-		vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
+		msr = kvmppc_set_field(msr, 42, 47, 0);
+		kvmppc_set_msr_fast(vcpu, msr);
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
 		return EMULATE_AGAIN;
 		return EMULATE_AGAIN;
 	}
 	}
@@ -651,7 +683,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
 		return RESUME_GUEST;
 		return RESUME_GUEST;
 
 
-	if (!(vcpu->arch.shared->msr & msr)) {
+	if (!(kvmppc_get_msr(vcpu) & msr)) {
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		return RESUME_GUEST;
 		return RESUME_GUEST;
 	}
 	}
@@ -683,16 +715,20 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #endif
 #endif
 
 
 	if (msr & MSR_FP) {
 	if (msr & MSR_FP) {
+		preempt_disable();
 		enable_kernel_fp();
 		enable_kernel_fp();
 		load_fp_state(&vcpu->arch.fp);
 		load_fp_state(&vcpu->arch.fp);
 		t->fp_save_area = &vcpu->arch.fp;
 		t->fp_save_area = &vcpu->arch.fp;
+		preempt_enable();
 	}
 	}
 
 
 	if (msr & MSR_VEC) {
 	if (msr & MSR_VEC) {
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
+		preempt_disable();
 		enable_kernel_altivec();
 		enable_kernel_altivec();
 		load_vr_state(&vcpu->arch.vr);
 		load_vr_state(&vcpu->arch.vr);
 		t->vr_save_area = &vcpu->arch.vr;
 		t->vr_save_area = &vcpu->arch.vr;
+		preempt_enable();
 #endif
 #endif
 	}
 	}
 
 
@@ -716,18 +752,90 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 		return;
 		return;
 
 
 	if (lost_ext & MSR_FP) {
 	if (lost_ext & MSR_FP) {
+		preempt_disable();
 		enable_kernel_fp();
 		enable_kernel_fp();
 		load_fp_state(&vcpu->arch.fp);
 		load_fp_state(&vcpu->arch.fp);
+		preempt_enable();
 	}
 	}
 #ifdef CONFIG_ALTIVEC
 #ifdef CONFIG_ALTIVEC
 	if (lost_ext & MSR_VEC) {
 	if (lost_ext & MSR_VEC) {
+		preempt_disable();
 		enable_kernel_altivec();
 		enable_kernel_altivec();
 		load_vr_state(&vcpu->arch.vr);
 		load_vr_state(&vcpu->arch.vr);
+		preempt_enable();
 	}
 	}
 #endif
 #endif
 	current->thread.regs->msr |= lost_ext;
 	current->thread.regs->msr |= lost_ext;
 }
 }
 
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+{
+	/* Inject the Interrupt Cause field and trigger a guest interrupt */
+	vcpu->arch.fscr &= ~(0xffULL << 56);
+	vcpu->arch.fscr |= (fac << 56);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+}
+
+static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+	enum emulation_result er = EMULATE_FAIL;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_PR))
+		er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+
+	if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
+		/* Couldn't emulate, trigger interrupt in guest */
+		kvmppc_trigger_fac_interrupt(vcpu, fac);
+	}
+}
+
+/* Enable facilities (TAR, EBB, DSCR) for the guest */
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+	bool guest_fac_enabled;
+	BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
+
+	/*
+	 * Not every facility is enabled by FSCR bits, check whether the
+	 * guest has this facility enabled at all.
+	 */
+	switch (fac) {
+	case FSCR_TAR_LG:
+	case FSCR_EBB_LG:
+		guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
+		break;
+	case FSCR_TM_LG:
+		guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
+		break;
+	default:
+		guest_fac_enabled = false;
+		break;
+	}
+
+	if (!guest_fac_enabled) {
+		/* Facility not enabled by the guest */
+		kvmppc_trigger_fac_interrupt(vcpu, fac);
+		return RESUME_GUEST;
+	}
+
+	switch (fac) {
+	case FSCR_TAR_LG:
+		/* TAR switching isn't lazy in Linux yet */
+		current->thread.tar = mfspr(SPRN_TAR);
+		mtspr(SPRN_TAR, vcpu->arch.tar);
+		vcpu->arch.shadow_fscr |= FSCR_TAR;
+		break;
+	default:
+		kvmppc_emulate_fac(vcpu, fac);
+		break;
+	}
+
+	return RESUME_GUEST;
+}
+#endif
+
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			  unsigned int exit_nr)
 			  unsigned int exit_nr)
 {
 {
@@ -784,7 +892,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			r = RESUME_GUEST;
 			r = RESUME_GUEST;
 		} else {
 		} else {
-			vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
+			u64 msr = kvmppc_get_msr(vcpu);
+			msr |= shadow_srr1 & 0x58000000;
+			kvmppc_set_msr_fast(vcpu, msr);
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			r = RESUME_GUEST;
 			r = RESUME_GUEST;
 		}
 		}
@@ -824,8 +934,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
 			srcu_read_unlock(&vcpu->kvm->srcu, idx);
 			srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		} else {
 		} else {
-			vcpu->arch.shared->dar = dar;
-			vcpu->arch.shared->dsisr = fault_dsisr;
+			kvmppc_set_dar(vcpu, dar);
+			kvmppc_set_dsisr(vcpu, fault_dsisr);
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			r = RESUME_GUEST;
 			r = RESUME_GUEST;
 		}
 		}
@@ -833,7 +943,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 	}
 	case BOOK3S_INTERRUPT_DATA_SEGMENT:
 	case BOOK3S_INTERRUPT_DATA_SEGMENT:
 		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
 		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
-			vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+			kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
 			kvmppc_book3s_queue_irqprio(vcpu,
 			kvmppc_book3s_queue_irqprio(vcpu,
 				BOOK3S_INTERRUPT_DATA_SEGMENT);
 				BOOK3S_INTERRUPT_DATA_SEGMENT);
 		}
 		}
@@ -871,7 +981,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 program_interrupt:
 program_interrupt:
 		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
 
-		if (vcpu->arch.shared->msr & MSR_PR) {
+		if (kvmppc_get_msr(vcpu) & MSR_PR) {
 #ifdef EXIT_DEBUG
 #ifdef EXIT_DEBUG
 			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 #endif
 #endif
@@ -913,7 +1023,7 @@ program_interrupt:
 	case BOOK3S_INTERRUPT_SYSCALL:
 	case BOOK3S_INTERRUPT_SYSCALL:
 		if (vcpu->arch.papr_enabled &&
 		if (vcpu->arch.papr_enabled &&
 		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
 		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
-		    !(vcpu->arch.shared->msr & MSR_PR)) {
+		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 			/* SC 1 papr hypercalls */
 			/* SC 1 papr hypercalls */
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
 			int i;
 			int i;
@@ -945,7 +1055,7 @@ program_interrupt:
 				gprs[i] = kvmppc_get_gpr(vcpu, i);
 				gprs[i] = kvmppc_get_gpr(vcpu, i);
 			vcpu->arch.osi_needed = 1;
 			vcpu->arch.osi_needed = 1;
 			r = RESUME_HOST_NV;
 			r = RESUME_HOST_NV;
-		} else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		} else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
 		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
 		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
 			/* KVM PV hypercalls */
 			/* KVM PV hypercalls */
 			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
 			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
@@ -986,14 +1096,26 @@ program_interrupt:
 	}
 	}
 	case BOOK3S_INTERRUPT_ALIGNMENT:
 	case BOOK3S_INTERRUPT_ALIGNMENT:
 		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
 		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
-			vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
-				kvmppc_get_last_inst(vcpu));
-			vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
-				kvmppc_get_last_inst(vcpu));
+			u32 last_inst = kvmppc_get_last_inst(vcpu);
+			u32 dsisr;
+			u64 dar;
+
+			dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
+			dar = kvmppc_alignment_dar(vcpu, last_inst);
+
+			kvmppc_set_dsisr(vcpu, dsisr);
+			kvmppc_set_dar(vcpu, dar);
+
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		}
 		}
 		r = RESUME_GUEST;
 		r = RESUME_GUEST;
 		break;
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
+		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
+		r = RESUME_GUEST;
+		break;
+#endif
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
 	case BOOK3S_INTERRUPT_TRACE:
 	case BOOK3S_INTERRUPT_TRACE:
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
@@ -1054,7 +1176,7 @@ static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
 		}
 		}
 	} else {
 	} else {
 		for (i = 0; i < 16; i++)
 		for (i = 0; i < 16; i++)
-			sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+			sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
 
 
 		for (i = 0; i < 8; i++) {
 		for (i = 0; i < 8; i++) {
 			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
 			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
@@ -1110,6 +1232,15 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_HIOR:
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
 		break;
+	case KVM_REG_PPC_LPCR:
+		/*
+		 * We are only interested in the LPCR_ILE bit
+		 */
+		if (vcpu->arch.intr_msr & MSR_LE)
+			*val = get_reg_val(id, LPCR_ILE);
+		else
+			*val = get_reg_val(id, 0);
+		break;
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 		break;
 		break;
@@ -1118,6 +1249,14 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	return r;
 	return r;
 }
 }
 
 
+static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+	if (new_lpcr & LPCR_ILE)
+		vcpu->arch.intr_msr |= MSR_LE;
+	else
+		vcpu->arch.intr_msr &= ~MSR_LE;
+}
+
 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 				 union kvmppc_one_reg *val)
 				 union kvmppc_one_reg *val)
 {
 {
@@ -1128,6 +1267,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior_explicit = true;
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
 		break;
+	case KVM_REG_PPC_LPCR:
+		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
+		break;
 	default:
 	default:
 		r = -EINVAL;
 		r = -EINVAL;
 		break;
 		break;
@@ -1170,8 +1312,14 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 		goto uninit_vcpu;
 		goto uninit_vcpu;
 	/* the real shared page fills the last 4k of our page */
 	/* the real shared page fills the last 4k of our page */
 	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
-
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
+	/* Always start the shared struct in native endian mode */
+#ifdef __BIG_ENDIAN__
+        vcpu->arch.shared_big_endian = true;
+#else
+        vcpu->arch.shared_big_endian = false;
+#endif
+
 	/*
 	/*
 	 * Default to the same as the host if we're on sufficiently
 	 * Default to the same as the host if we're on sufficiently
 	 * recent machine that we have 1TB segments;
 	 * recent machine that we have 1TB segments;
@@ -1180,6 +1328,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 	vcpu->arch.pvr = 0x3C0301;
 	vcpu->arch.pvr = 0x3C0301;
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		vcpu->arch.pvr = mfspr(SPRN_PVR);
 		vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu->arch.intr_msr = MSR_SF;
 #else
 #else
 	/* default to book3s_32 (750) */
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
 	vcpu->arch.pvr = 0x84202;
@@ -1187,7 +1336,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
 	vcpu->arch.slb_nr = 64;
 
 
-	vcpu->arch.shadow_msr = MSR_USER64;
+	vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
 
 
 	err = kvmppc_mmu_init(vcpu);
 	err = kvmppc_mmu_init(vcpu);
 	if (err < 0)
 	if (err < 0)
@@ -1264,7 +1413,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 #endif
 
 
 	/* Preload FPU if it's enabled */
 	/* Preload FPU if it's enabled */
-	if (vcpu->arch.shared->msr & MSR_FP)
+	if (kvmppc_get_msr(vcpu) & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
 
 	kvmppc_fix_ee_before_entry();
 	kvmppc_fix_ee_before_entry();
@@ -1277,6 +1426,9 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Make sure we save the guest FPU/Altivec/VSX state */
 	/* Make sure we save the guest FPU/Altivec/VSX state */
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 
 
+	/* Make sure we save the guest TAR/EBB/DSCR state */
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+
 out:
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	return ret;
 	return ret;

+ 12 - 4
arch/powerpc/kvm/book3s_pr_papr.c

@@ -57,7 +57,7 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 		for (i = 0; ; ++i) {
 		for (i = 0; ; ++i) {
 			if (i == 8)
 			if (i == 8)
 				goto done;
 				goto done;
-			if ((*hpte & HPTE_V_VALID) == 0)
+			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)
 				break;
 				break;
 			hpte += 2;
 			hpte += 2;
 		}
 		}
@@ -67,8 +67,8 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 			goto done;
 			goto done;
 	}
 	}
 
 
-	hpte[0] = kvmppc_get_gpr(vcpu, 6);
-	hpte[1] = kvmppc_get_gpr(vcpu, 7);
+	hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
+	hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
 	pteg_addr += i * HPTE_SIZE;
 	pteg_addr += i * HPTE_SIZE;
 	copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
 	copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
 	kvmppc_set_gpr(vcpu, 4, pte_index | i);
 	kvmppc_set_gpr(vcpu, 4, pte_index | i);
@@ -93,6 +93,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	pte[0] = be64_to_cpu(pte[0]);
+	pte[1] = be64_to_cpu(pte[1]);
 
 
 	ret = H_NOT_FOUND;
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -169,6 +171,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 
 
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
 		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+		pte[0] = be64_to_cpu(pte[0]);
+		pte[1] = be64_to_cpu(pte[1]);
 
 
 		/* tsl = AVPN */
 		/* tsl = AVPN */
 		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
 		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
@@ -207,6 +211,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+	pte[0] = be64_to_cpu(pte[0]);
+	pte[1] = be64_to_cpu(pte[1]);
 
 
 	ret = H_NOT_FOUND;
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -225,6 +231,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 
 
 	rb = compute_tlbie_rb(v, r, pte_index);
 	rb = compute_tlbie_rb(v, r, pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+	pte[0] = cpu_to_be64(pte[0]);
+	pte[1] = cpu_to_be64(pte[1]);
 	copy_to_user((void __user *)pteg, pte, sizeof(pte));
 	copy_to_user((void __user *)pteg, pte, sizeof(pte));
 	ret = H_SUCCESS;
 	ret = H_SUCCESS;
 
 
@@ -270,7 +278,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 	case H_PUT_TCE:
 	case H_PUT_TCE:
 		return kvmppc_h_pr_put_tce(vcpu);
 		return kvmppc_h_pr_put_tce(vcpu);
 	case H_CEDE:
 	case H_CEDE:
-		vcpu->arch.shared->msr |= MSR_EE;
+		kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
 		kvm_vcpu_block(vcpu);
 		kvm_vcpu_block(vcpu);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		vcpu->stat.halt_wakeup++;

+ 29 - 0
arch/powerpc/kvm/book3s_rtas.c

@@ -205,6 +205,32 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
 	return rc;
 	return rc;
 }
 }
 
 
+static void kvmppc_rtas_swap_endian_in(struct rtas_args *args)
+{
+#ifdef __LITTLE_ENDIAN__
+	int i;
+
+	args->token = be32_to_cpu(args->token);
+	args->nargs = be32_to_cpu(args->nargs);
+	args->nret = be32_to_cpu(args->nret);
+	for (i = 0; i < args->nargs; i++)
+		args->args[i] = be32_to_cpu(args->args[i]);
+#endif
+}
+
+static void kvmppc_rtas_swap_endian_out(struct rtas_args *args)
+{
+#ifdef __LITTLE_ENDIAN__
+	int i;
+
+	for (i = 0; i < args->nret; i++)
+		args->args[i] = cpu_to_be32(args->args[i]);
+	args->token = cpu_to_be32(args->token);
+	args->nargs = cpu_to_be32(args->nargs);
+	args->nret = cpu_to_be32(args->nret);
+#endif
+}
+
 int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 {
 {
 	struct rtas_token_definition *d;
 	struct rtas_token_definition *d;
@@ -223,6 +249,8 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 	if (rc)
 	if (rc)
 		goto fail;
 		goto fail;
 
 
+	kvmppc_rtas_swap_endian_in(&args);
+
 	/*
 	/*
 	 * args->rets is a pointer into args->args. Now that we've
 	 * args->rets is a pointer into args->args. Now that we've
 	 * copied args we need to fix it up to point into our copy,
 	 * copied args we need to fix it up to point into our copy,
@@ -247,6 +275,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 
 
 	if (rc == 0) {
 	if (rc == 0) {
 		args.rets = orig_rets;
 		args.rets = orig_rets;
+		kvmppc_rtas_swap_endian_out(&args);
 		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
 		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
 		if (rc)
 		if (rc)
 			goto fail;
 			goto fail;

+ 25 - 0
arch/powerpc/kvm/book3s_segment.S

@@ -90,6 +90,15 @@ kvmppc_handler_trampoline_enter:
 	LOAD_GUEST_SEGMENTS
 	LOAD_GUEST_SEGMENTS
 
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+	/* Save host FSCR */
+	mfspr	r8, SPRN_FSCR
+	std	r8, HSTATE_HOST_FSCR(r13)
+	/* Set FSCR during guest execution */
+	ld	r9, SVCPU_SHADOW_FSCR(r13)
+	mtspr	SPRN_FSCR, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
 	/* Some guests may need to have dcbz set to 32 byte length.
 	/* Some guests may need to have dcbz set to 32 byte length.
 	 *
 	 *
 	 * Usually we ensure that by patching the guest's instructions
 	 * Usually we ensure that by patching the guest's instructions
@@ -255,6 +264,10 @@ BEGIN_FTR_SECTION
 	cmpwi	r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	cmpwi	r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	beq-	ld_last_inst
 	beq-	ld_last_inst
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+BEGIN_FTR_SECTION
+	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+	beq-	ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 #endif
 #endif
 
 
 	b	no_ld_last_inst
 	b	no_ld_last_inst
@@ -311,6 +324,18 @@ no_ld_last_inst:
 
 
 no_dcbz32_off:
 no_dcbz32_off:
 
 
+BEGIN_FTR_SECTION
+	/* Save guest FSCR on a FAC_UNAVAIL interrupt */
+	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+	bne+	no_fscr_save
+	mfspr	r7, SPRN_FSCR
+	std	r7, SVCPU_SHADOW_FSCR(r13)
+no_fscr_save:
+	/* Restore host FSCR */
+	ld	r8, HSTATE_HOST_FSCR(r13)
+	mtspr	SPRN_FSCR, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 
 	/*
 	/*

+ 15 - 0
arch/powerpc/kvm/e500_emulate.c

@@ -19,6 +19,7 @@
 #include "booke.h"
 #include "booke.h"
 #include "e500.h"
 #include "e500.h"
 
 
+#define XOP_DCBTLS  166
 #define XOP_MSGSND  206
 #define XOP_MSGSND  206
 #define XOP_MSGCLR  238
 #define XOP_MSGCLR  238
 #define XOP_TLBIVAX 786
 #define XOP_TLBIVAX 786
@@ -103,6 +104,15 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return emulated;
 	return emulated;
 }
 }
 
 
+static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	/* Always fail to lock the cache */
+	vcpu_e500->l1csr0 |= L1CSR0_CUL;
+	return EMULATE_DONE;
+}
+
 int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned int inst, int *advance)
 				unsigned int inst, int *advance)
 {
 {
@@ -116,6 +126,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case 31:
 	case 31:
 		switch (get_xop(inst)) {
 		switch (get_xop(inst)) {
 
 
+		case XOP_DCBTLS:
+			emulated = kvmppc_e500_emul_dcbtls(vcpu);
+			break;
+
 #ifdef CONFIG_KVM_E500MC
 #ifdef CONFIG_KVM_E500MC
 		case XOP_MSGSND:
 		case XOP_MSGSND:
 			emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
 			emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
@@ -222,6 +236,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
 		break;
 		break;
 	case SPRN_L1CSR1:
 	case SPRN_L1CSR1:
 		vcpu_e500->l1csr1 = spr_val;
 		vcpu_e500->l1csr1 = spr_val;
+		vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);
 		break;
 		break;
 	case SPRN_HID0:
 	case SPRN_HID0:
 		vcpu_e500->hid0 = spr_val;
 		vcpu_e500->hid0 = spr_val;

+ 12 - 12
arch/powerpc/kvm/emulate.c

@@ -97,10 +97,10 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 
 	switch (sprn) {
 	switch (sprn) {
 	case SPRN_SRR0:
 	case SPRN_SRR0:
-		vcpu->arch.shared->srr0 = spr_val;
+		kvmppc_set_srr0(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_SRR1:
 	case SPRN_SRR1:
-		vcpu->arch.shared->srr1 = spr_val;
+		kvmppc_set_srr1(vcpu, spr_val);
 		break;
 		break;
 
 
 	/* XXX We need to context-switch the timebase for
 	/* XXX We need to context-switch the timebase for
@@ -114,16 +114,16 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		break;
 		break;
 
 
 	case SPRN_SPRG0:
 	case SPRN_SPRG0:
-		vcpu->arch.shared->sprg0 = spr_val;
+		kvmppc_set_sprg0(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_SPRG1:
 	case SPRN_SPRG1:
-		vcpu->arch.shared->sprg1 = spr_val;
+		kvmppc_set_sprg1(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_SPRG2:
 	case SPRN_SPRG2:
-		vcpu->arch.shared->sprg2 = spr_val;
+		kvmppc_set_sprg2(vcpu, spr_val);
 		break;
 		break;
 	case SPRN_SPRG3:
 	case SPRN_SPRG3:
-		vcpu->arch.shared->sprg3 = spr_val;
+		kvmppc_set_sprg3(vcpu, spr_val);
 		break;
 		break;
 
 
 	/* PIR can legally be written, but we ignore it */
 	/* PIR can legally be written, but we ignore it */
@@ -150,10 +150,10 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
 
 	switch (sprn) {
 	switch (sprn) {
 	case SPRN_SRR0:
 	case SPRN_SRR0:
-		spr_val = vcpu->arch.shared->srr0;
+		spr_val = kvmppc_get_srr0(vcpu);
 		break;
 		break;
 	case SPRN_SRR1:
 	case SPRN_SRR1:
-		spr_val = vcpu->arch.shared->srr1;
+		spr_val = kvmppc_get_srr1(vcpu);
 		break;
 		break;
 	case SPRN_PVR:
 	case SPRN_PVR:
 		spr_val = vcpu->arch.pvr;
 		spr_val = vcpu->arch.pvr;
@@ -173,16 +173,16 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		break;
 		break;
 
 
 	case SPRN_SPRG0:
 	case SPRN_SPRG0:
-		spr_val = vcpu->arch.shared->sprg0;
+		spr_val = kvmppc_get_sprg0(vcpu);
 		break;
 		break;
 	case SPRN_SPRG1:
 	case SPRN_SPRG1:
-		spr_val = vcpu->arch.shared->sprg1;
+		spr_val = kvmppc_get_sprg1(vcpu);
 		break;
 		break;
 	case SPRN_SPRG2:
 	case SPRN_SPRG2:
-		spr_val = vcpu->arch.shared->sprg2;
+		spr_val = kvmppc_get_sprg2(vcpu);
 		break;
 		break;
 	case SPRN_SPRG3:
 	case SPRN_SPRG3:
-		spr_val = vcpu->arch.shared->sprg3;
+		spr_val = kvmppc_get_sprg3(vcpu);
 		break;
 		break;
 	/* Note: SPRG4-7 are user-readable, so we don't get
 	/* Note: SPRG4-7 are user-readable, so we don't get
 	 * a trap. */
 	 * a trap. */

+ 4 - 1
arch/powerpc/kvm/mpic.c

@@ -126,6 +126,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
 				      u32 val, int idx);
 				      u32 val, int idx);
 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
 				     u32 *ptr, int idx);
 				     u32 *ptr, int idx);
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+				    uint32_t val);
 
 
 enum irq_type {
 enum irq_type {
 	IRQ_TYPE_NORMAL = 0,
 	IRQ_TYPE_NORMAL = 0,
@@ -528,7 +530,6 @@ static void openpic_reset(struct openpic *opp)
 	/* Initialise IRQ sources */
 	/* Initialise IRQ sources */
 	for (i = 0; i < opp->max_irq; i++) {
 	for (i = 0; i < opp->max_irq; i++) {
 		opp->src[i].ivpr = opp->ivpr_reset;
 		opp->src[i].ivpr = opp->ivpr_reset;
-		opp->src[i].idr = opp->idr_reset;
 
 
 		switch (opp->src[i].type) {
 		switch (opp->src[i].type) {
 		case IRQ_TYPE_NORMAL:
 		case IRQ_TYPE_NORMAL:
@@ -543,6 +544,8 @@ static void openpic_reset(struct openpic *opp)
 		case IRQ_TYPE_FSLSPECIAL:
 		case IRQ_TYPE_FSLSPECIAL:
 			break;
 			break;
 		}
 		}
+
+		write_IRQreg_idr(opp, i, opp->idr_reset);
 	}
 	}
 	/* Initialise IRQ destinations */
 	/* Initialise IRQ destinations */
 	for (i = 0; i < MAX_CPU; i++) {
 	for (i = 0; i < MAX_CPU; i++) {

+ 53 - 11
arch/powerpc/kvm/powerpc.c

@@ -125,6 +125,27 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 }
 }
 EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
 EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
 
 
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+	int i;
+
+	shared->sprg0 = swab64(shared->sprg0);
+	shared->sprg1 = swab64(shared->sprg1);
+	shared->sprg2 = swab64(shared->sprg2);
+	shared->sprg3 = swab64(shared->sprg3);
+	shared->srr0 = swab64(shared->srr0);
+	shared->srr1 = swab64(shared->srr1);
+	shared->dar = swab64(shared->dar);
+	shared->msr = swab64(shared->msr);
+	shared->dsisr = swab32(shared->dsisr);
+	shared->int_pending = swab32(shared->int_pending);
+	for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
+		shared->sr[i] = swab32(shared->sr[i]);
+}
+#endif
+
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
 {
 	int nr = kvmppc_get_gpr(vcpu, 11);
 	int nr = kvmppc_get_gpr(vcpu, 11);
@@ -135,7 +156,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
 	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
 	unsigned long r2 = 0;
 	unsigned long r2 = 0;
 
 
-	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
 		/* 32 bit mode */
 		/* 32 bit mode */
 		param1 &= 0xffffffff;
 		param1 &= 0xffffffff;
 		param2 &= 0xffffffff;
 		param2 &= 0xffffffff;
@@ -146,8 +167,28 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	switch (nr) {
 	switch (nr) {
 	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
 	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
 	{
 	{
-		vcpu->arch.magic_page_pa = param1;
-		vcpu->arch.magic_page_ea = param2;
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+		/* Book3S can be little endian, find it out here */
+		int shared_big_endian = true;
+		if (vcpu->arch.intr_msr & MSR_LE)
+			shared_big_endian = false;
+		if (shared_big_endian != vcpu->arch.shared_big_endian)
+			kvmppc_swab_shared(vcpu);
+		vcpu->arch.shared_big_endian = shared_big_endian;
+#endif
+
+		if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
+			/*
+			 * Older versions of the Linux magic page code had
+			 * a bug where they would map their trampoline code
+			 * NX. If that's the case, remove !PR NX capability.
+			 */
+			vcpu->arch.disable_kernel_nx = true;
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		}
+
+		vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
+		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
 
 
 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
 
 
@@ -375,6 +416,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
 	case KVM_CAP_PPC_RTAS:
+	case KVM_CAP_PPC_FIXUP_HCALL:
 #ifdef CONFIG_KVM_XICS
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 	case KVM_CAP_IRQ_XICS:
 #endif
 #endif
@@ -1015,10 +1057,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	u32 inst_nop = 0x60000000;
 	u32 inst_nop = 0x60000000;
 #ifdef CONFIG_KVM_BOOKE_HV
 #ifdef CONFIG_KVM_BOOKE_HV
 	u32 inst_sc1 = 0x44000022;
 	u32 inst_sc1 = 0x44000022;
-	pvinfo->hcall[0] = inst_sc1;
-	pvinfo->hcall[1] = inst_nop;
-	pvinfo->hcall[2] = inst_nop;
-	pvinfo->hcall[3] = inst_nop;
+	pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
+	pvinfo->hcall[1] = cpu_to_be32(inst_nop);
+	pvinfo->hcall[2] = cpu_to_be32(inst_nop);
+	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
 #else
 #else
 	u32 inst_lis = 0x3c000000;
 	u32 inst_lis = 0x3c000000;
 	u32 inst_ori = 0x60000000;
 	u32 inst_ori = 0x60000000;
@@ -1034,10 +1076,10 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	 *    sc
 	 *    sc
 	 *    nop
 	 *    nop
 	 */
 	 */
-	pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
-	pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
-	pvinfo->hcall[2] = inst_sc;
-	pvinfo->hcall[3] = inst_nop;
+	pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
+	pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
+	pvinfo->hcall[2] = cpu_to_be32(inst_sc);
+	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
 #endif
 #endif
 
 
 	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
 	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;

+ 1 - 1
arch/powerpc/kvm/trace_pr.h

@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_exit,
 		__entry->exit_nr	= exit_nr;
 		__entry->exit_nr	= exit_nr;
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
-		__entry->msr		= vcpu->arch.shared->msr;
+		__entry->msr		= kvmppc_get_msr(vcpu);
 		__entry->srr1		= vcpu->arch.shadow_srr1;
 		__entry->srr1		= vcpu->arch.shadow_srr1;
 		__entry->last_inst	= vcpu->arch.last_inst;
 		__entry->last_inst	= vcpu->arch.last_inst;
 	),
 	),

+ 1 - 1
arch/powerpc/mm/slb.c

@@ -97,7 +97,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
 static void __slb_flush_and_rebolt(void)
 static void __slb_flush_and_rebolt(void)
 {
 {
 	/* If you change this make sure you change SLB_NUM_BOLTED
 	/* If you change this make sure you change SLB_NUM_BOLTED
-	 * appropriately too. */
+	 * and PR KVM appropriately too. */
 	unsigned long linear_llp, vmalloc_llp, lflags, vflags;
 	unsigned long linear_llp, vmalloc_llp, lflags, vflags;
 	unsigned long ksp_esid_data, ksp_vsid_data;
 	unsigned long ksp_esid_data, ksp_vsid_data;
 
 

+ 14 - 0
arch/s390/include/asm/ctl_reg.h

@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
 
+union ctlreg0 {
+	unsigned long val;
+	struct {
+#ifdef CONFIG_64BIT
+		unsigned long	   : 32;
+#endif
+		unsigned long	   : 3;
+		unsigned long lap  : 1; /* Low-address-protection control */
+		unsigned long	   : 4;
+		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+		unsigned long	   : 23;
+	};
+};
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 # define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
 # define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
 # define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
 # define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)

+ 149 - 14
arch/s390/include/asm/kvm_host.h

@@ -32,16 +32,26 @@
 #define KVM_NR_IRQCHIPS 1
 #define KVM_NR_IRQCHIPS 1
 #define KVM_IRQCHIP_NUM_PINS 4096
 #define KVM_IRQCHIP_NUM_PINS 4096
 
 
+#define SIGP_CTRL_C	0x00800000
+
 struct sca_entry {
 struct sca_entry {
-	atomic_t scn;
+	atomic_t ctrl;
 	__u32	reserved;
 	__u32	reserved;
 	__u64	sda;
 	__u64	sda;
 	__u64	reserved2[2];
 	__u64	reserved2[2];
 } __attribute__((packed));
 } __attribute__((packed));
 
 
+union ipte_control {
+	unsigned long val;
+	struct {
+		unsigned long k  : 1;
+		unsigned long kh : 31;
+		unsigned long kg : 32;
+	};
+};
 
 
 struct sca_block {
 struct sca_block {
-	__u64	ipte_control;
+	union ipte_control ipte_control;
 	__u64	reserved[5];
 	__u64	reserved[5];
 	__u64	mcn;
 	__u64	mcn;
 	__u64	reserved2;
 	__u64	reserved2;
@@ -64,6 +74,7 @@ struct sca_block {
 #define CPUSTAT_ZARCH      0x00000800
 #define CPUSTAT_ZARCH      0x00000800
 #define CPUSTAT_MCDS       0x00000100
 #define CPUSTAT_MCDS       0x00000100
 #define CPUSTAT_SM         0x00000080
 #define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
 #define CPUSTAT_G          0x00000008
 #define CPUSTAT_G          0x00000008
 #define CPUSTAT_GED        0x00000004
 #define CPUSTAT_GED        0x00000004
 #define CPUSTAT_J          0x00000002
 #define CPUSTAT_J          0x00000002
@@ -71,7 +82,9 @@ struct sca_block {
 
 
 struct kvm_s390_sie_block {
 struct kvm_s390_sie_block {
 	atomic_t cpuflags;		/* 0x0000 */
 	atomic_t cpuflags;		/* 0x0000 */
-	__u32	prefix;			/* 0x0004 */
+	__u32 : 1;			/* 0x0004 */
+	__u32 prefix : 18;
+	__u32 : 13;
 	__u8	reserved08[4];		/* 0x0008 */
 	__u8	reserved08[4];		/* 0x0008 */
 #define PROG_IN_SIE (1<<0)
 #define PROG_IN_SIE (1<<0)
 	__u32	prog0c;			/* 0x000c */
 	__u32	prog0c;			/* 0x000c */
@@ -85,12 +98,27 @@ struct kvm_s390_sie_block {
 	__u8	reserved40[4];		/* 0x0040 */
 	__u8	reserved40[4];		/* 0x0040 */
 #define LCTL_CR0	0x8000
 #define LCTL_CR0	0x8000
 #define LCTL_CR6	0x0200
 #define LCTL_CR6	0x0200
+#define LCTL_CR9	0x0040
+#define LCTL_CR10	0x0020
+#define LCTL_CR11	0x0010
 #define LCTL_CR14	0x0002
 #define LCTL_CR14	0x0002
 	__u16   lctl;			/* 0x0044 */
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
 	__s16	icpua;			/* 0x0046 */
-#define ICTL_LPSW 0x00400000
+#define ICTL_PINT	0x20000000
+#define ICTL_LPSW	0x00400000
+#define ICTL_STCTL	0x00040000
+#define ICTL_ISKE	0x00004000
+#define ICTL_SSKE	0x00002000
+#define ICTL_RRBE	0x00001000
+#define ICTL_TPROT	0x00000200
 	__u32	ictl;			/* 0x0048 */
 	__u32	ictl;			/* 0x0048 */
 	__u32	eca;			/* 0x004c */
 	__u32	eca;			/* 0x004c */
+#define ICPT_INST	0x04
+#define ICPT_PROGI	0x08
+#define ICPT_INSTPROGI	0x0C
+#define ICPT_OPEREXC	0x2C
+#define ICPT_PARTEXEC	0x38
+#define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
 	__u8	icptcode;		/* 0x0050 */
 	__u8	reserved51;		/* 0x0051 */
 	__u8	reserved51;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
 	__u16	ihcpu;			/* 0x0052 */
@@ -109,9 +137,24 @@ struct kvm_s390_sie_block {
 	psw_t	gpsw;			/* 0x0090 */
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
 	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[30];		/* 0x00b0 */
-	__u16   iprcc;			/* 0x00ce */
-	__u8	reservedd0[48];		/* 0x00d0 */
+	__u8	reservedb0[20];		/* 0x00b0 */
+	__u16	extcpuaddr;		/* 0x00c4 */
+	__u16	eic;			/* 0x00c6 */
+	__u32	reservedc8;		/* 0x00c8 */
+	__u16	pgmilc;			/* 0x00cc */
+	__u16	iprcc;			/* 0x00ce */
+	__u32	dxc;			/* 0x00d0 */
+	__u16	mcn;			/* 0x00d4 */
+	__u8	perc;			/* 0x00d6 */
+	__u8	peratmid;		/* 0x00d7 */
+	__u64	peraddr;		/* 0x00d8 */
+	__u8	eai;			/* 0x00e0 */
+	__u8	peraid;			/* 0x00e1 */
+	__u8	oai;			/* 0x00e2 */
+	__u8	armid;			/* 0x00e3 */
+	__u8	reservede4[4];		/* 0x00e4 */
+	__u64	tecmc;			/* 0x00e8 */
+	__u8	reservedf0[16];		/* 0x00f0 */
 	__u64	gcr[16];		/* 0x0100 */
 	__u64	gcr[16];		/* 0x0100 */
 	__u64	gbea;			/* 0x0180 */
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[24];	/* 0x0188 */
 	__u8	reserved188[24];	/* 0x0188 */
@@ -146,6 +189,8 @@ struct kvm_vcpu_stat {
 	u32 exit_instruction;
 	u32 exit_instruction;
 	u32 instruction_lctl;
 	u32 instruction_lctl;
 	u32 instruction_lctlg;
 	u32 instruction_lctlg;
+	u32 instruction_stctl;
+	u32 instruction_stctg;
 	u32 exit_program_interruption;
 	u32 exit_program_interruption;
 	u32 exit_instr_and_program;
 	u32 exit_instr_and_program;
 	u32 deliver_external_call;
 	u32 deliver_external_call;
@@ -164,6 +209,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_stpx;
 	u32 instruction_stpx;
 	u32 instruction_stap;
 	u32 instruction_stap;
 	u32 instruction_storage_key;
 	u32 instruction_storage_key;
+	u32 instruction_ipte_interlock;
 	u32 instruction_stsch;
 	u32 instruction_stsch;
 	u32 instruction_chsc;
 	u32 instruction_chsc;
 	u32 instruction_stsi;
 	u32 instruction_stsi;
@@ -183,13 +229,58 @@ struct kvm_vcpu_stat {
 	u32 diagnose_9c;
 	u32 diagnose_9c;
 };
 };
 
 
-#define PGM_OPERATION            0x01
-#define PGM_PRIVILEGED_OP	 0x02
-#define PGM_EXECUTE              0x03
-#define PGM_PROTECTION           0x04
-#define PGM_ADDRESSING           0x05
-#define PGM_SPECIFICATION        0x06
-#define PGM_DATA                 0x07
+#define PGM_OPERATION			0x01
+#define PGM_PRIVILEGED_OP		0x02
+#define PGM_EXECUTE			0x03
+#define PGM_PROTECTION			0x04
+#define PGM_ADDRESSING			0x05
+#define PGM_SPECIFICATION		0x06
+#define PGM_DATA			0x07
+#define PGM_FIXED_POINT_OVERFLOW	0x08
+#define PGM_FIXED_POINT_DIVIDE		0x09
+#define PGM_DECIMAL_OVERFLOW		0x0a
+#define PGM_DECIMAL_DIVIDE		0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW	0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW	0x0d
+#define PGM_HFP_SIGNIFICANCE		0x0e
+#define PGM_HFP_DIVIDE			0x0f
+#define PGM_SEGMENT_TRANSLATION		0x10
+#define PGM_PAGE_TRANSLATION		0x11
+#define PGM_TRANSLATION_SPEC		0x12
+#define PGM_SPECIAL_OPERATION		0x13
+#define PGM_OPERAND			0x15
+#define PGM_TRACE_TABEL			0x16
+#define PGM_SPACE_SWITCH		0x1c
+#define PGM_HFP_SQUARE_ROOT		0x1d
+#define PGM_PC_TRANSLATION_SPEC		0x1f
+#define PGM_AFX_TRANSLATION		0x20
+#define PGM_ASX_TRANSLATION		0x21
+#define PGM_LX_TRANSLATION		0x22
+#define PGM_EX_TRANSLATION		0x23
+#define PGM_PRIMARY_AUTHORITY		0x24
+#define PGM_SECONDARY_AUTHORITY		0x25
+#define PGM_LFX_TRANSLATION		0x26
+#define PGM_LSX_TRANSLATION		0x27
+#define PGM_ALET_SPECIFICATION		0x28
+#define PGM_ALEN_TRANSLATION		0x29
+#define PGM_ALE_SEQUENCE		0x2a
+#define PGM_ASTE_VALIDITY		0x2b
+#define PGM_ASTE_SEQUENCE		0x2c
+#define PGM_EXTENDED_AUTHORITY		0x2d
+#define PGM_LSTE_SEQUENCE		0x2e
+#define PGM_ASTE_INSTANCE		0x2f
+#define PGM_STACK_FULL			0x30
+#define PGM_STACK_EMPTY			0x31
+#define PGM_STACK_SPECIFICATION		0x32
+#define PGM_STACK_TYPE			0x33
+#define PGM_STACK_OPERATION		0x34
+#define PGM_ASCE_TYPE			0x38
+#define PGM_REGION_FIRST_TRANS		0x39
+#define PGM_REGION_SECOND_TRANS		0x3a
+#define PGM_REGION_THIRD_TRANS		0x3b
+#define PGM_MONITOR			0x40
+#define PGM_PER				0x80
+#define PGM_CRYPTO_OPERATION		0x119
 
 
 struct kvm_s390_interrupt_info {
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	struct list_head list;
@@ -229,6 +320,45 @@ struct kvm_s390_float_interrupt {
 	unsigned int irq_count;
 	unsigned int irq_count;
 };
 };
 
 
+struct kvm_hw_wp_info_arch {
+	unsigned long addr;
+	unsigned long phys_addr;
+	int len;
+	char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+	unsigned long addr;
+	int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+		(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+	unsigned long cr0;
+	unsigned long cr9;
+	unsigned long cr10;
+	unsigned long cr11;
+	struct kvm_hw_bp_info_arch *hw_bp_info;
+	struct kvm_hw_wp_info_arch *hw_wp_info;
+	int nr_hw_bp;
+	int nr_hw_wp;
+	unsigned long last_bp;
+};
 
 
 struct kvm_vcpu_arch {
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
 	struct kvm_s390_sie_block *sie_block;
@@ -238,11 +368,13 @@ struct kvm_vcpu_arch {
 	struct kvm_s390_local_interrupt local_int;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct hrtimer    ckc_timer;
 	struct tasklet_struct tasklet;
 	struct tasklet_struct tasklet;
+	struct kvm_s390_pgm_info pgm;
 	union  {
 	union  {
 		struct cpuid	cpu_id;
 		struct cpuid	cpu_id;
 		u64		stidp_data;
 		u64		stidp_data;
 	};
 	};
 	struct gmap *gmap;
 	struct gmap *gmap;
+	struct kvm_guestdbg_info_arch guestdbg;
 #define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
 #define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
 	unsigned long pfault_token;
 	unsigned long pfault_token;
 	unsigned long pfault_select;
 	unsigned long pfault_select;
@@ -285,7 +417,10 @@ struct kvm_arch{
 	struct gmap *gmap;
 	struct gmap *gmap;
 	int css_support;
 	int css_support;
 	int use_irqchip;
 	int use_irqchip;
+	int use_cmma;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+	wait_queue_head_t ipte_wq;
+	spinlock_t start_stop_lock;
 };
 };
 
 
 #define KVM_HVA_ERR_BAD		(-1UL)
 #define KVM_HVA_ERR_BAD		(-1UL)

+ 6 - 4
arch/s390/include/asm/lowcore.h

@@ -56,13 +56,14 @@ struct _lowcore {
 	__u16	pgm_code;			/* 0x008e */
 	__u16	pgm_code;			/* 0x008e */
 	__u32	trans_exc_code;			/* 0x0090 */
 	__u32	trans_exc_code;			/* 0x0090 */
 	__u16	mon_class_num;			/* 0x0094 */
 	__u16	mon_class_num;			/* 0x0094 */
-	__u16	per_perc_atmid;			/* 0x0096 */
+	__u8	per_code;			/* 0x0096 */
+	__u8	per_atmid;			/* 0x0097 */
 	__u32	per_address;			/* 0x0098 */
 	__u32	per_address;			/* 0x0098 */
 	__u32	monitor_code;			/* 0x009c */
 	__u32	monitor_code;			/* 0x009c */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	op_access_id;			/* 0x00a2 */
 	__u8	op_access_id;			/* 0x00a2 */
-	__u8	ar_access_id;			/* 0x00a3 */
+	__u8	ar_mode_id;			/* 0x00a3 */
 	__u8	pad_0x00a4[0x00b8-0x00a4];	/* 0x00a4 */
 	__u8	pad_0x00a4[0x00b8-0x00a4];	/* 0x00a4 */
 	__u16	subchannel_id;			/* 0x00b8 */
 	__u16	subchannel_id;			/* 0x00b8 */
 	__u16	subchannel_nr;			/* 0x00ba */
 	__u16	subchannel_nr;			/* 0x00ba */
@@ -195,12 +196,13 @@ struct _lowcore {
 	__u16	pgm_code;			/* 0x008e */
 	__u16	pgm_code;			/* 0x008e */
 	__u32	data_exc_code;			/* 0x0090 */
 	__u32	data_exc_code;			/* 0x0090 */
 	__u16	mon_class_num;			/* 0x0094 */
 	__u16	mon_class_num;			/* 0x0094 */
-	__u16	per_perc_atmid;			/* 0x0096 */
+	__u8	per_code;			/* 0x0096 */
+	__u8	per_atmid;			/* 0x0097 */
 	__u64	per_address;			/* 0x0098 */
 	__u64	per_address;			/* 0x0098 */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	op_access_id;			/* 0x00a2 */
 	__u8	op_access_id;			/* 0x00a2 */
-	__u8	ar_access_id;			/* 0x00a3 */
+	__u8	ar_mode_id;			/* 0x00a3 */
 	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
 	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
 	__u64	trans_exc_code;			/* 0x00a8 */
 	__u64	trans_exc_code;			/* 0x00a8 */
 	__u64	monitor_code;			/* 0x00b0 */
 	__u64	monitor_code;			/* 0x00b0 */

+ 2 - 0
arch/s390/include/asm/mmu.h

@@ -16,6 +16,8 @@ typedef struct {
 	unsigned long vdso_base;
 	unsigned long vdso_base;
 	/* The mmu context has extended page tables. */
 	/* The mmu context has extended page tables. */
 	unsigned int has_pgste:1;
 	unsigned int has_pgste:1;
+	/* The mmu context uses storage keys. */
+	unsigned int use_skey:1;
 } mm_context_t;
 } mm_context_t;
 
 
 #define INIT_MM_CONTEXT(name)						      \
 #define INIT_MM_CONTEXT(name)						      \

+ 1 - 0
arch/s390/include/asm/mmu_context.h

@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
 #endif
 	mm->context.has_pgste = 0;
 	mm->context.has_pgste = 0;
+	mm->context.use_skey = 0;
 	mm->context.asce_limit = STACK_TOP_MAX;
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
 	return 0;

+ 2 - 1
arch/s390/include/asm/pgalloc.h

@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
 
 
-void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long);
+void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
+			    bool init_skey);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
 			  unsigned long key, bool nq);
 
 

+ 81 - 88
arch/s390/include/asm/pgtable.h

@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_HC_BIT	0x00200000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GR_BIT	0x00040000UL
 #define PGSTE_GC_BIT	0x00020000UL
 #define PGSTE_GC_BIT	0x00020000UL
-#define PGSTE_IN_BIT	0x00008000UL	/* IPTE notify bit */
+#define PGSTE_UC_BIT	0x00008000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x00004000UL	/* IPTE notify bit */
 
 
 #else /* CONFIG_64BIT */
 #else /* CONFIG_64BIT */
 
 
@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
-#define PGSTE_IN_BIT	0x0000800000000000UL	/* IPTE notify bit */
+#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
 
 
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_64BIT */
 
 
@@ -466,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 #endif
 #endif
 	return 0;
 	return 0;
 }
 }
+
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (mm->context.use_skey)
+		return 1;
+#endif
+	return 0;
+}
+
 /*
 /*
  * pgd/pmd/pte query functions
  * pgd/pmd/pte query functions
  */
  */
@@ -699,26 +711,17 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 #endif
 #endif
 }
 }
 
 
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+				       struct mm_struct *mm)
 {
 {
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
 	unsigned long address, bits, skey;
 	unsigned long address, bits, skey;
 
 
-	if (pte_val(*ptep) & _PAGE_INVALID)
+	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
 		return pgste;
 		return pgste;
 	address = pte_val(*ptep) & PAGE_MASK;
 	address = pte_val(*ptep) & PAGE_MASK;
 	skey = (unsigned long) page_get_storage_key(address);
 	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
-	if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
-		/* Transfer dirty + referenced bit to host bits in pgste */
-		pgste_val(pgste) |= bits << 52;
-		page_set_storage_key(address, skey ^ bits, 0);
-	} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
-		   (bits & _PAGE_REFERENCED)) {
-		/* Transfer referenced bit to host bit in pgste */
-		pgste_val(pgste) |= PGSTE_HR_BIT;
-		page_reset_referenced(address);
-	}
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
 	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
 	/* Copy page access key and fetch protection bit to pgste */
 	/* Copy page access key and fetch protection bit to pgste */
@@ -729,25 +732,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 
 
 }
 }
 
 
-static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-	if (pte_val(*ptep) & _PAGE_INVALID)
-		return pgste;
-	/* Get referenced bit from storage key */
-	if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
-		pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
-#endif
-	return pgste;
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+				 struct mm_struct *mm)
 {
 {
 #ifdef CONFIG_PGSTE
 #ifdef CONFIG_PGSTE
 	unsigned long address;
 	unsigned long address;
 	unsigned long nkey;
 	unsigned long nkey;
 
 
-	if (pte_val(entry) & _PAGE_INVALID)
+	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
 		return;
 		return;
 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 	address = pte_val(entry) & PAGE_MASK;
 	address = pte_val(entry) & PAGE_MASK;
@@ -757,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
 	 * key C/R to 0.
 	 * key C/R to 0.
 	 */
 	 */
 	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 	page_set_storage_key(address, nkey, 0);
 	page_set_storage_key(address, nkey, 0);
 #endif
 #endif
 }
 }
 
 
-static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 {
 {
-	if (!MACHINE_HAS_ESOP &&
-	    (pte_val(entry) & _PAGE_PRESENT) &&
-	    (pte_val(entry) & _PAGE_WRITE)) {
-		/*
-		 * Without enhanced suppression-on-protection force
-		 * the dirty bit on for all writable ptes.
-		 */
-		pte_val(entry) |= _PAGE_DIRTY;
-		pte_val(entry) &= ~_PAGE_PROTECT;
+	if ((pte_val(entry) & _PAGE_PRESENT) &&
+	    (pte_val(entry) & _PAGE_WRITE) &&
+	    !(pte_val(entry) & _PAGE_INVALID)) {
+		if (!MACHINE_HAS_ESOP) {
+			/*
+			 * Without enhanced suppression-on-protection force
+			 * the dirty bit on for all writable ptes.
+			 */
+			pte_val(entry) |= _PAGE_DIRTY;
+			pte_val(entry) &= ~_PAGE_PROTECT;
+		}
+		if (!(pte_val(entry) & _PAGE_PROTECT))
+			/* This pte allows write access, set user-dirty */
+			pgste_val(pgste) |= PGSTE_UC_BIT;
 	}
 	}
 	*ptep = entry;
 	*ptep = entry;
+	return pgste;
 }
 }
 
 
 /**
 /**
@@ -839,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
 void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 void __gmap_zap(unsigned long address, struct gmap *);
 void __gmap_zap(unsigned long address, struct gmap *);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
 
 
 void gmap_register_ipte_notifier(struct gmap_notifier *);
 void gmap_register_ipte_notifier(struct gmap_notifier *);
 void gmap_unregister_ipte_notifier(struct gmap_notifier *);
 void gmap_unregister_ipte_notifier(struct gmap_notifier *);
@@ -870,8 +871,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_get_lock(ptep);
 		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
-		pgste_set_key(ptep, pgste, entry);
-		pgste_set_pte(ptep, entry);
+		pgste_set_key(ptep, pgste, entry, mm);
+		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	} else {
 	} else {
 		if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
 		if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1017,45 +1018,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
 }
 }
 #endif
 #endif
 
 
-/*
- * Get (and clear) the user dirty bit for a pte.
- */
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
-						 pte_t *ptep)
-{
-	pgste_t pgste;
-	int dirty = 0;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_all(ptep, pgste);
-		dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
-		pgste_val(pgste) &= ~PGSTE_HC_BIT;
-		pgste_set_unlock(ptep, pgste);
-		return dirty;
-	}
-	return dirty;
-}
-
-/*
- * Get (and clear) the user referenced bit for a pte.
- */
-static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
-						 pte_t *ptep)
-{
-	pgste_t pgste;
-	int young = 0;
-
-	if (mm_has_pgste(mm)) {
-		pgste = pgste_get_lock(ptep);
-		pgste = pgste_update_young(ptep, pgste);
-		young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
-		pgste_val(pgste) &= ~PGSTE_HR_BIT;
-		pgste_set_unlock(ptep, pgste);
-	}
-	return young;
-}
-
 static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
 {
 {
 	unsigned long pto = (unsigned long) ptep;
 	unsigned long pto = (unsigned long) ptep;
@@ -1118,6 +1080,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
 	atomic_sub(0x10000, &mm->context.attach_count);
 	atomic_sub(0x10000, &mm->context.attach_count);
 }
 }
 
 
+/*
+ * Get (and clear) the user dirty bit for a pte.
+ */
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+						 unsigned long addr,
+						 pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+	int dirty;
+
+	if (!mm_has_pgste(mm))
+		return 0;
+	pgste = pgste_get_lock(ptep);
+	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pte = *ptep;
+	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+		pgste = pgste_ipte_notify(mm, ptep, pgste);
+		__ptep_ipte(addr, ptep);
+		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+			pte_val(pte) |= _PAGE_PROTECT;
+		else
+			pte_val(pte) |= _PAGE_INVALID;
+		*ptep = pte;
+	}
+	pgste_set_unlock(ptep, pgste);
+	return dirty;
+}
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 					    unsigned long addr, pte_t *ptep)
@@ -1137,7 +1129,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 	pte = pte_mkold(pte);
 	pte = pte_mkold(pte);
 
 
 	if (mm_has_pgste(vma->vm_mm)) {
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_pte(ptep, pte);
+		pgste = pgste_set_pte(ptep, pgste, pte);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	} else
 	} else
 		*ptep = pte;
 		*ptep = pte;
@@ -1182,7 +1174,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	pte_val(*ptep) = _PAGE_INVALID;
 	pte_val(*ptep) = _PAGE_INVALID;
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
 	}
 	return pte;
 	return pte;
@@ -1205,7 +1197,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 	ptep_flush_lazy(mm, address, ptep);
 	ptep_flush_lazy(mm, address, ptep);
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set(ptep, pgste);
 		pgste_set(ptep, pgste);
 	}
 	}
 	return pte;
 	return pte;
@@ -1219,8 +1211,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 
 
 	if (mm_has_pgste(mm)) {
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get(ptep);
 		pgste = pgste_get(ptep);
-		pgste_set_key(ptep, pgste, pte);
-		pgste_set_pte(ptep, pte);
+		pgste_set_key(ptep, pgste, pte, mm);
+		pgste = pgste_set_pte(ptep, pgste, pte);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	} else
 	} else
 		*ptep = pte;
 		*ptep = pte;
@@ -1246,7 +1238,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 		    _PGSTE_GPS_USAGE_UNUSED)
 		    _PGSTE_GPS_USAGE_UNUSED)
 			pte_val(pte) |= _PAGE_UNUSED;
 			pte_val(pte) |= _PAGE_UNUSED;
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
 	}
 	return pte;
 	return pte;
@@ -1278,7 +1270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	pte_val(*ptep) = _PAGE_INVALID;
 	pte_val(*ptep) = _PAGE_INVALID;
 
 
 	if (!full && mm_has_pgste(mm)) {
 	if (!full && mm_has_pgste(mm)) {
-		pgste = pgste_update_all(&pte, pgste);
+		pgste = pgste_update_all(&pte, pgste, mm);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	}
 	}
 	return pte;
 	return pte;
@@ -1301,7 +1293,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 		pte = pte_wrprotect(pte);
 		pte = pte_wrprotect(pte);
 
 
 		if (mm_has_pgste(mm)) {
 		if (mm_has_pgste(mm)) {
-			pgste_set_pte(ptep, pte);
+			pgste = pgste_set_pte(ptep, pgste, pte);
 			pgste_set_unlock(ptep, pgste);
 			pgste_set_unlock(ptep, pgste);
 		} else
 		} else
 			*ptep = pte;
 			*ptep = pte;
@@ -1326,7 +1318,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 	ptep_flush_direct(vma->vm_mm, address, ptep);
 	ptep_flush_direct(vma->vm_mm, address, ptep);
 
 
 	if (mm_has_pgste(vma->vm_mm)) {
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_pte(ptep, entry);
+		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 		pgste_set_unlock(ptep, pgste);
 	} else
 	} else
 		*ptep = entry;
 		*ptep = entry;
@@ -1734,6 +1726,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
 extern int s390_enable_sie(void);
+extern void s390_enable_skey(void);
 
 
 /*
 /*
  * No page table caches to initialise
  * No page table caches to initialise

+ 44 - 0
arch/s390/include/asm/ptrace.h

@@ -22,6 +22,50 @@
 			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
 			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
 			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 
 
+struct psw_bits {
+	unsigned long long	: 1;
+	unsigned long long r	: 1; /* PER-Mask */
+	unsigned long long	: 3;
+	unsigned long long t	: 1; /* DAT Mode */
+	unsigned long long i	: 1; /* Input/Output Mask */
+	unsigned long long e	: 1; /* External Mask */
+	unsigned long long key	: 4; /* PSW Key */
+	unsigned long long	: 1;
+	unsigned long long m	: 1; /* Machine-Check Mask */
+	unsigned long long w	: 1; /* Wait State */
+	unsigned long long p	: 1; /* Problem State */
+	unsigned long long as	: 2; /* Address Space Control */
+	unsigned long long cc	: 2; /* Condition Code */
+	unsigned long long pm	: 4; /* Program Mask */
+	unsigned long long ri	: 1; /* Runtime Instrumentation */
+	unsigned long long	: 6;
+	unsigned long long eaba : 2; /* Addressing Mode */
+#ifdef CONFIG_64BIT
+	unsigned long long	: 31;
+	unsigned long long ia	: 64;/* Instruction Address */
+#else
+	unsigned long long ia	: 31;/* Instruction Address */
+#endif
+};
+
+enum {
+	PSW_AMODE_24BIT = 0,
+	PSW_AMODE_31BIT = 1,
+	PSW_AMODE_64BIT = 3
+};
+
+enum {
+	PSW_AS_PRIMARY	 = 0,
+	PSW_AS_ACCREG	 = 1,
+	PSW_AS_SECONDARY = 2,
+	PSW_AS_HOME	 = 3
+};
+
+#define psw_bits(__psw) (*({			\
+	typecheck(psw_t, __psw);		\
+	&(*(struct psw_bits *)(&(__psw)));	\
+}))
+
 /*
 /*
  * The pt_regs struct defines the way the registers are stored on
  * The pt_regs struct defines the way the registers are stored on
  * the stack during a system call.
  * the stack during a system call.

+ 7 - 1
arch/s390/include/asm/sclp.h

@@ -28,7 +28,11 @@ struct sclp_ipl_info {
 
 
 struct sclp_cpu_entry {
 struct sclp_cpu_entry {
 	u8 address;
 	u8 address;
-	u8 reserved0[13];
+	u8 reserved0[2];
+	u8 : 3;
+	u8 siif : 1;
+	u8 : 4;
+	u8 reserved2[10];
 	u8 type;
 	u8 type;
 	u8 reserved1;
 	u8 reserved1;
 } __attribute__((packed));
 } __attribute__((packed));
@@ -61,5 +65,7 @@ int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 unsigned long sclp_get_hsa_size(void);
 unsigned long sclp_get_hsa_size(void);
 void sclp_early_detect(void);
 void sclp_early_detect(void);
+int sclp_has_siif(void);
+unsigned int sclp_get_ibc(void);
 
 
 #endif /* _ASM_S390_SCLP_H */
 #endif /* _ASM_S390_SCLP_H */

+ 28 - 0
arch/s390/include/uapi/asm/kvm.h

@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/types.h>
 
 
 #define __KVM_S390
 #define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
 
 
 /* Device control API: s390-specific devices */
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
 	__u64 addr;
 	__u64 addr;
 };
 };
 
 
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL		0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA	0
+#define KVM_S390_VM_MEM_CLR_CMMA	1
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 struct kvm_regs {
 	/* general purpose regs for s390 */
 	/* general purpose regs for s390 */
@@ -72,11 +80,31 @@ struct kvm_fpu {
 	__u64 fprs[16];
 	__u64 fprs[16];
 };
 };
 
 
+#define KVM_GUESTDBG_USE_HW_BP		0x00010000
+
+#define KVM_HW_BP			1
+#define KVM_HW_WP_WRITE			2
+#define KVM_SINGLESTEP			4
+
 struct kvm_debug_exit_arch {
 struct kvm_debug_exit_arch {
+	__u64 addr;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+	__u64 addr;
+	__u64 phys_addr;
+	__u64 len;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
 };
 };
 
 
 /* for KVM_SET_GUEST_DEBUG */
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
 struct kvm_guest_debug_arch {
+	__u32 nr_hw_bp;
+	__u32 pad; /* Should be set to 0 */
+	struct kvm_hw_breakpoint __user *hw_bp;
 };
 };
 
 
 #define KVM_SYNC_PREFIX (1UL << 0)
 #define KVM_SYNC_PREFIX (1UL << 0)

+ 245 - 0
arch/s390/include/uapi/asm/sie.h

@@ -0,0 +1,245 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#include <asm/sigp.h>
+
+#define diagnose_codes						\
+	{ 0x10, "DIAG (0x10) release pages" },			\
+	{ 0x44, "DIAG (0x44) time slice end" },			\
+	{ 0x9c, "DIAG (0x9c) time slice end directed" },	\
+	{ 0x204, "DIAG (0x204) logical-cpu utilization" },	\
+	{ 0x258, "DIAG (0x258) page-reference services" },	\
+	{ 0x308, "DIAG (0x308) ipl functions" },		\
+	{ 0x500, "DIAG (0x500) KVM virtio functions" },		\
+	{ 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes						\
+	{ SIGP_SENSE, "SIGP sense" },					\
+	{ SIGP_EXTERNAL_CALL, "SIGP external call" },			\
+	{ SIGP_EMERGENCY_SIGNAL, "SIGP emergency signal" },		\
+	{ SIGP_STOP, "SIGP stop" },					\
+	{ SIGP_STOP_AND_STORE_STATUS, "SIGP stop and store status" },	\
+	{ SIGP_SET_ARCHITECTURE, "SIGP set architecture" },		\
+	{ SIGP_SET_PREFIX, "SIGP set prefix" },				\
+	{ SIGP_SENSE_RUNNING, "SIGP sense running" },			\
+	{ SIGP_RESTART, "SIGP restart" },				\
+	{ SIGP_INITIAL_CPU_RESET, "SIGP initial cpu reset" },		\
+	{ SIGP_STORE_STATUS_AT_ADDRESS, "SIGP store status at address" }
+
+#define icpt_prog_codes						\
+	{ 0x0001, "Prog Operation" },				\
+	{ 0x0002, "Prog Privileged Operation" },		\
+	{ 0x0003, "Prog Execute" },				\
+	{ 0x0004, "Prog Protection" },				\
+	{ 0x0005, "Prog Addressing" },				\
+	{ 0x0006, "Prog Specification" },			\
+	{ 0x0007, "Prog Data" },				\
+	{ 0x0008, "Prog Fixedpoint overflow" },			\
+	{ 0x0009, "Prog Fixedpoint divide" },			\
+	{ 0x000A, "Prog Decimal overflow" },			\
+	{ 0x000B, "Prog Decimal divide" },			\
+	{ 0x000C, "Prog HFP exponent overflow" },		\
+	{ 0x000D, "Prog HFP exponent underflow" },		\
+	{ 0x000E, "Prog HFP significance" },			\
+	{ 0x000F, "Prog HFP divide" },				\
+	{ 0x0010, "Prog Segment translation" },			\
+	{ 0x0011, "Prog Page translation" },			\
+	{ 0x0012, "Prog Translation specification" },		\
+	{ 0x0013, "Prog Special operation" },			\
+	{ 0x0015, "Prog Operand" },				\
+	{ 0x0016, "Prog Trace table" },				\
+	{ 0x0017, "Prog ASNtranslation specification" },	\
+	{ 0x001C, "Prog Spaceswitch event" },			\
+	{ 0x001D, "Prog HFP square root" },			\
+	{ 0x001F, "Prog PCtranslation specification" },		\
+	{ 0x0020, "Prog AFX translation" },			\
+	{ 0x0021, "Prog ASX translation" },			\
+	{ 0x0022, "Prog LX translation" },			\
+	{ 0x0023, "Prog EX translation" },			\
+	{ 0x0024, "Prog Primary authority" },			\
+	{ 0x0025, "Prog Secondary authority" },			\
+	{ 0x0026, "Prog LFXtranslation exception" },		\
+	{ 0x0027, "Prog LSXtranslation exception" },		\
+	{ 0x0028, "Prog ALET specification" },			\
+	{ 0x0029, "Prog ALEN translation" },			\
+	{ 0x002A, "Prog ALE sequence" },			\
+	{ 0x002B, "Prog ASTE validity" },			\
+	{ 0x002C, "Prog ASTE sequence" },			\
+	{ 0x002D, "Prog Extended authority" },			\
+	{ 0x002E, "Prog LSTE sequence" },			\
+	{ 0x002F, "Prog ASTE instance" },			\
+	{ 0x0030, "Prog Stack full" },				\
+	{ 0x0031, "Prog Stack empty" },				\
+	{ 0x0032, "Prog Stack specification" },			\
+	{ 0x0033, "Prog Stack type" },				\
+	{ 0x0034, "Prog Stack operation" },			\
+	{ 0x0039, "Prog Region first translation" },		\
+	{ 0x003A, "Prog Region second translation" },		\
+	{ 0x003B, "Prog Region third translation" },		\
+	{ 0x0040, "Prog Monitor event" },			\
+	{ 0x0080, "Prog PER event" },				\
+	{ 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic)		\
+	{ (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic)			\
+	{ opcode, mnemonic }
+
+#define icpt_insn_codes				\
+	exit_code_ipa0(0x01, 0x01, "PR"),	\
+	exit_code_ipa0(0x01, 0x04, "PTFF"),	\
+	exit_code_ipa0(0x01, 0x07, "SCKPF"),	\
+	exit_code_ipa0(0xAA, 0x00, "RINEXT"),	\
+	exit_code_ipa0(0xAA, 0x01, "RION"),	\
+	exit_code_ipa0(0xAA, 0x02, "TRIC"),	\
+	exit_code_ipa0(0xAA, 0x03, "RIOFF"),	\
+	exit_code_ipa0(0xAA, 0x04, "RIEMIT"),	\
+	exit_code_ipa0(0xB2, 0x02, "STIDP"),	\
+	exit_code_ipa0(0xB2, 0x04, "SCK"),	\
+	exit_code_ipa0(0xB2, 0x05, "STCK"),	\
+	exit_code_ipa0(0xB2, 0x06, "SCKC"),	\
+	exit_code_ipa0(0xB2, 0x07, "STCKC"),	\
+	exit_code_ipa0(0xB2, 0x08, "SPT"),	\
+	exit_code_ipa0(0xB2, 0x09, "STPT"),	\
+	exit_code_ipa0(0xB2, 0x0d, "PTLB"),	\
+	exit_code_ipa0(0xB2, 0x10, "SPX"),	\
+	exit_code_ipa0(0xB2, 0x11, "STPX"),	\
+	exit_code_ipa0(0xB2, 0x12, "STAP"),	\
+	exit_code_ipa0(0xB2, 0x14, "SIE"),	\
+	exit_code_ipa0(0xB2, 0x16, "SETR"),	\
+	exit_code_ipa0(0xB2, 0x17, "STETR"),	\
+	exit_code_ipa0(0xB2, 0x18, "PC"),	\
+	exit_code_ipa0(0xB2, 0x20, "SERVC"),	\
+	exit_code_ipa0(0xB2, 0x28, "PT"),	\
+	exit_code_ipa0(0xB2, 0x29, "ISKE"),	\
+	exit_code_ipa0(0xB2, 0x2a, "RRBE"),	\
+	exit_code_ipa0(0xB2, 0x2b, "SSKE"),	\
+	exit_code_ipa0(0xB2, 0x2c, "TB"),	\
+	exit_code_ipa0(0xB2, 0x2e, "PGIN"),	\
+	exit_code_ipa0(0xB2, 0x2f, "PGOUT"),	\
+	exit_code_ipa0(0xB2, 0x30, "CSCH"),	\
+	exit_code_ipa0(0xB2, 0x31, "HSCH"),	\
+	exit_code_ipa0(0xB2, 0x32, "MSCH"),	\
+	exit_code_ipa0(0xB2, 0x33, "SSCH"),	\
+	exit_code_ipa0(0xB2, 0x34, "STSCH"),	\
+	exit_code_ipa0(0xB2, 0x35, "TSCH"),	\
+	exit_code_ipa0(0xB2, 0x36, "TPI"),	\
+	exit_code_ipa0(0xB2, 0x37, "SAL"),	\
+	exit_code_ipa0(0xB2, 0x38, "RSCH"),	\
+	exit_code_ipa0(0xB2, 0x39, "STCRW"),	\
+	exit_code_ipa0(0xB2, 0x3a, "STCPS"),	\
+	exit_code_ipa0(0xB2, 0x3b, "RCHP"),	\
+	exit_code_ipa0(0xB2, 0x3c, "SCHM"),	\
+	exit_code_ipa0(0xB2, 0x40, "BAKR"),	\
+	exit_code_ipa0(0xB2, 0x48, "PALB"),	\
+	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
+	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
+	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
+	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
+	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
+	exit_code_ipa0(0xB2, 0x74, "SIGA"),	\
+	exit_code_ipa0(0xB2, 0x76, "XSCH"),	\
+	exit_code_ipa0(0xB2, 0x78, "STCKE"),	\
+	exit_code_ipa0(0xB2, 0x7c, "STCKF"),	\
+	exit_code_ipa0(0xB2, 0x7d, "STSI"),	\
+	exit_code_ipa0(0xB2, 0xb0, "STFLE"),	\
+	exit_code_ipa0(0xB2, 0xb1, "STFL"),	\
+	exit_code_ipa0(0xB2, 0xb2, "LPSWE"),	\
+	exit_code_ipa0(0xB2, 0xf8, "TEND"),	\
+	exit_code_ipa0(0xB2, 0xfc, "TABORT"),	\
+	exit_code_ipa0(0xB9, 0x1e, "KMAC"),	\
+	exit_code_ipa0(0xB9, 0x28, "PCKMO"),	\
+	exit_code_ipa0(0xB9, 0x2a, "KMF"),	\
+	exit_code_ipa0(0xB9, 0x2b, "KMO"),	\
+	exit_code_ipa0(0xB9, 0x2d, "KMCTR"),	\
+	exit_code_ipa0(0xB9, 0x2e, "KM"),	\
+	exit_code_ipa0(0xB9, 0x2f, "KMC"),	\
+	exit_code_ipa0(0xB9, 0x3e, "KIMD"),	\
+	exit_code_ipa0(0xB9, 0x3f, "KLMD"),	\
+	exit_code_ipa0(0xB9, 0x8a, "CSPG"),	\
+	exit_code_ipa0(0xB9, 0x8d, "EPSW"),	\
+	exit_code_ipa0(0xB9, 0x8e, "IDTE"),	\
+	exit_code_ipa0(0xB9, 0x8f, "CRDTE"),	\
+	exit_code_ipa0(0xB9, 0x9c, "EQBS"),	\
+	exit_code_ipa0(0xB9, 0xa2, "PTF"),	\
+	exit_code_ipa0(0xB9, 0xab, "ESSA"),	\
+	exit_code_ipa0(0xB9, 0xae, "RRBM"),	\
+	exit_code_ipa0(0xB9, 0xaf, "PFMF"),	\
+	exit_code_ipa0(0xE3, 0x03, "LRAG"),	\
+	exit_code_ipa0(0xE3, 0x13, "LRAY"),	\
+	exit_code_ipa0(0xE3, 0x25, "NTSTG"),	\
+	exit_code_ipa0(0xE5, 0x00, "LASP"),	\
+	exit_code_ipa0(0xE5, 0x01, "TPROT"),	\
+	exit_code_ipa0(0xE5, 0x60, "TBEGIN"),	\
+	exit_code_ipa0(0xE5, 0x61, "TBEGINC"),	\
+	exit_code_ipa0(0xEB, 0x25, "STCTG"),	\
+	exit_code_ipa0(0xEB, 0x2f, "LCTLG"),	\
+	exit_code_ipa0(0xEB, 0x60, "LRIC"),	\
+	exit_code_ipa0(0xEB, 0x61, "STRIC"),	\
+	exit_code_ipa0(0xEB, 0x62, "MRIC"),	\
+	exit_code_ipa0(0xEB, 0x8a, "SQBS"),	\
+	exit_code_ipa0(0xC8, 0x01, "ECTG"),	\
+	exit_code(0x0a, "SVC"),			\
+	exit_code(0x80, "SSM"),			\
+	exit_code(0x82, "LPSW"),		\
+	exit_code(0x83, "DIAG"),		\
+	exit_code(0xae, "SIGP"),		\
+	exit_code(0xac, "STNSM"),		\
+	exit_code(0xad, "STOSM"),		\
+	exit_code(0xb1, "LRA"),			\
+	exit_code(0xb6, "STCTL"),		\
+	exit_code(0xb7, "LCTL"),		\
+	exit_code(0xee, "PLO")
+
+#define sie_intercept_code					\
+	{ 0x00, "Host interruption" },				\
+	{ 0x04, "Instruction" },				\
+	{ 0x08, "Program interruption" },			\
+	{ 0x0c, "Instruction and program interruption" },	\
+	{ 0x10, "External request" },				\
+	{ 0x14, "External interruption" },			\
+	{ 0x18, "I/O request" },				\
+	{ 0x1c, "Wait state" },					\
+	{ 0x20, "Validity" },					\
+	{ 0x28, "Stop request" },				\
+	{ 0x2c, "Operation exception" },			\
+	{ 0x38, "Partial-execution" },				\
+	{ 0x3c, "I/O interruption" },				\
+	{ 0x40, "I/O instruction" },				\
+	{ 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask)		\
+	(insn >> 56) == (ipa0) ?				\
+		((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn)			\
+	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
+	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xb9, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe3, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
+	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
+	INSN_DECODE(insn)
+
+#endif /* _UAPI_ASM_S390_SIE_H */

+ 11 - 3
arch/s390/kernel/asm-offsets.c

@@ -90,16 +90,22 @@ int main(void)
 	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
 	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
 	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
 	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
 	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
 	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-	DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
+	DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-	DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
-	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
+	DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+	DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+	DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
 	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
 	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
 	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
 	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
 	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
 	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
 	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
 	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+	DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
 	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
 	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
 	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
 	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
 	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
 	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
@@ -157,6 +163,8 @@ int main(void)
 #ifdef CONFIG_32BIT
 #ifdef CONFIG_32BIT
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 #else /* CONFIG_32BIT */
 #else /* CONFIG_32BIT */
+	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
 	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
 	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
 	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));

+ 2 - 2
arch/s390/kernel/entry.S

@@ -389,8 +389,8 @@ ENTRY(pgm_check_handler)
 	jz	pgm_kprobe
 	jz	pgm_kprobe
 	oi	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
 	oi	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CAUSE
-	mvc	__THREAD_per_paid(1,%r1),__LC_PER_PAID
+	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
 0:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	l	%r1,BASED(.Ljump_table)
 	l	%r1,BASED(.Ljump_table)

+ 2 - 2
arch/s390/kernel/entry64.S

@@ -420,8 +420,8 @@ ENTRY(pgm_check_handler)
 	jz	pgm_kprobe
 	jz	pgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CAUSE
-	mvc	__THREAD_per_paid(1,%r14),__LC_PER_PAID
+	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
 0:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	larl	%r1,pgm_check_table

+ 3 - 1
arch/s390/kvm/Makefile

@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
+
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(CONFIG_KVM) += kvm.o

+ 6 - 13
arch/s390/kvm/diag.c

@@ -23,7 +23,7 @@
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
 {
 	unsigned long start, end;
 	unsigned long start, end;
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
 
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	int rc;
 	int rc;
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
 	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
-	unsigned long hva_token = KVM_HVA_ERR_BAD;
 
 
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 
@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
 		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 
-		hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
-		if (kvm_is_error_hva(hva_token))
+		if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 
 		vcpu->arch.pfault_token = parm.token_addr;
 		vcpu->arch.pfault_token = parm.token_addr;
@@ -167,23 +166,17 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
 
 	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
 	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
 	switch (subcode) {
 	switch (subcode) {
-	case 0:
-	case 1:
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
-		return -EOPNOTSUPP;
 	case 3:
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
 		break;
 		break;
 	case 4:
 	case 4:
 		vcpu->run->s390_reset_flags = 0;
 		vcpu->run->s390_reset_flags = 0;
-		page_table_reset_pgste(current->mm, 0, TASK_SIZE);
 		break;
 		break;
 	default:
 	default:
 		return -EOPNOTSUPP;
 		return -EOPNOTSUPP;
 	}
 	}
 
 
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_stop(vcpu);
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;

+ 726 - 0
arch/s390/kvm/gaccess.c

@@ -0,0 +1,726 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+	unsigned long val;
+	struct {
+		unsigned long origin : 52; /* Region- or Segment-Table Origin */
+		unsigned long	 : 2;
+		unsigned long g  : 1; /* Subspace Group Control */
+		unsigned long p  : 1; /* Private Space Control */
+		unsigned long s  : 1; /* Storage-Alteration-Event Control */
+		unsigned long x  : 1; /* Space-Switch-Event Control */
+		unsigned long r  : 1; /* Real-Space Control */
+		unsigned long	 : 1;
+		unsigned long dt : 2; /* Designation-Type Control */
+		unsigned long tl : 2; /* Region- or Segment-Table Length */
+	};
+};
+
+enum {
+	ASCE_TYPE_SEGMENT = 0,
+	ASCE_TYPE_REGION3 = 1,
+	ASCE_TYPE_REGION2 = 2,
+	ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Second-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Second-Table Length */
+	};
+};
+
+union region2_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Third-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Third-Table Length */
+	};
+};
+
+struct region3_table_entry_fc0 {
+	unsigned long sto: 52;/* Segment-Table Origin */
+	unsigned long	 : 1;
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 1;
+	unsigned long tf : 2; /* Segment-Table Offset */
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long	 : 14;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union region3_table_entry {
+	unsigned long val;
+	struct region3_table_entry_fc0 fc0;
+	struct region3_table_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long cr : 1; /* Common-Region Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+struct segment_entry_fc0 {
+	unsigned long pto: 53;/* Page-Table Origin */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 3;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+struct segment_entry_fc1 {
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long	 : 3;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union segment_table_entry {
+	unsigned long val;
+	struct segment_entry_fc0 fc0;
+	struct segment_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Segment-Invalid Bit */
+		unsigned long cs : 1; /* Common-Segment Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+enum {
+	TABLE_TYPE_SEGMENT = 0,
+	TABLE_TYPE_REGION3 = 1,
+	TABLE_TYPE_REGION2 = 2,
+	TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long pfra : 52; /* Page-Frame Real Address */
+		unsigned long z  : 1; /* Zero Bit */
+		unsigned long i  : 1; /* Page-Invalid Bit */
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long co : 1; /* Change-Recording Override */
+		unsigned long	 : 8;
+	};
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+	unsigned long addr;
+	struct {
+		unsigned long rfx : 11;
+		unsigned long rsx : 11;
+		unsigned long rtx : 11;
+		unsigned long sx  : 11;
+		unsigned long px  : 8;
+		unsigned long bx  : 12;
+	};
+	struct {
+		unsigned long rfx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rsx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rtx01 : 2;
+		unsigned long	    : 9;
+		unsigned long sx01  : 2;
+		unsigned long	    : 29;
+	};
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+	unsigned long addr;
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+	union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+	if (vcpu->arch.sie_block->eca & 1)
+		return ic->kh != 0;
+	return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&ipte_mutex);
+	ipte_lock_count++;
+	if (ipte_lock_count > 1)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = ACCESS_ONCE(*ic);
+		while (old.k) {
+			cond_resched();
+			old = ACCESS_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+	mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&ipte_mutex);
+	ipte_lock_count--;
+	if (ipte_lock_count)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		new = old = ACCESS_ONCE(*ic);
+		new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	if (!ipte_lock_count)
+		wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+	mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = ACCESS_ONCE(*ic);
+		while (old.kg) {
+			cond_resched();
+			old = ACCESS_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+		new.kh++;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		new = old = ACCESS_ONCE(*ic);
+		new.kh--;
+		if (!new.kh)
+			new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	if (!new.kh)
+		wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_lock_siif(vcpu);
+	else
+		ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_unlock_siif(vcpu);
+	else
+		ipte_unlock_simple(vcpu);
+}
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+	case PSW_AS_PRIMARY:
+		return vcpu->arch.sie_block->gcr[1];
+	case PSW_AS_SECONDARY:
+		return vcpu->arch.sie_block->gcr[7];
+	case PSW_AS_HOME:
+		return vcpu->arch.sie_block->gcr[13];
+	}
+	return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ *	    - a negative value if guest access failed due to e.g. broken
+ *	      guest mapping
+ *	    - a positve value if an access exception happened. In this case
+ *	      the returned value is the program interruption code as defined
+ *	      by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+				     unsigned long *gpa, int write)
+{
+	union vaddress vaddr = {.addr = gva};
+	union raddress raddr = {.addr = gva};
+	union page_table_entry pte;
+	int dat_protection = 0;
+	union ctlreg0 ctlreg0;
+	unsigned long ptr;
+	int edat1, edat2;
+	union asce asce;
+
+	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+	edat1 = ctlreg0.edat && test_vfacility(8);
+	edat2 = edat1 && test_vfacility(78);
+	asce.val = get_vcpu_asce(vcpu);
+	if (asce.r)
+		goto real_address;
+	ptr = asce.origin * 4096;
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl)
+			return PGM_REGION_FIRST_TRANS;
+		ptr += vaddr.rfx * 8;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		ptr += vaddr.rsx * 8;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		ptr += vaddr.rtx * 8;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		ptr += vaddr.sx * 8;
+		break;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:	{
+		union region1_table_entry rfte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rfte.val))
+			return -EFAULT;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (edat1)
+			dat_protection |= rfte.p;
+		ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rste.val))
+			return -EFAULT;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (edat1)
+			dat_protection |= rste.p;
+		ptr = rste.rto * 4096 + vaddr.rtx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rtte.val))
+			return -EFAULT;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && edat2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && edat2) {
+			dat_protection |= rtte.fc1.p;
+			raddr.rfaa = rtte.fc1.rfaa;
+			goto absolute_address;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf)
+			return PGM_SEGMENT_TRANSLATION;
+		if (vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (edat1)
+			dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &ste.val))
+			return -EFAULT;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.fc && edat1) {
+			dat_protection |= ste.fc1.p;
+			raddr.sfaa = ste.fc1.sfaa;
+			goto absolute_address;
+		}
+		dat_protection |= ste.fc0.p;
+		ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+	}
+	}
+	if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		return PGM_ADDRESSING;
+	if (deref_table(vcpu->kvm, ptr, &pte.val))
+		return -EFAULT;
+	if (pte.i)
+		return PGM_PAGE_TRANSLATION;
+	if (pte.z)
+		return PGM_TRANSLATION_SPEC;
+	if (pte.co && !edat1)
+		return PGM_TRANSLATION_SPEC;
+	dat_protection |= pte.p;
+	raddr.pfra = pte.pfra;
+real_address:
+	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+	if (write && dat_protection)
+		return PGM_PROTECTION;
+	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+		return PGM_ADDRESSING;
+	*gpa = raddr.addr;
+	return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+	/* Check for address ranges 0..511 and 4096..4607 */
+	return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	union asce asce;
+
+	if (!ctlreg0.lap)
+		return 0;
+	asce.val = get_vcpu_asce(vcpu);
+	if (psw_bits(*psw).t && asce.p)
+		return 0;
+	return 1;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 7;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+			    unsigned long *pages, unsigned long nr_pages,
+			    int write)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+	int lap_enabled, rc;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec_bits->as = psw_bits(*psw).as;
+	lap_enabled = low_address_protection_enabled(vcpu);
+	while (nr_pages) {
+		ga = kvm_s390_logical_to_effective(vcpu, ga);
+		tec_bits->addr = ga >> PAGE_SHIFT;
+		if (write && lap_enabled && is_low_address(ga)) {
+			pgm->code = PGM_PROTECTION;
+			return pgm->code;
+		}
+		ga &= PAGE_MASK;
+		if (psw_bits(*psw).t) {
+			rc = guest_translate(vcpu, ga, pages, write);
+			if (rc < 0)
+				return rc;
+			if (rc == PGM_PROTECTION)
+				tec_bits->b61 = 1;
+			if (rc)
+				pgm->code = rc;
+		} else {
+			*pages = kvm_s390_real_to_abs(vcpu, ga);
+			if (kvm_is_error_gpa(vcpu->kvm, *pages))
+				pgm->code = PGM_ADDRESSING;
+		}
+		if (pgm->code)
+			return pgm->code;
+		ga += PAGE_SIZE;
+		pages++;
+		nr_pages--;
+	}
+	return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		 unsigned long len, int write)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long _len, nr_pages, gpa, idx;
+	unsigned long pages_array[2];
+	unsigned long *pages;
+	int need_ipte_lock;
+	union asce asce;
+	int rc;
+
+	if (!len)
+		return 0;
+	/* Access register mode is not supported yet. */
+	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+		return -EOPNOTSUPP;
+	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+	pages = pages_array;
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		pages = vmalloc(nr_pages * sizeof(unsigned long));
+	if (!pages)
+		return -ENOMEM;
+	asce.val = get_vcpu_asce(vcpu);
+	need_ipte_lock = psw_bits(*psw).t && !asce.r;
+	if (need_ipte_lock)
+		ipte_lock(vcpu);
+	rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+	for (idx = 0; idx < nr_pages && !rc; idx++) {
+		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+		else
+			rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+		len -= _len;
+		ga += _len;
+		data += _len;
+	}
+	if (need_ipte_lock)
+		ipte_unlock(vcpu);
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		vfree(pages);
+	return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write)
+{
+	unsigned long _len, gpa;
+	int rc = 0;
+
+	while (len && !rc) {
+		gpa = kvm_s390_real_to_abs(vcpu, gra);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = write_guest_abs(vcpu, gpa, data, _len);
+		else
+			rc = read_guest_abs(vcpu, gpa, data, _len);
+		len -= _len;
+		gra += _len;
+		data += _len;
+	}
+	return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+			    unsigned long *gpa, int write)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec;
+	union asce asce;
+	int rc;
+
+	/* Access register mode is not supported yet. */
+	if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+		return -EOPNOTSUPP;
+
+	gva = kvm_s390_logical_to_effective(vcpu, gva);
+	memset(pgm, 0, sizeof(*pgm));
+	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec->as = psw_bits(*psw).as;
+	tec->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec->addr = gva >> PAGE_SHIFT;
+	if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+		if (write) {
+			rc = pgm->code = PGM_PROTECTION;
+			return rc;
+		}
+	}
+
+	asce.val = get_vcpu_asce(vcpu);
+	if (psw_bits(*psw).t && !asce.r) {	/* Use DAT? */
+		rc = guest_translate(vcpu, gva, gpa, write);
+		if (rc > 0) {
+			if (rc == PGM_PROTECTION)
+				tec->b61 = 1;
+			pgm->code = rc;
+		}
+	} else {
+		rc = 0;
+		*gpa = kvm_s390_real_to_abs(vcpu, gva);
+		if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+			rc = pgm->code = PGM_ADDRESSING;
+	}
+
+	return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_protection - check for low-address protection
+ * @ga: Guest address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+
+	if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+		return 0;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = FSI_STORE;
+	tec_bits->as = psw_bits(*psw).as;
+	tec_bits->addr = ga >> PAGE_SHIFT;
+	pgm->code = PGM_PROTECTION;
+
+	return pgm->code;
+}

+ 300 - 79
arch/s390/kvm/gaccess.h

@@ -1,7 +1,7 @@
 /*
 /*
  * access guest memory
  * access guest memory
  *
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
  * it under the terms of the GNU General Public License (version 2 only)
@@ -15,100 +15,321 @@
 
 
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
 #include "kvm-s390.h"
 #include "kvm-s390.h"
 
 
-/* Convert real to absolute address by applying the prefix of the CPU */
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
 static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
 static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
-						 unsigned long gaddr)
+						 unsigned long gra)
 {
 {
-	unsigned long prefix  = vcpu->arch.sie_block->prefix;
-	if (gaddr < 2 * PAGE_SIZE)
-		gaddr += prefix;
-	else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
-		gaddr -= prefix;
-	return gaddr;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
+
+	if (gra < 2 * PAGE_SIZE)
+		gra += prefix;
+	else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+		gra -= prefix;
+	return gra;
 }
 }
 
 
-static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
-					  void __user *gptr,
-					  int prefixing)
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+							  unsigned long ga)
 {
 {
-	unsigned long gaddr = (unsigned long) gptr;
-	unsigned long uaddr;
-
-	if (prefixing)
-		gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
-	uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
-	if (IS_ERR_VALUE(uaddr))
-		uaddr = -EFAULT;
-	return (void __user *)uaddr;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+		return ga;
+	if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+		return ga & ((1UL << 31) - 1);
+	return ga & ((1UL << 24) - 1);
 }
 }
 
 
-#define get_guest(vcpu, x, gptr)				\
-({								\
-	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret;						\
-								\
-	if (IS_ERR((void __force *)__uptr)) {			\
-		__ret = PTR_ERR((void __force *)__uptr);	\
-	} else {						\
-		BUG_ON((unsigned long)__uptr & __mask);		\
-		__ret = get_user(x, __uptr);			\
-	}							\
-	__ret;							\
-})
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
 
 
-#define put_guest(vcpu, x, gptr)				\
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra)				\
 ({								\
 ({								\
-	__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
-	int __mask = sizeof(__typeof__(*(gptr))) - 1;		\
-	int __ret;						\
+	struct kvm_vcpu *__vcpu = (vcpu);			\
+	__typeof__(*(gra)) __x = (x);				\
+	unsigned long __gpa;					\
 								\
 								\
-	if (IS_ERR((void __force *)__uptr)) {			\
-		__ret = PTR_ERR((void __force *)__uptr);	\
-	} else {						\
-		BUG_ON((unsigned long)__uptr & __mask);		\
-		__ret = put_user(x, __uptr);			\
-	}							\
-	__ret;							\
+	__gpa = (unsigned long)(gra);				\
+	__gpa += kvm_s390_get_prefix(__vcpu);			\
+	kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x));	\
 })
 })
 
 
-static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
-			       unsigned long from, unsigned long len,
-			       int to_guest, int prefixing)
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		   unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		  unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+			    unsigned long *gpa, int write);
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		 unsigned long len, int write);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write);
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondory or home space (access register mode is currently not
+ * implemented).
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns:  - zero on success
+ *	     - a negative value if e.g. the guest mapping is broken or in
+ *	       case of out-of-memory. In this case the contents of pgm are
+ *	       undefined. Also parts of @data may have been copied to guest
+ *	       space.
+ *	     - a positive value if an access exception happened. In this case
+ *	       the returned value is the program interruption code and the
+ *	       contents of pgm may be used to inject an exception into the
+ *	       guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ *	 guest space (this is also true, if the to be copied data would cross
+ *	 one or more page boundaries in guest space).
+ *	 Therefore this function may be used for nullifying and suppressing
+ *	 instruction emulation.
+ *	 It may also be used for terminating instructions, if it is undefined
+ *	 if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		unsigned long len)
+{
+	return access_guest(vcpu, ga, data, len, 1);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+	       unsigned long len)
+{
+	return access_guest(vcpu, ga, data, len, 0);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		    unsigned long len)
+{
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		   unsigned long len)
+{
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		     unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		    unsigned long len)
 {
 {
-	unsigned long _len, rc;
-	void __user *uptr;
-
-	while (len) {
-		uptr = to_guest ? (void __user *)to : (void __user *)from;
-		uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
-		if (IS_ERR((void __force *)uptr))
-			return -EFAULT;
-		_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
-		_len = min(_len, len);
-		if (to_guest)
-			rc = copy_to_user((void __user *) uptr, (void *)from, _len);
-		else
-			rc = copy_from_user((void *)to, (void __user *)uptr, _len);
-		if (rc)
-			return -EFAULT;
-		len -= _len;
-		from += _len;
-		to += _len;
-	}
-	return 0;
+	return access_guest_real(vcpu, gra, data, len, 0);
 }
 }
 
 
-#define copy_to_guest(vcpu, to, from, size) \
-	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
-#define copy_from_guest(vcpu, to, from, size) \
-	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
-#define copy_to_guest_absolute(vcpu, to, from, size) \
-	__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
-#define copy_from_guest_absolute(vcpu, to, from, size) \
-	__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
 
 
 #endif /* __KVM_S390_GACCESS_H */
 #endif /* __KVM_S390_GACCESS_H */

+ 482 - 0
arch/s390/kvm/guestdbg.c

@@ -0,0 +1,482 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+	u64 estop;
+
+	if (len > 0)
+		len--;
+	else
+		len = 0;
+
+	estop = estart + len;
+
+	/* 0-0 range represents "not set" */
+	if ((*start == 0) && (*stop == 0)) {
+		*start = estart;
+		*stop = estop;
+	} else if (*start <= *stop) {
+		/* increase the existing range */
+		if (estart < *start)
+			*start = estart;
+		if (estop > *stop)
+			*stop = estop;
+	} else {
+		/* "overflowing" interval, whereby *stop > *start */
+		if (estart <= *stop) {
+			if (estop > *stop)
+				*stop = estop;
+		} else if (estop > *start) {
+			if (estart < *start)
+				*start = estart;
+		}
+		/* minimize the range */
+		else if ((estop - *stop) < (*start - estart))
+			*stop = estop;
+		else
+			*start = estart;
+	}
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+	    vcpu->arch.guestdbg.hw_bp_info == NULL)
+		return;
+
+	/*
+	 * If the guest is not interrested in branching events, we can savely
+	 * limit them to the PER address range.
+	 */
+	if (!(*cr9 & PER_EVENT_BRANCH))
+		*cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+	*cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+		len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+		/*
+		 * The instruction in front of the desired bp has to
+		 * report instruction-fetching events
+		 */
+		if (start < MAX_INST_SIZE) {
+			len += start;
+			start = 0;
+		} else {
+			start -= MAX_INST_SIZE;
+			len += MAX_INST_SIZE;
+		}
+
+		extend_address_range(cr10, cr11, start, len);
+	}
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+	    vcpu->arch.guestdbg.hw_wp_info == NULL)
+		return;
+
+	/* if host uses storage alternation for special address
+	 * spaces, enable all events and give all to the guest */
+	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr10 = 0;
+		*cr11 = PSW_ADDR_INSN;
+	} else {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr9 |= PER_EVENT_STORE;
+
+		for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+			start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+			len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+			extend_address_range(cr10, cr11, start, len);
+		}
+	}
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+	vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+	vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+	vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+	vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+	vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+	vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * TODO: if guest psw has per enabled, otherwise 0s!
+	 * This reduces the amount of reported events.
+	 * Need to intercept all psw changes!
+	 */
+
+	if (guestdbg_sstep_enabled(vcpu)) {
+		/* disable timer (clock-comparator) interrupts */
+		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+		vcpu->arch.sie_block->gcr[10] = 0;
+		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+	}
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		enable_all_hw_bp(vcpu);
+		enable_all_hw_wp(vcpu);
+	}
+
+	/* TODO: Instruction-fetching-nullification not allowed for now */
+	if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+		vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+			    struct kvm_hw_breakpoint *bp_data,
+			    struct kvm_hw_wp_info_arch *wp_info)
+{
+	int ret = 0;
+	wp_info->len = bp_data->len;
+	wp_info->addr = bp_data->addr;
+	wp_info->phys_addr = bp_data->phys_addr;
+	wp_info->old_data = NULL;
+
+	if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+		return -EINVAL;
+
+	wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+	if (!wp_info->old_data)
+		return -ENOMEM;
+	/* try to backup the original value */
+	ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
+			 wp_info->len);
+	if (ret) {
+		kfree(wp_info->old_data);
+		wp_info->old_data = NULL;
+	}
+
+	return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg)
+{
+	int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+	struct kvm_hw_breakpoint *bp_data = NULL;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+	if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+		return 0;
+	else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+		return -EINVAL;
+
+	size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+	bp_data = kmalloc(size, GFP_KERNEL);
+	if (!bp_data) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
+		ret = -EFAULT;
+		goto error;
+	}
+
+	for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			nr_bp++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+	if (size > 0) {
+		wp_info = kmalloc(size, GFP_KERNEL);
+		if (!wp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+	size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+	if (size > 0) {
+		bp_info = kmalloc(size, GFP_KERNEL);
+		if (!bp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			ret = __import_wp_info(vcpu, &bp_data[i],
+					       &wp_info[nr_wp]);
+			if (ret)
+				goto error;
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			bp_info[nr_bp].len = bp_data[i].len;
+			bp_info[nr_bp].addr = bp_data[i].addr;
+			nr_bp++;
+			break;
+		}
+	}
+
+	vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+	vcpu->arch.guestdbg.hw_bp_info = bp_info;
+	vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+	vcpu->arch.guestdbg.hw_wp_info = wp_info;
+	return 0;
+error:
+	kfree(bp_data);
+	kfree(wp_info);
+	kfree(bp_info);
+	return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		kfree(hw_wp_info->old_data);
+		hw_wp_info->old_data = NULL;
+	}
+	kfree(vcpu->arch.guestdbg.hw_wp_info);
+	vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+	kfree(vcpu->arch.guestdbg.hw_bp_info);
+	vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+	vcpu->arch.guestdbg.nr_hw_wp = 0;
+	vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+	if (a <= b)
+		return (addr >= a) && (addr <= b);
+	else
+		/* "overflowing" interval */
+		return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+					      unsigned long addr)
+{
+	struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		/* addr is directly the start or in the range of a bp */
+		if (addr == bp_info->addr)
+			goto found;
+		if (bp_info->len > 0 &&
+		    in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+			goto found;
+
+		bp_info++;
+	}
+
+	return NULL;
+found:
+	return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	void *temp = NULL;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+			continue;
+
+		temp = kmalloc(wp_info->len, GFP_KERNEL);
+		if (!temp)
+			continue;
+
+		/* refetch the wp data and compare it to the old value */
+		if (!read_guest(vcpu, wp_info->phys_addr, temp,
+				wp_info->len)) {
+			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+				kfree(temp);
+				return wp_info;
+			}
+		}
+		kfree(temp);
+		temp = NULL;
+	}
+
+	return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+	vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+	vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+			(code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+			(code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+	u32 perc = (vcpu->arch.sie_block->perc << 24);
+	struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+	unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+	unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		if (per_write_wp_event(perc) &&
+		    vcpu->arch.guestdbg.nr_hw_wp > 0) {
+			wp_info = any_wp_changed(vcpu);
+			if (wp_info) {
+				debug_exit->addr = wp_info->addr;
+				debug_exit->type = KVM_HW_WP_WRITE;
+				goto exit_required;
+			}
+		}
+		if (per_bp_event(perc) &&
+			 vcpu->arch.guestdbg.nr_hw_bp > 0) {
+			bp_info = find_hw_bp(vcpu, addr);
+			/* remove duplicate events if PC==PER address */
+			if (bp_info && (addr != peraddr)) {
+				debug_exit->addr = addr;
+				debug_exit->type = KVM_HW_BP;
+				vcpu->arch.guestdbg.last_bp = addr;
+				goto exit_required;
+			}
+			/* breakpoint missed */
+			bp_info = find_hw_bp(vcpu, peraddr);
+			if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+				debug_exit->addr = peraddr;
+				debug_exit->type = KVM_HW_BP;
+				goto exit_required;
+			}
+		}
+	}
+	if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+		debug_exit->addr = addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		goto exit_required;
+	}
+
+	return 0;
+exit_required:
+	return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+			     (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+	u32 perc = vcpu->arch.sie_block->perc << 24;
+	u64 peraddr = vcpu->arch.sie_block->peraddr;
+	u64 addr = vcpu->arch.sie_block->gpsw.addr;
+	u64 cr9 = vcpu->arch.sie_block->gcr[9];
+	u64 cr10 = vcpu->arch.sie_block->gcr[10];
+	u64 cr11 = vcpu->arch.sie_block->gcr[11];
+	/* filter all events, demanded by the guest */
+	u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+	if (!guest_per_enabled(vcpu))
+		guest_perc = 0;
+
+	/* filter "successful-branching" events */
+	if (guest_perc & PER_EVENT_BRANCH &&
+	    cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+	    !in_addr_range(addr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_BRANCH;
+
+	/* filter "instruction-fetching" events */
+	if (guest_perc & PER_EVENT_IFETCH &&
+	    !in_addr_range(peraddr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_IFETCH;
+
+	/* All other PER events will be given to the guest */
+	/* TODO: Check alterated address/address space */
+
+	vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+	if (!guest_perc)
+		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+	if (debug_exit_required(vcpu))
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+	filter_guest_per_event(vcpu);
+}

+ 206 - 16
arch/s390/kvm/intercept.c

@@ -1,7 +1,7 @@
 /*
 /*
  * in-kernel handling for sie intercepts
  * in-kernel handling for sie intercepts
  *
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
  * it under the terms of the GNU General Public License (version 2 only)
@@ -16,6 +16,8 @@
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 
 
 #include <asm/kvm_host.h>
 #include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
 
 
 #include "kvm-s390.h"
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "gaccess.h"
@@ -29,6 +31,7 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0x83] = kvm_s390_handle_diag,
 	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_b2,
 	[0xb2] = kvm_s390_handle_b2,
+	[0xb6] = kvm_s390_handle_stctl,
 	[0xb7] = kvm_s390_handle_lctl,
 	[0xb7] = kvm_s390_handle_lctl,
 	[0xb9] = kvm_s390_handle_b9,
 	[0xb9] = kvm_s390_handle_b9,
 	[0xe5] = kvm_s390_handle_e5,
 	[0xe5] = kvm_s390_handle_e5,
@@ -44,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu)
 	case 0x10:
 	case 0x10:
 		vcpu->stat.exit_external_request++;
 		vcpu->stat.exit_external_request++;
 		break;
 		break;
-	case 0x14:
-		vcpu->stat.exit_external_interrupt++;
-		break;
 	default:
 	default:
 		break; /* nothing */
 		break; /* nothing */
 	}
 	}
@@ -63,8 +63,7 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
 	trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
 
 
 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
-		atomic_set_mask(CPUSTAT_STOPPED,
-				&vcpu->arch.sie_block->cpuflags);
+		kvm_s390_vcpu_stop(vcpu);
 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
 		rc = -EOPNOTSUPP;
 		rc = -EOPNOTSUPP;
@@ -109,22 +108,120 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
 	return -EOPNOTSUPP;
 	return -EOPNOTSUPP;
 }
 }
 
 
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+			       struct kvm_s390_pgm_info *pgm_info)
+{
+	memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+	pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+	switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		pgm_info->op_access_id  = vcpu->arch.sie_block->oai;
+		break;
+	case PGM_MONITOR:
+		pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+		pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_DATA:
+		pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+		break;
+	case PGM_PROTECTION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		break;
+	default:
+		break;
+	}
+
+	if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+		pgm_info->per_code = vcpu->arch.sie_block->perc;
+		pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+		pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+		pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+	}
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_itdb *itdb;
+	int rc;
+
+	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+		return 0;
+	if (current->thread.per_flags & PER_FLAG_NO_TE)
+		return 0;
+	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+	if (rc)
+		return rc;
+	memset(itdb, 0, sizeof(*itdb));
+
+	return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 {
+	struct kvm_s390_pgm_info pgm_info;
+	psw_t psw;
+	int rc;
+
 	vcpu->stat.exit_program_interruption++;
 	vcpu->stat.exit_program_interruption++;
 
 
-	/* Restore ITDB to Program-Interruption TDB in guest memory */
-	if (IS_TE_ENABLED(vcpu) &&
-	    !(current->thread.per_flags & PER_FLAG_NO_TE) &&
-	    IS_ITDB_VALID(vcpu)) {
-		copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
-			      sizeof(struct kvm_s390_itdb));
-		memset((void *) vcpu->arch.sie_block->itdba, 0,
-		       sizeof(struct kvm_s390_itdb));
+	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+		kvm_s390_handle_per_event(vcpu);
+		/* the interrupt might have been filtered out completely */
+		if (vcpu->arch.sie_block->iprcc == 0)
+			return 0;
 	}
 	}
 
 
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
-	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+	if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+		rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+		if (rc)
+			return rc;
+		/* Avoid endless loops of specification exceptions */
+		if (!is_valid_psw(&psw))
+			return -EOPNOTSUPP;
+	}
+	rc = handle_itdb(vcpu);
+	if (rc)
+		return rc;
+
+	__extract_prog_irq(vcpu, &pgm_info);
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
 }
 }
 
 
 static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
@@ -142,17 +239,110 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 	return rc2;
 	return rc2;
 }
 }
 
 
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+	u16 eic = vcpu->arch.sie_block->eic;
+	struct kvm_s390_interrupt irq;
+	psw_t newpsw;
+	int rc;
+
+	vcpu->stat.exit_external_interrupt++;
+
+	rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+	if (rc)
+		return rc;
+	/* We can not handle clock comparator or timer interrupt with bad PSW */
+	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+	    (newpsw.mask & PSW_MASK_EXT))
+		return -EOPNOTSUPP;
+
+	switch (eic) {
+	case EXT_IRQ_CLK_COMP:
+		irq.type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case EXT_IRQ_CPU_TIMER:
+		irq.type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case EXT_IRQ_EXTERNAL_CALL:
+		if (kvm_s390_si_ext_call_pending(vcpu))
+			return 0;
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long srcaddr, dstaddr;
+	int reg1, reg2, rc;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* Make sure that the source is paged-in */
+	srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
+	if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+	if (rc != 0)
+		return rc;
+
+	/* Make sure that the destination is paged-in */
+	dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
+	if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+	if (rc != 0)
+		return rc;
+
+	psw->addr = __rewind_psw(*psw, 4);
+
+	return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->ipa == 0xb254)	/* MVPG */
+		return handle_mvpg_pei(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae)	/* SIGP */
+		return kvm_s390_handle_sigp_pei(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
 static const intercept_handler_t intercept_funcs[] = {
 static const intercept_handler_t intercept_funcs[] = {
 	[0x00 >> 2] = handle_noop,
 	[0x00 >> 2] = handle_noop,
 	[0x04 >> 2] = handle_instruction,
 	[0x04 >> 2] = handle_instruction,
 	[0x08 >> 2] = handle_prog,
 	[0x08 >> 2] = handle_prog,
 	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x10 >> 2] = handle_noop,
 	[0x10 >> 2] = handle_noop,
-	[0x14 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_external_interrupt,
 	[0x18 >> 2] = handle_noop,
 	[0x18 >> 2] = handle_noop,
 	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x20 >> 2] = handle_validity,
 	[0x20 >> 2] = handle_validity,
 	[0x28 >> 2] = handle_stop,
 	[0x28 >> 2] = handle_stop,
+	[0x38 >> 2] = handle_partial_execution,
 };
 };
 
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)

+ 299 - 101
arch/s390/kvm/interrupt.c

@@ -27,6 +27,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define IOINT_AI_MASK 0x04000000
 
 
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+
 static int is_ioint(u64 type)
 static int is_ioint(u64 type)
 {
 {
 	return ((type & 0xfffe0000u) != 0xfffe0000u);
 	return ((type & 0xfffe0000u) != 0xfffe0000u);
@@ -56,6 +58,17 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 	return 1;
 	return 1;
 }
 }
 
 
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	if (psw_extint_disabled(vcpu) ||
+	    !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+		/* No timer interrupts when single stepping */
+		return 0;
+	return 1;
+}
+
 static u64 int_word_to_isc_bits(u32 int_word)
 static u64 int_word_to_isc_bits(u32 int_word)
 {
 {
 	u8 isc = (int_word & 0x38000000) >> 27;
 	u8 isc = (int_word & 0x38000000) >> 27;
@@ -78,6 +91,14 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
 		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
 			return 1;
 			return 1;
 		return 0;
 		return 0;
+	case KVM_S390_INT_CLOCK_COMP:
+		return ckc_interrupts_enabled(vcpu);
+	case KVM_S390_INT_CPU_TIMER:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
+			return 1;
+		return 0;
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_PFAULT_DONE:
@@ -127,11 +148,16 @@ static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 {
 {
-	atomic_clear_mask(CPUSTAT_ECALL_PEND |
-		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
-		&vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+			  &vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
 	vcpu->arch.sie_block->lctl = 0x0000;
-	vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
+	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+	if (guestdbg_enabled(vcpu)) {
+		vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+					       LCTL_CR10 | LCTL_CR11);
+		vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+	}
 }
 }
 
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -149,6 +175,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_CLOCK_COMP:
+	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
 		else
@@ -174,6 +202,106 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	}
 	}
 }
 }
 
 
+static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+			      struct kvm_s390_pgm_info *pgm_info)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+	int rc = 0;
+
+	switch (pgm_info->code & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+				  (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+				   (u8 *)__LC_OP_ACCESS_ID);
+		break;
+	case PGM_MONITOR:
+		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
+				  (u64 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+				   (u64 *)__LC_MON_CODE);
+		break;
+	case PGM_DATA:
+		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+				  (u32 *)__LC_DATA_EXC_CODE);
+		break;
+	case PGM_PROTECTION:
+		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	}
+
+	if (pgm_info->code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+				   (u8 *) __LC_PER_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+				   (u8 *)__LC_PER_ATMID);
+		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+				   (u64 *) __LC_PER_ADDRESS);
+		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+				   (u8 *) __LC_PER_ACCESS_ID);
+	}
+
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_INST:
+	case ICPT_INSTPROGI:
+	case ICPT_OPEREXC:
+	case ICPT_PARTEXEC:
+	case ICPT_IOINST:
+		/* last instruction only stored for these icptcodes */
+		rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+				   (u16 *) __LC_PGM_ILC);
+		break;
+	case ICPT_PROGI:
+		rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
+				   (u16 *) __LC_PGM_ILC);
+		break;
+	default:
+		rc |= put_guest_lc(vcpu, 0,
+				   (u16 *) __LC_PGM_ILC);
+	}
+
+	rc |= put_guest_lc(vcpu, pgm_info->code,
+			   (u16 *)__LC_PGM_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+
+	return rc;
+}
+
 static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 				   struct kvm_s390_interrupt_info *inti)
 				   struct kvm_s390_interrupt_info *inti)
 {
 {
@@ -186,26 +314,46 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_emergency_signal++;
 		vcpu->stat.deliver_emergency_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->emerg.code, 0);
 						 inti->emerg.code, 0);
-		rc  = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, inti->emerg.code,
-				(u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, inti->emerg.code,
+				   (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
 		break;
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EXTERNAL_CALL:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
 		vcpu->stat.deliver_external_call++;
 		vcpu->stat.deliver_external_call++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->extcall.code, 0);
 						 inti->extcall.code, 0);
-		rc  = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, inti->extcall.code,
-				(u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, inti->extcall.code,
+				   (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
+		break;
+	case KVM_S390_INT_CLOCK_COMP:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+						 inti->ext.ext_params, 0);
+		deliver_ckc_interrupt(vcpu);
+		break;
+	case KVM_S390_INT_CPU_TIMER:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+						 inti->ext.ext_params, 0);
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+				   (u16 *)__LC_EXT_INT_CODE);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
 		break;
 		break;
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_SERVICE:
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
@@ -213,37 +361,39 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_service_signal++;
 		vcpu->stat.deliver_service_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params, 0);
 						 inti->ext.ext_params, 0);
-		rc  = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params,
-				(u32 __user *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
 		break;
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_INIT:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 						 inti->ext.ext_params2);
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *) __LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *) __LC_EXT_PARAMS2);
 		break;
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_PFAULT_DONE:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 						 inti->ext.ext_params2);
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *) __LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *)__LC_EXT_PARAMS2);
 		break;
 		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_VIRTIO:
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
@@ -252,16 +402,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params,
 						 inti->ext.ext_params,
 						 inti->ext.ext_params2);
 						 inti->ext.ext_params2);
-		rc  = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
-		rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
-		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_EXT_NEW_PSW, sizeof(psw_t));
-		rc |= put_guest(vcpu, inti->ext.ext_params,
-				(u32 __user *)__LC_EXT_PARAMS);
-		rc |= put_guest(vcpu, inti->ext.ext_params2,
-				(u64 __user *)__LC_EXT_PARAMS2);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				   (u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				   (u64 *)__LC_EXT_PARAMS2);
 		break;
 		break;
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_SIGP_STOP:
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
 		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
@@ -285,13 +436,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_restart_signal++;
 		vcpu->stat.deliver_restart_signal++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 0, 0);
 						 0, 0);
-		rc  = copy_to_guest(vcpu,
-				    offsetof(struct _lowcore, restart_old_psw),
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      offsetof(struct _lowcore, restart_psw),
-				      sizeof(psw_t));
-		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+		rc  = write_guest_lc(vcpu,
+				     offsetof(struct _lowcore, restart_old_psw),
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		break;
 		break;
 	case KVM_S390_PROGRAM_INT:
 	case KVM_S390_PROGRAM_INT:
 		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
 		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
@@ -300,13 +450,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_program_int++;
 		vcpu->stat.deliver_program_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->pgm.code, 0);
 						 inti->pgm.code, 0);
-		rc  = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
-		rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
-				(u16 __user *)__LC_PGM_ILC);
-		rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_PGM_NEW_PSW, sizeof(psw_t));
+		rc = __deliver_prog_irq(vcpu, &inti->pgm);
 		break;
 		break;
 
 
 	case KVM_S390_MCHK:
 	case KVM_S390_MCHK:
@@ -317,11 +461,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 						 inti->mchk.mcic);
 						 inti->mchk.mcic);
 		rc  = kvm_s390_vcpu_store_status(vcpu,
 		rc  = kvm_s390_vcpu_store_status(vcpu,
 						 KVM_S390_STORE_STATUS_PREFIXED);
 						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
-		rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
+		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
+		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_MCK_NEW_PSW, sizeof(psw_t));
 		break;
 		break;
 
 
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -334,18 +479,20 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		vcpu->stat.deliver_io_int++;
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 param0, param1);
 						 param0, param1);
-		rc  = put_guest(vcpu, inti->io.subchannel_id,
-				(u16 __user *) __LC_SUBCHANNEL_ID);
-		rc |= put_guest(vcpu, inti->io.subchannel_nr,
-				(u16 __user *) __LC_SUBCHANNEL_NR);
-		rc |= put_guest(vcpu, inti->io.io_int_parm,
-				(u32 __user *) __LC_IO_INT_PARM);
-		rc |= put_guest(vcpu, inti->io.io_int_word,
-				(u32 __user *) __LC_IO_INT_WORD);
-		rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-				      __LC_IO_NEW_PSW, sizeof(psw_t));
+		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+				   (u16 *)__LC_SUBCHANNEL_ID);
+		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+				   (u16 *)__LC_SUBCHANNEL_NR);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+				   (u32 *)__LC_IO_INT_PARM);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+				   (u32 *)__LC_IO_INT_WORD);
+		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
 		break;
 		break;
 	}
 	}
 	default:
 	default:
@@ -358,25 +505,35 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	}
 	}
 }
 }
 
 
-static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
 {
 	int rc;
 	int rc;
 
 
-	if (psw_extint_disabled(vcpu))
-		return 0;
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
-		return 0;
-	rc  = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
-			      __LC_EXT_NEW_PSW, sizeof(psw_t));
+	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
 	if (rc) {
 	if (rc) {
 		printk("kvm: The guest lowcore is not mapped during interrupt "
 		printk("kvm: The guest lowcore is not mapped during interrupt "
 			"delivery, killing userspace\n");
 			"delivery, killing userspace\n");
 		do_exit(SIGKILL);
 		do_exit(SIGKILL);
 	}
 	}
-	return 1;
+}
+
+/* Check whether SIGP interpretation facility has an external call pending */
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+	atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
+
+	if (!psw_extint_disabled(vcpu) &&
+	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
+	    (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+		return 1;
+
+	return 0;
 }
 }
 
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
@@ -406,19 +563,23 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 		spin_unlock(&fi->lock);
 		spin_unlock(&fi->lock);
 	}
 	}
 
 
-	if ((!rc) && (vcpu->arch.sie_block->ckc <
-		get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
-		if ((!psw_extint_disabled(vcpu)) &&
-			(vcpu->arch.sie_block->gcr[0] & 0x800ul))
-			rc = 1;
-	}
+	if (!rc && kvm_cpu_has_pending_timer(vcpu))
+		rc = 1;
+
+	if (!rc && kvm_s390_si_ext_call_pending(vcpu))
+		rc = 1;
 
 
 	return rc;
 	return rc;
 }
 }
 
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 {
-	return 0;
+	if (!(vcpu->arch.sie_block->ckc <
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+		return 0;
+	if (!ckc_interrupts_enabled(vcpu))
+		return 0;
+	return 1;
 }
 }
 
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -441,8 +602,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP; /* disabled wait */
 		return -EOPNOTSUPP; /* disabled wait */
 	}
 	}
 
 
-	if (psw_extint_disabled(vcpu) ||
-	    (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+	if (!ckc_interrupts_enabled(vcpu)) {
 		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
 		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
 		goto no_timer;
 		goto no_timer;
 	}
 	}
@@ -465,7 +625,8 @@ no_timer:
 	while (list_empty(&vcpu->arch.local_int.list) &&
 	while (list_empty(&vcpu->arch.local_int.list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
 		(!vcpu->arch.local_int.timer_due) &&
 		(!vcpu->arch.local_int.timer_due) &&
-		!signal_pending(current)) {
+		!signal_pending(current) &&
+		!kvm_s390_si_ext_call_pending(vcpu)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&vcpu->arch.local_int.lock);
 		spin_unlock_bh(&vcpu->arch.local_int.lock);
 		spin_unlock(&vcpu->arch.local_int.float_int->lock);
 		spin_unlock(&vcpu->arch.local_int.float_int->lock);
@@ -522,6 +683,11 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	}
 	}
 	atomic_set(&li->active, 0);
 	atomic_set(&li->active, 0);
 	spin_unlock_bh(&li->lock);
 	spin_unlock_bh(&li->lock);
+
+	/* clear pending external calls set by sigp interpretation facility */
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(SIGP_CTRL_C,
+			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
 }
 
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -554,9 +720,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 		} while (deliver);
 		} while (deliver);
 	}
 	}
 
 
-	if ((vcpu->arch.sie_block->ckc <
-		get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-		__try_deliver_ckc_interrupt(vcpu);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		deliver_ckc_interrupt(vcpu);
 
 
 	if (atomic_read(&fi->active)) {
 	if (atomic_read(&fi->active)) {
 		do {
 		do {
@@ -660,6 +825,31 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	return 0;
 	return 0;
 }
 }
 
 
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+			     struct kvm_s390_pgm_info *pgm_info)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+		   pgm_info->code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   pgm_info->code, 0, 1);
+
+	inti->type = KVM_S390_PROGRAM_INT;
+	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
+	spin_lock_bh(&li->lock);
+	list_add(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	BUG_ON(waitqueue_active(li->wq));
+	spin_unlock_bh(&li->lock);
+	return 0;
+}
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 						    u64 cr6, u64 schid)
 {
 {
@@ -810,6 +1000,12 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	return __inject_vm(kvm, inti);
 	return __inject_vm(kvm, inti);
 }
 }
 
 
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+			      struct kvm_s390_interrupt_info *inti)
+{
+	__inject_vm(kvm, inti);
+}
+
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 			 struct kvm_s390_interrupt *s390int)
 			 struct kvm_s390_interrupt *s390int)
 {
 {
@@ -839,6 +1035,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		break;
 		break;
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_RESTART:
 	case KVM_S390_RESTART:
+	case KVM_S390_INT_CLOCK_COMP:
+	case KVM_S390_INT_CPU_TIMER:
 		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
 		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
 		inti->type = s390int->type;
 		inti->type = s390int->type;
 		break;
 		break;
@@ -900,7 +1098,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	return 0;
 	return 0;
 }
 }
 
 
-static void clear_floating_interrupts(struct kvm *kvm)
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
 {
 {
 	struct kvm_s390_float_interrupt *fi;
 	struct kvm_s390_float_interrupt *fi;
 	struct kvm_s390_interrupt_info	*n, *inti = NULL;
 	struct kvm_s390_interrupt_info	*n, *inti = NULL;
@@ -1246,7 +1444,7 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		break;
 		break;
 	case KVM_DEV_FLIC_CLEAR_IRQS:
 	case KVM_DEV_FLIC_CLEAR_IRQS:
 		r = 0;
 		r = 0;
-		clear_floating_interrupts(dev->kvm);
+		kvm_s390_clear_float_irqs(dev->kvm);
 		break;
 		break;
 	case KVM_DEV_FLIC_APF_ENABLE:
 	case KVM_DEV_FLIC_APF_ENABLE:
 		dev->kvm->arch.gmap->pfault_enabled = 1;
 		dev->kvm->arch.gmap->pfault_enabled = 1;

+ 447 - 107
arch/s390/kvm/kvm-s390.c

@@ -11,6 +11,7 @@
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ *               Jason J. Herne <jjherne@us.ibm.com>
  */
  */
 
 
 #include <linux/compiler.h>
 #include <linux/compiler.h>
@@ -51,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
+	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
@@ -66,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
@@ -90,7 +94,7 @@ unsigned long *vfacilities;
 static struct gmap_notifier gmap_notifier;
 static struct gmap_notifier gmap_notifier;
 
 
 /* test availability of vfacility */
 /* test availability of vfacility */
-static inline int test_vfacility(unsigned long nr)
+int test_vfacility(unsigned long nr)
 {
 {
 	return __test_facility(nr, (void *) vfacilities);
 	return __test_facility(nr, (void *) vfacilities);
 }
 }
@@ -162,6 +166,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_VM_ATTRIBUTES:
 		r = 1;
 		r = 1;
 		break;
 		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_NR_VCPUS:
@@ -180,6 +185,25 @@ int kvm_dev_ioctl_check_extension(long ext)
 	return r;
 	return r;
 }
 }
 
 
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+					struct kvm_memory_slot *memslot)
+{
+	gfn_t cur_gfn, last_gfn;
+	unsigned long address;
+	struct gmap *gmap = kvm->arch.gmap;
+
+	down_read(&gmap->mm->mmap_sem);
+	/* Loop over all guest pages */
+	last_gfn = memslot->base_gfn + memslot->npages;
+	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+		address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+		if (gmap_test_and_clear_dirty(address, gmap))
+			mark_page_dirty(kvm, cur_gfn);
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+
 /* Section: vm related */
 /* Section: vm related */
 /*
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  * Get (and clear) the dirty memory log for a memory slot.
@@ -187,7 +211,36 @@ int kvm_dev_ioctl_check_extension(long ext)
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 			       struct kvm_dirty_log *log)
 			       struct kvm_dirty_log *log)
 {
 {
-	return 0;
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	kvm_s390_sync_dirty_log(kvm, memslot);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* Clear the dirty log */
+	if (is_dirty) {
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
 }
 }
 
 
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
@@ -209,11 +262,86 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 	return r;
 	return r;
 }
 }
 
 
+static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+	unsigned int idx;
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		ret = -EBUSY;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) == 0) {
+			kvm->arch.use_cmma = 1;
+			ret = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	case KVM_S390_VM_MEM_CLR_CMMA:
+		mutex_lock(&kvm->lock);
+		idx = srcu_read_lock(&kvm->srcu);
+		page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+		srcu_read_unlock(&kvm->srcu, idx);
+		mutex_unlock(&kvm->lock);
+		ret = 0;
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_mem_control(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		switch (attr->attr) {
+		case KVM_S390_VM_MEM_ENABLE_CMMA:
+		case KVM_S390_VM_MEM_CLR_CMMA:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 		       unsigned int ioctl, unsigned long arg)
 {
 {
 	struct kvm *kvm = filp->private_data;
 	struct kvm *kvm = filp->private_data;
 	void __user *argp = (void __user *)arg;
 	void __user *argp = (void __user *)arg;
+	struct kvm_device_attr attr;
 	int r;
 	int r;
 
 
 	switch (ioctl) {
 	switch (ioctl) {
@@ -246,6 +374,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 		}
 		break;
 		break;
 	}
 	}
+	case KVM_SET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_set_attr(kvm, &attr);
+		break;
+	}
+	case KVM_GET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_get_attr(kvm, &attr);
+		break;
+	}
+	case KVM_HAS_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_has_attr(kvm, &attr);
+		break;
+	}
 	default:
 	default:
 		r = -ENOTTY;
 		r = -ENOTTY;
 	}
 	}
@@ -292,6 +441,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 
 	spin_lock_init(&kvm->arch.float_int.lock);
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+	init_waitqueue_head(&kvm->arch.ipte_wq);
 
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
 	VM_EVENT(kvm, 3, "%s", "vm created");
@@ -309,6 +459,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.css_support = 0;
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
 	kvm->arch.use_irqchip = 0;
 
 
+	spin_lock_init(&kvm->arch.start_stop_lock);
+
 	return 0;
 	return 0;
 out_nogmap:
 out_nogmap:
 	debug_unregister(kvm->arch.dbf);
 	debug_unregister(kvm->arch.dbf);
@@ -322,6 +474,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_s390_clear_local_irqs(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		clear_bit(63 - vcpu->vcpu_id,
 		clear_bit(63 - vcpu->vcpu_id,
@@ -335,9 +488,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	if (kvm_is_ucontrol(vcpu->kvm))
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
 		gmap_free(vcpu->arch.gmap);
 
 
-	if (vcpu->arch.sie_block->cbrlo)
-		__free_page(__pfn_to_page(
-				vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
+	if (kvm_s390_cmma_enabled(vcpu->kvm))
+		kvm_s390_vcpu_unsetup_cmma(vcpu);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 	free_page((unsigned long)(vcpu->arch.sie_block));
 
 
 	kvm_vcpu_uninit(vcpu);
 	kvm_vcpu_uninit(vcpu);
@@ -372,6 +524,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (!kvm_is_ucontrol(kvm))
 	if (!kvm_is_ucontrol(kvm))
 		gmap_free(kvm->arch.gmap);
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_destroy_adapters(kvm);
+	kvm_s390_clear_float_irqs(kvm);
 }
 }
 
 
 /* Section: vcpu related */
 /* Section: vcpu related */
@@ -442,7 +595,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	kvm_clear_async_pf_completion_queue(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_stop(vcpu);
 	kvm_s390_clear_local_irqs(vcpu);
 	kvm_s390_clear_local_irqs(vcpu);
 }
 }
 
 
@@ -451,9 +604,26 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	return 0;
 	return 0;
 }
 }
 
 
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+	free_page(vcpu->arch.sie_block->cbrlo);
+	vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+	if (!vcpu->arch.sie_block->cbrlo)
+		return -ENOMEM;
+
+	vcpu->arch.sie_block->ecb2 |= 0x80;
+	vcpu->arch.sie_block->ecb2 &= ~0x08;
+	return 0;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 {
-	struct page *cbrl;
+	int rc = 0;
 
 
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 						    CPUSTAT_SM |
 						    CPUSTAT_SM |
@@ -464,15 +634,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= 0x10;
 		vcpu->arch.sie_block->ecb |= 0x10;
 
 
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->ecb2  = 8;
-	vcpu->arch.sie_block->eca   = 0xC1002001U;
+	vcpu->arch.sie_block->eca   = 0xD1002000U;
+	if (sclp_has_siif())
+		vcpu->arch.sie_block->eca |= 1;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
-	if (kvm_enabled_cmma()) {
-		cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
-		if (cbrl) {
-			vcpu->arch.sie_block->ecb2 |= 0x80;
-			vcpu->arch.sie_block->ecb2 &= ~0x08;
-			vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
-		}
+	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
+				      ICTL_TPROT;
+
+	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+		rc = kvm_s390_vcpu_setup_cmma(vcpu);
+		if (rc)
+			return rc;
 	}
 	}
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
 	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
@@ -480,7 +652,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
 	vcpu->arch.cpu_id.version = 0xff;
-	return 0;
+	return rc;
 }
 }
 
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -584,7 +756,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
 
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		/* match against both prefix pages */
 		/* match against both prefix pages */
-		if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 			exit_sie_sync(vcpu);
 			exit_sie_sync(vcpu);
@@ -769,10 +941,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 	return -EINVAL; /* not implemented yet */
 }
 }
 
 
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+			      KVM_GUESTDBG_USE_HW_BP | \
+			      KVM_GUESTDBG_ENABLE)
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 					struct kvm_guest_debug *dbg)
 {
 {
-	return -EINVAL; /* not implemented yet */
+	int rc = 0;
+
+	vcpu->guest_debug = 0;
+	kvm_s390_clear_bp_data(vcpu);
+
+	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
+		return -EINVAL;
+
+	if (dbg->control & KVM_GUESTDBG_ENABLE) {
+		vcpu->guest_debug = dbg->control;
+		/* enforce guest PER */
+		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+			rc = kvm_s390_import_bp_data(vcpu, dbg);
+	} else {
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		vcpu->arch.guestdbg.last_bp = 0;
+	}
+
+	if (rc) {
+		vcpu->guest_debug = 0;
+		kvm_s390_clear_bp_data(vcpu);
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+	}
+
+	return rc;
 }
 }
 
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -787,8 +989,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 	return -EINVAL; /* not implemented yet */
 }
 }
 
 
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+	if (!MACHINE_IS_LPAR)
+		return false;
+	/* only enable for z10 and later */
+	if (!MACHINE_HAS_EDAT1)
+		return false;
+	if (!kvm->arch.use_cmma)
+		return false;
+	return true;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
 {
+retry:
+	s390_vcpu_unblock(vcpu);
 	/*
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -796,27 +1017,61 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	 * already finished. We might race against a second unmapper that
 	 * already finished. We might race against a second unmapper that
 	 * wants to set the blocking bit. Lets just retry the request loop.
 	 * wants to set the blocking bit. Lets just retry the request loop.
 	 */
 	 */
-	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
 		int rc;
 		int rc;
 		rc = gmap_ipte_notify(vcpu->arch.gmap,
 		rc = gmap_ipte_notify(vcpu->arch.gmap,
-				      vcpu->arch.sie_block->prefix,
+				      kvm_s390_get_prefix(vcpu),
 				      PAGE_SIZE * 2);
 				      PAGE_SIZE * 2);
 		if (rc)
 		if (rc)
 			return rc;
 			return rc;
-		s390_vcpu_unblock(vcpu);
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+		if (!ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+			atomic_set_mask(CPUSTAT_IBS,
+					&vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
 	}
 	}
+
+	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+		if (ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+			atomic_clear_mask(CPUSTAT_IBS,
+					  &vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
-static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
 {
 {
-	long rc;
-	hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
 	struct mm_struct *mm = current->mm;
 	struct mm_struct *mm = current->mm;
+	hva_t hva;
+	long rc;
+
+	hva = gmap_fault(gpa, vcpu->arch.gmap);
+	if (IS_ERR_VALUE(hva))
+		return (long)hva;
 	down_read(&mm->mmap_sem);
 	down_read(&mm->mmap_sem);
-	rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+	rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	up_read(&mm->mmap_sem);
-	return rc;
+
+	return rc < 0 ? rc : 0;
 }
 }
 
 
 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -883,8 +1138,9 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.gmap->pfault_enabled)
 	if (!vcpu->arch.gmap->pfault_enabled)
 		return 0;
 		return 0;
 
 
-	hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
-	if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+	hva += current->thread.gmap_addr & ~PAGE_MASK;
+	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
 		return 0;
 		return 0;
 
 
 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
@@ -917,6 +1173,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	if (rc)
 	if (rc)
 		return rc;
 		return rc;
 
 
+	if (guestdbg_enabled(vcpu)) {
+		kvm_s390_backup_guest_per_regs(vcpu);
+		kvm_s390_patch_guest_per_regs(vcpu);
+	}
+
 	vcpu->arch.sie_block->icptcode = 0;
 	vcpu->arch.sie_block->icptcode = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
@@ -933,6 +1194,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		   vcpu->arch.sie_block->icptcode);
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 
 
+	if (guestdbg_enabled(vcpu))
+		kvm_s390_restore_guest_per_regs(vcpu);
+
 	if (exit_reason >= 0) {
 	if (exit_reason >= 0) {
 		rc = 0;
 		rc = 0;
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
@@ -945,9 +1209,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	} else if (current->thread.gmap_pfault) {
 	} else if (current->thread.gmap_pfault) {
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_setup_async_pf(vcpu) ||
-		    (kvm_arch_fault_in_sync(vcpu) >= 0))
+		if (kvm_arch_setup_async_pf(vcpu)) {
 			rc = 0;
 			rc = 0;
+		} else {
+			gpa_t gpa = current->thread.gmap_addr;
+			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
+		}
 	}
 	}
 
 
 	if (rc == -1) {
 	if (rc == -1) {
@@ -969,16 +1236,6 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	return rc;
 	return rc;
 }
 }
 
 
-bool kvm_enabled_cmma(void)
-{
-	if (!MACHINE_IS_LPAR)
-		return false;
-	/* only enable for z10 and later */
-	if (!MACHINE_HAS_EDAT1)
-		return false;
-	return true;
-}
-
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 {
 	int rc, exit_reason;
 	int rc, exit_reason;
@@ -1008,7 +1265,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 
 		rc = vcpu_post_run(vcpu, exit_reason);
 		rc = vcpu_post_run(vcpu, exit_reason);
-	} while (!signal_pending(current) && !rc);
+	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
 
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	return rc;
 	return rc;
@@ -1019,10 +1276,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int rc;
 	int rc;
 	sigset_t sigsaved;
 	sigset_t sigsaved;
 
 
+	if (guestdbg_exit_pending(vcpu)) {
+		kvm_s390_prepare_debug_exit(vcpu);
+		return 0;
+	}
+
 	if (vcpu->sigset_active)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
 
-	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	kvm_s390_vcpu_start(vcpu);
 
 
 	switch (kvm_run->exit_reason) {
 	switch (kvm_run->exit_reason) {
 	case KVM_EXIT_S390_SIEIC:
 	case KVM_EXIT_S390_SIEIC:
@@ -1031,6 +1293,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	case KVM_EXIT_S390_RESET:
 	case KVM_EXIT_S390_RESET:
 	case KVM_EXIT_S390_UCONTROL:
 	case KVM_EXIT_S390_UCONTROL:
 	case KVM_EXIT_S390_TSCH:
 	case KVM_EXIT_S390_TSCH:
+	case KVM_EXIT_DEBUG:
 		break;
 		break;
 	default:
 	default:
 		BUG();
 		BUG();
@@ -1056,6 +1319,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = -EINTR;
 		rc = -EINTR;
 	}
 	}
 
 
+	if (guestdbg_exit_pending(vcpu) && !rc)  {
+		kvm_s390_prepare_debug_exit(vcpu);
+		rc = 0;
+	}
+
 	if (rc == -EOPNOTSUPP) {
 	if (rc == -EOPNOTSUPP) {
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
@@ -1073,7 +1341,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 
 	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
 	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
 	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
 	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
-	kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
 
 
 	if (vcpu->sigset_active)
 	if (vcpu->sigset_active)
@@ -1083,83 +1351,52 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return rc;
 	return rc;
 }
 }
 
 
-static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
-		       unsigned long n, int prefix)
-{
-	if (prefix)
-		return copy_to_guest(vcpu, guestdest, from, n);
-	else
-		return copy_to_guest_absolute(vcpu, guestdest, from, n);
-}
-
 /*
 /*
  * store status at address
  * store status at address
  * we use have two special cases:
  * we use have two special cases:
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  */
  */
-int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 {
 	unsigned char archmode = 1;
 	unsigned char archmode = 1;
-	int prefix;
+	unsigned int px;
 	u64 clkcomp;
 	u64 clkcomp;
+	int rc;
 
 
-	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
-		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+		if (write_guest_abs(vcpu, 163, &archmode, 1))
 			return -EFAULT;
 			return -EFAULT;
-		addr = SAVE_AREA_BASE;
-		prefix = 0;
-	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
-		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+		gpa = SAVE_AREA_BASE;
+	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (write_guest_real(vcpu, 163, &archmode, 1))
 			return -EFAULT;
 			return -EFAULT;
-		addr = SAVE_AREA_BASE;
-		prefix = 1;
-	} else
-		prefix = 0;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
-			vcpu->arch.guest_fpregs.fprs, 128, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
-			vcpu->run->s.regs.gprs, 128, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
-			&vcpu->arch.sie_block->gpsw, 16, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
-			&vcpu->arch.sie_block->prefix, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu,
-			addr + offsetof(struct save_area, fp_ctrl_reg),
-			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
-			&vcpu->arch.sie_block->todpr, 4, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
-			&vcpu->arch.sie_block->cputm, 8, prefix))
-		return -EFAULT;
-
+		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+	}
+	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+			     vcpu->arch.guest_fpregs.fprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+			      vcpu->run->s.regs.gprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+			      &vcpu->arch.sie_block->gpsw, 16);
+	px = kvm_s390_get_prefix(vcpu);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+			      &px, 4);
+	rc |= write_guest_abs(vcpu,
+			      gpa + offsetof(struct save_area, fp_ctrl_reg),
+			      &vcpu->arch.guest_fpregs.fpc, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+			      &vcpu->arch.sie_block->todpr, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+			      &vcpu->arch.sie_block->cputm, 8);
 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
-			&clkcomp, 8, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
-			&vcpu->run->s.regs.acrs, 64, prefix))
-		return -EFAULT;
-
-	if (__guestcopy(vcpu,
-			addr + offsetof(struct save_area, ctrl_regs),
-			&vcpu->arch.sie_block->gcr, 128, prefix))
-		return -EFAULT;
-	return 0;
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+			      &clkcomp, 8);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+			      &vcpu->run->s.regs.acrs, 64);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+			      &vcpu->arch.sie_block->gcr, 128);
+	return rc ? -EFAULT : 0;
 }
 }
 
 
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
@@ -1176,6 +1413,109 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 }
 
 
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		__disable_ibs_on_vcpu(vcpu);
+	}
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+			started_vcpus++;
+	}
+
+	if (started_vcpus == 0) {
+		/* we're the only active VCPU -> speed it up */
+		__enable_ibs_on_vcpu(vcpu);
+	} else if (started_vcpus == 1) {
+		/*
+		 * As we are starting a second VCPU, we have to disable
+		 * the IBS facility on all VCPUs to remove potentially
+		 * oustanding ENABLE requests.
+		 */
+		__disable_ibs_on_all_vcpus(vcpu->kvm);
+	}
+
+	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	/*
+	 * Another VCPU might have used IBS while we were offline.
+	 * Let's play safe and flush the VCPU at startup.
+	 */
+	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+	struct kvm_vcpu *started_vcpu = NULL;
+
+	if (is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	__disable_ibs_on_vcpu(vcpu);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+			started_vcpus++;
+			started_vcpu = vcpu->kvm->vcpus[i];
+		}
+	}
+
+	if (started_vcpus == 1) {
+		/*
+		 * As we only have one VCPU left, we want to enable the
+		 * IBS facility for that VCPU to speed it up.
+		 */
+		__enable_ibs_on_vcpu(started_vcpu);
+	}
+
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				     struct kvm_enable_cap *cap)
 				     struct kvm_enable_cap *cap)
 {
 {

Some files were not shown because too many files changed in this diff