Browse Source

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull second round of KVM changes from Paolo Bonzini:
 "Here are the PPC and ARM changes for KVM, which I separated because
  they had small conflicts (respectively within KVM documentation, and
  with 3.16-rc changes).  Since they were all within the subsystem, I
  took care of them.

  Stephen Rothwell reported some snags in PPC builds, but they are all
  fixed now; the latest linux-next report was clean.

  New features for ARM include:
   - KVM VGIC v2 emulation on GICv3 hardware
   - Big-Endian support for arm/arm64 (guest and host)
   - Debug Architecture support for arm64 (arm32 is on Christoffer's todo list)

  And for PPC:
   - Book3S: Good number of LE host fixes, enable HV on LE
   - Book3S HV: Add in-guest debug support

  This release drops support for KVM on the PPC440.  As a result, the
  PPC merge removes more lines than it adds.  :)

  I also included an x86 change, since Davidlohr tied it to an
  independent bug report and the reporter quickly provided a Tested-by;
  there was no reason to wait for -rc2"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (122 commits)
  KVM: Move more code under CONFIG_HAVE_KVM_IRQFD
  KVM: nVMX: fix "acknowledge interrupt on exit" when APICv is in use
  KVM: nVMX: Fix nested vmexit ack intr before load vmcs01
  KVM: PPC: Enable IRQFD support for the XICS interrupt controller
  KVM: Give IRQFD its own separate enabling Kconfig option
  KVM: Move irq notifier implementation into eventfd.c
  KVM: Move all accesses to kvm::irq_routing into irqchip.c
  KVM: irqchip: Provide and use accessors for irq routing table
  KVM: Don't keep reference to irq routing table in irqfd struct
  KVM: PPC: drop duplicate tracepoint
  arm64: KVM: fix 64bit CP15 VM access for 32bit guests
  KVM: arm64: GICv3: mandate page-aligned GICV region
  arm64: KVM: GICv3: move system register access to msr_s/mrs_s
  KVM: PPC: PR: Handle FSCR feature deselects
  KVM: PPC: HV: Remove generic instruction emulation
  KVM: PPC: BOOKEHV: rename e500hv_spr to bookehv_spr
  KVM: PPC: Remove DCR handling
  KVM: PPC: Expose helper functions for data/inst faults
  KVM: PPC: Separate loadstore emulation from priv emulation
  KVM: PPC: Handle magic page in kvmppc_ld/st
  ...
Linus Torvalds 11 years ago
parent
commit
66bb0aa077
100 changed files with 3915 additions and 2450 deletions
  1. 8 0
      Documentation/arm64/booting.txt
  2. 79 0
      Documentation/devicetree/bindings/arm/gic-v3.txt
  3. 0 2
      Documentation/powerpc/00-INDEX
  4. 0 41
      Documentation/powerpc/kvm_440.txt
  5. 52 8
      Documentation/virtual/kvm/api.txt
  6. 18 0
      arch/arm/include/asm/kvm_asm.h
  7. 18 4
      arch/arm/include/asm/kvm_emulate.h
  8. 5 3
      arch/arm/include/asm/kvm_host.h
  9. 12 0
      arch/arm/include/asm/kvm_mmu.h
  10. 7 7
      arch/arm/kernel/asm-offsets.c
  11. 1 3
      arch/arm/kernel/hyp-stub.S
  12. 1 1
      arch/arm/kvm/Kconfig
  13. 1 0
      arch/arm/kvm/Makefile
  14. 1 38
      arch/arm/kvm/arm.c
  15. 79 9
      arch/arm/kvm/coproc.c
  16. 0 10
      arch/arm/kvm/guest.c
  17. 2 2
      arch/arm/kvm/init.S
  18. 7 2
      arch/arm/kvm/interrupts.S
  19. 31 17
      arch/arm/kvm/interrupts_head.S
  20. 138 76
      arch/arm/kvm/mmu.c
  21. 14 5
      arch/arm64/include/asm/debug-monitors.h
  22. 3 2
      arch/arm64/include/asm/kvm_arm.h
  23. 43 10
      arch/arm64/include/asm/kvm_asm.h
  24. 2 1
      arch/arm64/include/asm/kvm_coproc.h
  25. 22 0
      arch/arm64/include/asm/kvm_emulate.h
  26. 46 2
      arch/arm64/include/asm/kvm_host.h
  27. 15 0
      arch/arm64/include/asm/kvm_mmu.h
  28. 4 0
      arch/arm64/include/asm/virt.h
  29. 19 7
      arch/arm64/kernel/asm-offsets.c
  30. 0 9
      arch/arm64/kernel/debug-monitors.c
  31. 4 0
      arch/arm64/kvm/Makefile
  32. 67 1
      arch/arm64/kvm/guest.c
  33. 2 2
      arch/arm64/kvm/handle_exit.c
  34. 494 106
      arch/arm64/kvm/hyp.S
  35. 473 73
      arch/arm64/kvm/sys_regs.c
  36. 133 0
      arch/arm64/kvm/vgic-v2-switch.S
  37. 267 0
      arch/arm64/kvm/vgic-v3-switch.S
  38. 1 0
      arch/ia64/kvm/Kconfig
  39. 1 1
      arch/ia64/kvm/kvm-ia64.c
  40. 1 1
      arch/mips/kvm/mips.c
  41. 1 3
      arch/powerpc/Kconfig.debug
  42. 0 1
      arch/powerpc/configs/ppc44x_defconfig
  43. 4 0
      arch/powerpc/include/asm/asm-compat.h
  44. 7 0
      arch/powerpc/include/asm/cache.h
  45. 6 0
      arch/powerpc/include/asm/hvcall.h
  46. 0 67
      arch/powerpc/include/asm/kvm_44x.h
  47. 1 1
      arch/powerpc/include/asm/kvm_asm.h
  48. 19 32
      arch/powerpc/include/asm/kvm_book3s.h
  49. 18 11
      arch/powerpc/include/asm/kvm_book3s_64.h
  50. 10 5
      arch/powerpc/include/asm/kvm_booke.h
  51. 16 12
      arch/powerpc/include/asm/kvm_host.h
  52. 106 10
      arch/powerpc/include/asm/kvm_ppc.h
  53. 7 1
      arch/powerpc/include/asm/mmu-book3e.h
  54. 17 0
      arch/powerpc/include/asm/ppc-opcode.h
  55. 10 3
      arch/powerpc/include/asm/reg.h
  56. 9 0
      arch/powerpc/include/asm/time.h
  57. 2 0
      arch/powerpc/include/uapi/asm/kvm.h
  58. 2 0
      arch/powerpc/kernel/asm-offsets.c
  59. 0 237
      arch/powerpc/kvm/44x.c
  60. 0 194
      arch/powerpc/kvm/44x_emulate.c
  61. 0 528
      arch/powerpc/kvm/44x_tlb.c
  62. 0 86
      arch/powerpc/kvm/44x_tlb.h
  63. 4 16
      arch/powerpc/kvm/Kconfig
  64. 4 14
      arch/powerpc/kvm/Makefile
  65. 75 81
      arch/powerpc/kvm/book3s.c
  66. 1 1
      arch/powerpc/kvm/book3s_32_mmu.c
  67. 3 4
      arch/powerpc/kvm/book3s_32_mmu_host.c
  68. 3 2
      arch/powerpc/kvm/book3s_64_mmu_host.c
  69. 72 73
      arch/powerpc/kvm/book3s_64_mmu_hv.c
  70. 17 11
      arch/powerpc/kvm/book3s_emulate.c
  71. 228 43
      arch/powerpc/kvm/book3s_hv.c
  72. 13 0
      arch/powerpc/kvm/book3s_hv_builtin.c
  73. 3 3
      arch/powerpc/kvm/book3s_hv_ras.c
  74. 83 63
      arch/powerpc/kvm/book3s_hv_rm_mmu.c
  75. 5 0
      arch/powerpc/kvm/book3s_hv_rm_xics.c
  76. 46 24
      arch/powerpc/kvm/book3s_hv_rmhandlers.S
  77. 24 14
      arch/powerpc/kvm/book3s_paired_singles.c
  78. 161 62
      arch/powerpc/kvm/book3s_pr.c
  79. 80 12
      arch/powerpc/kvm/book3s_pr_papr.c
  80. 46 9
      arch/powerpc/kvm/book3s_xics.c
  81. 2 0
      arch/powerpc/kvm/book3s_xics.h
  82. 136 89
      arch/powerpc/kvm/booke.c
  83. 0 7
      arch/powerpc/kvm/booke.h
  84. 4 4
      arch/powerpc/kvm/booke_emulate.c
  85. 0 5
      arch/powerpc/kvm/booke_interrupts.S
  86. 10 50
      arch/powerpc/kvm/bookehv_interrupts.S
  87. 12 0
      arch/powerpc/kvm/e500_emulate.c
  88. 102 0
      arch/powerpc/kvm/e500_mmu_host.c
  89. 23 5
      arch/powerpc/kvm/e500mc.c
  90. 11 195
      arch/powerpc/kvm/emulate.c
  91. 272 0
      arch/powerpc/kvm/emulate_loadstore.c
  92. 1 3
      arch/powerpc/kvm/mpic.c
  93. 163 18
      arch/powerpc/kvm/powerpc.c
  94. 0 1
      arch/powerpc/kvm/timing.c
  95. 0 3
      arch/powerpc/kvm/timing.h
  96. 1 0
      arch/s390/kvm/Kconfig
  97. 1 2
      arch/s390/kvm/interrupt.c
  98. 1 1
      arch/s390/kvm/kvm-s390.c
  99. 1 0
      arch/x86/kvm/Kconfig
  100. 1 1
      arch/x86/kvm/irq.c

+ 8 - 0
Documentation/arm64/booting.txt

@@ -168,6 +168,14 @@ Before jumping into the kernel, the following conditions must be met:
   the kernel image will be entered must be initialised by software at a
   higher exception level to prevent execution in an UNKNOWN state.
 
+  For systems with a GICv3 interrupt controller:
+  - If EL3 is present:
+    ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
+    ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+  - If the kernel is entered at EL1:
+    ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+    ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.

+ 79 - 0
Documentation/devicetree/bindings/arm/gic-v3.txt

@@ -0,0 +1,79 @@
+* ARM Generic Interrupt Controller, version 3
+
+AArch64 SMP cores are often associated with a GICv3, providing Private
+Peripheral Interrupts (PPI), Shared Peripheral Interrupts (SPI),
+Software Generated Interrupts (SGI), and Locality-specific Peripheral
+Interrupts (LPI).
+
+Main node required properties:
+
+- compatible : should at least contain  "arm,gic-v3".
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. Must be a single cell with a value of at least 3.
+
+  The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
+  interrupts. Other values are reserved for future use.
+
+  The 2nd cell contains the interrupt number for the interrupt type.
+  SPI interrupts are in the range [0-987]. PPI interrupts are in the
+  range [0-15].
+
+  The 3rd cell is the flags, encoded as follows:
+	bits[3:0] trigger type and level flags.
+		1 = edge triggered
+		4 = level triggered
+
+  Cells 4 and beyond are reserved for future use. When the 1st cell
+  has a value of 0 or 1, cells 4 and beyond act as padding, and may be
+  ignored. It is recommended that padding cells have a value of 0.
+
+- reg : Specifies base physical address(s) and size of the GIC
+  registers, in the following order:
+  - GIC Distributor interface (GICD)
+  - GIC Redistributors (GICR), one range per redistributor region
+  - GIC CPU interface (GICC)
+  - GIC Hypervisor interface (GICH)
+  - GIC Virtual CPU interface (GICV)
+
+  GICC, GICH and GICV are optional.
+
+- interrupts : Interrupt source of the VGIC maintenance interrupt.
+
+Optional
+
+- redistributor-stride : If using padding pages, specifies the stride
+  of consecutive redistributors. Must be a multiple of 64kB.
+
+- #redistributor-regions: The number of independent contiguous regions
+  occupied by the redistributors. Required if more than one such
+  region is present.
+
+Examples:
+
+	gic: interrupt-controller@2cf00000 {
+		compatible = "arm,gic-v3";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		reg = <0x0 0x2f000000 0 0x10000>,	// GICD
+		      <0x0 0x2f100000 0 0x200000>,	// GICR
+		      <0x0 0x2c000000 0 0x2000>,	// GICC
+		      <0x0 0x2c010000 0 0x2000>,	// GICH
+		      <0x0 0x2c020000 0 0x2000>;	// GICV
+		interrupts = <1 9 4>;
+	};
+
+	gic: interrupt-controller@2c010000 {
+		compatible = "arm,gic-v3";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		redistributor-stride = <0x0 0x40000>;	// 256kB stride
+		#redistributor-regions = <2>;
+		reg = <0x0 0x2c010000 0 0x10000>,	// GICD
+		      <0x0 0x2d000000 0 0x800000>,	// GICR 1: CPUs 0-31
+		      <0x0 0x2e000000 0 0x800000>;	// GICR 2: CPUs 32-63
+		      <0x0 0x2c040000 0 0x2000>,	// GICC
+		      <0x0 0x2c060000 0 0x2000>,	// GICH
+		      <0x0 0x2c080000 0 0x2000>;	// GICV
+		interrupts = <1 9 4>;
+	};

+ 0 - 2
Documentation/powerpc/00-INDEX

@@ -17,8 +17,6 @@ firmware-assisted-dump.txt
 	- Documentation on the firmware assisted dump mechanism "fadump".
 hvcs.txt
 	- IBM "Hypervisor Virtual Console Server" Installation Guide
-kvm_440.txt
-	- Various notes on the implementation of KVM for PowerPC 440.
 mpc52xx.txt
 	- Linux 2.6.x on MPC52xx family
 pmu-ebb.txt

+ 0 - 41
Documentation/powerpc/kvm_440.txt

@@ -1,41 +0,0 @@
-Hollis Blanchard <hollisb@us.ibm.com>
-15 Apr 2008
-
-Various notes on the implementation of KVM for PowerPC 440:
-
-To enforce isolation, host userspace, guest kernel, and guest userspace all
-run at user privilege level. Only the host kernel runs in supervisor mode.
-Executing privileged instructions in the guest traps into KVM (in the host
-kernel), where we decode and emulate them. Through this technique, unmodified
-440 Linux kernels can be run (slowly) as guests. Future performance work will
-focus on reducing the overhead and frequency of these traps.
-
-The usual code flow is started from userspace invoking an "run" ioctl, which
-causes KVM to switch into guest context. We use IVPR to hijack the host
-interrupt vectors while running the guest, which allows us to direct all
-interrupts to kvmppc_handle_interrupt(). At this point, we could either
-- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or
-- let the host interrupt handler run (e.g. when the decrementer fires), or
-- return to host userspace (e.g. when the guest performs device MMIO)
-
-Address spaces: We take advantage of the fact that Linux doesn't use the AS=1
-address space (in host or guest), which gives us virtual address space to use
-for guest mappings. While the guest is running, the host kernel remains mapped
-in AS=0, but the guest can only use AS=1 mappings.
-
-TLB entries: The TLB entries covering the host linear mapping remain
-present while running the guest. This reduces the overhead of lightweight
-exits, which are handled by KVM running in the host kernel. We keep three
-copies of the TLB:
- - guest TLB: contents of the TLB as the guest sees it
- - shadow TLB: the TLB that is actually in hardware while guest is running
- - host TLB: to restore TLB state when context switching guest -> host
-When a TLB miss occurs because a mapping was not present in the shadow TLB,
-but was present in the guest TLB, KVM handles the fault without invoking the
-guest. Large guest pages are backed by multiple 4KB shadow pages through this
-mechanism.
-
-IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network
-and block IO, so those drivers must be enabled in the guest. It's possible
-that some qemu device emulation (e.g. e1000 or rtl8139) may also work with
-little effort.

+ 52 - 8
Documentation/virtual/kvm/api.txt

@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
@@ -1892,7 +1895,8 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_PID               | 64
   PPC   | KVM_REG_PPC_ACOP              | 64
   PPC   | KVM_REG_PPC_VRSAVE            | 32
-  PPC   | KVM_REG_PPC_LPCR              | 64
+  PPC   | KVM_REG_PPC_LPCR              | 32
+  PPC   | KVM_REG_PPC_LPCR_64           | 64
   PPC   | KVM_REG_PPC_PPR               | 64
   PPC   | KVM_REG_PPC_ARCH_COMPAT       | 32
   PPC   | KVM_REG_PPC_DABRX             | 32
@@ -2677,8 +2681,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would
 appear if the VCPU performed a load or store of the appropriate width directly
 to the byte array.
 
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
-      KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
+      KVM_EXIT_EPR the corresponding
 operations are complete (and guest state is consistent) only after userspace
 has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
@@ -2749,7 +2753,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation
 			__u8  is_write;
 		} dcr;
 
-powerpc specific.
+Deprecated - was used for 440 KVM.
 
 		/* KVM_EXIT_OSI */
 		struct {
@@ -2931,8 +2935,8 @@ The fields in each entry are defined as follows:
          this function/index combination
 
 
-6. Capabilities that can be enabled
------------------------------------
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
 
 There are certain capabilities that change the behavior of the virtual CPU or
 the virtual machine when enabled. To enable them, please see section 4.37.
@@ -3091,3 +3095,43 @@ Parameters: none
 
 This capability enables the in-kernel irqchip for s390. Please refer to
 "4.24 KVM_CREATE_IRQCHIP" for details.
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+	    args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not.  Enabling or disabling in-kernel
+handling of an hcall is effective across the VM.  On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented.  If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it.  Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
+
+If the hcall number specified is not one that has an in-kernel
+implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
+error.

+ 18 - 0
arch/arm/include/asm/kvm_asm.h

@@ -61,6 +61,24 @@
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;

+ 18 - 4
arch/arm/include/asm/kvm_emulate.h

@@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be32_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		default:
+			return le32_to_cpu(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
@@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be32(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		default:
+			return cpu_to_le32(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 #endif /* __ARM_KVM_EMULATE_H__ */

+ 5 - 3
arch/arm/include/asm/kvm_host.h

@@ -225,10 +225,12 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 	return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
-int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
-
 #endif /* __ARM_KVM_HOST_H__ */

+ 12 - 0
arch/arm/include/asm/kvm_mmu.h

@@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))

+ 7 - 7
arch/arm/kernel/asm-offsets.c

@@ -182,13 +182,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));

+ 1 - 3
arch/arm/kernel/hyp-stub.S

@@ -134,9 +134,7 @@ ENTRY(__hyp_stub_install_secondary)
 	mcr	p15, 4, r7, c1, c1, 3	@ HSTR
 
 THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	orr	r7, #(1 << 9)		@ HSCTLR.EE
-#endif
+ARM_BE8(orr	r7, r7, #(1 << 25))     @ HSCTLR.EE
 	mcr	p15, 4, r7, c1, c0, 0	@ HSCTLR
 
 	mrc	p15, 4, r7, c1, c1, 1	@ HDCR

+ 1 - 1
arch/arm/kvm/Kconfig

@@ -23,7 +23,7 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
+	depends on ARM_VIRT_EXT && ARM_LPAE
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.

+ 1 - 0
arch/arm/kvm/Makefile

@@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o

+ 1 - 38
arch/arm/kvm/arm.c

@@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -184,7 +174,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
@@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {

+ 79 - 9
arch/arm/kvm/coproc.c

@@ -44,6 +44,31 @@ static u32 cache_levels;
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 12
 
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+				       const struct coproc_reg *r,
+				       u64 val)
+{
+	vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+	vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+				      const struct coproc_reg *r)
+{
+	u64 val;
+
+	val = vcpu->arch.cp15[r->reg + 1];
+	val = val << 32;
+	val = val | vcpu->arch.cp15[r->reg];
+	return val;
+}
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
@@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
 	{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
 };
 
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
 static int reg_from_user(void *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
 static int reg_to_user(void __user *uaddr, const void *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 {
 	struct coproc_params params;
 	const struct coproc_reg *r;
+	int ret;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	return reg_to_user(uaddr, &r->val, id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val = r->val;
+
+		ret = reg_to_user(uaddr, &val, id);
+	} else if (KVM_REG_SIZE(id) == 8) {
+		ret = reg_to_user(uaddr, &r->val, id);
+	}
+	return ret;
 }
 
 static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	struct coproc_params params;
 	const struct coproc_reg *r;
 	int err;
-	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+	u64 val;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	err = reg_from_user(&val, uaddr, id);
+	err = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val32;
+
+		err = reg_from_user(&val32, uaddr, id);
+		if (!err)
+			val = val32;
+	} else if (KVM_REG_SIZE(id) == 8) {
+		err = reg_from_user(&val, uaddr, id);
+	}
 	if (err)
 		return err;
 
@@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_get(reg->id, uaddr);
@@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return get_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit. */
-	return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		val = vcpu_cp15_reg64_get(vcpu, r);
+		ret = reg_to_user(uaddr, &val, reg->id);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	}
+
+	return ret;
 }
 
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_set(reg->id, uaddr);
@@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return set_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit */
-	return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		ret = reg_from_user(&val, uaddr, reg->id);
+		if (!ret)
+			vcpu_cp15_reg64_set(vcpu, r, val);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	}
+
+	return ret;
 }
 
 static unsigned int num_demux_regs(void)

+ 0 - 10
arch/arm/kvm/guest.c

@@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index)
 	return false;
 }
 
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-	return 0;
-}
-
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-	return 0;
-}
-
 #else
 
 #define NUM_TIMER_REGS 3

+ 2 - 2
arch/arm/kvm/init.S

@@ -72,7 +72,7 @@ __do_hyp_init:
 	bne	phase2			@ Yes, second stage init
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -138,7 +138,7 @@ phase2:
 	ret	r0
 
 target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 	isb
 
 	@ Invalidate the old TLBs

+ 7 - 2
arch/arm/kvm/interrupts.S

@@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
 	dsb	ish
@@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
 	ldr	r1, [vcpu, #VCPU_KVM]
 	add	r1, r1, #KVM_VTTBR
 	ldrd	r2, r3, [r1]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 
 	@ We're all done, just restore the GPRs and go to the guest
 	restore_guest_regs
@@ -199,8 +199,13 @@ after_vfp_restore:
 
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
 	mov	r0, r1			@ Return the return code
 	mov	r1, #0			@ Clear upper bits in return value
+#else
+	@ r1 already has return code
+	mov	r0, #0			@ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
 	bx	lr			@ return to IOCTL
 
 /********************************************************************

+ 31 - 17
arch/arm/kvm/interrupts_head.S

@@ -1,4 +1,5 @@
 #include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -420,15 +421,23 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r8, [r2, #GICH_ELRSR0]
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
-
-	str	r3, [r11, #VGIC_CPU_HCR]
-	str	r4, [r11, #VGIC_CPU_VMCR]
-	str	r5, [r11, #VGIC_CPU_MISR]
-	str	r6, [r11, #VGIC_CPU_EISR]
-	str	r7, [r11, #(VGIC_CPU_EISR + 4)]
-	str	r8, [r11, #VGIC_CPU_ELRSR]
-	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-	str	r10, [r11, #VGIC_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r5, r5	)
+ARM_BE8(rev	r6, r6	)
+ARM_BE8(rev	r7, r7	)
+ARM_BE8(rev	r8, r8	)
+ARM_BE8(rev	r9, r9	)
+ARM_BE8(rev	r10, r10	)
+
+	str	r3, [r11, #VGIC_V2_CPU_HCR]
+	str	r4, [r11, #VGIC_V2_CPU_VMCR]
+	str	r5, [r11, #VGIC_V2_CPU_MISR]
+	str	r6, [r11, #VGIC_V2_CPU_EISR]
+	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
+	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	mov	r5, #0
@@ -436,9 +445,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
+ARM_BE8(rev	r6, r6	)
 	str	r6, [r3], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -463,9 +473,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r11, vcpu, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	r3, [r11, #VGIC_CPU_HCR]
-	ldr	r4, [r11, #VGIC_CPU_VMCR]
-	ldr	r8, [r11, #VGIC_CPU_APR]
+	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
+	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
+	ldr	r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r8, r8	)
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -473,9 +486,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Restore list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
+ARM_BE8(rev	r6, r6  )
 	str	r6, [r2], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -506,7 +520,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	isb
 
-	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
@@ -546,12 +560,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
 	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
-	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c14	@ CNTVOFF
 
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	ldrd	r2, r3, [r5]
-	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mcrr	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	isb
 
 	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]

+ 138 - 76
arch/arm/kvm/mmu.c

@@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static bool page_empty(void *ptr)
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-	struct page *ptr_page = virt_to_page(ptr);
-	return page_count(ptr_page) == 1;
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+	pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
 }
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	if (pud_huge(*pud)) {
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pmd_t *pmd_table = pmd_offset(pud, 0);
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pmd_free(NULL, pmd_table);
-	}
+	pmd_t *pmd_table = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
+	pud_clear(pud);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pmd_free(NULL, pmd_table);
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	if (kvm_pmd_huge(*pmd)) {
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pte_t *pte_table = pte_offset_kernel(pmd, 0);
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pte_free_kernel(NULL, pte_table);
-	}
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(kvm_pmd_huge(*pmd));
+	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	if (pte_present(*pte)) {
-		kvm_set_pte(pte, __pte(0));
-		put_page(virt_to_page(pte));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	}
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (kvm_pte_table_empty(start_pte))
+		clear_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			unsigned long long start, u64 size)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long long addr = start, end = start + size;
-	u64 next;
-
-	while (addr < end) {
-		pgd = pgdp + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-		pte = NULL;
-		if (pud_none(*pud)) {
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
-		}
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
 
-		if (pud_huge(*pud)) {
-			/*
-			 * If we are dealing with a huge pud, just clear it and
-			 * move on.
-			 */
-			clear_pud_entry(kvm, pud, addr);
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_ptes(kvm, pmd, addr, next);
+			}
 		}
+	} while (pmd++, addr = next, addr != end);
 
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			addr = kvm_pmd_addr_end(addr, end);
-			continue;
-		}
+	if (kvm_pmd_table_empty(start_pmd))
+		clear_pud_entry(kvm, pud, start_addr);
+}
 
-		if (!kvm_pmd_huge(*pmd)) {
-			pte = pte_offset_kernel(pmd, addr);
-			clear_pte_entry(kvm, pte, addr);
-			next = addr + PAGE_SIZE;
-		}
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
 
-		/*
-		 * If the pmd entry is to be cleared, walk back up the ladder
-		 */
-		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
-			clear_pmd_entry(kvm, pmd, addr);
-			next = kvm_pmd_addr_end(addr, end);
-			if (page_empty(pmd) && !page_empty(pud)) {
-				clear_pud_entry(kvm, pud, addr);
-				next = kvm_pud_addr_end(addr, end);
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_pmds(kvm, pud, addr, next);
 			}
 		}
+	} while (pud++, addr = next, addr != end);
 
-		addr = next;
-	}
+	if (kvm_pud_table_empty(start_pud))
+		clear_pgd_entry(kvm, pgd, start_addr);
+}
+
+
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+			phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		unmap_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
 }
 
 static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
@@ -748,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
 	pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -798,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
+	if (kvm_is_mmio_pfn(pfn))
+		mem_type = PAGE_S2_DEVICE;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
@@ -805,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
 	if (hugetlb) {
-		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
 		if (writable) {
 			kvm_set_s2pmd_writable(&new_pmd);
@@ -814,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
-		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+		pte_t new_pte = pfn_pte(pfn, mem_type);
 		if (writable) {
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+				     mem_type == PAGE_S2_DEVICE);
 	}
 
 
@@ -1100,3 +1116,49 @@ out:
 	free_hyp_pgds();
 	return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
+{
+	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = old->npages << PAGE_SHIFT;
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		spin_lock(&kvm->mmu_lock);
+		unmap_stage2_range(kvm, gpa, size);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}

+ 14 - 5
arch/arm64/include/asm/debug-monitors.h

@@ -18,6 +18,15 @@
 
 #ifdef __KERNEL__
 
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS		(1 << 0)
+#define DBG_SPSR_SS		(1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE		(1 << 13)
+#define DBG_MDSCR_MDE		(1 << 15)
+#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
 #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
 
 /* AArch64 */
@@ -73,11 +82,6 @@
 
 #define CACHE_FLUSH_IS_SAFE		1
 
-enum debug_el {
-	DBG_ACTIVE_EL0 = 0,
-	DBG_ACTIVE_EL1,
-};
-
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
 #define DBG_ESR_EVT_VECC	0x5
@@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook);
 
 u8 debug_monitors_arch(void);
 
+enum debug_el {
+	DBG_ACTIVE_EL0 = 0,
+	DBG_ACTIVE_EL1,
+};
+
 void enable_debug_monitors(enum debug_el el);
 void disable_debug_monitors(enum debug_el el);
 

+ 3 - 2
arch/arm64/include/asm/kvm_arm.h

@@ -76,9 +76,10 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-			 HCR_AMO | HCR_IMO | HCR_FMO | \
-			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
 
 /* Hyp System Control Register (SCTLR_EL2) bits */
 #define SCTLR_EL2_EE	(1 << 25)

+ 43 - 10
arch/arm64/include/asm/kvm_asm.h

@@ -18,6 +18,8 @@
 #ifndef __ARM_KVM_ASM_H__
 #define __ARM_KVM_ASM_H__
 
+#include <asm/virt.h>
+
 /*
  * 0 is reserved as an invalid value.
  * Order *must* be kept in sync with the hyp switch code.
@@ -43,14 +45,25 @@
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
 #define	PAR_EL1		21	/* Physical Address Register */
+#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
+#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1	38
+#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1	54
+#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1	70
+#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1	86
+#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	22	/* Domain Access Control Register */
-#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	28
+#define	DACR32_EL2	88	/* Domain Access Control Register */
+#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	94
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -82,11 +95,23 @@
 #define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-#define NR_CP15_REGS	(NR_SYS_REGS * 2)
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
+#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
@@ -96,13 +121,21 @@ extern char __kvm_hyp_init_end[];
 
 extern char __kvm_hyp_vector[];
 
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+#define	__kvm_hyp_code_start	__hyp_text_start
+#define	__kvm_hyp_code_end	__hyp_text_end
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */

+ 2 - 1
arch/arm64/include/asm/kvm_coproc.h

@@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table);
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);

+ 22 - 0
arch/arm64/include/asm/kvm_emulate.h

@@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be64_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		case 4:
+			return le32_to_cpu(data & 0xffffffff);
+		default:
+			return le64_to_cpu(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */
@@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be64(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		case 4:
+			return cpu_to_le32(data & 0xffffffff);
+		default:
+			return cpu_to_le64(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */

+ 46 - 2
arch/arm64/include/asm/kvm_host.h

@@ -86,7 +86,7 @@ struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	union {
 		u64 sys_regs[NR_SYS_REGS];
-		u32 cp15[NR_CP15_REGS];
+		u32 copro[NR_COPRO_REGS];
 	};
 };
 
@@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
+	/* Debug state */
+	u64 debug_flags;
+
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
 
@@ -138,7 +141,20 @@ struct kvm_vcpu_arch {
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 #define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
-#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
+#else
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
+#endif
 
 struct kvm_vm_stat {
 	u32 remote_tlb_flush;
@@ -200,4 +216,32 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+	switch(vgic->type)
+	{
+	case VGIC_V2:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v2_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
+		break;
+
+#ifdef CONFIG_ARM_GIC_V3
+	case VGIC_V3:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v3_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v3_state;
+		break;
+#endif
+
+	default:
+		BUG();
+	}
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */

+ 15 - 0
arch/arm64/include/asm/kvm_mmu.h

@@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
 #define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))

+ 4 - 0
arch/arm64/include/asm/virt.h

@@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void)
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */

+ 19 - 7
arch/arm64/kernel/asm-offsets.c

@@ -120,6 +120,7 @@ int main(void)
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
@@ -129,13 +130,24 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,	sizeof(struct vgic_sr_vectors));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));

+ 0 - 9
arch/arm64/kernel/debug-monitors.c

@@ -30,15 +30,6 @@
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS		(1 << 0)
-#define DBG_SPSR_SS		(1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE		(1 << 13)
-#define DBG_MDSCR_MDE		(1 << 15)
-#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {

+ 4 - 0
arch/arm64/kvm/Makefile

@@ -20,4 +20,8 @@ kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o

+ 67 - 1
arch/arm64/kvm/guest.c

@@ -135,6 +135,59 @@ static unsigned long num_core_regs(void)
 	return sizeof(struct kvm_regs) / sizeof(__u32);
 }
 
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 /**
  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
  *
@@ -142,7 +195,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+                + NUM_TIMER_REGS;
 }
 
 /**
@@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
 }
 
@@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
 }
 
@@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
 }
 

+ 2 - 2
arch/arm64/kvm/handle_exit.c

@@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
-	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_64,
 	[ESR_EL2_EC_HVC32]	= handle_hvc,
 	[ESR_EL2_EC_SMC32]	= handle_smc,
 	[ESR_EL2_EC_HVC64]	= handle_hvc,

+ 494 - 106
arch/arm64/kvm/hyp.S

@@ -16,11 +16,11 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
 
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
 #include <asm/fpsimdmacros.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
@@ -36,9 +36,6 @@
 	.pushsection	.hyp.text, "ax"
 	.align	PAGE_SHIFT
 
-__kvm_hyp_code_start:
-	.globl __kvm_hyp_code_start
-
 .macro save_common_regs
 	// x2: base address for cpu context
 	// x3: tmp register
@@ -215,6 +212,7 @@ __kvm_hyp_code_start:
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
 	mrs	x24,	par_el1
+	mrs	x25,	mdscr_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -226,7 +224,202 @@ __kvm_hyp_code_start:
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
-	str	x24, [x3, #160]
+	stp	x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbcr15_el1
+	mrs	x19, dbgbcr14_el1
+	mrs	x18, dbgbcr13_el1
+	mrs	x17, dbgbcr12_el1
+	mrs	x16, dbgbcr11_el1
+	mrs	x15, dbgbcr10_el1
+	mrs	x14, dbgbcr9_el1
+	mrs	x13, dbgbcr8_el1
+	mrs	x12, dbgbcr7_el1
+	mrs	x11, dbgbcr6_el1
+	mrs	x10, dbgbcr5_el1
+	mrs	x9, dbgbcr4_el1
+	mrs	x8, dbgbcr3_el1
+	mrs	x7, dbgbcr2_el1
+	mrs	x6, dbgbcr1_el1
+	mrs	x5, dbgbcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbvr15_el1
+	mrs	x19, dbgbvr14_el1
+	mrs	x18, dbgbvr13_el1
+	mrs	x17, dbgbvr12_el1
+	mrs	x16, dbgbvr11_el1
+	mrs	x15, dbgbvr10_el1
+	mrs	x14, dbgbvr9_el1
+	mrs	x13, dbgbvr8_el1
+	mrs	x12, dbgbvr7_el1
+	mrs	x11, dbgbvr6_el1
+	mrs	x10, dbgbvr5_el1
+	mrs	x9, dbgbvr4_el1
+	mrs	x8, dbgbvr3_el1
+	mrs	x7, dbgbvr2_el1
+	mrs	x6, dbgbvr1_el1
+	mrs	x5, dbgbvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwcr15_el1
+	mrs	x19, dbgwcr14_el1
+	mrs	x18, dbgwcr13_el1
+	mrs	x17, dbgwcr12_el1
+	mrs	x16, dbgwcr11_el1
+	mrs	x15, dbgwcr10_el1
+	mrs	x14, dbgwcr9_el1
+	mrs	x13, dbgwcr8_el1
+	mrs	x12, dbgwcr7_el1
+	mrs	x11, dbgwcr6_el1
+	mrs	x10, dbgwcr5_el1
+	mrs	x9, dbgwcr4_el1
+	mrs	x8, dbgwcr3_el1
+	mrs	x7, dbgwcr2_el1
+	mrs	x6, dbgwcr1_el1
+	mrs	x5, dbgwcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwvr15_el1
+	mrs	x19, dbgwvr14_el1
+	mrs	x18, dbgwvr13_el1
+	mrs	x17, dbgwvr12_el1
+	mrs	x16, dbgwvr11_el1
+	mrs	x15, dbgwvr10_el1
+	mrs	x14, dbgwvr9_el1
+	mrs	x13, dbgwvr8_el1
+	mrs	x12, dbgwvr7_el1
+	mrs	x11, dbgwvr6_el1
+	mrs	x10, dbgwvr5_el1
+	mrs	x9, dbgwvr4_el1
+	mrs	x8, dbgwvr3_el1
+	mrs	x7, dbgwvr2_el1
+	mrs	x6, dbgwvr1_el1
+	mrs	x5, dbgwvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	mrs	x21, mdccint_el1
+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
 .endm
 
 .macro restore_sysregs
@@ -245,7 +438,7 @@ __kvm_hyp_code_start:
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
-	ldr	x24, [x3, #160]
+	ldp	x24, x25, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -268,6 +461,198 @@ __kvm_hyp_code_start:
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
 	msr	par_el1,	x24
+	msr	mdscr_el1,	x25
+.endm
+
+.macro restore_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbcr15_el1, x20
+	msr	dbgbcr14_el1, x19
+	msr	dbgbcr13_el1, x18
+	msr	dbgbcr12_el1, x17
+	msr	dbgbcr11_el1, x16
+	msr	dbgbcr10_el1, x15
+	msr	dbgbcr9_el1, x14
+	msr	dbgbcr8_el1, x13
+	msr	dbgbcr7_el1, x12
+	msr	dbgbcr6_el1, x11
+	msr	dbgbcr5_el1, x10
+	msr	dbgbcr4_el1, x9
+	msr	dbgbcr3_el1, x8
+	msr	dbgbcr2_el1, x7
+	msr	dbgbcr1_el1, x6
+	msr	dbgbcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbvr15_el1, x20
+	msr	dbgbvr14_el1, x19
+	msr	dbgbvr13_el1, x18
+	msr	dbgbvr12_el1, x17
+	msr	dbgbvr11_el1, x16
+	msr	dbgbvr10_el1, x15
+	msr	dbgbvr9_el1, x14
+	msr	dbgbvr8_el1, x13
+	msr	dbgbvr7_el1, x12
+	msr	dbgbvr6_el1, x11
+	msr	dbgbvr5_el1, x10
+	msr	dbgbvr4_el1, x9
+	msr	dbgbvr3_el1, x8
+	msr	dbgbvr2_el1, x7
+	msr	dbgbvr1_el1, x6
+	msr	dbgbvr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwcr15_el1, x20
+	msr	dbgwcr14_el1, x19
+	msr	dbgwcr13_el1, x18
+	msr	dbgwcr12_el1, x17
+	msr	dbgwcr11_el1, x16
+	msr	dbgwcr10_el1, x15
+	msr	dbgwcr9_el1, x14
+	msr	dbgwcr8_el1, x13
+	msr	dbgwcr7_el1, x12
+	msr	dbgwcr6_el1, x11
+	msr	dbgwcr5_el1, x10
+	msr	dbgwcr4_el1, x9
+	msr	dbgwcr3_el1, x8
+	msr	dbgwcr2_el1, x7
+	msr	dbgwcr1_el1, x6
+	msr	dbgwcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwvr15_el1, x20
+	msr	dbgwvr14_el1, x19
+	msr	dbgwvr13_el1, x18
+	msr	dbgwvr12_el1, x17
+	msr	dbgwvr11_el1, x16
+	msr	dbgwvr10_el1, x15
+	msr	dbgwvr9_el1, x14
+	msr	dbgwvr8_el1, x13
+	msr	dbgwvr7_el1, x12
+	msr	dbgwvr6_el1, x11
+	msr	dbgwvr5_el1, x10
+	msr	dbgwvr4_el1, x9
+	msr	dbgwvr3_el1, x8
+	msr	dbgwvr2_el1, x7
+	msr	dbgwvr1_el1, x6
+	msr	dbgwvr0_el1, x5
+
+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+	msr	mdccint_el1, x21
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -282,6 +667,35 @@ __kvm_hyp_code_start:
 	tbz	\tmp, #12, \target
 .endm
 
+.macro skip_debug_state tmp, target
+	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
+	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+	// is set, we do a full save/restore cycle and disable trapping.
+	add	x25, x0, #VCPU_CONTEXT
+
+	// Check the state of MDSCR_EL1
+	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+	and	x26, x25, #DBG_MDSCR_KDE
+	and	x25, x25, #DBG_MDSCR_MDE
+	adds	xzr, x25, x26
+	b.eq	9998f		// Nothing to see there
+
+	// If any interesting bits was set, we must set the flag
+	mov	x26, #KVM_ARM64_DEBUG_DIRTY
+	str	x26, [x0, #VCPU_DEBUG_FLAGS]
+	b	9999f		// Don't skip restore
+
+9998:
+	// Otherwise load the flags from memory in case we recently
+	// trapped
+	skip_debug_state x25, \target
+9999:
+.endm
+
 .macro save_guest_32bit_state
 	skip_32bit_state x3, 1f
 
@@ -297,10 +711,13 @@ __kvm_hyp_code_start:
 	mrs	x4, dacr32_el2
 	mrs	x5, ifsr32_el2
 	mrs	x6, fpexc32_el2
-	mrs	x7, dbgvcr32_el2
 	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
+	str	x6, [x3, #16]
 
+	skip_debug_state x8, 2f
+	mrs	x7, dbgvcr32_el2
+	str	x7, [x3, #24]
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -323,12 +740,15 @@ __kvm_hyp_code_start:
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
 	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
+	ldr	x6, [x3, #16]
 	msr	dacr32_el2, x4
 	msr	ifsr32_el2, x5
 	msr	fpexc32_el2, x6
-	msr	dbgvcr32_el2, x7
 
+	skip_debug_state x8, 2f
+	ldr	x7, [x3, #24]
+	msr	dbgvcr32_el2, x7
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -339,11 +759,8 @@ __kvm_hyp_code_start:
 .endm
 
 .macro activate_traps
-	ldr	x2, [x0, #VCPU_IRQ_LINES]
-	ldr	x1, [x0, #VCPU_HCR_EL2]
-	orr	x2, x2, x1
-	msr	hcr_el2, x2
-
+	ldr     x2, [x0, #VCPU_HCR_EL2]
+	msr     hcr_el2, x2
 	ldr	x2, =(CPTR_EL2_TTA)
 	msr	cptr_el2, x2
 
@@ -353,6 +770,14 @@ __kvm_hyp_code_start:
 	mrs	x2, mdcr_el2
 	and	x2, x2, #MDCR_EL2_HPMN_MASK
 	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+	// if not dirty.
+	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
+	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+	orr	x2, x2,  #MDCR_EL2_TDA
+1:
 	msr	mdcr_el2, x2
 .endm
 
@@ -379,100 +804,33 @@ __kvm_hyp_code_start:
 .endm
 
 /*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
+ * Call into the vgic backend for state saving
  */
 .macro save_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w4, [x2, #GICH_HCR]
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w4,  w4  )
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w4, [x3, #VGIC_CPU_HCR]
-	str	w5, [x3, #VGIC_CPU_VMCR]
-	str	w6, [x3, #VGIC_CPU_MISR]
-	str	w7, [x3, #VGIC_CPU_EISR]
-	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
-	str	w11, [x3, #VGIC_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, VGIC_SAVE_FN]
+	kern_hyp_va	x24
+	blr	x24
+	mrs	x24, hcr_el2
+	mov	x25, #HCR_INT_OVERRIDE
+	neg	x25, x25
+	and	x24, x24, x25
+	msr	hcr_el2, x24
 .endm
 
 /*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
+ * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_CPU_HCR]
-	ldr	w5, [x3, #VGIC_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	mrs	x24, hcr_el2
+	ldr	x25, [x0, #VCPU_IRQ_LINES]
+	orr	x24, x24, #HCR_INT_OVERRIDE
+	orr	x24, x24, x25
+	msr	hcr_el2, x24
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, #VGIC_RESTORE_FN]
+	kern_hyp_va	x24
+	blr	x24
 .endm
 
 .macro save_timer_state
@@ -537,6 +895,14 @@ __restore_sysregs:
 	restore_sysregs
 	ret
 
+__save_debug:
+	save_debug
+	ret
+
+__restore_debug:
+	restore_debug
+	ret
+
 __save_fpsimd:
 	save_fpsimd
 	ret
@@ -568,6 +934,9 @@ ENTRY(__kvm_vcpu_run)
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	compute_debug_state 1f
+	bl	__save_debug
+1:
 	activate_traps
 	activate_vm
 
@@ -579,6 +948,10 @@ ENTRY(__kvm_vcpu_run)
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	bl	__restore_debug
+1:
 	restore_guest_32bit_state
 	restore_guest_regs
 
@@ -595,6 +968,10 @@ __kvm_vcpu_return:
 	save_guest_regs
 	bl __save_fpsimd
 	bl __save_sysregs
+
+	skip_debug_state x3, 1f
+	bl	__save_debug
+1:
 	save_guest_32bit_state
 
 	save_timer_state
@@ -609,6 +986,14 @@ __kvm_vcpu_return:
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	// Clear the dirty flag for the next run, as all the state has
+	// already been saved. Note that we nuke the whole 64bit word.
+	// If we ever add more flags, we'll have to be more careful...
+	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
+	bl	__restore_debug
+1:
 	restore_host_regs
 
 	mov	x0, x1
@@ -653,6 +1038,12 @@ ENTRY(__kvm_flush_vm_context)
 	ret
 ENDPROC(__kvm_flush_vm_context)
 
+	// struct vgic_sr_vectors __vgi_sr_vectors;
+	.align 3
+ENTRY(__vgic_sr_vectors)
+	.skip	VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
 __kvm_hyp_panic:
 	// Guess the context by looking at VTTBR:
 	// If zero, then we're already a host.
@@ -830,7 +1221,7 @@ el1_trap:
 	mrs	x2, far_el2
 
 2:	mrs	x0, tpidr_el2
-	str	x1, [x0, #VCPU_ESR_EL2]
+	str	w1, [x0, #VCPU_ESR_EL2]
 	str	x2, [x0, #VCPU_FAR_EL2]
 	str	x3, [x0, #VCPU_HPFAR_EL2]
 
@@ -880,7 +1271,4 @@ ENTRY(__kvm_hyp_vector)
 	ventry	el1_error_invalid		// Error 32-bit EL1
 ENDPROC(__kvm_hyp_vector)
 
-__kvm_hyp_code_end:
-	.globl	__kvm_hyp_code_end
-
 	.popsection

+ 473 - 73
arch/arm64/kvm/sys_regs.c

@@ -30,6 +30,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <trace/events/kvm.h>
 
 #include "sys_regs.h"
@@ -137,10 +138,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 	if (!p->is_aarch32) {
 		vcpu_sys_reg(vcpu, r->reg) = val;
 	} else {
-		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
 		if (!p->is_32bit)
-			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
+			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
+		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
 	}
+
 	return true;
 }
 
@@ -163,18 +165,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-/*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- */
-static bool pm_fake(struct kvm_vcpu *vcpu,
-		    const struct sys_reg_params *p,
-		    const struct sys_reg_desc *r)
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
 {
 	if (p->is_write)
 		return ignore_write(vcpu, p);
@@ -182,6 +175,73 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		return true;
+	}
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+				   const struct sys_reg_params *p,
+				   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u32 val;
+		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+		*vcpu_reg(vcpu, p->Rt) = val;
+		return true;
+	}
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ * 
+ * - If we've touched any debug register, it is likely that we're
+ *   going to touch more of them. It then makes sense to disable the
+ *   traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ *   then mandatory to save/restore the registers, as the guest
+ *   depends on them.
+ * 
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ *   disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ *   set the dirty bit, disable the traps, save host registers,
+ *   restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ *   registers and clear the dirty bit. This ensure that the host can
+ *   now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+	}
+
+	return true;
+}
+
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 amair;
@@ -198,9 +258,39 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
 }
 
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
+	/* DBGBVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
+	/* DBGBCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
+	/* DBGWVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
+	/* DBGWCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 	/* DC ISW */
@@ -213,12 +303,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
 	  access_dcsw },
 
+	DBG_BCR_BVR_WCR_WVR_EL1(0),
+	DBG_BCR_BVR_WCR_WVR_EL1(1),
+	/* MDCCINT_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	/* MDSCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	DBG_BCR_BVR_WCR_WVR_EL1(2),
+	DBG_BCR_BVR_WCR_WVR_EL1(3),
+	DBG_BCR_BVR_WCR_WVR_EL1(4),
+	DBG_BCR_BVR_WCR_WVR_EL1(5),
+	DBG_BCR_BVR_WCR_WVR_EL1(6),
+	DBG_BCR_BVR_WCR_WVR_EL1(7),
+	DBG_BCR_BVR_WCR_WVR_EL1(8),
+	DBG_BCR_BVR_WCR_WVR_EL1(9),
+	DBG_BCR_BVR_WCR_WVR_EL1(10),
+	DBG_BCR_BVR_WCR_WVR_EL1(11),
+	DBG_BCR_BVR_WCR_WVR_EL1(12),
+	DBG_BCR_BVR_WCR_WVR_EL1(13),
+	DBG_BCR_BVR_WCR_WVR_EL1(14),
+	DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+	/* MDRAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  trap_raz_wi },
+	/* OSLAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+	  trap_raz_wi },
+	/* OSLSR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+	  trap_oslsr_el1 },
+	/* OSDLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGPRCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGCLAIMSET_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGCLAIMCLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGAUTHSTATUS_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+	  trap_dbgauthstatus_el1 },
+
 	/* TEECR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEECR32_EL1, 0 },
 	/* TEEHBR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEEHBR32_EL1, 0 },
+
+	/* MDCCSR_EL1 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR[TR]X_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+	  trap_raz_wi },
+
 	/* DBGVCR32_EL2 */
 	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
 	  NULL, reset_val, DBGVCR32_EL2, 0 },
@@ -260,10 +409,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMINTENCLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* MAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -292,43 +441,43 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMCR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSWINC_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSELR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID0_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID1_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVTYPER_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMUSERENR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* TPIDR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -348,13 +497,161 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+		u32 el3 = !!((pfr >> 12) & 0xf);
+
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+					  (((dfr >> 12) & 0xf) << 24) |
+					  (((dfr >> 28) & 0xf) << 20) |
+					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		return true;
+	}
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n)					\
+	/* DBGBVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
+	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
+	/* DBGBCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
+	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
+	/* DBGWVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
+	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
+	/* DBGWCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
+	  NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n)						\
+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
+	  NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+	/* DBGIDR */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+	/* DBGDTRRXext */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+	DBG_BCR_BVR_WCR_WVR(0),
+	/* DBGDSCRint */
+	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(1),
+	/* DBGDCCINT */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+	/* DBGDSCRext */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(2),
+	/* DBGDTR[RT]Xint */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+	/* DBGDTR[RT]Xext */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(3),
+	DBG_BCR_BVR_WCR_WVR(4),
+	DBG_BCR_BVR_WCR_WVR(5),
+	/* DBGWFAR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+	/* DBGOSECCR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(6),
+	/* DBGVCR */
+	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(7),
+	DBG_BCR_BVR_WCR_WVR(8),
+	DBG_BCR_BVR_WCR_WVR(9),
+	DBG_BCR_BVR_WCR_WVR(10),
+	DBG_BCR_BVR_WCR_WVR(11),
+	DBG_BCR_BVR_WCR_WVR(12),
+	DBG_BCR_BVR_WCR_WVR(13),
+	DBG_BCR_BVR_WCR_WVR(14),
+	DBG_BCR_BVR_WCR_WVR(15),
+
+	/* DBGDRAR (32bit) */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+	DBGBXVR(0),
+	/* DBGOSLAR */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+	DBGBXVR(1),
+	/* DBGOSLSR */
+	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+	DBGBXVR(2),
+	DBGBXVR(3),
+	/* DBGOSDLR */
+	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+	DBGBXVR(4),
+	/* DBGPRCR */
+	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+	DBGBXVR(5),
+	DBGBXVR(6),
+	DBGBXVR(7),
+	DBGBXVR(8),
+	DBGBXVR(9),
+	DBGBXVR(10),
+	DBGBXVR(11),
+	DBGBXVR(12),
+	DBGBXVR(13),
+	DBGBXVR(14),
+	DBGBXVR(15),
+
+	/* DBGDSAR (32bit) */
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+	/* DBGDEVID2 */
+	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+	/* DBGDEVID1 */
+	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+	/* DBGDEVID */
+	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+	/* DBGCLAIMSET */
+	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+	/* DBGCLAIMCLR */
+	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+	/* DBGAUTHSTATUS */
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+	/* DBGDRAR (64bit) */
+	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+	/* DBGDSAR (64bit) */
+	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -374,26 +671,30 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
 
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+	/* PMU */
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
 
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+};
 
+static const struct sys_reg_desc cp15_64_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 };
 
@@ -454,26 +755,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static void emulate_cp15(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *params)
+/*
+ * emulate_cp --  tries to match a sys_reg access in a handling table, and
+ *                call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_params *params,
+		      const struct sys_reg_desc *table,
+		      size_t num)
 {
-	size_t num;
-	const struct sys_reg_desc *table, *r;
+	const struct sys_reg_desc *r;
 
-	table = get_target_table(vcpu->arch.target, false, &num);
+	if (!table)
+		return -1;	/* Not handled */
 
-	/* Search target-specific then generic table. */
 	r = find_reg(params, table, num);
-	if (!r)
-		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
 
-	if (likely(r)) {
+	if (r) {
 		/*
 		 * Not having an accessor means that we have
 		 * configured a trap that we don't know how to
@@ -485,22 +789,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu,
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return;
 		}
-		/* If access function fails, it should complain. */
+
+		/* Handled */
+		return 0;
 	}
 
-	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	/* Not handled */
+	return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+				struct sys_reg_params *params)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	int cp;
+
+	switch(hsr_ec) {
+	case ESR_EL2_EC_CP15_32:
+	case ESR_EL2_EC_CP15_64:
+		cp = 15;
+		break;
+	case ESR_EL2_EC_CP14_MR:
+	case ESR_EL2_EC_CP14_64:
+		cp = 14;
+		break;
+	default:
+		WARN_ON((cp = -1));
+	}
+
+	kvm_err("Unsupported guest CP%d access at: %08lx\n",
+		cp, *vcpu_pc(vcpu));
 	print_sys_reg_instr(params);
 	kvm_inject_undefined(vcpu);
 }
 
 /**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -529,8 +862,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		*vcpu_reg(vcpu, params.Rt) = val;
 	}
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		goto out;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		goto out;
 
+	unhandled_cp_access(vcpu, &params);
+
+out:
 	/* Do the opposite hack for the read side */
 	if (!params.is_write) {
 		u64 val = *vcpu_reg(vcpu, params.Rt);
@@ -546,7 +885,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -561,10 +904,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (hsr >> 14) & 0x7;
 	params.Op2 = (hsr >> 17) & 0x7;
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		return 1;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		return 1;
+
+	unhandled_cp_access(vcpu, &params);
 	return 1;
 }
 
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_64(vcpu,
+				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_32(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_64(vcpu,
+				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+				NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_32(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   const struct sys_reg_params *params)
 {
@@ -776,17 +1160,15 @@ static struct sys_reg_desc invariant_sys_regs[] = {
 	  NULL, get_ctr_el0 },
 };
 
-static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
-static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -962,7 +1344,7 @@ static unsigned int num_demux_regs(void)
 
 static int write_demux_regids(u64 __user *uindices)
 {
-	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
 	unsigned int i;
 
 	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
@@ -1069,14 +1451,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return write_demux_regids(uindices);
 }
 
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 1; i < n; i++) {
+		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 void kvm_sys_reg_table_init(void)
 {
 	unsigned int i;
 	struct sys_reg_desc clidr;
 
 	/* Make sure tables are unique and in order. */
-	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
-		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)

+ 133 - 0
arch/arm64/kvm/vgic-v2-switch.S

@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
+
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__restore_vgic_v2_state)
+
+	.popsection

+ 267 - 0
arch/arm64/kvm/vgic-v3-switch.S

@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro	save_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Make sure stores to the GIC via the memory mapped interface
+	// are now visible to the system register interface
+	dsb	st
+
+	// Save all interesting registers
+	mrs_s	x4, ICH_HCR_EL2
+	mrs_s	x5, ICH_VMCR_EL2
+	mrs_s	x6, ICH_MISR_EL2
+	mrs_s	x7, ICH_EISR_EL2
+	mrs_s	x8, ICH_ELSR_EL2
+
+	str	w4, [x3, #VGIC_V3_CPU_HCR]
+	str	w5, [x3, #VGIC_V3_CPU_VMCR]
+	str	w6, [x3, #VGIC_V3_CPU_MISR]
+	str	w7, [x3, #VGIC_V3_CPU_EISR]
+	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+	msr_s	ICH_HCR_EL2, xzr
+
+	mrs_s	x21, ICH_VTR_EL2
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	mrs_s	x20, ICH_LR15_EL2
+	mrs_s	x19, ICH_LR14_EL2
+	mrs_s	x18, ICH_LR13_EL2
+	mrs_s	x17, ICH_LR12_EL2
+	mrs_s	x16, ICH_LR11_EL2
+	mrs_s	x15, ICH_LR10_EL2
+	mrs_s	x14, ICH_LR9_EL2
+	mrs_s	x13, ICH_LR8_EL2
+	mrs_s	x12, ICH_LR7_EL2
+	mrs_s	x11, ICH_LR6_EL2
+	mrs_s	x10, ICH_LR5_EL2
+	mrs_s	x9, ICH_LR4_EL2
+	mrs_s	x8, ICH_LR3_EL2
+	mrs_s	x7, ICH_LR2_EL2
+	mrs_s	x6, ICH_LR1_EL2
+	mrs_s	x5, ICH_LR0_EL2
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	str	x20, [x3, #LR_OFFSET(15)]
+	str	x19, [x3, #LR_OFFSET(14)]
+	str	x18, [x3, #LR_OFFSET(13)]
+	str	x17, [x3, #LR_OFFSET(12)]
+	str	x16, [x3, #LR_OFFSET(11)]
+	str	x15, [x3, #LR_OFFSET(10)]
+	str	x14, [x3, #LR_OFFSET(9)]
+	str	x13, [x3, #LR_OFFSET(8)]
+	str	x12, [x3, #LR_OFFSET(7)]
+	str	x11, [x3, #LR_OFFSET(6)]
+	str	x10, [x3, #LR_OFFSET(5)]
+	str	x9, [x3, #LR_OFFSET(4)]
+	str	x8, [x3, #LR_OFFSET(3)]
+	str	x7, [x3, #LR_OFFSET(2)]
+	str	x6, [x3, #LR_OFFSET(1)]
+	str	x5, [x3, #LR_OFFSET(0)]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP0R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	mrs_s	x19, ICH_AP0R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6:	mrs_s	x18, ICH_AP0R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5:	mrs_s	x17, ICH_AP0R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP0R]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs_s	x20, ICH_AP1R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	mrs_s	x19, ICH_AP1R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6:	mrs_s	x18, ICH_AP1R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5:	mrs_s	x17, ICH_AP1R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP1R]
+
+	// Restore SRE_EL1 access and re-enable SRE at EL1.
+	mrs_s	x5, ICC_SRE_EL2
+	orr	x5, x5, #ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+	isb
+	mov	x5, #1
+	msr_s	ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro	restore_vgic_v3_state
+	// Disable SRE_EL1 access. Necessary, otherwise
+	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+	msr_s	ICC_SRE_EL1, xzr
+	isb
+
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Restore all interesting registers
+	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
+
+	msr_s	ICH_HCR_EL2, x4
+	msr_s	ICH_VMCR_EL2, x5
+
+	mrs_s	x21, ICH_VTR_EL2
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	msr_s	ICH_AP1R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+	msr_s	ICH_AP1R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+	msr_s	ICH_AP1R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
+	msr_s	ICH_AP1R0_EL2, x17
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	msr_s	ICH_AP0R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+	msr_s	ICH_AP0R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+	msr_s	ICH_AP0R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
+	msr_s	ICH_AP0R0_EL2, x17
+
+	and	w22, w21, #0xf
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	ldr	x20, [x3, #LR_OFFSET(15)]
+	ldr	x19, [x3, #LR_OFFSET(14)]
+	ldr	x18, [x3, #LR_OFFSET(13)]
+	ldr	x17, [x3, #LR_OFFSET(12)]
+	ldr	x16, [x3, #LR_OFFSET(11)]
+	ldr	x15, [x3, #LR_OFFSET(10)]
+	ldr	x14, [x3, #LR_OFFSET(9)]
+	ldr	x13, [x3, #LR_OFFSET(8)]
+	ldr	x12, [x3, #LR_OFFSET(7)]
+	ldr	x11, [x3, #LR_OFFSET(6)]
+	ldr	x10, [x3, #LR_OFFSET(5)]
+	ldr	x9, [x3, #LR_OFFSET(4)]
+	ldr	x8, [x3, #LR_OFFSET(3)]
+	ldr	x7, [x3, #LR_OFFSET(2)]
+	ldr	x6, [x3, #LR_OFFSET(1)]
+	ldr	x5, [x3, #LR_OFFSET(0)]
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	msr_s	ICH_LR15_EL2, x20
+	msr_s	ICH_LR14_EL2, x19
+	msr_s	ICH_LR13_EL2, x18
+	msr_s	ICH_LR12_EL2, x17
+	msr_s	ICH_LR11_EL2, x16
+	msr_s	ICH_LR10_EL2, x15
+	msr_s	ICH_LR9_EL2,  x14
+	msr_s	ICH_LR8_EL2,  x13
+	msr_s	ICH_LR7_EL2,  x12
+	msr_s	ICH_LR6_EL2,  x11
+	msr_s	ICH_LR5_EL2,  x10
+	msr_s	ICH_LR4_EL2,   x9
+	msr_s	ICH_LR3_EL2,   x8
+	msr_s	ICH_LR2_EL2,   x7
+	msr_s	ICH_LR1_EL2,   x6
+	msr_s	ICH_LR0_EL2,   x5
+
+	// Ensure that the above will have reached the
+	// (re)distributors. This ensure the guest will read
+	// the correct values from the memory-mapped interface.
+	isb
+	dsb	sy
+
+	// Prevent the guest from touching the GIC system registers
+	mrs_s	x5, ICC_SRE_EL2
+	and	x5, x5, #~ICC_SRE_EL2_ENABLE
+	msr_s	ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+	save_vgic_v3_state
+	ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+	restore_vgic_v3_state
+	ret
+ENDPROC(__restore_vgic_v3_state)
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+	mrs_s	x0, ICH_VTR_EL2
+	ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+	.popsection

+ 1 - 0
arch/ia64/kvm/Kconfig

@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO

+ 1 - 1
arch/ia64/kvm/kvm-ia64.c

@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;

+ 1 - 1
arch/mips/kvm/mips.c

@@ -886,7 +886,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 

+ 1 - 3
arch/powerpc/Kconfig.debug

@@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT
 
 config PPC_EARLY_DEBUG_44x
 	bool "Early serial debugging for IBM/AMCC 44x CPUs"
-	# PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
-	# mark, which doesn't work with current 440 KVM.
-	depends on 44x && !KVM
+	depends on 44x
 	help
 	  Select this to enable early debugging for IBM 44x chips via the
 	  inbuilt serial port.  If you enable this, ensure you set

+ 0 - 1
arch/powerpc/configs/ppc44x_defconfig

@@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
 CONFIG_VIRTUALIZATION=y
-CONFIG_KVM_440=y

+ 4 - 0
arch/powerpc/include/asm/asm-compat.h

@@ -34,10 +34,14 @@
 #define PPC_MIN_STKFRM	112
 
 #ifdef __BIG_ENDIAN__
+#define LWZX_BE	stringify_in_c(lwzx)
 #define LDX_BE	stringify_in_c(ldx)
+#define STWX_BE	stringify_in_c(stwx)
 #define STDX_BE	stringify_in_c(stdx)
 #else
+#define LWZX_BE	stringify_in_c(lwbrx)
 #define LDX_BE	stringify_in_c(ldbrx)
+#define STWX_BE	stringify_in_c(stwbrx)
 #define STDX_BE	stringify_in_c(stdbrx)
 #endif
 

+ 7 - 0
arch/powerpc/include/asm/cache.h

@@ -3,6 +3,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm/reg.h>
 
 /* bytes per L1 cache line */
 #if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
@@ -39,6 +40,12 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
+
+static inline void logmpp(u64 x)
+{
+	asm volatile(PPC_LOGMPP(R1) : : "r" (x));
+}
+
 #endif /* __powerpc64__ && ! __ASSEMBLY__ */
 
 #if defined(__ASSEMBLY__)

+ 6 - 0
arch/powerpc/include/asm/hvcall.h

@@ -279,6 +279,12 @@
 #define H_GET_24X7_DATA		0xF07C
 #define H_GET_PERF_COUNTER_INFO	0xF080
 
+/* Values for 2nd argument to H_SET_MODE */
+#define H_SET_MODE_RESOURCE_SET_CIABR		1
+#define H_SET_MODE_RESOURCE_SET_DAWR		2
+#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
+#define H_SET_MODE_RESOURCE_LE			4
+
 #ifndef __ASSEMBLY__
 
 /**

+ 0 - 67
arch/powerpc/include/asm/kvm_44x.h

@@ -1,67 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_44X_H__
-#define __ASM_44X_H__
-
-#include <linux/kvm_host.h>
-
-#define PPC44x_TLB_SIZE 64
-
-/* If the guest is expecting it, this can be as large as we like; we'd just
- * need to find some way of advertising it. */
-#define KVM44x_GUEST_TLB_SIZE 64
-
-struct kvmppc_44x_tlbe {
-	u32 tid; /* Only the low 8 bits are used. */
-	u32 word0;
-	u32 word1;
-	u32 word2;
-};
-
-struct kvmppc_44x_shadow_ref {
-	struct page *page;
-	u16 gtlb_index;
-	u8 writeable;
-	u8 tid;
-};
-
-struct kvmppc_vcpu_44x {
-	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
-
-	/* References to guest pages in the hardware TLB. */
-	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
-
-	/* State of the shadow TLB at guest context switch time. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
-	struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
-{
-	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
-}
-
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
-
-#endif /* __ASM_44X_H__ */

+ 1 - 1
arch/powerpc/include/asm/kvm_asm.h

@@ -33,7 +33,6 @@
 /* IVPR must be 64KiB-aligned. */
 #define VCPU_SIZE_ORDER 4
 #define VCPU_SIZE_LOG   (VCPU_SIZE_ORDER + 12)
-#define VCPU_TLB_PGSZ   PPC44x_TLB_64K
 #define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG)
 
 #define BOOKE_INTERRUPT_CRITICAL 0
@@ -132,6 +131,7 @@
 #define BOOK3S_HFLAG_NATIVE_PS			0x8
 #define BOOK3S_HFLAG_MULTI_PGSIZE		0x10
 #define BOOK3S_HFLAG_NEW_TLBIE			0x20
+#define BOOK3S_HFLAG_SPLIT_HACK			0x40
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */

+ 19 - 32
arch/powerpc/include/asm/kvm_book3s.h

@@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s {
 	u64 sdr1;
 	u64 hior;
 	u64 msr_mask;
-	u64 purr_offset;
-	u64 spurr_offset;
 #ifdef CONFIG_PPC_BOOK3S_32
 	u32 vsid_pool[VSID_POOL_SIZE];
 	u32 vsid_next;
@@ -148,9 +146,10 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *
 extern int kvmppc_mmu_hpte_sysinit(void);
 extern void kvmppc_mmu_hpte_sysexit(void);
 extern int kvmppc_mmu_hv_init(void);
+extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 
+/* XXX remove this export when load_last_inst() is generic */
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
-extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
 extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec);
@@ -159,13 +158,13 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 			bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 			unsigned long *rmap, long pte_index, int realmode);
-extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index);
-void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index);
 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
@@ -183,12 +182,16 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
 extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
 			unsigned long mask);
+extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
+extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
+extern int kvmppc_hcall_impl_pr(unsigned long cmd);
+extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
 extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 				 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
@@ -274,32 +277,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 	return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
 }
 
-static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)
-{
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) :
-		vcpu->arch.last_inst;
-}
-
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu));
-}
-
-/*
- * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
- * Because the sc instruction sets SRR0 to point to the following
- * instruction, we have to fetch from pc - 4.
- */
-static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
-{
-	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4);
-}
-
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dar;
@@ -310,6 +287,13 @@ static inline bool is_kvmppc_resume_guest(int r)
 	return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
 }
 
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm);
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Only PR KVM supports the magic page */
+	return !is_kvmppc_hv_enabled(vcpu->kvm);
+}
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
@@ -322,4 +306,7 @@ static inline bool is_kvmppc_resume_guest(int r)
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)
 
+#define SPLIT_HACK_MASK			0xff000000
+#define SPLIT_HACK_OFFS			0xfb000000
+
 #endif /* __ASM_KVM_BOOK3S_H__ */

+ 18 - 11
arch/powerpc/include/asm/kvm_book3s_64.h

@@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages;
 /* These bits are reserved in the guest view of the HPTE */
 #define HPTE_GR_RESERVED	HPTE_GR_MODIFIED
 
-static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
+static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
 {
 	unsigned long tmp, old;
+	__be64 be_lockbit, be_bits;
+
+	/*
+	 * We load/store in native endian, but the HTAB is in big endian. If
+	 * we byte swap all data we apply on the PTE we're implicitly correct
+	 * again.
+	 */
+	be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
+	be_bits = cpu_to_be64(bits);
 
 	asm volatile("	ldarx	%0,0,%2\n"
 		     "	and.	%1,%0,%3\n"
 		     "	bne	2f\n"
-		     "	ori	%0,%0,%4\n"
+		     "	or	%0,%0,%4\n"
 		     "  stdcx.	%0,0,%2\n"
 		     "	beq+	2f\n"
 		     "	mr	%1,%3\n"
 		     "2:	isync"
 		     : "=&r" (tmp), "=&r" (old)
-		     : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
+		     : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
 		     : "cc", "memory");
 	return old == 0;
 }
@@ -110,16 +119,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize)
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 					     unsigned long pte_index)
 {
-	int b_psize, a_psize;
+	int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
 	unsigned int penc;
 	unsigned long rb = 0, va_low, sllp;
 	unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
-	if (!(v & HPTE_V_LARGE)) {
-		/* both base and actual psize is 4k */
-		b_psize = MMU_PAGE_4K;
-		a_psize = MMU_PAGE_4K;
-	} else {
+	if (v & HPTE_V_LARGE) {
 		for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
 
 			/* valid entries have a shift value */
@@ -142,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	 */
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
+
+	rb |= v >> (62 - 8);			/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index
@@ -172,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	{
 		int aval_shift;
 		/*
-		 * remaining 7bits of AVA/LP fields
+		 * remaining bits of AVA/LP fields
 		 * Also contain the rr bits of LP
 		 */
-		rb |= (va_low & 0x7f) << 16;
+		rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
 		/*
 		 * Now clear not needed LP bits based on actual psize
 		 */

+ 10 - 5
arch/powerpc/include/asm/kvm_booke.h

@@ -69,11 +69,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 	return false;
 }
 
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.last_inst;
-}
-
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
 	vcpu->arch.ctr = val;
@@ -108,4 +103,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dear;
 }
+
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Magic page is only supported on e500v2 */
+#ifdef CONFIG_KVM_E500V2
+	return true;
+#else
+	return false;
+#endif
+}
 #endif /* __ASM_KVM_BOOKE_H__ */

+ 16 - 12
arch/powerpc/include/asm/kvm_host.h

@@ -34,6 +34,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/hvcall.h>
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
@@ -48,7 +49,6 @@
 #define KVM_NR_IRQCHIPS          1
 #define KVM_IRQCHIP_NUM_PINS     256
 
-#if !defined(CONFIG_KVM_440)
 #include <linux/mmu_notifier.h>
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -61,8 +61,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
-#endif
-
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
 #define HPTEG_HASH_BITS_PTE_LONG	12
@@ -96,7 +94,6 @@ struct kvm_vm_stat {
 struct kvm_vcpu_stat {
 	u32 sum_exits;
 	u32 mmio_exits;
-	u32 dcr_exits;
 	u32 signal_exits;
 	u32 light_exits;
 	/* Account for special types of light exits: */
@@ -113,22 +110,21 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 	u32 dbell_exits;
 	u32 gdbell_exits;
+	u32 ld;
+	u32 st;
 #ifdef CONFIG_PPC_BOOK3S
 	u32 pf_storage;
 	u32 pf_instruc;
 	u32 sp_storage;
 	u32 sp_instruc;
 	u32 queue_intr;
-	u32 ld;
 	u32 ld_slow;
-	u32 st;
 	u32 st_slow;
 #endif
 };
 
 enum kvm_exit_types {
 	MMIO_EXITS,
-	DCR_EXITS,
 	SIGNAL_EXITS,
 	ITLB_REAL_MISS_EXITS,
 	ITLB_VIRT_MISS_EXITS,
@@ -254,7 +250,6 @@ struct kvm_arch {
 	atomic_t hpte_mod_interest;
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
-	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -263,6 +258,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
@@ -271,6 +267,10 @@ struct kvm_arch {
 	struct kvmppc_xics *xics;
 #endif
 	struct kvmppc_ops *kvm_ops;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* This array can grow quite large, keep it at the end */
+	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
+#endif
 };
 
 /*
@@ -305,6 +305,8 @@ struct kvmppc_vcore {
 	u32 arch_compat;
 	ulong pcr;
 	ulong dpdes;		/* doorbell state (POWER8) */
+	void *mpp_buffer; /* Micro Partition Prefetch buffer */
+	bool mpp_buffer_is_valid;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -503,8 +505,10 @@ struct kvm_vcpu_arch {
 #ifdef CONFIG_BOOKE
 	u32 decar;
 #endif
-	u32 tbl;
-	u32 tbu;
+	/* Time base value when we entered the guest */
+	u64 entry_tb;
+	u64 entry_vtb;
+	u64 entry_ic;
 	u32 tcr;
 	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
 	u32 ivor[64];
@@ -580,6 +584,8 @@ struct kvm_vcpu_arch {
 	u32 mmucfg;
 	u32 eptcfg;
 	u32 epr;
+	u64 sprg9;
+	u32 pwrmgtcr0;
 	u32 crit_save;
 	/* guest debug registers*/
 	struct debug_reg dbg_reg;
@@ -593,8 +599,6 @@ struct kvm_vcpu_arch {
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_is_bigendian;
 	u8 mmio_sign_extend;
-	u8 dcr_needed;
-	u8 dcr_is_write;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;

+ 106 - 10
arch/powerpc/include/asm/kvm_ppc.h

@@ -41,12 +41,26 @@
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
-	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 	EMULATE_AGAIN,        /* something went wrong. go again */
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
 };
 
+enum instruction_type {
+	INST_GENERIC,
+	INST_SC,		/* system call */
+};
+
+enum xlate_instdata {
+	XLATE_INST,		/* translate instruction address */
+	XLATE_DATA		/* translate data address */
+};
+
+enum xlate_readwrite {
+	XLATE_READ,		/* check for read permissions */
+	XLATE_WRITE		/* check for write permissions */
+};
+
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern void kvmppc_handler_highmem(void);
@@ -62,8 +76,16 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			       u64 val, unsigned int bytes,
 			       int is_default_endian);
 
+extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+				 enum instruction_type type, u32 *inst);
+
+extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
+extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
@@ -86,6 +108,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
                               gva_t eaddr);
 extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
+extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
+			enum xlate_instdata xlid, enum xlate_readwrite xlrw,
+			struct kvmppc_pte *pte);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
@@ -106,6 +131,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags,
+					ulong esr_flags);
+extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+					   ulong dear_flags,
+					   ulong esr_flags);
+extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+					   ulong esr_flags);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
@@ -228,12 +261,35 @@ struct kvmppc_ops {
 	void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
 	long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
 			      unsigned long arg);
-
+	int (*hcall_implemented)(unsigned long hcall);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
 extern struct kvmppc_ops *kvmppc_pr_ops;
 
+static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
+					enum instruction_type type, u32 *inst)
+{
+	int ret = EMULATE_DONE;
+	u32 fetched_inst;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst);
+
+	/*  Write fetch_failed unswapped if the fetch failed */
+	if (ret == EMULATE_DONE)
+		fetched_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.last_inst) :
+				vcpu->arch.last_inst;
+	else
+		fetched_inst = vcpu->arch.last_inst;
+
+	*inst = fetched_inst;
+	return ret;
+}
+
 static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
 {
 	return kvm->arch.kvm_ops == kvmppc_hv_ops;
@@ -392,6 +448,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
 #endif
 
+static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+	return mfspr(SPRN_GEPR);
+#elif defined(CONFIG_BOOKE)
+	return vcpu->arch.epr;
+#else
+	return 0;
+#endif
+}
+
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -472,8 +539,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
 #endif
 }
 
+#define SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
+static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
+{									\
+	return mfspr(bookehv_spr);					\
+}									\
+
+#define SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val)	\
+{									\
+	mtspr(bookehv_spr, val);						\
+}									\
+
 #define SHARED_WRAPPER_GET(reg, size)					\
-static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)	\
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
 {									\
 	if (kvmppc_shared_big_endian(vcpu))				\
 	       return be##size##_to_cpu(vcpu->arch.shared->reg);	\
@@ -494,14 +573,31 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)	\
 	SHARED_WRAPPER_GET(reg, size)					\
 	SHARED_WRAPPER_SET(reg, size)					\
 
+#define SPRNG_WRAPPER(reg, bookehv_spr)					\
+	SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
+	SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
+
+#ifdef CONFIG_KVM_BOOKE_HV
+
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
+	SPRNG_WRAPPER(reg, bookehv_spr)					\
+
+#else
+
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
+	SHARED_WRAPPER(reg, size)					\
+
+#endif
+
 SHARED_WRAPPER(critical, 64)
-SHARED_WRAPPER(sprg0, 64)
-SHARED_WRAPPER(sprg1, 64)
-SHARED_WRAPPER(sprg2, 64)
-SHARED_WRAPPER(sprg3, 64)
-SHARED_WRAPPER(srr0, 64)
-SHARED_WRAPPER(srr1, 64)
-SHARED_WRAPPER(dar, 64)
+SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0)
+SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1)
+SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2)
+SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3)
+SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0)
+SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1)
+SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR)
+SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR)
 SHARED_WRAPPER_GET(msr, 64)
 static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
 {

+ 7 - 1
arch/powerpc/include/asm/mmu-book3e.h

@@ -40,7 +40,11 @@
 
 /* MAS registers bit definitions */
 
-#define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
+#define MAS0_TLBSEL_MASK	0x30000000
+#define MAS0_TLBSEL_SHIFT	28
+#define MAS0_TLBSEL(x)		(((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
+#define MAS0_GET_TLBSEL(mas0)	(((mas0) & MAS0_TLBSEL_MASK) >> \
+			MAS0_TLBSEL_SHIFT)
 #define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_ESEL_SHIFT		16
 #define MAS0_ESEL(x)		(((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
@@ -58,6 +62,7 @@
 #define MAS1_TSIZE_MASK		0x00000f80
 #define MAS1_TSIZE_SHIFT	7
 #define MAS1_TSIZE(x)		(((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
+#define MAS1_GET_TSIZE(mas1)	(((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT)
 
 #define MAS2_EPN		(~0xFFFUL)
 #define MAS2_X0			0x00000040
@@ -86,6 +91,7 @@
 #define MAS3_SPSIZE		0x0000003e
 #define MAS3_SPSIZE_SHIFT	1
 
+#define MAS4_TLBSEL_MASK	MAS0_TLBSEL_MASK
 #define MAS4_TLBSELD(x) 	MAS0_TLBSEL(x)
 #define MAS4_INDD		0x00008000	/* Default IND */
 #define MAS4_TSIZED(x)		MAS1_TSIZE(x)

+ 17 - 0
arch/powerpc/include/asm/ppc-opcode.h

@@ -139,6 +139,7 @@
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
+#define PPC_INST_LOGMPP			0x7c0007e4
 #define PPC_INST_LSWI			0x7c0004aa
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
@@ -277,6 +278,20 @@
 #define __PPC_EH(eh)	0
 #endif
 
+/* POWER8 Micro Partition Prefetch (MPP) parameters */
+/* Address mask is common for LOGMPP instruction and MPPR SPR */
+#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
+
+/* Bits 60 and 61 of MPP SPR should be set to one of the following */
+/* Aborting the fetch is indeed setting 00 in the table size bits */
+#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60)
+#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60)
+
+/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */
+#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54)
+#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54)
+#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54)
+
 /* Deal with instructions that older assemblers aren't aware of */
 #define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_INST_DCBAL | \
 					__PPC_RA(a) | __PPC_RB(b))
@@ -285,6 +300,8 @@
 #define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LDARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_LOGMPP(b)		stringify_in_c(.long PPC_INST_LOGMPP | \
+					__PPC_RB(b))
 #define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LWARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))

+ 10 - 3
arch/powerpc/include/asm/reg.h

@@ -225,6 +225,7 @@
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DAWR	0xB4
+#define SPRN_MPPR	0xB8	/* Micro Partition Prefetch Register */
 #define SPRN_RPR	0xBA	/* Relative Priority Register */
 #define SPRN_CIABR	0xBB
 #define   CIABR_PRIV		0x3
@@ -944,9 +945,6 @@
  *      readable variant for reads, which can avoid a fault
  *      with KVM type virtualization.
  *
- *      (*) Under KVM, the host SPRG1 is used to point to
- *      the current VCPU data structure
- *
  * 32-bit 8xx:
  *	- SPRG0 scratch for exception vectors
  *	- SPRG1 scratch for exception vectors
@@ -1203,6 +1201,15 @@
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 
+static inline unsigned long mfvtb (void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mfspr(SPRN_VTB);
+#endif
+	return 0;
+}
+
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define mftb()		({unsigned long rval;				\

+ 9 - 0
arch/powerpc/include/asm/time.h

@@ -102,6 +102,15 @@ static inline u64 get_rtc(void)
 	return (u64)hi * 1000000000 + lo;
 }
 
+static inline u64 get_vtb(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mfvtb();
+#endif
+	return 0;
+}
+
 #ifdef CONFIG_PPC64
 static inline u64 get_tb(void)
 {

+ 2 - 0
arch/powerpc/include/uapi/asm/kvm.h

@@ -548,6 +548,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_LPCR_64	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5)
 #define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
 
 /* Architecture compatibility level */
@@ -555,6 +556,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
 #define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs

+ 2 - 0
arch/powerpc/kernel/asm-offsets.c

@@ -491,6 +491,7 @@ int main(void)
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
 	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
+	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
 	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -665,6 +666,7 @@ int main(void)
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+	DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));

+ 0 - 237
arch/powerpc/kvm/44x.c

@@ -1,237 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-
-#include <asm/reg.h>
-#include <asm/cputable.h>
-#include <asm/tlbflush.h>
-#include <asm/kvm_44x.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-#include "booke.h"
-
-static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
-{
-	kvmppc_booke_vcpu_load(vcpu, cpu);
-	kvmppc_44x_tlb_load(vcpu);
-}
-
-static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
-{
-	kvmppc_44x_tlb_put(vcpu);
-	kvmppc_booke_vcpu_put(vcpu);
-}
-
-int kvmppc_core_check_processor_compat(void)
-{
-	int r;
-
-	if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	return r;
-}
-
-int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
-	int i;
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	/* XXX CCR1 doesn't exist on all 440 SoCs. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
-		vcpu_44x->shadow_refs[i].gtlb_index = -1;
-
-	vcpu->arch.cpu_type = KVM_CPU_440;
-	vcpu->arch.pvr = mfspr(SPRN_PVR);
-
-	return 0;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
-                               struct kvm_translation *tr)
-{
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
-}
-
-static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
-				      struct kvm_sregs *sregs)
-{
-	return kvmppc_get_sregs_ivor(vcpu, sregs);
-}
-
-static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
-				     struct kvm_sregs *sregs)
-{
-	return kvmppc_set_sregs_ivor(vcpu, sregs);
-}
-
-static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
-				  union kvmppc_one_reg *val)
-{
-	return -EINVAL;
-}
-
-static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
-				  union kvmppc_one_reg *val)
-{
-	return -EINVAL;
-}
-
-static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
-						    unsigned int id)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x;
-	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu_44x) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	vcpu = &vcpu_44x->vcpu;
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
-	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-	if (!vcpu->arch.shared)
-		goto uninit_vcpu;
-
-	return vcpu;
-
-uninit_vcpu:
-	kvm_vcpu_uninit(vcpu);
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-out:
-	return ERR_PTR(err);
-}
-
-static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-
-	free_page((unsigned long)vcpu->arch.shared);
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-}
-
-static int kvmppc_core_init_vm_44x(struct kvm *kvm)
-{
-	return 0;
-}
-
-static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
-{
-}
-
-static struct kvmppc_ops kvm_ops_44x = {
-	.get_sregs = kvmppc_core_get_sregs_44x,
-	.set_sregs = kvmppc_core_set_sregs_44x,
-	.get_one_reg = kvmppc_get_one_reg_44x,
-	.set_one_reg = kvmppc_set_one_reg_44x,
-	.vcpu_load   = kvmppc_core_vcpu_load_44x,
-	.vcpu_put    = kvmppc_core_vcpu_put_44x,
-	.vcpu_create = kvmppc_core_vcpu_create_44x,
-	.vcpu_free   = kvmppc_core_vcpu_free_44x,
-	.mmu_destroy  = kvmppc_mmu_destroy_44x,
-	.init_vm = kvmppc_core_init_vm_44x,
-	.destroy_vm = kvmppc_core_destroy_vm_44x,
-	.emulate_op = kvmppc_core_emulate_op_44x,
-	.emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
-	.emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
-};
-
-static int __init kvmppc_44x_init(void)
-{
-	int r;
-
-	r = kvmppc_booke_init();
-	if (r)
-		goto err_out;
-
-	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
-	if (r)
-		goto err_out;
-	kvm_ops_44x.owner = THIS_MODULE;
-	kvmppc_pr_ops = &kvm_ops_44x;
-
-err_out:
-	return r;
-}
-
-static void __exit kvmppc_44x_exit(void)
-{
-	kvmppc_pr_ops = NULL;
-	kvmppc_booke_exit();
-}
-
-module_init(kvmppc_44x_init);
-module_exit(kvmppc_44x_exit);
-MODULE_ALIAS_MISCDEV(KVM_MINOR);
-MODULE_ALIAS("devname:kvm");

+ 0 - 194
arch/powerpc/kvm/44x_emulate.c

@@ -1,194 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <asm/kvm_ppc.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
-#include <asm/disassemble.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "booke.h"
-#include "44x_tlb.h"
-
-#define XOP_MFDCRX  259
-#define XOP_MFDCR   323
-#define XOP_MTDCRX  387
-#define XOP_MTDCR   451
-#define XOP_TLBSX   914
-#define XOP_ICCCI   966
-#define XOP_TLBWE   978
-
-static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
-{
-	/* emulate some access in kernel */
-	switch (dcrn) {
-	case DCRN_CPR0_CONFIG_ADDR:
-		vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
-		return EMULATE_DONE;
-	default:
-		vcpu->run->dcr.dcrn = dcrn;
-		vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
-		vcpu->run->dcr.is_write = 1;
-		vcpu->arch.dcr_is_write = 1;
-		vcpu->arch.dcr_needed = 1;
-		kvmppc_account_exit(vcpu, DCR_EXITS);
-		return EMULATE_DO_DCR;
-	}
-}
-
-static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
-{
-	/* The guest may access CPR0 registers to determine the timebase
-	 * frequency, and it must know the real host frequency because it
-	 * can directly access the timebase registers.
-	 *
-	 * It would be possible to emulate those accesses in userspace,
-	 * but userspace can really only figure out the end frequency.
-	 * We could decompose that into the factors that compute it, but
-	 * that's tricky math, and it's easier to just report the real
-	 * CPR0 values.
-	 */
-	switch (dcrn) {
-	case DCRN_CPR0_CONFIG_ADDR:
-		kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
-		break;
-	case DCRN_CPR0_CONFIG_DATA:
-		local_irq_disable();
-		mtdcr(DCRN_CPR0_CONFIG_ADDR,
-			  vcpu->arch.cpr0_cfgaddr);
-		kvmppc_set_gpr(vcpu, rt,
-			       mfdcr(DCRN_CPR0_CONFIG_DATA));
-		local_irq_enable();
-		break;
-	default:
-		vcpu->run->dcr.dcrn = dcrn;
-		vcpu->run->dcr.data =  0;
-		vcpu->run->dcr.is_write = 0;
-		vcpu->arch.dcr_is_write = 0;
-		vcpu->arch.io_gpr = rt;
-		vcpu->arch.dcr_needed = 1;
-		kvmppc_account_exit(vcpu, DCR_EXITS);
-		return EMULATE_DO_DCR;
-	}
-
-	return EMULATE_DONE;
-}
-
-int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
-			       unsigned int inst, int *advance)
-{
-	int emulated = EMULATE_DONE;
-	int dcrn = get_dcrn(inst);
-	int ra = get_ra(inst);
-	int rb = get_rb(inst);
-	int rc = get_rc(inst);
-	int rs = get_rs(inst);
-	int rt = get_rt(inst);
-	int ws = get_ws(inst);
-
-	switch (get_op(inst)) {
-	case 31:
-		switch (get_xop(inst)) {
-
-		case XOP_MFDCR:
-			emulated = emulate_mfdcr(vcpu, rt, dcrn);
-			break;
-
-		case XOP_MFDCRX:
-			emulated = emulate_mfdcr(vcpu, rt,
-					kvmppc_get_gpr(vcpu, ra));
-			break;
-
-		case XOP_MTDCR:
-			emulated = emulate_mtdcr(vcpu, rs, dcrn);
-			break;
-
-		case XOP_MTDCRX:
-			emulated = emulate_mtdcr(vcpu, rs,
-					kvmppc_get_gpr(vcpu, ra));
-			break;
-
-		case XOP_TLBWE:
-			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
-			break;
-
-		case XOP_TLBSX:
-			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
-			break;
-
-		case XOP_ICCCI:
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-		}
-
-		break;
-
-	default:
-		emulated = EMULATE_FAIL;
-	}
-
-	if (emulated == EMULATE_FAIL)
-		emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
-
-	return emulated;
-}
-
-int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
-{
-	int emulated = EMULATE_DONE;
-
-	switch (sprn) {
-	case SPRN_PID:
-		kvmppc_set_pid(vcpu, spr_val); break;
-	case SPRN_MMUCR:
-		vcpu->arch.mmucr = spr_val; break;
-	case SPRN_CCR0:
-		vcpu->arch.ccr0 = spr_val; break;
-	case SPRN_CCR1:
-		vcpu->arch.ccr1 = spr_val; break;
-	default:
-		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
-	}
-
-	return emulated;
-}
-
-int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
-{
-	int emulated = EMULATE_DONE;
-
-	switch (sprn) {
-	case SPRN_PID:
-		*spr_val = vcpu->arch.pid; break;
-	case SPRN_MMUCR:
-		*spr_val = vcpu->arch.mmucr; break;
-	case SPRN_CCR0:
-		*spr_val = vcpu->arch.ccr0; break;
-	case SPRN_CCR1:
-		*spr_val = vcpu->arch.ccr1; break;
-	default:
-		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
-	}
-
-	return emulated;
-}
-

+ 0 - 528
arch/powerpc/kvm/44x_tlb.c

@@ -1,528 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/highmem.h>
-
-#include <asm/tlbflush.h>
-#include <asm/mmu-44x.h>
-#include <asm/kvm_ppc.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "44x_tlb.h"
-#include "trace.h"
-
-#ifndef PPC44x_TLBE_SIZE
-#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
-#endif
-
-#define PAGE_SIZE_4K (1<<12)
-#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
-
-#define PPC44x_TLB_UATTR_MASK \
-	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
-#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
-#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
-
-#ifdef DEBUG
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		tlbe = &vcpu_44x->guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-#endif
-
-static inline void kvmppc_44x_tlbie(unsigned int index)
-{
-	/* 0 <= index < 64, so the V bit is clear and we can use the index as
-	 * word0. */
-	asm volatile(
-		"tlbwe %[index], %[index], 0\n"
-	:
-	: [index] "r"(index)
-	);
-}
-
-static inline void kvmppc_44x_tlbre(unsigned int index,
-                                    struct kvmppc_44x_tlbe *tlbe)
-{
-	asm volatile(
-		"tlbre %[word0], %[index], 0\n"
-		"mfspr %[tid], %[sprn_mmucr]\n"
-		"andi. %[tid], %[tid], 0xff\n"
-		"tlbre %[word1], %[index], 1\n"
-		"tlbre %[word2], %[index], 2\n"
-		: [word0] "=r"(tlbe->word0),
-		  [word1] "=r"(tlbe->word1),
-		  [word2] "=r"(tlbe->word2),
-		  [tid]   "=r"(tlbe->tid)
-		: [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-		: "cc"
-	);
-}
-
-static inline void kvmppc_44x_tlbwe(unsigned int index,
-                                    struct kvmppc_44x_tlbe *stlbe)
-{
-	unsigned long tmp;
-
-	asm volatile(
-		"mfspr %[tmp], %[sprn_mmucr]\n"
-		"rlwimi %[tmp], %[tid], 0, 0xff\n"
-		"mtspr %[sprn_mmucr], %[tmp]\n"
-		"tlbwe %[word0], %[index], 0\n"
-		"tlbwe %[word1], %[index], 1\n"
-		"tlbwe %[word2], %[index], 2\n"
-		: [tmp]   "=&r"(tmp)
-		: [word0] "r"(stlbe->word0),
-		  [word1] "r"(stlbe->word1),
-		  [word2] "r"(stlbe->word2),
-		  [tid]   "r"(stlbe->tid),
-		  [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-	);
-}
-
-static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
-{
-	/* We only care about the guest's permission and user bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
-
-	if (!usermode) {
-		/* Guest is in supervisor mode, so we need to translate guest
-		 * supervisor permissions into user permissions. */
-		attrib &= ~PPC44x_TLB_USER_PERM_MASK;
-		attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
-	}
-
-	/* Make sure host can always access this memory. */
-	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
-
-	/* WIMGE = 0b00100 */
-	attrib |= PPC44x_TLB_M;
-
-	return attrib;
-}
-
-/* Load shadow TLB back into hardware. */
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbwe(i, stlbe);
-	}
-}
-
-static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
-                                         unsigned int i)
-{
-	vcpu_44x->shadow_tlb_mod[i] = 1;
-}
-
-/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (vcpu_44x->shadow_tlb_mod[i])
-			kvmppc_44x_tlbre(i, stlbe);
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbie(i);
-	}
-}
-
-
-/* Search the guest TLB for a matching entry. */
-int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
-                         unsigned int as)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
-		unsigned int tid;
-
-		if (eaddr < get_tlb_eaddr(tlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(tlbe))
-			continue;
-
-		tid = get_tlb_tid(tlbe);
-		if (tid && (tid != pid))
-			continue;
-
-		if (!get_tlb_v(tlbe))
-			continue;
-
-		if (get_tlb_ts(tlbe) != as)
-			continue;
-
-		return i;
-	}
-
-	return -1;
-}
-
-gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
-                       gva_t eaddr)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-	unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
-
-	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
-}
-
-int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
-	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
-
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-}
-
-int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
-	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
-
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-}
-
-void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
-{
-}
-
-static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
-                                      unsigned int stlb_index)
-{
-	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-
-	if (!ref->page)
-		return;
-
-	/* Discard from the TLB. */
-	/* Note: we could actually invalidate a host mapping, if the host overwrote
-	 * this TLB entry since we inserted a guest mapping. */
-	kvmppc_44x_tlbie(stlb_index);
-
-	/* Now release the page. */
-	if (ref->writeable)
-		kvm_release_page_dirty(ref->page);
-	else
-		kvm_release_page_clean(ref->page);
-
-	ref->page = NULL;
-
-	/* XXX set tlb_44x_index to stlb_index? */
-
-	trace_kvm_stlb_inval(stlb_index);
-}
-
-void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu_44x, i);
-}
-
-/**
- * kvmppc_mmu_map -- create a host mapping for guest memory
- *
- * If the guest wanted a larger page than the host supports, only the first
- * host page is mapped here and the rest are demand faulted.
- *
- * If the guest wanted a smaller page than the host page size, we map only the
- * guest-size page (i.e. not a full host page mapping).
- *
- * Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB.
- */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                    unsigned int gtlb_index)
-{
-	struct kvmppc_44x_tlbe stlbe;
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-	struct kvmppc_44x_shadow_ref *ref;
-	struct page *new_page;
-	hpa_t hpaddr;
-	gfn_t gfn;
-	u32 asid = gtlbe->tid;
-	u32 flags = gtlbe->word2;
-	u32 max_bytes = get_tlb_bytes(gtlbe);
-	unsigned int victim;
-
-	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
-	 * miss handler by disabling interrupts. */
-	local_irq_disable();
-	victim = ++tlb_44x_index;
-	if (victim > tlb_44x_hwater)
-		victim = 0;
-	tlb_44x_index = victim;
-	local_irq_enable();
-
-	/* Get reference to new page. */
-	gfn = gpaddr >> PAGE_SHIFT;
-	new_page = gfn_to_page(vcpu->kvm, gfn);
-	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
-			(unsigned long long)gfn);
-		return;
-	}
-	hpaddr = page_to_phys(new_page);
-
-	/* Invalidate any previous shadow mappings. */
-	kvmppc_44x_shadow_release(vcpu_44x, victim);
-
-	/* XXX Make sure (va, size) doesn't overlap any other
-	 * entries. 440x6 user manual says the result would be
-	 * "undefined." */
-
-	/* XXX what about AS? */
-
-	/* Force TS=1 for all guest mappings. */
-	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
-
-	if (max_bytes >= PAGE_SIZE) {
-		/* Guest mapping is larger than or equal to host page size. We can use
-		 * a "native" host mapping. */
-		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
-	} else {
-		/* Guest mapping is smaller than host page size. We must restrict the
-		 * size of the mapping to be at most the smaller of the two, but for
-		 * simplicity we fall back to a 4K mapping (this is probably what the
-		 * guest is using anyways). */
-		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
-
-		/* 'hpaddr' is a host page, which is larger than the mapping we're
-		 * inserting here. To compensate, we must add the in-page offset to the
-		 * sub-page. */
-		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
-	}
-
-	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
-	                                            vcpu->arch.shared->msr & MSR_PR);
-	stlbe.tid = !(asid & 0xff);
-
-	/* Keep track of the reference so we can properly release it later. */
-	ref = &vcpu_44x->shadow_refs[victim];
-	ref->page = new_page;
-	ref->gtlb_index = gtlb_index;
-	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
-	ref->tid = stlbe.tid;
-
-	/* Insert shadow mapping into hardware TLB. */
-	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
-	kvmppc_44x_tlbwe(victim, &stlbe);
-	trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
-			     stlbe.word2);
-}
-
-/* For a particular guest TLB entry, invalidate the corresponding host TLB
- * mappings and release the host pages. */
-static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
-                                  unsigned int gtlb_index)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-		if (ref->gtlb_index == gtlb_index)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
-}
-
-void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
-{
-	int usermode = vcpu->arch.shared->msr & MSR_PR;
-
-	vcpu->arch.shadow_pid = !usermode;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	if (unlikely(vcpu->arch.pid == new_pid))
-		return;
-
-	vcpu->arch.pid = new_pid;
-
-	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
-	 * can't access guest kernel mappings (TID=1). When we switch to a new
-	 * guest PID, which will also use host PID=0, we must discard the old guest
-	 * userspace mappings. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-
-		if (ref->tid == 0)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct kvmppc_44x_tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe;
-	unsigned int gtlb_index;
-	int idx;
-
-	gtlb_index = kvmppc_get_gpr(vcpu, ra);
-	if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, gtlb_index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
-	if (tlbe->word0 & PPC44x_TLB_VALID)
-		kvmppc_44x_invalidate(vcpu, gtlb_index);
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = get_mmucr_stid(vcpu);
-		tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		gva_t eaddr;
-		gpa_t gpaddr;
-		u32 bytes;
-
-		eaddr = get_tlb_eaddr(tlbe);
-		gpaddr = get_tlb_raddr(tlbe);
-
-		/* Use the advertised page size to mask effective and real addrs. */
-		bytes = get_tlb_bytes(tlbe);
-		eaddr &= ~(bytes - 1);
-		gpaddr &= ~(bytes - 1);
-
-		kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
-	}
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
-	trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
-			     tlbe->word2);
-
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
-	return EMULATE_DONE;
-}
-
-int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
-{
-	u32 ea;
-	int gtlb_index;
-	unsigned int as = get_mmucr_sts(vcpu);
-	unsigned int pid = get_mmucr_stid(vcpu);
-
-	ea = kvmppc_get_gpr(vcpu, rb);
-	if (ra)
-		ea += kvmppc_get_gpr(vcpu, ra);
-
-	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-	if (rc) {
-		u32 cr = kvmppc_get_cr(vcpu);
-
-		if (gtlb_index < 0)
-			kvmppc_set_cr(vcpu, cr & ~0x20000000);
-		else
-			kvmppc_set_cr(vcpu, cr | 0x20000000);
-	}
-	kvmppc_set_gpr(vcpu, rt, gtlb_index);
-
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
-	return EMULATE_DONE;
-}

+ 0 - 86
arch/powerpc/kvm/44x_tlb.h

@@ -1,86 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __KVM_POWERPC_TLB_H__
-#define __KVM_POWERPC_TLB_H__
-
-#include <linux/kvm_host.h>
-#include <asm/mmu-44x.h>
-
-extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                unsigned int pid, unsigned int as);
-
-extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
-                                 u8 rc);
-extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
-
-/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 4) & 0xf;
-}
-
-static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return tlbe->word0 & 0xfffffc00;
-}
-
-static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
-{
-	unsigned int pgsize = get_tlb_size(tlbe);
-	return 1 << 10 << (pgsize << 1);
-}
-
-static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
-}
-
-static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
-{
-	u64 word1 = tlbe->word1;
-	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
-}
-
-static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return tlbe->tid & 0xff;
-}
-
-static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 8) & 0x1;
-}
-
-static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 9) & 0x1;
-}
-
-static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.mmucr & 0xff;
-}
-
-static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mmucr >> 16) & 0x1;
-}
-
-#endif /* __KVM_POWERPC_TLB_H__ */

+ 4 - 16
arch/powerpc/kvm/Kconfig

@@ -75,7 +75,6 @@ config KVM_BOOK3S_64
 config KVM_BOOK3S_64_HV
 	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
-	depends on !CPU_LITTLE_ENDIAN
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select CMA
@@ -113,23 +112,9 @@ config KVM_BOOK3S_64_PR
 config KVM_BOOKE_HV
 	bool
 
-config KVM_440
-	bool "KVM support for PowerPC 440 processors"
-	depends on 44x
-	select KVM
-	select KVM_MMIO
-	---help---
-	  Support running unmodified 440 guest kernels in virtual machines on
-	  440 host processors.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  If unsure, say N.
-
 config KVM_EXIT_TIMING
 	bool "Detailed exit timing"
-	depends on KVM_440 || KVM_E500V2 || KVM_E500MC
+	depends on KVM_E500V2 || KVM_E500MC
 	---help---
 	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
 	  report is available in debugfs kvm/vm#_vcpu#_timing.
@@ -173,6 +158,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
@@ -184,6 +170,8 @@ config KVM_MPIC
 config KVM_XICS
 	bool "KVM in-kernel XICS emulation"
 	depends on KVM_BOOK3S_64 && !KVM_MPIC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	---help---
 	  Include support for the XICS (eXternal Interrupt Controller
 	  Specification) interrupt controller architecture used on

+ 4 - 14
arch/powerpc/kvm/Makefile

@@ -10,27 +10,17 @@ KVM := ../../../virt/kvm
 common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 		$(KVM)/eventfd.o
 
-CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
 CFLAGS_e500_mmu_host.o := -I.
 CFLAGS_emulate.o  := -I.
+CFLAGS_emulate_loadstore.o  := -I.
 
-common-objs-y += powerpc.o emulate.o
+common-objs-y += powerpc.o emulate.o emulate_loadstore.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-440-objs := \
-	$(common-objs-y) \
-	booke.o \
-	booke_emulate.o \
-	booke_interrupts.o \
-	44x.o \
-	44x_tlb.o \
-	44x_emulate.o
-kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs)
-
 kvm-e500-objs := \
 	$(common-objs-y) \
 	booke.o \
@@ -58,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
 
 kvm-pr-y := \
 	fpu.o \
+	emulate.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
 	book3s_pr_papr.o \
@@ -100,7 +91,7 @@ kvm-book3s_64-module-objs += \
 	$(KVM)/kvm_main.o \
 	$(KVM)/eventfd.o \
 	powerpc.o \
-	emulate.o \
+	emulate_loadstore.o \
 	book3s.o \
 	book3s_64_vio.o \
 	book3s_rtas.o \
@@ -126,7 +117,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
-obj-$(CONFIG_KVM_440) += kvm.o
 obj-$(CONFIG_KVM_E500V2) += kvm.o
 obj-$(CONFIG_KVM_E500MC) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o

+ 75 - 81
arch/powerpc/kvm/book3s.c

@@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+		ulong pc = kvmppc_get_pc(vcpu);
+		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
+
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
 	if (!is_kvmppc_hv_enabled(vcpu->kvm))
@@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
+	kvmppc_unfixup_split_real(vcpu);
 	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
 	kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
@@ -218,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
+				    ulong flags)
+{
+	kvmppc_set_dar(vcpu, dar);
+	kvmppc_set_dsisr(vcpu, flags);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+}
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
+{
+	u64 msr = kvmppc_get_msr(vcpu);
+	msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
+	msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
+	kvmppc_set_msr_fast(vcpu, msr);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+}
+
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 {
 	int deliver = 1;
@@ -342,18 +371,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
 
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 			bool *writable)
 {
-	ulong mp_pa = vcpu->arch.magic_page_pa;
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
 
 	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 
 	/* Magic page override */
-	if (unlikely(mp_pa) &&
-	    unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
-		     ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) {
 		ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
 		pfn_t pfn;
 
@@ -366,11 +395,13 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 
 	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
 }
-EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
 
-static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
-			bool iswrite, struct kvmppc_pte *pte)
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
 {
+	bool data = (xlid == XLATE_DATA);
+	bool iswrite = (xlrw == XLATE_WRITE);
 	int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
 	int r;
 
@@ -384,88 +415,34 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 		pte->may_write = true;
 		pte->may_execute = true;
 		r = 0;
+
+		if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR &&
+		    !data) {
+			if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+			    ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte->raddr &= ~SPLIT_HACK_MASK;
+		}
 	}
 
 	return r;
 }
 
-static hva_t kvmppc_bad_hva(void)
-{
-	return PAGE_OFFSET;
-}
-
-static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
-			       bool read)
-{
-	hva_t hpage;
-
-	if (read && !pte->may_read)
-		goto err;
-
-	if (!read && !pte->may_write)
-		goto err;
-
-	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (kvm_is_error_hva(hpage))
-		goto err;
-
-	return hpage | (pte->raddr & ~PAGE_MASK);
-err:
-	return kvmppc_bad_hva();
-}
-
-int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
-	      bool data)
-{
-	struct kvmppc_pte pte;
-
-	vcpu->stat.st++;
-
-	if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
-		return -ENOENT;
-
-	*eaddr = pte.raddr;
-
-	if (!pte.may_write)
-		return -EPERM;
-
-	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
-		return EMULATE_DO_MMIO;
-
-	return EMULATE_DONE;
-}
-EXPORT_SYMBOL_GPL(kvmppc_st);
-
-int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
-		      bool data)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+					 u32 *inst)
 {
-	struct kvmppc_pte pte;
-	hva_t hva = *eaddr;
-
-	vcpu->stat.ld++;
-
-	if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
-		goto nopte;
-
-	*eaddr = pte.raddr;
-
-	hva = kvmppc_pte_to_hva(vcpu, &pte, true);
-	if (kvm_is_error_hva(hva))
-		goto mmio;
-
-	if (copy_from_user(ptr, (void __user *)hva, size)) {
-		printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
-		goto mmio;
-	}
+	ulong pc = kvmppc_get_pc(vcpu);
+	int r;
 
-	return EMULATE_DONE;
+	if (type == INST_SC)
+		pc -= 4;
 
-nopte:
-	return -ENOENT;
-mmio:
-	return EMULATE_DO_MMIO;
+	r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false);
+	if (r == EMULATE_DONE)
+		return r;
+	else
+		return EMULATE_AGAIN;
 }
-EXPORT_SYMBOL_GPL(kvmppc_ld);
+EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
@@ -646,6 +623,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_BESCR:
 			val = get_reg_val(reg->id, vcpu->arch.bescr);
 			break;
+		case KVM_REG_PPC_VTB:
+			val = get_reg_val(reg->id, vcpu->arch.vtb);
+			break;
+		case KVM_REG_PPC_IC:
+			val = get_reg_val(reg->id, vcpu->arch.ic);
+			break;
 		default:
 			r = -EINVAL;
 			break;
@@ -750,6 +733,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_BESCR:
 			vcpu->arch.bescr = set_reg_val(reg->id, val);
 			break;
+		case KVM_REG_PPC_VTB:
+			vcpu->arch.vtb = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_IC:
+			vcpu->arch.ic = set_reg_val(reg->id, val);
+			break;
 		default:
 			r = -EINVAL;
 			break;
@@ -913,6 +902,11 @@ int kvmppc_core_check_processor_compat(void)
 	return 0;
 }
 
+int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
+{
+	return kvm->arch.kvm_ops->hcall_implemented(hcall);
+}
+
 static int kvmppc_book3s_init(void)
 {
 	int r;

+ 1 - 1
arch/powerpc/kvm/book3s_32_mmu.c

@@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	if (r < 0)
 		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
 						   data, iswrite, true);
-	if (r < 0)
+	if (r == -ENOENT)
 		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
 						   data, iswrite, false);
 

+ 3 - 4
arch/powerpc/kvm/book3s_32_mmu_host.c

@@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	bool writable;
 
 	/* Get host physical address for gpa */
-	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
-				   iswrite, &writable);
+	hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
 	if (is_error_noslot_pfn(hpaddr)) {
-		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
-				 orig_pte->eaddr);
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+				 orig_pte->raddr);
 		r = -EINVAL;
 		goto out;
 	}

+ 3 - 2
arch/powerpc/kvm/book3s_64_mmu_host.c

@@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	smp_rmb();
 
 	/* Get host physical address for gpa */
-	pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
+	pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
 	if (is_error_noslot_pfn(pfn)) {
-		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+		       orig_pte->raddr);
 		r = -EINVAL;
 		goto out;
 	}

+ 72 - 73
arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -448,7 +448,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	unsigned long slb_v;
 	unsigned long pp, key;
 	unsigned long v, gr;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int index;
 	int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
 
@@ -471,13 +471,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		preempt_enable();
 		return -ENOENT;
 	}
-	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
-	v = hptep[0] & ~HPTE_V_HVLOCK;
+	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+	v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
 	gr = kvm->arch.revmap[index].guest_rpte;
 
 	/* Unlock the HPTE */
 	asm volatile("lwsync" : : : "memory");
-	hptep[0] = v;
+	hptep[0] = cpu_to_be64(v);
 	preempt_enable();
 
 	gpte->eaddr = eaddr;
@@ -528,21 +528,14 @@ static int instruction_is_store(unsigned int instr)
 static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				  unsigned long gpa, gva_t ea, int is_store)
 {
-	int ret;
 	u32 last_inst;
-	unsigned long srr0 = kvmppc_get_pc(vcpu);
 
-	/* We try to load the last instruction.  We don't let
-	 * emulate_instruction do it as it doesn't check what
-	 * kvmppc_ld returns.
+	/*
 	 * If we fail, we just return to the guest and try executing it again.
 	 */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
-		ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
-		if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
-			return RESUME_GUEST;
-		vcpu->arch.last_inst = last_inst;
-	}
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+		EMULATE_DONE)
+		return RESUME_GUEST;
 
 	/*
 	 * WARNING: We do not know for sure whether the instruction we just
@@ -556,7 +549,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * we just return and retry the instruction.
 	 */
 
-	if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)
+	if (instruction_is_store(last_inst) != !!is_store)
 		return RESUME_GUEST;
 
 	/*
@@ -581,7 +574,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned long ea, unsigned long dsisr)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hptep, hpte[3], r;
+	unsigned long hpte[3], r;
+	__be64 *hptep;
 	unsigned long mmu_seq, psize, pte_size;
 	unsigned long gpa_base, gfn_base;
 	unsigned long gpa, gfn, hva, pfn;
@@ -604,16 +598,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (ea != vcpu->arch.pgfault_addr)
 		return RESUME_GUEST;
 	index = vcpu->arch.pgfault_index;
-	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
 	rev = &kvm->arch.revmap[index];
 	preempt_disable();
 	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
 		cpu_relax();
-	hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
-	hpte[1] = hptep[1];
+	hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+	hpte[1] = be64_to_cpu(hptep[1]);
 	hpte[2] = r = rev->guest_rpte;
 	asm volatile("lwsync" : : : "memory");
-	hptep[0] = hpte[0];
+	hptep[0] = cpu_to_be64(hpte[0]);
 	preempt_enable();
 
 	if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@ -729,8 +723,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	preempt_disable();
 	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
-	    rev->guest_rpte != hpte[2])
+	if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
+		be64_to_cpu(hptep[1]) != hpte[1] ||
+		rev->guest_rpte != hpte[2])
 		/* HPTE has been changed under us; let the guest retry */
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -750,20 +745,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
 	r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
 
-	if (hptep[0] & HPTE_V_VALID) {
+	if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
 		/* HPTE was previously valid, so we need to invalidate it */
 		unlock_rmap(rmap);
-		hptep[0] |= HPTE_V_ABSENT;
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 		kvmppc_invalidate_hpte(kvm, hptep, index);
 		/* don't lose previous R and C bits */
-		r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
+		r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
 	} else {
 		kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
 	}
 
-	hptep[1] = r;
+	hptep[1] = cpu_to_be64(r);
 	eieio();
-	hptep[0] = hpte[0];
+	hptep[0] = cpu_to_be64(hpte[0]);
 	asm volatile("ptesync" : : : "memory");
 	preempt_enable();
 	if (page && hpte_is_writable(r))
@@ -782,7 +777,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return ret;
 
  out_unlock:
-	hptep[0] &= ~HPTE_V_HVLOCK;
+	hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	preempt_enable();
 	goto out_put;
 }
@@ -858,7 +853,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 {
 	struct revmap_entry *rev = kvm->arch.revmap;
 	unsigned long h, i, j;
-	unsigned long *hptep;
+	__be64 *hptep;
 	unsigned long ptel, psize, rcbits;
 
 	for (;;) {
@@ -874,11 +869,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		 * rmap chain lock.
 		 */
 		i = *rmapp & KVMPPC_RMAP_INDEX;
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
 				cpu_relax();
 			continue;
 		}
@@ -897,14 +892,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 		/* Now check and modify the HPTE */
 		ptel = rev[i].guest_rpte;
-		psize = hpte_page_size(hptep[0], ptel);
-		if ((hptep[0] & HPTE_V_VALID) &&
+		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
 			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= HPTE_V_ABSENT;
+				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
-			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
+			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
 			if (rcbits & ~rev[i].guest_rpte) {
 				rev[i].guest_rpte = ptel | rcbits;
@@ -912,7 +907,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 		}
 		unlock_rmap(rmapp);
-		hptep[0] &= ~HPTE_V_HVLOCK;
+		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	}
 	return 0;
 }
@@ -959,7 +954,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 {
 	struct revmap_entry *rev = kvm->arch.revmap;
 	unsigned long head, i, j;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int ret = 0;
 
  retry:
@@ -975,23 +970,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
 	do {
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		j = rev[i].forw;
 
 		/* If this HPTE isn't referenced, ignore it */
-		if (!(hptep[1] & HPTE_R_R))
+		if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
 			continue;
 
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
 				cpu_relax();
 			goto retry;
 		}
 
 		/* Now check and modify the HPTE */
-		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
+		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+		    (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
 			kvmppc_clear_ref_hpte(kvm, hptep, i);
 			if (!(rev[i].guest_rpte & HPTE_R_R)) {
 				rev[i].guest_rpte |= HPTE_R_R;
@@ -999,7 +995,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 			ret = 1;
 		}
-		hptep[0] &= ~HPTE_V_HVLOCK;
+		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -1033,7 +1029,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		do {
 			hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
 			j = rev[i].forw;
-			if (hp[1] & HPTE_R_R)
+			if (be64_to_cpu(hp[1]) & HPTE_R_R)
 				goto out;
 		} while ((i = j) != head);
 	}
@@ -1073,7 +1069,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 	unsigned long head, i, j;
 	unsigned long n;
 	unsigned long v, r;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int npages_dirty = 0;
 
  retry:
@@ -1089,7 +1085,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
 	do {
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		unsigned long hptep1;
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		j = rev[i].forw;
 
 		/*
@@ -1106,29 +1103,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		 * Otherwise we need to do the tlbie even if C==0 in
 		 * order to pick up any delayed writeback of C.
 		 */
-		if (!(hptep[1] & HPTE_R_C) &&
-		    (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
+		hptep1 = be64_to_cpu(hptep[1]);
+		if (!(hptep1 & HPTE_R_C) &&
+		    (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
 			continue;
 
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
 				cpu_relax();
 			goto retry;
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & HPTE_V_VALID))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
 			continue;
 
 		/* need to make it temporarily absent so C is stable */
-		hptep[0] |= HPTE_V_ABSENT;
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 		kvmppc_invalidate_hpte(kvm, hptep, i);
-		v = hptep[0];
-		r = hptep[1];
+		v = be64_to_cpu(hptep[0]);
+		r = be64_to_cpu(hptep[1]);
 		if (r & HPTE_R_C) {
-			hptep[1] = r & ~HPTE_R_C;
+			hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
 			if (!(rev[i].guest_rpte & HPTE_R_C)) {
 				rev[i].guest_rpte |= HPTE_R_C;
 				note_hpte_modification(kvm, &rev[i]);
@@ -1141,7 +1139,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		}
 		v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
 		v |= HPTE_V_VALID;
-		hptep[0] = v;
+		hptep[0] = cpu_to_be64(v);
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -1305,7 +1303,7 @@ struct kvm_htab_ctx {
  * Returns 1 if this HPT entry has been modified or has pending
  * R/C bit changes.
  */
-static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
+static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
 {
 	unsigned long rcbits_unset;
 
@@ -1314,13 +1312,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
 
 	/* Also need to consider changes in reference and changed bits */
 	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
-	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
+	if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
+	    (be64_to_cpu(hptp[1]) & rcbits_unset))
 		return 1;
 
 	return 0;
 }
 
-static long record_hpte(unsigned long flags, unsigned long *hptp,
+static long record_hpte(unsigned long flags, __be64 *hptp,
 			unsigned long *hpte, struct revmap_entry *revp,
 			int want_valid, int first_pass)
 {
@@ -1335,10 +1334,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 		return 0;
 
 	valid = 0;
-	if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+	if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 		valid = 1;
 		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
-		    !(hptp[0] & HPTE_V_BOLTED))
+		    !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
 			valid = 0;
 	}
 	if (valid != want_valid)
@@ -1350,7 +1349,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 		preempt_disable();
 		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
 			cpu_relax();
-		v = hptp[0];
+		v = be64_to_cpu(hptp[0]);
 
 		/* re-evaluate valid and dirty from synchronized HPTE value */
 		valid = !!(v & HPTE_V_VALID);
@@ -1358,9 +1357,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 
 		/* Harvest R and C into guest view if necessary */
 		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
-		if (valid && (rcbits_unset & hptp[1])) {
-			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
-				HPTE_GR_MODIFIED;
+		if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
+			revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
+				(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
 			dirty = 1;
 		}
 
@@ -1379,13 +1378,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 			revp->guest_rpte = r;
 		}
 		asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-		hptp[0] &= ~HPTE_V_HVLOCK;
+		hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		preempt_enable();
 		if (!(valid == want_valid && (first_pass || dirty)))
 			ok = 0;
 	}
-	hpte[0] = v;
-	hpte[1] = r;
+	hpte[0] = cpu_to_be64(v);
+	hpte[1] = cpu_to_be64(r);
 	return ok;
 }
 
@@ -1395,7 +1394,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 	struct kvm_htab_ctx *ctx = file->private_data;
 	struct kvm *kvm = ctx->kvm;
 	struct kvm_get_htab_header hdr;
-	unsigned long *hptp;
+	__be64 *hptp;
 	struct revmap_entry *revp;
 	unsigned long i, nb, nw;
 	unsigned long __user *lbuf;
@@ -1411,7 +1410,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 	flags = ctx->flags;
 
 	i = ctx->index;
-	hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+	hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 	revp = kvm->arch.revmap + i;
 	lbuf = (unsigned long __user *)buf;
 
@@ -1495,7 +1494,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 	unsigned long i, j;
 	unsigned long v, r;
 	unsigned long __user *lbuf;
-	unsigned long *hptp;
+	__be64 *hptp;
 	unsigned long tmp[2];
 	ssize_t nb;
 	long int err, ret;
@@ -1537,7 +1536,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		    i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
 			break;
 
-		hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
 			err = -EFAULT;
@@ -1549,7 +1548,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 			lbuf += 2;
 			nb += HPTE_SIZE;
 
-			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
 				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
 			err = -EIO;
 			ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
@@ -1575,7 +1574,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		}
 
 		for (j = 0; j < hdr.n_invalid; ++j) {
-			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
 				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
 			++i;
 			hptp += 2;

+ 17 - 11
arch/powerpc/kvm/book3s_emulate.c

@@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		    (mfmsr() & MSR_HV))
 			vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 		break;
-	case SPRN_PURR:
-		to_book3s(vcpu)->purr_offset = spr_val - get_tb();
-		break;
-	case SPRN_SPURR:
-		to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
-		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
@@ -455,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_GQR7:
 		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_FSCR:
-		vcpu->arch.fscr = spr_val;
+		kvmppc_set_fscr(vcpu, spr_val);
 		break;
-#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_BESCR:
 		vcpu->arch.bescr = spr_val;
 		break;
@@ -572,10 +566,22 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		*spr_val = 0;
 		break;
 	case SPRN_PURR:
-		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		/*
+		 * On exit we would have updated purr
+		 */
+		*spr_val = vcpu->arch.purr;
 		break;
 	case SPRN_SPURR:
-		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		/*
+		 * On exit we would have updated spurr
+		 */
+		*spr_val = vcpu->arch.spurr;
+		break;
+	case SPRN_VTB:
+		*spr_val = vcpu->arch.vtb;
+		break;
+	case SPRN_IC:
+		*spr_val = vcpu->arch.ic;
 		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
@@ -587,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 	case SPRN_GQR7:
 		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_FSCR:
 		*spr_val = vcpu->arch.fscr;
 		break;
-#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_BESCR:
 		*spr_val = vcpu->arch.bescr;
 		break;

+ 228 - 43
arch/powerpc/kvm/book3s_hv.c

@@ -35,6 +35,7 @@
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
+#include <asm/cache.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -67,6 +68,15 @@
 /* Used as a "null" value for timebase values */
 #define TB_NIL	(~(u64)0)
 
+static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+
+#if defined(CONFIG_PPC_64K_PAGES)
+#define MPP_BUFFER_ORDER	0
+#elif defined(CONFIG_PPC_4K_PAGES)
+#define MPP_BUFFER_ORDER	3
+#endif
+
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -270,7 +280,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
 {
 	vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
-	vpa->yield_count = 1;
+	vpa->yield_count = cpu_to_be32(1);
 }
 
 static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
@@ -293,8 +303,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
 struct reg_vpa {
 	u32 dummy;
 	union {
-		u16 hword;
-		u32 word;
+		__be16 hword;
+		__be32 word;
 	} length;
 };
 
@@ -333,9 +343,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 		if (va == NULL)
 			return H_PARAMETER;
 		if (subfunc == H_VPA_REG_VPA)
-			len = ((struct reg_vpa *)va)->length.hword;
+			len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
 		else
-			len = ((struct reg_vpa *)va)->length.word;
+			len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
 		kvmppc_unpin_guest_page(kvm, va, vpa, false);
 
 		/* Check length */
@@ -540,21 +550,63 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
 	dt->dispatch_reason = 7;
-	dt->processor_id = vc->pcpu + vcpu->arch.ptid;
-	dt->timebase = now + vc->tb_offset;
-	dt->enqueue_to_dispatch_time = stolen;
-	dt->srr0 = kvmppc_get_pc(vcpu);
-	dt->srr1 = vcpu->arch.shregs.msr;
+	dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
+	dt->timebase = cpu_to_be64(now + vc->tb_offset);
+	dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+	dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
+	dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
 	++dt;
 	if (dt == vcpu->arch.dtl.pinned_end)
 		dt = vcpu->arch.dtl.pinned_addr;
 	vcpu->arch.dtl_ptr = dt;
 	/* order writing *dt vs. writing vpa->dtl_idx */
 	smp_wmb();
-	vpa->dtl_idx = ++vcpu->arch.dtl_index;
+	vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
 	vcpu->arch.dtl.dirty = true;
 }
 
+static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
+		return true;
+	if ((!vcpu->arch.vcore->arch_compat) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		return true;
+	return false;
+}
+
+static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
+			     unsigned long resource, unsigned long value1,
+			     unsigned long value2)
+{
+	switch (resource) {
+	case H_SET_MODE_RESOURCE_SET_CIABR:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (value2)
+			return H_P4;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		/* Guests can't breakpoint the hypervisor */
+		if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
+			return H_P3;
+		vcpu->arch.ciabr  = value1;
+		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_SET_DAWR:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		if (value2 & DABRX_HYP)
+			return H_P4;
+		vcpu->arch.dawr  = value1;
+		vcpu->arch.dawrx = value2;
+		return H_SUCCESS;
+	default:
+		return H_TOO_HARD;
+	}
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -562,6 +614,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
+	if (req <= MAX_HCALL_OPCODE &&
+	    !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
+		return RESUME_HOST;
+
 	switch (req) {
 	case H_ENTER:
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -620,7 +676,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
-
+	case H_SET_MODE:
+		ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
 	case H_XIRR:
 	case H_CPPR:
 	case H_EOI:
@@ -639,6 +702,29 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+static int kvmppc_hcall_impl_hv(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_CEDE:
+	case H_PROD:
+	case H_CONFER:
+	case H_REGISTER_VPA:
+	case H_SET_MODE:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+		return 1;
+	}
+
+	/* See if it's in the real-mode table */
+	return kvmppc_hcall_impl_hv_realmode(cmd);
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -785,7 +871,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
+		bool preserve_top32)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 mask;
@@ -820,6 +907,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
 	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		mask |= LPCR_AIL;
+
+	/* Broken 32-bit version of LPCR must not clear top bits */
+	if (preserve_top32)
+		mask &= 0xFFFFFFFF;
 	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	spin_unlock(&vc->lock);
 }
@@ -894,12 +985,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_CIABR:
 		*val = get_reg_val(id, vcpu->arch.ciabr);
 		break;
-	case KVM_REG_PPC_IC:
-		*val = get_reg_val(id, vcpu->arch.ic);
-		break;
-	case KVM_REG_PPC_VTB:
-		*val = get_reg_val(id, vcpu->arch.vtb);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		*val = get_reg_val(id, vcpu->arch.csigr);
 		break;
@@ -939,6 +1024,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		*val = get_reg_val(id, vcpu->arch.vcore->lpcr);
 		break;
 	case KVM_REG_PPC_PPR:
@@ -1094,12 +1180,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
 			vcpu->arch.ciabr &= ~CIABR_PRIV;	/* disable */
 		break;
-	case KVM_REG_PPC_IC:
-		vcpu->arch.ic = set_reg_val(id, *val);
-		break;
-	case KVM_REG_PPC_VTB:
-		vcpu->arch.vtb = set_reg_val(id, *val);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		vcpu->arch.csigr = set_reg_val(id, *val);
 		break;
@@ -1150,7 +1230,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
 		break;
 	case KVM_REG_PPC_LPCR:
-		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
+		break;
+	case KVM_REG_PPC_LPCR_64:
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
 		break;
 	case KVM_REG_PPC_PPR:
 		vcpu->arch.ppr = set_reg_val(id, *val);
@@ -1228,6 +1311,33 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	return r;
 }
 
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+{
+	struct kvmppc_vcore *vcore;
+
+	vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+
+	if (vcore == NULL)
+		return NULL;
+
+	INIT_LIST_HEAD(&vcore->runnable_threads);
+	spin_lock_init(&vcore->lock);
+	init_waitqueue_head(&vcore->wq);
+	vcore->preempt_tb = TB_NIL;
+	vcore->lpcr = kvm->arch.lpcr;
+	vcore->first_vcpuid = core * threads_per_subcore;
+	vcore->kvm = kvm;
+
+	vcore->mpp_buffer_is_valid = false;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcore->mpp_buffer = (void *)__get_free_pages(
+			GFP_KERNEL|__GFP_ZERO,
+			MPP_BUFFER_ORDER);
+
+	return vcore;
+}
+
 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 						   unsigned int id)
 {
@@ -1279,16 +1389,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	mutex_lock(&kvm->lock);
 	vcore = kvm->arch.vcores[core];
 	if (!vcore) {
-		vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
-		if (vcore) {
-			INIT_LIST_HEAD(&vcore->runnable_threads);
-			spin_lock_init(&vcore->lock);
-			init_waitqueue_head(&vcore->wq);
-			vcore->preempt_tb = TB_NIL;
-			vcore->lpcr = kvm->arch.lpcr;
-			vcore->first_vcpuid = core * threads_per_subcore;
-			vcore->kvm = kvm;
-		}
+		vcore = kvmppc_vcore_create(kvm, core);
 		kvm->arch.vcores[core] = vcore;
 		kvm->arch.online_vcores++;
 	}
@@ -1500,6 +1601,33 @@ static int on_primary_thread(void)
 	return 1;
 }
 
+static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc)
+{
+	phys_addr_t phy_addr, mpp_addr;
+
+	phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer);
+	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+
+	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT);
+	logmpp(mpp_addr | PPC_LOGMPP_LOG_L2);
+
+	vc->mpp_buffer_is_valid = true;
+}
+
+static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
+{
+	phys_addr_t phy_addr, mpp_addr;
+
+	phy_addr = virt_to_phys(vc->mpp_buffer);
+	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+
+	/* We must abort any in-progress save operations to ensure
+	 * the table is valid so that prefetch engine knows when to
+	 * stop prefetching. */
+	logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT);
+	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -1577,9 +1705,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
+	if (vc->mpp_buffer_is_valid)
+		kvmppc_start_restoring_l2_cache(vc);
+
 	__kvmppc_vcore_entry();
 
 	spin_lock(&vc->lock);
+
+	if (vc->mpp_buffer)
+		kvmppc_start_saving_l2_cache(vc);
+
 	/* disable sending of IPIs on virtual external irqs */
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		vcpu->cpu = -1;
@@ -1929,12 +2064,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 	(*sps)->page_shift = def->shift;
 	(*sps)->slb_enc = def->sllp;
 	(*sps)->enc[0].page_shift = def->shift;
-	/*
-	 * Only return base page encoding. We don't want to return
-	 * all the supporting pte_enc, because our H_ENTER doesn't
-	 * support MPSS yet. Once they do, we can start passing all
-	 * support pte_enc here
-	 */
 	(*sps)->enc[0].pte_enc = def->penc[linux_psize];
 	/*
 	 * Add 16MB MPSS support if host supports it
@@ -2281,6 +2410,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
+	/* Start out with the default set of hcalls enabled */
+	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
+	       sizeof(kvm->arch.enabled_hcalls));
+
 	kvm->arch.rma = NULL;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -2323,8 +2456,14 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 {
 	long int i;
 
-	for (i = 0; i < KVM_MAX_VCORES; ++i)
+	for (i = 0; i < KVM_MAX_VCORES; ++i) {
+		if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) {
+			struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+			free_pages((unsigned long)vc->mpp_buffer,
+				   MPP_BUFFER_ORDER);
+		}
 		kfree(kvm->arch.vcores[i]);
+	}
 	kvm->arch.online_vcores = 0;
 }
 
@@ -2419,6 +2558,49 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_REMOVE,
+	H_ENTER,
+	H_READ,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_GET_TCE,
+	H_PUT_TCE,
+	H_SET_DABR,
+	H_SET_XDABR,
+	H_CEDE,
+	H_PROD,
+	H_CONFER,
+	H_REGISTER_VPA,
+#ifdef CONFIG_KVM_XICS
+	H_EOI,
+	H_CPPR,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR,
+	H_XIRR_X,
+#endif
+	0
+};
+
+static void init_default_hcalls(void)
+{
+	int i;
+	unsigned int hcall;
+
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_hv(hcall));
+		__set_bit(hcall / 4, default_enabled_hcalls);
+	}
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2451,6 +2633,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
 	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
+	.hcall_implemented = kvmppc_hcall_impl_hv,
 };
 
 static int kvmppc_book3s_init_hv(void)
@@ -2466,6 +2649,8 @@ static int kvmppc_book3s_init_hv(void)
 	kvm_ops_hv.owner = THIS_MODULE;
 	kvmppc_hv_ops = &kvm_ops_hv;
 
+	init_default_hcalls();
+
 	r = kvmppc_mmu_hv_init();
 	return r;
 }

+ 13 - 0
arch/powerpc/kvm/book3s_hv_builtin.c

@@ -219,3 +219,16 @@ bool kvm_hv_mode_active(void)
 {
 	return atomic_read(&hv_vm_count) != 0;
 }
+
+extern int hcall_real_table[], hcall_real_table_end[];
+
+int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
+{
+	cmd /= 4;
+	if (cmd < hcall_real_table_end - hcall_real_table &&
+	    hcall_real_table[cmd])
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);

+ 3 - 3
arch/powerpc/kvm/book3s_hv_ras.c

@@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu)
 		return;
 
 	/* Sanity check */
-	n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
 	if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
 		return;
 
 	/* Load up the SLB from that */
 	for (i = 0; i < n; ++i) {
-		unsigned long rb = slb->save_area[i].esid;
-		unsigned long rs = slb->save_area[i].vsid;
+		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
 
 		rb = (rb & ~0xFFFul) | i;	/* insert entry number */
 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));

+ 83 - 63
arch/powerpc/kvm/book3s_hv_rm_mmu.c

@@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
 	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
 }
 
-static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
 {
 	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-	hpte[0] = hpte_v;
+	hpte[0] = cpu_to_be64(hpte_v);
 }
 
 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
@@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 {
 	unsigned long i, pa, gpa, gfn, psize;
 	unsigned long slot_fn, hva;
-	unsigned long *hpte;
+	__be64 *hpte;
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
@@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		return H_PARAMETER;
 	if (likely((flags & H_EXACT) == 0)) {
 		pte_index &= ~7UL;
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 		for (i = 0; i < 8; ++i) {
-			if ((*hpte & HPTE_V_VALID) == 0 &&
+			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
 			    try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 					  HPTE_V_ABSENT))
 				break;
@@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			 */
 			hpte -= 16;
 			for (i = 0; i < 8; ++i) {
+				u64 pte;
 				while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 					cpu_relax();
-				if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+				pte = be64_to_cpu(*hpte);
+				if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 					break;
-				*hpte &= ~HPTE_V_HVLOCK;
+				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				hpte += 2;
 			}
 			if (i == 8)
@@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte_index += i;
 	} else {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 		if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 				   HPTE_V_ABSENT)) {
 			/* Lock the slot and check again */
+			u64 pte;
+
 			while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 				cpu_relax();
-			if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-				*hpte &= ~HPTE_V_HVLOCK;
+			pte = be64_to_cpu(*hpte);
+			if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				return H_PTEG_FULL;
 			}
 		}
@@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 	}
 
-	hpte[1] = ptel;
+	hpte[1] = cpu_to_be64(ptel);
 
 	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
 	eieio();
-	hpte[0] = pteh;
+	hpte[0] = cpu_to_be64(pteh);
 	asm volatile("ptesync" : : : "memory");
 
 	*pte_idx_ret = pte_index;
@@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 			unsigned long pte_index, unsigned long avpn,
 			unsigned long *hpret)
 {
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long v, r, rb;
 	struct revmap_entry *rev;
+	u64 pte;
 
 	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
-	    ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
-	    ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
-		hpte[0] &= ~HPTE_V_HVLOCK;
+	pte = be64_to_cpu(hpte[0]);
+	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
+	    ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
+		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		return H_NOT_FOUND;
 	}
 
 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
-	v = hpte[0] & ~HPTE_V_HVLOCK;
+	v = pte & ~HPTE_V_HVLOCK;
 	if (v & HPTE_V_VALID) {
-		hpte[0] &= ~HPTE_V_VALID;
-		rb = compute_tlbie_rb(v, hpte[1], pte_index);
+		u64 pte1;
+
+		pte1 = be64_to_cpu(hpte[1]);
+		hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
+		rb = compute_tlbie_rb(v, pte1, pte_index);
 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/* Read PTE low word after tlbie to get final R/C values */
-		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
+		remove_revmap_chain(kvm, pte_index, rev, v, pte1);
 	}
 	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 	note_hpte_modification(kvm, rev);
@@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *args = &vcpu->arch.gpr[4];
-	unsigned long *hp, *hptes[4], tlbrb[4];
+	__be64 *hp, *hptes[4];
+	unsigned long tlbrb[4];
 	long int i, j, k, n, found, indexes[4];
 	unsigned long flags, req, pte_index, rcbits;
 	int global;
 	long int ret = H_SUCCESS;
 	struct revmap_entry *rev, *revs[4];
+	u64 hp0;
 
 	global = global_invalidates(kvm, 0);
 	for (i = 0; i < 4 && ret == H_SUCCESS; ) {
@@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				ret = H_PARAMETER;
 				break;
 			}
-			hp = (unsigned long *)
-				(kvm->arch.hpt_virt + (pte_index << 4));
+			hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
 			/* to avoid deadlock, don't spin except for first */
 			if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
 				if (n)
@@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 					cpu_relax();
 			}
 			found = 0;
-			if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+			hp0 = be64_to_cpu(hp[0]);
+			if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
 				switch (flags & 3) {
 				case 0:		/* absolute */
 					found = 1;
 					break;
 				case 1:		/* andcond */
-					if (!(hp[0] & args[j + 1]))
+					if (!(hp0 & args[j + 1]))
 						found = 1;
 					break;
 				case 2:		/* AVPN */
-					if ((hp[0] & ~0x7fUL) == args[j + 1])
+					if ((hp0 & ~0x7fUL) == args[j + 1])
 						found = 1;
 					break;
 				}
 			}
 			if (!found) {
-				hp[0] &= ~HPTE_V_HVLOCK;
+				hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				args[j] = ((0x90 | flags) << 56) + pte_index;
 				continue;
 			}
@@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 			note_hpte_modification(kvm, rev);
 
-			if (!(hp[0] & HPTE_V_VALID)) {
+			if (!(hp0 & HPTE_V_VALID)) {
 				/* insert R and C bits from PTE */
 				rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 				args[j] |= rcbits << (56 - 5);
@@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				continue;
 			}
 
-			hp[0] &= ~HPTE_V_VALID;		/* leave it locked */
-			tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+			/* leave it locked */
+			hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
+			tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
+				be64_to_cpu(hp[1]), pte_index);
 			indexes[n] = j;
 			hptes[n] = hp;
 			revs[n] = rev;
@@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			pte_index = args[j] & ((1ul << 56) - 1);
 			hp = hptes[k];
 			rev = revs[k];
-			remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+			remove_revmap_chain(kvm, pte_index, rev,
+				be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
 			rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 			args[j] |= rcbits << (56 - 5);
 			hp[0] = 0;
@@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		      unsigned long va)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hpte;
+	__be64 *hpte;
 	struct revmap_entry *rev;
 	unsigned long v, r, rb, mask, bits;
+	u64 pte;
 
 	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
-	    ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
-		hpte[0] &= ~HPTE_V_HVLOCK;
+	pte = be64_to_cpu(hpte[0]);
+	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
+		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		return H_NOT_FOUND;
 	}
 
-	v = hpte[0];
+	v = pte;
 	bits = (flags << 55) & HPTE_R_PP0;
 	bits |= (flags << 48) & HPTE_R_KEY_HI;
 	bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (hpte[1] & ~mask) | bits;
+	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
 		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = v & ~HPTE_V_VALID;
+		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
 		 * If the host has this page as readonly but the guest
@@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			}
 		}
 	}
-	hpte[1] = r;
+	hpte[1] = cpu_to_be64(r);
 	eieio();
-	hpte[0] = v & ~HPTE_V_HVLOCK;
+	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }
@@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 		   unsigned long pte_index)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hpte, v, r;
+	__be64 *hpte;
+	unsigned long v, r;
 	int i, n = 1;
 	struct revmap_entry *rev = NULL;
 
@@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	}
 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 	for (i = 0; i < n; ++i, ++pte_index) {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
-		v = hpte[0] & ~HPTE_V_HVLOCK;
-		r = hpte[1];
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+		v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+		r = be64_to_cpu(hpte[1]);
 		if (v & HPTE_V_ABSENT) {
 			v &= ~HPTE_V_ABSENT;
 			v |= HPTE_V_VALID;
@@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	return H_SUCCESS;
 }
 
-void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index)
 {
 	unsigned long rb;
 
-	hptep[0] &= ~HPTE_V_VALID;
-	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+	hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
+	rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
+			      pte_index);
 	do_tlbies(kvm, &rb, 1, 1, true);
 }
 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 
-void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 			   unsigned long pte_index)
 {
 	unsigned long rb;
 	unsigned char rbyte;
 
-	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
-	rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+	rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
+			      pte_index);
+	rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
 	/* modify only the second-last byte, which contains the ref bit */
 	*((char *)hptep + 14) = rbyte;
 	do_tlbies(kvm, &rb, 1, 1, false);
@@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 	unsigned long somask;
 	unsigned long vsid, hash;
 	unsigned long avpn;
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long mask, val;
 	unsigned long v, r;
 
@@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 	val |= avpn;
 
 	for (;;) {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
 
 		for (i = 0; i < 16; i += 2) {
 			/* Read the PTE racily */
-			v = hpte[i] & ~HPTE_V_HVLOCK;
+			v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 
 			/* Check valid/absent, hash, segment size and AVPN */
 			if (!(v & valid) || (v & mask) != val)
@@ -810,8 +830,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 			/* Lock the PTE and read it under the lock */
 			while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
 				cpu_relax();
-			v = hpte[i] & ~HPTE_V_HVLOCK;
-			r = hpte[i+1];
+			v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+			r = be64_to_cpu(hpte[i+1]);
 
 			/*
 			 * Check the HPTE again, including base page size
@@ -822,7 +842,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 				return (hash << 3) + (i >> 1);
 
 			/* Unlock and move on */
-			hpte[i] = v;
+			hpte[i] = cpu_to_be64(v);
 		}
 
 		if (val & HPTE_V_SECONDARY)
@@ -851,7 +871,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 	struct kvm *kvm = vcpu->kvm;
 	long int index;
 	unsigned long v, r, gr;
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long valid;
 	struct revmap_entry *rev;
 	unsigned long pp, key;
@@ -867,9 +887,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 			return status;	/* there really was no HPTE */
 		return 0;		/* for prot fault, HPTE disappeared */
 	}
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
-	v = hpte[0] & ~HPTE_V_HVLOCK;
-	r = hpte[1];
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+	v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+	r = be64_to_cpu(hpte[1]);
 	rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
 	gr = rev->guest_rpte;
 

+ 5 - 0
arch/powerpc/kvm/book3s_hv_rm_xics.c

@@ -401,6 +401,11 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 		icp->rm_action |= XICS_RM_REJECT;
 		icp->rm_reject = irq;
 	}
+
+	if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
+		icp->rm_action |= XICS_RM_NOTIFY_EOI;
+		icp->rm_eoied_irq = irq;
+	}
  bail:
 	return check_too_hard(xics, icp);
 }

+ 46 - 24
arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -32,10 +32,6 @@
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
-#ifdef __LITTLE_ENDIAN__
-#error Need to fix lppaca and SLB shadow accesses in little endian mode
-#endif
-
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
@@ -601,9 +597,10 @@ kvmppc_got_guest:
 	ld	r3, VCPU_VPA(r4)
 	cmpdi	r3, 0
 	beq	25f
-	lwz	r5, LPPACA_YIELDCOUNT(r3)
+	li	r6, LPPACA_YIELDCOUNT
+	LWZX_BE	r5, r3, r6
 	addi	r5, r5, 1
-	stw	r5, LPPACA_YIELDCOUNT(r3)
+	STWX_BE	r5, r3, r6
 	li	r6, 1
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
@@ -677,9 +674,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
 	mr	r31, r4
 	addi	r3, r31, VCPU_FPRS_TM
-	bl	.load_fp_state
+	bl	load_fp_state
 	addi	r3, r31, VCPU_VRS_TM
-	bl	.load_vr_state
+	bl	load_vr_state
 	mr	r4, r31
 	lwz	r7, VCPU_VRSAVE_TM(r4)
 	mtspr	SPRN_VRSAVE, r7
@@ -1423,9 +1420,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
 	/* Save FP/VSX. */
 	addi	r3, r9, VCPU_FPRS_TM
-	bl	.store_fp_state
+	bl	store_fp_state
 	addi	r3, r9, VCPU_VRS_TM
-	bl	.store_vr_state
+	bl	store_vr_state
 	mfspr	r6, SPRN_VRSAVE
 	stw	r6, VCPU_VRSAVE_TM(r9)
 1:
@@ -1448,9 +1445,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
 	cmpdi	r8, 0
 	beq	25f
-	lwz	r3, LPPACA_YIELDCOUNT(r8)
+	li	r4, LPPACA_YIELDCOUNT
+	LWZX_BE	r3, r8, r4
 	addi	r3, r3, 1
-	stw	r3, LPPACA_YIELDCOUNT(r8)
+	STWX_BE	r3, r8, r4
 	li	r3, 1
 	stb	r3, VCPU_VPA_DIRTY(r9)
 25:
@@ -1763,8 +1761,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 33:	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
-	ld	r5,SLBSHADOW_SAVEAREA(r8)
-	ld	r6,SLBSHADOW_SAVEAREA+8(r8)
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
 	andis.	r7,r5,SLB_ESID_V@h
 	beq	1f
 	slbmte	r6,r5
@@ -1915,12 +1915,23 @@ hcall_try_real_mode:
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
+	/* See if this hcall is enabled for in-kernel handling */
+	ld	r4, VCPU_KVM(r9)
+	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
+	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
+	add	r4, r4, r0
+	ld	r0, KVM_ENABLED_HCALLS(r4)
+	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
+	srd	r0, r0, r4
+	andi.	r0, r0, 1
+	beq	guest_exit_cont
+	/* Get pointer to handler, if any, and call it */
 	LOAD_REG_ADDR(r4, hcall_real_table)
 	lwax	r3,r3,r4
 	cmpwi	r3,0
 	beq	guest_exit_cont
-	add	r3,r3,r4
-	mtctr	r3
+	add	r12,r3,r4
+	mtctr	r12
 	mr	r3,r9		/* get vcpu pointer */
 	ld	r4,VCPU_GPR(R4)(r9)
 	bctrl
@@ -2037,6 +2048,7 @@ hcall_real_table:
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
 	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+	.globl	hcall_real_table_end
 hcall_real_table_end:
 
 ignore_hdec:
@@ -2344,7 +2356,18 @@ kvmppc_read_intr:
 	cmpdi	r6, 0
 	beq-	1f
 	lwzcix	r0, r6, r7
-	rlwinm.	r3, r0, 0, 0xffffff
+	/*
+	 * Save XIRR for later. Since we get in in reverse endian on LE
+	 * systems, save it byte reversed and fetch it back in host endian.
+	 */
+	li	r3, HSTATE_SAVED_XIRR
+	STWX_BE	r0, r3, r13
+#ifdef __LITTLE_ENDIAN__
+	lwz	r3, HSTATE_SAVED_XIRR(r13)
+#else
+	mr	r3, r0
+#endif
+	rlwinm.	r3, r3, 0, 0xffffff
 	sync
 	beq	1f			/* if nothing pending in the ICP */
 
@@ -2376,10 +2399,9 @@ kvmppc_read_intr:
 	li	r3, -1
 1:	blr
 
-42:	/* It's not an IPI and it's for the host, stash it in the PACA
-	 * before exit, it will be picked up by the host ICP driver
+42:	/* It's not an IPI and it's for the host. We saved a copy of XIRR in
+	 * the PACA earlier, it will be picked up by the host ICP driver
 	 */
-	stw	r0, HSTATE_SAVED_XIRR(r13)
 	li	r3, 1
 	b	1b
 
@@ -2414,11 +2436,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	mtmsrd	r8
 	isync
 	addi	r3,r3,VCPU_FPRS
-	bl	.store_fp_state
+	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	addi	r3,r31,VCPU_VRS
-	bl	.store_vr_state
+	bl	store_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	mfspr	r6,SPRN_VRSAVE
@@ -2450,11 +2472,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	mtmsrd	r8
 	isync
 	addi	r3,r4,VCPU_FPRS
-	bl	.load_fp_state
+	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	addi	r3,r31,VCPU_VRS
-	bl	.load_vr_state
+	bl	load_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	lwz	r7,VCPU_VRSAVE(r31)

+ 24 - 14
arch/powerpc/kvm/book3s_paired_singles.c

@@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 
 int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	u32 inst = kvmppc_get_last_inst(vcpu);
+	u32 inst;
 	enum emulation_result emulated = EMULATE_DONE;
+	int ax_rd, ax_ra, ax_rb, ax_rc;
+	short full_d;
+	u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c;
 
-	int ax_rd = inst_get_field(inst, 6, 10);
-	int ax_ra = inst_get_field(inst, 11, 15);
-	int ax_rb = inst_get_field(inst, 16, 20);
-	int ax_rc = inst_get_field(inst, 21, 25);
-	short full_d = inst_get_field(inst, 16, 31);
-
-	u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd);
-	u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra);
-	u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb);
-	u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);
-
-	bool rcomp = (inst & 1) ? true : false;
-	u32 cr = kvmppc_get_cr(vcpu);
+	bool rcomp;
+	u32 cr;
 #ifdef DEBUG
 	int i;
 #endif
 
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	ax_rd = inst_get_field(inst, 6, 10);
+	ax_ra = inst_get_field(inst, 11, 15);
+	ax_rb = inst_get_field(inst, 16, 20);
+	ax_rc = inst_get_field(inst, 21, 25);
+	full_d = inst_get_field(inst, 16, 31);
+
+	fpr_d = &VCPU_FPR(vcpu, ax_rd);
+	fpr_a = &VCPU_FPR(vcpu, ax_ra);
+	fpr_b = &VCPU_FPR(vcpu, ax_rb);
+	fpr_c = &VCPU_FPR(vcpu, ax_rc);
+
+	rcomp = (inst & 1) ? true : false;
+	cr = kvmppc_get_cr(vcpu);
+
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 		return EMULATE_FAIL;
 

+ 161 - 62
arch/powerpc/kvm/book3s_pr.c

@@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define HW_PAGE_SIZE PAGE_SIZE
 #endif
 
+static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	return (msr & (MSR_IR|MSR_DR)) == MSR_DR;
+}
+
+static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	ulong pc = kvmppc_get_pc(vcpu);
+
+	/* We are in DR only split real mode */
+	if ((msr & (MSR_IR|MSR_DR)) != MSR_DR)
+		return;
+
+	/* We have not fixed up the guest already */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK)
+		return;
+
+	/* The code is in fixupable address space */
+	if (pc & SPLIT_HACK_MASK)
+		return;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK;
+	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
+}
+
+void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+
 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -71,10 +100,19 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 	svcpu->in_use = 0;
 	svcpu_put(svcpu);
 #endif
+
+	/* Disable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+
 	vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
 	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 #endif
+
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
@@ -89,8 +127,17 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 #endif
 
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_unfixup_split_real(vcpu);
+
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+
+	/* Enable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+
 	vcpu->cpu = -1;
 }
 
@@ -120,6 +167,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 #ifdef CONFIG_PPC_BOOK3S_64
 	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
 #endif
+	/*
+	 * Now also save the current time base value. We use this
+	 * to find the guest purr and spurr value.
+	 */
+	vcpu->arch.entry_tb = get_tb();
+	vcpu->arch.entry_vtb = get_vtb();
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.entry_ic = mfspr(SPRN_IC);
 	svcpu->in_use = true;
 }
 
@@ -166,6 +221,14 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 #ifdef CONFIG_PPC_BOOK3S_64
 	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
 #endif
+	/*
+	 * Update purr and spurr using time base on exit.
+	 */
+	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
+	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
+	vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
 	svcpu->in_use = false;
 
 out:
@@ -294,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 		}
 	}
 
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
+	else
+		kvmppc_unfixup_split_real(vcpu);
+
 	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		kvmppc_mmu_flush_segments(vcpu);
@@ -443,19 +511,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	put_page(hpage);
 }
 
-static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
 	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 
-	if (unlikely(mp_pa) &&
-	    unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
 		return 1;
 	}
 
-	return kvm_is_visible_gfn(vcpu->kvm, gfn);
+	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -494,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 		break;
 	case MSR_DR:
+		if (!data &&
+		    (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+		    ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte.raddr &= ~SPLIT_HACK_MASK;
+		/* fall through */
 	case MSR_IR:
 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 
@@ -541,7 +614,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 	} else if (!is_mmio &&
-		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+		   kvmppc_visible_gpa(vcpu, pte.raddr)) {
 		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
 			/*
 			 * There is already a host HPTE there, presumably
@@ -637,42 +710,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
 #endif
 }
 
-static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
-{
-	ulong srr0 = kvmppc_get_pc(vcpu);
-	u32 last_inst = kvmppc_get_last_inst(vcpu);
-	int ret;
-
-	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
-	if (ret == -ENOENT) {
-		ulong msr = kvmppc_get_msr(vcpu);
-
-		msr = kvmppc_set_field(msr, 33, 33, 1);
-		msr = kvmppc_set_field(msr, 34, 36, 0);
-		msr = kvmppc_set_field(msr, 42, 47, 0);
-		kvmppc_set_msr_fast(vcpu, msr);
-		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
-		return EMULATE_AGAIN;
-	}
-
-	return EMULATE_DONE;
-}
-
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
-{
-
-	/* Need to do paired single emulation? */
-	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
-		return EMULATE_DONE;
-
-	/* Read out the instruction */
-	if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
-		/* Need to emulate */
-		return EMULATE_FAIL;
-
-	return EMULATE_AGAIN;
-}
-
 /* Handle external providers (FPU, Altivec, VSX) */
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 			     ulong msr)
@@ -834,6 +871,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 
 	return RESUME_GUEST;
 }
+
+void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
+{
+	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
+		/* TAR got dropped, drop it in shadow too */
+		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	}
+	vcpu->arch.fscr = fscr;
+}
 #endif
 
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -858,6 +904,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		vcpu->stat.pf_instruc++;
 
+		if (kvmppc_is_split_real(vcpu))
+			kvmppc_fixup_split_real(vcpu);
+
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
@@ -960,6 +1009,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_DECREMENTER:
 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
 	case BOOK3S_INTERRUPT_DOORBELL:
+	case BOOK3S_INTERRUPT_H_DOORBELL:
 		vcpu->stat.dec_exits++;
 		r = RESUME_GUEST;
 		break;
@@ -977,15 +1027,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	{
 		enum emulation_result er;
 		ulong flags;
+		u32 last_inst;
+		int emul;
 
 program_interrupt:
 		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
+		emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+		if (emul != EMULATE_DONE) {
+			r = RESUME_GUEST;
+			break;
+		}
+
 		if (kvmppc_get_msr(vcpu) & MSR_PR) {
 #ifdef EXIT_DEBUG
-			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+			pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
+				kvmppc_get_pc(vcpu), last_inst);
 #endif
-			if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
+			if ((last_inst & 0xff0007ff) !=
 			    (INS_DCBZ & 0xfffffff7)) {
 				kvmppc_core_queue_program(vcpu, flags);
 				r = RESUME_GUEST;
@@ -1004,7 +1063,7 @@ program_interrupt:
 			break;
 		case EMULATE_FAIL:
 			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
-			       __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+			       __func__, kvmppc_get_pc(vcpu), last_inst);
 			kvmppc_core_queue_program(vcpu, flags);
 			r = RESUME_GUEST;
 			break;
@@ -1021,8 +1080,23 @@ program_interrupt:
 		break;
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		u32 last_sc;
+		int emul;
+
+		/* Get last sc for papr */
+		if (vcpu->arch.papr_enabled) {
+			/* The sc instuction points SRR0 to the next inst */
+			emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
+			if (emul != EMULATE_DONE) {
+				kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+
 		if (vcpu->arch.papr_enabled &&
-		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
+		    (last_sc == 0x44000022) &&
 		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 			/* SC 1 papr hypercalls */
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -1067,36 +1141,51 @@ program_interrupt:
 			r = RESUME_GUEST;
 		}
 		break;
+	}
 	case BOOK3S_INTERRUPT_FP_UNAVAIL:
 	case BOOK3S_INTERRUPT_ALTIVEC:
 	case BOOK3S_INTERRUPT_VSX:
 	{
 		int ext_msr = 0;
+		int emul;
+		u32 last_inst;
+
+		if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
+			/* Do paired single instruction emulation */
+			emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						    &last_inst);
+			if (emul == EMULATE_DONE)
+				goto program_interrupt;
+			else
+				r = RESUME_GUEST;
 
-		switch (exit_nr) {
-		case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP;  break;
-		case BOOK3S_INTERRUPT_ALTIVEC:    ext_msr = MSR_VEC; break;
-		case BOOK3S_INTERRUPT_VSX:        ext_msr = MSR_VSX; break;
+			break;
 		}
 
-		switch (kvmppc_check_ext(vcpu, exit_nr)) {
-		case EMULATE_DONE:
-			/* everything ok - let's enable the ext */
-			r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+		/* Enable external provider */
+		switch (exit_nr) {
+		case BOOK3S_INTERRUPT_FP_UNAVAIL:
+			ext_msr = MSR_FP;
 			break;
-		case EMULATE_FAIL:
-			/* we need to emulate this instruction */
-			goto program_interrupt;
+
+		case BOOK3S_INTERRUPT_ALTIVEC:
+			ext_msr = MSR_VEC;
 			break;
-		default:
-			/* nothing to worry about - go again */
+
+		case BOOK3S_INTERRUPT_VSX:
+			ext_msr = MSR_VSX;
 			break;
 		}
+
+		r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
 		break;
 	}
 	case BOOK3S_INTERRUPT_ALIGNMENT:
-		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
-			u32 last_inst = kvmppc_get_last_inst(vcpu);
+	{
+		u32 last_inst;
+		int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+
+		if (emul == EMULATE_DONE) {
 			u32 dsisr;
 			u64 dar;
 
@@ -1110,6 +1199,7 @@ program_interrupt:
 		}
 		r = RESUME_GUEST;
 		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
 		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
@@ -1233,6 +1323,7 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		/*
 		 * We are only interested in the LPCR_ILE bit
 		 */
@@ -1268,6 +1359,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
 		break;
 	default:
@@ -1310,8 +1402,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (!p)
 		goto uninit_vcpu;
-	/* the real shared page fills the last 4k of our page */
-	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
+	vcpu->arch.shared = (void *)p;
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Always start the shared struct in native endian mode */
 #ifdef __BIG_ENDIAN__
@@ -1568,6 +1659,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hpt_mutex);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Start out with the default set of hcalls enabled */
+	kvmppc_pr_init_default_hcalls(kvm);
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
 		if (++kvm_global_user_count == 1)
@@ -1636,6 +1732,9 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
 	.fast_vcpu_kick = kvm_vcpu_kick,
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+#ifdef CONFIG_PPC_BOOK3S_64
+	.hcall_implemented = kvmppc_hcall_impl_pr,
+#endif
 };
 
 

+ 80 - 12
arch/powerpc/kvm/book3s_pr_papr.c

@@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 {
 	long flags = kvmppc_get_gpr(vcpu, 4);
 	long pte_index = kvmppc_get_gpr(vcpu, 5);
-	unsigned long pteg[2 * 8];
-	unsigned long pteg_addr, i, *hpte;
+	__be64 pteg[2 * 8];
+	__be64 *hpte;
+	unsigned long pteg_addr, i;
 	long int ret;
 
 	i = pte_index & 7;
@@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-	pte[0] = be64_to_cpu(pte[0]);
-	pte[1] = be64_to_cpu(pte[1]);
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
 		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-		pte[0] = be64_to_cpu(pte[0]);
-		pte[1] = be64_to_cpu(pte[1]);
+		pte[0] = be64_to_cpu((__force __be64)pte[0]);
+		pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 		/* tsl = AVPN */
 		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
@@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-	pte[0] = be64_to_cpu(pte[0]);
-	pte[1] = be64_to_cpu(pte[1]);
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 
 	rb = compute_tlbie_rb(v, r, pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
-	pte[0] = cpu_to_be64(pte[0]);
-	pte[1] = cpu_to_be64(pte[1]);
+	pte[0] = (__force u64)cpu_to_be64(pte[0]);
+	pte[1] = (__force u64)cpu_to_be64(pte[1]);
 	copy_to_user((void __user *)pteg, pte, sizeof(pte));
 	ret = H_SUCCESS;
 
@@ -266,6 +267,12 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
+	int rc, idx;
+
+	if (cmd <= MAX_HCALL_OPCODE &&
+	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
+		return EMULATE_FAIL;
+
 	switch (cmd) {
 	case H_ENTER:
 		return kvmppc_h_pr_enter(vcpu);
@@ -294,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		break;
 	case H_RTAS:
 		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
-			return RESUME_HOST;
-		if (kvmppc_rtas_hcall(vcpu))
+			break;
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		rc = kvmppc_rtas_hcall(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (rc)
 			break;
 		kvmppc_set_gpr(vcpu, 3, 0);
 		return EMULATE_DONE;
@@ -303,3 +313,61 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
 	return EMULATE_FAIL;
 }
+
+int kvmppc_hcall_impl_pr(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_ENTER:
+	case H_REMOVE:
+	case H_PROTECT:
+	case H_BULK_REMOVE:
+	case H_PUT_TCE:
+	case H_CEDE:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_ENTER,
+	H_REMOVE,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_PUT_TCE,
+	H_CEDE,
+#ifdef CONFIG_KVM_XICS
+	H_XIRR,
+	H_CPPR,
+	H_EOI,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR_X,
+#endif
+	0
+};
+
+void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
+{
+	int i;
+	unsigned int hcall;
+
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_pr(hcall));
+		__set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+	}
+}

+ 46 - 9
arch/powerpc/kvm/book3s_xics.c

@@ -64,8 +64,12 @@
 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 			    u32 new_irq);
 
-static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
-			   bool report_status)
+/*
+ * Return value ideally indicates how the interrupt was handled, but no
+ * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS),
+ * so just return 0.
+ */
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
 {
 	struct ics_irq_state *state;
 	struct kvmppc_ics *ics;
@@ -82,17 +86,14 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
 	if (!state->exists)
 		return -EINVAL;
 
-	if (report_status)
-		return state->asserted;
-
 	/*
 	 * We set state->asserted locklessly. This should be fine as
 	 * we are the only setter, thus concurrent access is undefined
 	 * to begin with.
 	 */
-	if (level == KVM_INTERRUPT_SET_LEVEL)
+	if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL)
 		state->asserted = 1;
-	else if (level == KVM_INTERRUPT_UNSET) {
+	else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
 		state->asserted = 0;
 		return 0;
 	}
@@ -100,7 +101,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
 	/* Attempt delivery */
 	icp_deliver_irq(xics, NULL, irq);
 
-	return state->asserted;
+	return 0;
 }
 
 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -772,6 +773,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 	if (state->asserted)
 		icp_deliver_irq(xics, icp, irq);
 
+	kvm_notify_acked_irq(vcpu->kvm, 0, irq);
+
 	return H_SUCCESS;
 }
 
@@ -789,6 +792,8 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 		icp_check_resend(xics, icp);
 	if (icp->rm_action & XICS_RM_REJECT)
 		icp_deliver_irq(xics, icp, icp->rm_reject);
+	if (icp->rm_action & XICS_RM_NOTIFY_EOI)
+		kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
 
 	icp->rm_action = 0;
 
@@ -1170,7 +1175,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
 	struct kvmppc_xics *xics = kvm->arch.xics;
 
-	return ics_deliver_irq(xics, irq, level, line_status);
+	return ics_deliver_irq(xics, irq, level);
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
+		int irq_source_id, int level, bool line_status)
+{
+	if (!level)
+		return -1;
+	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
+			   level, line_status);
 }
 
 static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
@@ -1301,3 +1315,26 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
 	vcpu->arch.icp = NULL;
 	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
 }
+
+static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int irq_source_id, int level,
+			bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
+}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
+{
+	entries->gsi = gsi;
+	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
+	entries->set = xics_set_irq;
+	entries->irqchip.irqchip = 0;
+	entries->irqchip.pin = gsi;
+	return 1;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	return pin;
+}

+ 2 - 0
arch/powerpc/kvm/book3s_xics.h

@@ -71,9 +71,11 @@ struct kvmppc_icp {
 #define XICS_RM_KICK_VCPU	0x1
 #define XICS_RM_CHECK_RESEND	0x2
 #define XICS_RM_REJECT		0x4
+#define XICS_RM_NOTIFY_EOI	0x8
 	u32 rm_action;
 	struct kvm_vcpu *rm_kick_target;
 	u32  rm_reject;
+	u32  rm_eoied_irq;
 
 	/* Debug stuff for real mode */
 	union kvmppc_icp_state rm_dbgstate;

+ 136 - 89
arch/powerpc/kvm/booke.c

@@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
@@ -185,24 +184,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
-                                        ulong dear_flags, ulong esr_flags)
+void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+				 ulong dear_flags, ulong esr_flags)
 {
 	vcpu->arch.queued_dear = dear_flags;
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 }
 
-static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
-                                           ulong dear_flags, ulong esr_flags)
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+				    ulong dear_flags, ulong esr_flags)
 {
 	vcpu->arch.queued_dear = dear_flags;
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
 }
 
-static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
-                                           ulong esr_flags)
+void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+}
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags)
 {
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
@@ -266,13 +269,8 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
 
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GSRR0, srr0);
-	mtspr(SPRN_GSRR1, srr1);
-#else
-	vcpu->arch.shared->srr0 = srr0;
-	vcpu->arch.shared->srr1 = srr1;
-#endif
+	kvmppc_set_srr0(vcpu, srr0);
+	kvmppc_set_srr1(vcpu, srr1);
 }
 
 static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
@@ -297,51 +295,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 	vcpu->arch.mcsrr1 = srr1;
 }
 
-static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GDEAR);
-#else
-	return vcpu->arch.shared->dar;
-#endif
-}
-
-static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GDEAR, dear);
-#else
-	vcpu->arch.shared->dar = dear;
-#endif
-}
-
-static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GESR);
-#else
-	return vcpu->arch.shared->esr;
-#endif
-}
-
-static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GESR, esr);
-#else
-	vcpu->arch.shared->esr = esr;
-#endif
-}
-
-static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GEPR);
-#else
-	return vcpu->arch.epr;
-#endif
-}
-
 /* Deliver the interrupt of the corresponding priority, if possible. */
 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
                                         unsigned int priority)
@@ -450,9 +403,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
 		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
 		if (update_esr == true)
-			set_guest_esr(vcpu, vcpu->arch.queued_esr);
+			kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
-			set_guest_dear(vcpu, vcpu->arch.queued_dear);
+			kvmppc_set_dar(vcpu, vcpu->arch.queued_dear);
 		if (update_epr == true) {
 			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
 				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
@@ -752,9 +705,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		 * they were actually modified by emulation. */
 		return RESUME_GUEST_NV;
 
-	case EMULATE_DO_DCR:
-		run->exit_reason = KVM_EXIT_DCR;
-		return RESUME_HOST;
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
 
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
@@ -866,6 +818,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				  enum emulation_result emulated, u32 last_inst)
+{
+	switch (emulated) {
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
+
+	case EMULATE_FAIL:
+		pr_debug("%s: load instruction from guest address %lx failed\n",
+		       __func__, vcpu->arch.pc);
+		/* For debugging, encode the failing instruction and
+		 * report it to userspace. */
+		run->hw.hardware_exit_reason = ~0ULL << 32;
+		run->hw.hardware_exit_reason |= last_inst;
+		kvmppc_core_queue_program(vcpu, ESR_PIL);
+		return RESUME_HOST;
+
+	default:
+		BUG();
+	}
+}
+
 /**
  * kvmppc_handle_exit
  *
@@ -877,6 +851,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int r = RESUME_HOST;
 	int s;
 	int idx;
+	u32 last_inst = KVM_INST_FETCH_FAILED;
+	enum emulation_result emulated = EMULATE_DONE;
 
 	/* update before a new last_exit_type is rewritten */
 	kvmppc_update_timing_stats(vcpu);
@@ -884,6 +860,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* restart interrupts if they were meant for the host */
 	kvmppc_restart_interrupt(vcpu, exit_nr);
 
+	/*
+	 * get last instruction before beeing preempted
+	 * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
+	 */
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+	case BOOKE_INTERRUPT_DTLB_MISS:
+	case BOOKE_INTERRUPT_HV_PRIV:
+		emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+		break;
+	default:
+		break;
+	}
+
 	local_irq_enable();
 
 	trace_kvm_exit(exit_nr, vcpu);
@@ -892,6 +882,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 
+	if (emulated != EMULATE_DONE) {
+		r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
+		goto out;
+	}
+
 	switch (exit_nr) {
 	case BOOKE_INTERRUPT_MACHINE_CHECK:
 		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
@@ -1181,6 +1176,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		BUG();
 	}
 
+out:
 	/*
 	 * To avoid clobbering exit_reason, only check for signals if we
 	 * aren't already exiting to userspace for some other reason.
@@ -1265,17 +1261,17 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->lr = vcpu->arch.lr;
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.shared->msr;
-	regs->srr0 = vcpu->arch.shared->srr0;
-	regs->srr1 = vcpu->arch.shared->srr1;
+	regs->srr0 = kvmppc_get_srr0(vcpu);
+	regs->srr1 = kvmppc_get_srr1(vcpu);
 	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.shared->sprg0;
-	regs->sprg1 = vcpu->arch.shared->sprg1;
-	regs->sprg2 = vcpu->arch.shared->sprg2;
-	regs->sprg3 = vcpu->arch.shared->sprg3;
-	regs->sprg4 = vcpu->arch.shared->sprg4;
-	regs->sprg5 = vcpu->arch.shared->sprg5;
-	regs->sprg6 = vcpu->arch.shared->sprg6;
-	regs->sprg7 = vcpu->arch.shared->sprg7;
+	regs->sprg0 = kvmppc_get_sprg0(vcpu);
+	regs->sprg1 = kvmppc_get_sprg1(vcpu);
+	regs->sprg2 = kvmppc_get_sprg2(vcpu);
+	regs->sprg3 = kvmppc_get_sprg3(vcpu);
+	regs->sprg4 = kvmppc_get_sprg4(vcpu);
+	regs->sprg5 = kvmppc_get_sprg5(vcpu);
+	regs->sprg6 = kvmppc_get_sprg6(vcpu);
+	regs->sprg7 = kvmppc_get_sprg7(vcpu);
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -1293,17 +1289,17 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.lr = regs->lr;
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
-	vcpu->arch.shared->srr0 = regs->srr0;
-	vcpu->arch.shared->srr1 = regs->srr1;
+	kvmppc_set_srr0(vcpu, regs->srr0);
+	kvmppc_set_srr1(vcpu, regs->srr1);
 	kvmppc_set_pid(vcpu, regs->pid);
-	vcpu->arch.shared->sprg0 = regs->sprg0;
-	vcpu->arch.shared->sprg1 = regs->sprg1;
-	vcpu->arch.shared->sprg2 = regs->sprg2;
-	vcpu->arch.shared->sprg3 = regs->sprg3;
-	vcpu->arch.shared->sprg4 = regs->sprg4;
-	vcpu->arch.shared->sprg5 = regs->sprg5;
-	vcpu->arch.shared->sprg6 = regs->sprg6;
-	vcpu->arch.shared->sprg7 = regs->sprg7;
+	kvmppc_set_sprg0(vcpu, regs->sprg0);
+	kvmppc_set_sprg1(vcpu, regs->sprg1);
+	kvmppc_set_sprg2(vcpu, regs->sprg2);
+	kvmppc_set_sprg3(vcpu, regs->sprg3);
+	kvmppc_set_sprg4(vcpu, regs->sprg4);
+	kvmppc_set_sprg5(vcpu, regs->sprg5);
+	kvmppc_set_sprg6(vcpu, regs->sprg6);
+	kvmppc_set_sprg7(vcpu, regs->sprg7);
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -1321,8 +1317,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
 	sregs->u.e.csrr0 = vcpu->arch.csrr0;
 	sregs->u.e.csrr1 = vcpu->arch.csrr1;
 	sregs->u.e.mcsr = vcpu->arch.mcsr;
-	sregs->u.e.esr = get_guest_esr(vcpu);
-	sregs->u.e.dear = get_guest_dear(vcpu);
+	sregs->u.e.esr = kvmppc_get_esr(vcpu);
+	sregs->u.e.dear = kvmppc_get_dar(vcpu);
 	sregs->u.e.tsr = vcpu->arch.tsr;
 	sregs->u.e.tcr = vcpu->arch.tcr;
 	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -1339,8 +1335,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 	vcpu->arch.csrr0 = sregs->u.e.csrr0;
 	vcpu->arch.csrr1 = sregs->u.e.csrr1;
 	vcpu->arch.mcsr = sregs->u.e.mcsr;
-	set_guest_esr(vcpu, sregs->u.e.esr);
-	set_guest_dear(vcpu, sregs->u.e.dear);
+	kvmppc_set_esr(vcpu, sregs->u.e.esr);
+	kvmppc_set_dar(vcpu, sregs->u.e.dear);
 	vcpu->arch.vrsave = sregs->u.e.vrsave;
 	kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
 
@@ -1493,7 +1489,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
 		break;
 	case KVM_REG_PPC_EPR: {
-		u32 epr = get_guest_epr(vcpu);
+		u32 epr = kvmppc_get_epr(vcpu);
 		val = get_reg_val(reg->id, epr);
 		break;
 	}
@@ -1788,6 +1784,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
 #endif
 }
 
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
+{
+	int gtlb_index;
+	gpa_t gpaddr;
+
+#ifdef CONFIG_KVM_E500V2
+	if (!(vcpu->arch.shared->msr & MSR_PR) &&
+	    (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+		pte->eaddr = eaddr;
+		pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) |
+			     (eaddr & ~PAGE_MASK);
+		pte->vpage = eaddr >> PAGE_SHIFT;
+		pte->may_read = true;
+		pte->may_write = true;
+		pte->may_execute = true;
+
+		return 0;
+	}
+#endif
+
+	/* Check the guest TLB. */
+	switch (xlid) {
+	case XLATE_INST:
+		gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
+		break;
+	case XLATE_DATA:
+		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
+		break;
+	default:
+		BUG();
+	}
+
+	/* Do we have a TLB entry at all? */
+	if (gtlb_index < 0)
+		return -ENOENT;
+
+	gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+
+	pte->eaddr = eaddr;
+	pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK);
+	pte->vpage = eaddr >> PAGE_SHIFT;
+
+	/* XXX read permissions from the guest TLB */
+	pte->may_read = true;
+	pte->may_write = true;
+	pte->may_execute = true;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					 struct kvm_guest_debug *dbg)
 {

+ 0 - 7
arch/powerpc/kvm/booke.h

@@ -99,13 +99,6 @@ enum int_class {
 
 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
 
-extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
-				      unsigned int inst, int *advance);
-extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
-					 ulong spr_val);
-extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
-					 ulong *spr_val);
 extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
 				       struct kvm_vcpu *vcpu,

+ 4 - 4
arch/powerpc/kvm/booke_emulate.c

@@ -165,16 +165,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	 * guest (PR-mode only).
 	 */
 	case SPRN_SPRG4:
-		vcpu->arch.shared->sprg4 = spr_val;
+		kvmppc_set_sprg4(vcpu, spr_val);
 		break;
 	case SPRN_SPRG5:
-		vcpu->arch.shared->sprg5 = spr_val;
+		kvmppc_set_sprg5(vcpu, spr_val);
 		break;
 	case SPRN_SPRG6:
-		vcpu->arch.shared->sprg6 = spr_val;
+		kvmppc_set_sprg6(vcpu, spr_val);
 		break;
 	case SPRN_SPRG7:
-		vcpu->arch.shared->sprg7 = spr_val;
+		kvmppc_set_sprg7(vcpu, spr_val);
 		break;
 
 	case SPRN_IVPR:

+ 0 - 5
arch/powerpc/kvm/booke_interrupts.S

@@ -21,7 +21,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
-#include <asm/mmu-44x.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 
@@ -424,10 +423,6 @@ lightweight_exit:
 	mtspr	SPRN_PID1, r3
 #endif
 
-#ifdef CONFIG_44x
-	iccci	0, 0 /* XXX hack */
-#endif
-
 	/* Load some guest volatiles. */
 	lwz	r0, VCPU_GPR(R0)(r4)
 	lwz	r2, VCPU_GPR(R2)(r4)

+ 10 - 50
arch/powerpc/kvm/bookehv_interrupts.S

@@ -24,12 +24,10 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
-#include <asm/mmu-44x.h>
 #include <asm/page.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
 #include <asm/bitsperlong.h>
-#include <asm/thread_info.h>
 
 #ifdef CONFIG_64BIT
 #include <asm/exception-64e.h>
@@ -122,38 +120,14 @@
 1:
 
 	.if	\flags & NEED_EMU
-	/*
-	 * This assumes you have external PID support.
-	 * To support a bookehv CPU without external PID, you'll
-	 * need to look up the TLB entry and create a temporary mapping.
-	 *
-	 * FIXME: we don't currently handle if the lwepx faults.  PR-mode
-	 * booke doesn't handle it either.  Since Linux doesn't use
-	 * broadcast tlbivax anymore, the only way this should happen is
-	 * if the guest maps its memory execute-but-not-read, or if we
-	 * somehow take a TLB miss in the middle of this entry code and
-	 * evict the relevant entry.  On e500mc, all kernel lowmem is
-	 * bolted into TLB1 large page mappings, and we don't use
-	 * broadcast invalidates, so we should not take a TLB miss here.
-	 *
-	 * Later we'll need to deal with faults here.  Disallowing guest
-	 * mappings that are execute-but-not-read could be an option on
-	 * e500mc, but not on chips with an LRAT if it is used.
-	 */
-
-	mfspr	r3, SPRN_EPLC	/* will already have correct ELPID and EGS */
 	PPC_STL	r15, VCPU_GPR(R15)(r4)
 	PPC_STL	r16, VCPU_GPR(R16)(r4)
 	PPC_STL	r17, VCPU_GPR(R17)(r4)
 	PPC_STL	r18, VCPU_GPR(R18)(r4)
 	PPC_STL	r19, VCPU_GPR(R19)(r4)
-	mr	r8, r3
 	PPC_STL	r20, VCPU_GPR(R20)(r4)
-	rlwimi	r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
 	PPC_STL	r21, VCPU_GPR(R21)(r4)
-	rlwimi	r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
 	PPC_STL	r22, VCPU_GPR(R22)(r4)
-	rlwimi	r8, r10, EPC_EPID_SHIFT, EPC_EPID
 	PPC_STL	r23, VCPU_GPR(R23)(r4)
 	PPC_STL	r24, VCPU_GPR(R24)(r4)
 	PPC_STL	r25, VCPU_GPR(R25)(r4)
@@ -163,33 +137,15 @@
 	PPC_STL	r29, VCPU_GPR(R29)(r4)
 	PPC_STL	r30, VCPU_GPR(R30)(r4)
 	PPC_STL	r31, VCPU_GPR(R31)(r4)
-	mtspr	SPRN_EPLC, r8
-
-	/* disable preemption, so we are sure we hit the fixup handler */
-	CURRENT_THREAD_INFO(r8, r1)
-	li	r7, 1
-	stw	r7, TI_PREEMPT(r8)
-
-	isync
 
 	/*
-	 * In case the read goes wrong, we catch it and write an invalid value
-	 * in LAST_INST instead.
+	 * We don't use external PID support. lwepx faults would need to be
+	 * handled by KVM and this implies aditional code in DO_KVM (for
+	 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
+	 * is too intrusive for the host. Get last instuction in
+	 * kvmppc_get_last_inst().
 	 */
-1:	lwepx	r9, 0, r5
-2:
-.section .fixup, "ax"
-3:	li	r9, KVM_INST_FETCH_FAILED
-	b	2b
-.previous
-.section __ex_table,"a"
-	PPC_LONG_ALIGN
-	PPC_LONG 1b,3b
-.previous
-
-	mtspr	SPRN_EPLC, r3
-	li	r7, 0
-	stw	r7, TI_PREEMPT(r8)
+	li	r9, KVM_INST_FETCH_FAILED
 	stw	r9, VCPU_LAST_INST(r4)
 	.endif
 
@@ -441,6 +397,7 @@ _GLOBAL(kvmppc_resume_host)
 #ifdef CONFIG_64BIT
 	PPC_LL	r3, PACA_SPRG_VDSO(r13)
 #endif
+	mfspr	r5, SPRN_SPRG9
 	PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
 	mfspr	r8, SPRN_SPRG6
 	PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
@@ -448,6 +405,7 @@ _GLOBAL(kvmppc_resume_host)
 #ifdef CONFIG_64BIT
 	mtspr	SPRN_SPRG_VDSO_WRITE, r3
 #endif
+	PPC_STD(r5, VCPU_SPRG9, r4)
 	PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
 	mfxer	r3
 	PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
@@ -682,7 +640,9 @@ lightweight_exit:
 	mtspr	SPRN_SPRG5W, r6
 	PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
 	mtspr	SPRN_SPRG6W, r7
+	PPC_LD(r5, VCPU_SPRG9, r4)
 	mtspr	SPRN_SPRG7W, r8
+	mtspr	SPRN_SPRG9, r5
 
 	/* Load some guest volatiles. */
 	PPC_LL	r3, VCPU_LR(r4)

+ 12 - 0
arch/powerpc/kvm/e500_emulate.c

@@ -250,6 +250,14 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
 				spr_val);
 		break;
 
+	case SPRN_PWRMGTCR0:
+		/*
+		 * Guest relies on host power management configurations
+		 * Treat the request as a general store
+		 */
+		vcpu->arch.pwrmgtcr0 = spr_val;
+		break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
@@ -368,6 +376,10 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
 		*spr_val = vcpu->arch.eptcfg;
 		break;
 
+	case SPRN_PWRMGTCR0:
+		*spr_val = vcpu->arch.pwrmgtcr0;
+		break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];

+ 102 - 0
arch/powerpc/kvm/e500_mmu_host.c

@@ -107,11 +107,15 @@ static u32 get_host_mas0(unsigned long eaddr)
 {
 	unsigned long flags;
 	u32 mas0;
+	u32 mas4;
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS6, 0);
+	mas4 = mfspr(SPRN_MAS4);
+	mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK);
 	asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
 	mas0 = mfspr(SPRN_MAS0);
+	mtspr(SPRN_MAS4, mas4);
 	local_irq_restore(flags);
 
 	return mas0;
@@ -607,6 +611,104 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 	}
 }
 
+#ifdef CONFIG_KVM_BOOKE_HV
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+			  u32 *instr)
+{
+	gva_t geaddr;
+	hpa_t addr;
+	hfn_t pfn;
+	hva_t eaddr;
+	u32 mas1, mas2, mas3;
+	u64 mas7_mas3;
+	struct page *page;
+	unsigned int addr_space, psize_shift;
+	bool pr;
+	unsigned long flags;
+
+	/* Search TLB for guest pc to get the real address */
+	geaddr = kvmppc_get_pc(vcpu);
+
+	addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
+	mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+	asm volatile("tlbsx 0, %[geaddr]\n" : :
+		     [geaddr] "r" (geaddr));
+	mtspr(SPRN_MAS5, 0);
+	mtspr(SPRN_MAS8, 0);
+	mas1 = mfspr(SPRN_MAS1);
+	mas2 = mfspr(SPRN_MAS2);
+	mas3 = mfspr(SPRN_MAS3);
+#ifdef CONFIG_64BIT
+	mas7_mas3 = mfspr(SPRN_MAS7_MAS3);
+#else
+	mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3;
+#endif
+	local_irq_restore(flags);
+
+	/*
+	 * If the TLB entry for guest pc was evicted, return to the guest.
+	 * There are high chances to find a valid TLB entry next time.
+	 */
+	if (!(mas1 & MAS1_VALID))
+		return EMULATE_AGAIN;
+
+	/*
+	 * Another thread may rewrite the TLB entry in parallel, don't
+	 * execute from the address if the execute permission is not set
+	 */
+	pr = vcpu->arch.shared->msr & MSR_PR;
+	if (unlikely((pr && !(mas3 & MAS3_UX)) ||
+		     (!pr && !(mas3 & MAS3_SX)))) {
+		pr_err_ratelimited(
+			"%s: Instuction emulation from guest addres %08lx without execute permission\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/*
+	 * The real address will be mapped by a cacheable, memory coherent,
+	 * write-back page. Check for mismatches when LRAT is used.
+	 */
+	if (has_feature(vcpu, VCPU_FTR_MMU_V2) &&
+	    unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) {
+		pr_err_ratelimited(
+			"%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Get pfn */
+	psize_shift = MAS1_GET_TSIZE(mas1) + 10;
+	addr = (mas7_mas3 & (~0ULL << psize_shift)) |
+	       (geaddr & ((1ULL << psize_shift) - 1ULL));
+	pfn = addr >> PAGE_SHIFT;
+
+	/* Guard against emulation from devices area */
+	if (unlikely(!page_is_ram(pfn))) {
+		pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n",
+			 __func__, addr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Map a page and get guest's instruction */
+	page = pfn_to_page(pfn);
+	eaddr = (unsigned long)kmap_atomic(page);
+	*instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK));
+	kunmap_atomic((u32 *)eaddr);
+
+	return EMULATE_DONE;
+}
+#else
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+			  u32 *instr)
+{
+	return EMULATE_AGAIN;
+}
+#endif
+
 /************* MMU Notifiers *************/
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)

+ 23 - 5
arch/powerpc/kvm/e500mc.c

@@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 {
 }
 
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
+static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
 
 static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 	mtspr(SPRN_GESR, vcpu->arch.shared->esr);
 
 	if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
-	    __get_cpu_var(last_vcpu_on_cpu) != vcpu) {
+	    __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
 		kvmppc_e500_tlbil_all(vcpu_e500);
-		__get_cpu_var(last_vcpu_on_cpu) = vcpu;
+		__get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
 	}
 
 	kvmppc_load_guest_fp(vcpu);
@@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
 static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
 			      union kvmppc_one_reg *val)
 {
-	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		*val = get_reg_val(id, vcpu->arch.sprg9);
+		break;
+	default:
+		r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	}
+
 	return r;
 }
 
 static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
 			      union kvmppc_one_reg *val)
 {
-	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		vcpu->arch.sprg9 = set_reg_val(id, *val);
+		break;
+	default:
+		r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	}
+
 	return r;
 }
 

+ 11 - 195
arch/powerpc/kvm/emulate.c

@@ -207,36 +207,28 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	return emulated;
 }
 
-/* XXX to do:
- * lhax
- * lhaux
- * lswx
- * lswi
- * stswx
- * stswi
- * lha
- * lhau
- * lmw
- * stmw
- *
- */
 /* XXX Should probably auto-generate instruction decoding for a particular core
  * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	u32 inst = kvmppc_get_last_inst(vcpu);
-	int ra = get_ra(inst);
-	int rs = get_rs(inst);
-	int rt = get_rt(inst);
-	int sprn = get_sprn(inst);
-	enum emulation_result emulated = EMULATE_DONE;
+	u32 inst;
+	int rs, rt, sprn;
+	enum emulation_result emulated;
 	int advance = 1;
 
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
+	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
 	pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
+	rs = get_rs(inst);
+	rt = get_rt(inst);
+	sprn = get_sprn(inst);
+
 	switch (get_op(inst)) {
 	case OP_TRAP:
 #ifdef CONFIG_PPC_BOOK3S
@@ -264,200 +256,24 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #endif
 			advance = 0;
 			break;
-		case OP_31_XOP_LWZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-			break;
-
-		case OP_31_XOP_LBZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			break;
-
-		case OP_31_XOP_LBZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_STWX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               4, 1);
-			break;
-
-		case OP_31_XOP_STBX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               1, 1);
-			break;
-
-		case OP_31_XOP_STBUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LHAX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-			break;
-
-		case OP_31_XOP_LHZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			break;
-
-		case OP_31_XOP_LHZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
 
 		case OP_31_XOP_MFSPR:
 			emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
 			break;
 
-		case OP_31_XOP_STHX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 1);
-			break;
-
-		case OP_31_XOP_STHUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
 		case OP_31_XOP_MTSPR:
 			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
 			break;
 
-		case OP_31_XOP_DCBST:
-		case OP_31_XOP_DCBF:
-		case OP_31_XOP_DCBI:
-			/* Do nothing. The guest is performing dcbi because
-			 * hardware DMA is not snooped by the dcache, but
-			 * emulated DMA either goes through the dcache as
-			 * normal writes, or the host kernel has handled dcache
-			 * coherence. */
-			break;
-
-		case OP_31_XOP_LWBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
-			break;
-
 		case OP_31_XOP_TLBSYNC:
 			break;
 
-		case OP_31_XOP_STWBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               4, 0);
-			break;
-
-		case OP_31_XOP_LHBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
-			break;
-
-		case OP_31_XOP_STHBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 0);
-			break;
-
 		default:
 			/* Attempt core-specific emulation below. */
 			emulated = EMULATE_FAIL;
 		}
 		break;
 
-	case OP_LWZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		break;
-
-	/* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
-	case OP_LD:
-		rt = get_rt(inst);
-		emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-		break;
-
-	case OP_LWZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LBZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		break;
-
-	case OP_LBZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STW:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               4, 1);
-		break;
-
-	/* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
-	case OP_STD:
-		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               8, 1);
-		break;
-
-	case OP_STWU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STB:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               1, 1);
-		break;
-
-	case OP_STBU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHA:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHAU:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STH:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               2, 1);
-		break;
-
-	case OP_STHU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
 	default:
 		emulated = EMULATE_FAIL;
 	}

+ 272 - 0
arch/powerpc/kvm/emulate_loadstore.c

@@ -0,0 +1,272 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
+
+#include <asm/reg.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include "timing.h"
+#include "trace.h"
+
+/* XXX to do:
+ * lhax
+ * lhaux
+ * lswx
+ * lswi
+ * stswx
+ * stswi
+ * lha
+ * lhau
+ * lmw
+ * stmw
+ *
+ */
+int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	u32 inst;
+	int ra, rs, rt;
+	enum emulation_result emulated;
+	int advance = 1;
+
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
+	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	ra = get_ra(inst);
+	rs = get_rs(inst);
+	rt = get_rt(inst);
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_LWZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+			break;
+
+		case OP_31_XOP_LBZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+			break;
+
+		case OP_31_XOP_LBZUX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_STWX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               4, 1);
+			break;
+
+		case OP_31_XOP_STBX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               1, 1);
+			break;
+
+		case OP_31_XOP_STBUX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               1, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_LHAX:
+			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+			break;
+
+		case OP_31_XOP_LHZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			break;
+
+		case OP_31_XOP_LHZUX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_STHX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 1);
+			break;
+
+		case OP_31_XOP_STHUX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_DCBST:
+		case OP_31_XOP_DCBF:
+		case OP_31_XOP_DCBI:
+			/* Do nothing. The guest is performing dcbi because
+			 * hardware DMA is not snooped by the dcache, but
+			 * emulated DMA either goes through the dcache as
+			 * normal writes, or the host kernel has handled dcache
+			 * coherence. */
+			break;
+
+		case OP_31_XOP_LWBRX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+			break;
+
+		case OP_31_XOP_STWBRX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               4, 0);
+			break;
+
+		case OP_31_XOP_LHBRX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+			break;
+
+		case OP_31_XOP_STHBRX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 0);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case OP_LWZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		break;
+
+	/* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
+	case OP_LD:
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+		break;
+
+	case OP_LWZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LBZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		break;
+
+	case OP_LBZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STW:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               4, 1);
+		break;
+
+	/* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
+	case OP_STD:
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               8, 1);
+		break;
+
+	case OP_STWU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               4, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STB:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               1, 1);
+		break;
+
+	case OP_STBU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               1, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LHZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		break;
+
+	case OP_LHZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LHA:
+		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+		break;
+
+	case OP_LHAU:
+		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STH:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               2, 1);
+		break;
+
+	case OP_STHU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+		break;
+	}
+
+	if (emulated == EMULATE_FAIL) {
+		advance = 0;
+		kvmppc_core_queue_program(vcpu, 0);
+	}
+
+	trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
+
+	/* Advance past emulated instruction. */
+	if (advance)
+		kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+
+	return emulated;
+}

+ 1 - 3
arch/powerpc/kvm/mpic.c

@@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;

+ 163 - 18
arch/powerpc/kvm/powerpc.c

@@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 		vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
 		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
 
+#ifdef CONFIG_PPC_64K_PAGES
+		/*
+		 * Make sure our 4k magic page is in the same window of a 64k
+		 * page within the guest and within the host's page.
+		 */
+		if ((vcpu->arch.magic_page_pa & 0xf000) !=
+		    ((ulong)vcpu->arch.shared & 0xf000)) {
+			void *old_shared = vcpu->arch.shared;
+			ulong shared = (ulong)vcpu->arch.shared;
+			void *new_shared;
+
+			shared &= PAGE_MASK;
+			shared |= vcpu->arch.magic_page_pa & 0xf000;
+			new_shared = (void*)shared;
+			memcpy(new_shared, old_shared, 0x1000);
+			vcpu->arch.shared = new_shared;
+		}
+#endif
+
 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
 
 		r = EV_SUCCESS;
@@ -198,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
 		r = EV_SUCCESS;
 #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
-		/* XXX Missing magic page on 44x */
 		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
 #endif
 
@@ -254,13 +272,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	enum emulation_result er;
 	int r;
 
-	er = kvmppc_emulate_instruction(run, vcpu);
+	er = kvmppc_emulate_loadstore(vcpu);
 	switch (er) {
 	case EMULATE_DONE:
 		/* Future optimization: only reload non-volatiles if they were
 		 * actually modified. */
 		r = RESUME_GUEST_NV;
 		break;
+	case EMULATE_AGAIN:
+		r = RESUME_GUEST;
+		break;
 	case EMULATE_DO_MMIO:
 		run->exit_reason = KVM_EXIT_MMIO;
 		/* We must reload nonvolatiles because "update" load/store
@@ -270,11 +291,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		r = RESUME_HOST_NV;
 		break;
 	case EMULATE_FAIL:
+	{
+		u32 last_inst;
+
+		kvmppc_get_last_inst(vcpu, false, &last_inst);
 		/* XXX Deliver Program interrupt to guest. */
-		printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
-		       kvmppc_get_last_inst(vcpu));
+		pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
 		r = RESUME_HOST;
 		break;
+	}
 	default:
 		WARN_ON(1);
 		r = RESUME_GUEST;
@@ -284,6 +309,81 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+	      bool data)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
+	struct kvmppc_pte pte;
+	int r;
+
+	vcpu->stat.st++;
+
+	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			 XLATE_WRITE, &pte);
+	if (r < 0)
+		return r;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_write)
+		return -EPERM;
+
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(magic, ptr, size);
+		return EMULATE_DONE;
+	}
+
+	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvmppc_st);
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		      bool data)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
+	struct kvmppc_pte pte;
+	int rc;
+
+	vcpu->stat.ld++;
+
+	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			  XLATE_READ, &pte);
+	if (rc)
+		return rc;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_read)
+		return -EPERM;
+
+	if (!data && !pte.may_execute)
+		return -ENOEXEC;
+
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(ptr, magic, size);
+		return EMULATE_DONE;
+	}
+
+	if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ld);
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
@@ -366,14 +466,20 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
-	/* FIXME!!
-	 * Should some of this be vm ioctl ? is it possible now ?
-	 */
+	/* Assume we're using HV mode when the HV module is loaded */
 	int hv_enabled = kvmppc_hv_ops ? 1 : 0;
 
+	if (kvm) {
+		/*
+		 * Hooray - we know which VM type we're running on. Depend on
+		 * that rather than the guess above.
+		 */
+		hv_enabled = is_kvmppc_hv_enabled(kvm);
+	}
+
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
@@ -387,6 +493,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
@@ -417,6 +524,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
 	case KVM_CAP_PPC_FIXUP_HCALL:
+	case KVM_CAP_PPC_ENABLE_HCALL:
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 #endif
@@ -635,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
-                                     struct kvm_run *run)
-{
-	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
-}
-
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -837,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (!vcpu->mmio_is_write)
 			kvmppc_complete_mmio_load(vcpu, run);
 		vcpu->mmio_needed = 0;
-	} else if (vcpu->arch.dcr_needed) {
-		if (!vcpu->arch.dcr_is_write)
-			kvmppc_complete_dcr_load(vcpu, run);
-		vcpu->arch.dcr_needed = 0;
 	} else if (vcpu->arch.osi_needed) {
 		u64 *gprs = run->osi.gprs;
 		int i;
@@ -1099,6 +1197,42 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	case KVM_CAP_PPC_ENABLE_HCALL: {
+		unsigned long hcall = cap->args[0];
+
+		r = -EINVAL;
+		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
+		    cap->args[1] > 1)
+			break;
+		if (!kvmppc_book3s_hcall_implemented(kvm, hcall))
+			break;
+		if (cap->args[1])
+			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		else
+			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		r = 0;
+		break;
+	}
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
@@ -1118,6 +1252,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 		break;
 	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
@@ -1204,3 +1347,5 @@ void kvm_arch_exit(void)
 {
 
 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);

+ 0 - 1
arch/powerpc/kvm/timing.c

@@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
 
 static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
 	[MMIO_EXITS] =              "MMIO",
-	[DCR_EXITS] =               "DCR",
 	[SIGNAL_EXITS] =            "SIGNAL",
 	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
 	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",

+ 0 - 3
arch/powerpc/kvm/timing.h

@@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 	case EMULATED_INST_EXITS:
 		vcpu->stat.emulated_inst_exits++;
 		break;
-	case DCR_EXITS:
-		vcpu->stat.dcr_exits++;
-		break;
 	case DSI_EXITS:
 		vcpu->stat.dsi_exits++;
 		break;

+ 1 - 0
arch/s390/kvm/Kconfig

@@ -26,6 +26,7 @@ config KVM
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE

+ 1 - 2
arch/s390/kvm/interrupt.c

@@ -1556,8 +1556,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
 	return ret;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int ret;

+ 1 - 1
arch/s390/kvm/kvm-s390.c

@@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 

+ 1 - 0
arch/x86/kvm/Kconfig

@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE

+ 1 - 1
arch/x86/kvm/irq.c

@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 
 	vector = kvm_cpu_get_extint(v);
 
-	if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
+	if (vector != -1)
 		return vector;			/* PIC */
 
 	return kvm_get_apic_interrupt(v);	/* APIC */

Some files were not shown because too many files changed in this diff