10 years ago · f710a12d73
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 
															 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
														
 
															 })
														
 
															+#define kvm_pgd_index(addr)			pgd_index(addr)
														
 
															+
														
 
															 static inline bool kvm_page_empty(void *ptr)
														
 
															 {
														
 
															 	struct page *ptr_page = virt_to_page(ptr);
														
 
															 	return page_count(ptr_page) == 1;
														
 
															 }
														
 
															-
														
 
															 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
														
 
															 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
														
 
															 #define kvm_pud_table_empty(kvm, pudp) (0)
														
 
															 #define KVM_PREALLOC_LEVEL	0
														
 
															-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
														
 
															+static inline void *kvm_get_hwpgd(struct kvm *kvm)
														
 
															 {
														
 
															-	return 0;
														
 
															+	return kvm->arch.pgd;
														
 
															 }
														
 
															-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
														
 
															-
														
 
															-static inline void *kvm_get_hwpgd(struct kvm *kvm)
														
 
															+static inline unsigned int kvm_get_hwpgd_size(void)
														
 
															 {
														
 
															-	return kvm->arch.pgd;
														
 
															+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
														
 
															 }
														
 
															 struct kvm;
														
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 
															 	phys_addr_t addr = start, end = start + size;
														
 
															 	phys_addr_t next;
														
 
															-	pgd = pgdp + pgd_index(addr);
														
 
															+	pgd = pgdp + kvm_pgd_index(addr);
														
 
															 	do {
														
 
															 		next = kvm_pgd_addr_end(addr, end);
														
 
															 		if (!pgd_none(*pgd))
														
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 
															 	phys_addr_t next;
														
 
															 	pgd_t *pgd;
														
 
															-	pgd = kvm->arch.pgd + pgd_index(addr);
														
 
															+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
														
 
															 	do {
														
 
															 		next = kvm_pgd_addr_end(addr, end);
														
 
															 		stage2_flush_puds(kvm, pgd, addr, next);
														
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 
															 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
														
 
															 }
														
 
															+/* Free the HW pgd, one page at a time */
														
 
															+static void kvm_free_hwpgd(void *hwpgd)
														
 
															+{
														
 
															+	free_pages_exact(hwpgd, kvm_get_hwpgd_size());
														
 
															+}
														
 
															+
														
 
															+/* Allocate the HW PGD, making sure that each page gets its own refcount */
														
 
															+static void *kvm_alloc_hwpgd(void)
														
 
															+{
														
 
															+	unsigned int size = kvm_get_hwpgd_size();
														
 
															+
														
 
															+	return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
														
 
															+}
														
 
															+
														
 
															 /**
														
 
															  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
														
 
															  * @kvm:	The KVM struct pointer for the VM.
														
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 
															  */
														
 
															 int kvm_alloc_stage2_pgd(struct kvm *kvm)
														
 
															 {
														
 
															-	int ret;
														
 
															 	pgd_t *pgd;
														
 
															+	void *hwpgd;
														
 
															 	if (kvm->arch.pgd != NULL) {
														
 
															 		kvm_err("kvm_arch already initialized?\n");
														
 
															 		return -EINVAL;
														
 
															 	}
														
 
															+	hwpgd = kvm_alloc_hwpgd();
														
 
															+	if (!hwpgd)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* When the kernel uses more levels of page tables than the
														
 
															+	 * guest, we allocate a fake PGD and pre-populate it to point
														
 
															+	 * to the next-level page table, which will be the real
														
 
															+	 * initial page table pointed to by the VTTBR.
														
 
															+	 *
														
 
															+	 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
														
 
															+	 * the PMD and the kernel will use folded pud.
														
 
															+	 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
														
 
															+	 * pages.
														
 
															+	 */
														
 
															 	if (KVM_PREALLOC_LEVEL > 0) {
														
 
															+		int i;
														
 
															+
														
 
															 		/*
														
 
															 		 * Allocate fake pgd for the page table manipulation macros to
														
 
															 		 * work.  This is not used by the hardware and we have no
														
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 
															 		 */
														
 
															 		pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
														
 
															 				       GFP_KERNEL | __GFP_ZERO);
														
 
															+
														
 
															+		if (!pgd) {
														
 
															+			kvm_free_hwpgd(hwpgd);
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+
														
 
															+		/* Plug the HW PGD into the fake one. */
														
 
															+		for (i = 0; i < PTRS_PER_S2_PGD; i++) {
														
 
															+			if (KVM_PREALLOC_LEVEL == 1)
														
 
															+				pgd_populate(NULL, pgd + i,
														
 
															+					     (pud_t *)hwpgd + i * PTRS_PER_PUD);
														
 
															+			else if (KVM_PREALLOC_LEVEL == 2)
														
 
															+				pud_populate(NULL, pud_offset(pgd, 0) + i,
														
 
															+					     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
														
 
															+		}
														
 
															 	} else {
														
 
															 		/*
														
 
															 		 * Allocate actual first-level Stage-2 page table used by the
														
 
															 		 * hardware for Stage-2 page table walks.
														
 
															 		 */
														
 
															-		pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
														
 
															+		pgd = (pgd_t *)hwpgd;
														
 
															 	}
														
 
															-	if (!pgd)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	ret = kvm_prealloc_hwpgd(kvm, pgd);
														
 
															-	if (ret)
														
 
															-		goto out_err;
														
 
															-
														
 
															 	kvm_clean_pgd(pgd);
														
 
															 	kvm->arch.pgd = pgd;
														
 
															 	return 0;
														
 
															-out_err:
														
 
															-	if (KVM_PREALLOC_LEVEL > 0)
														
 
															-		kfree(pgd);
														
 
															-	else
														
 
															-		free_pages((unsigned long)pgd, S2_PGD_ORDER);
														
 
															-	return ret;
														
 
															 }
														
 
															 /**
														
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 
															 		return;
														
 
															 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
														
 
															-	kvm_free_hwpgd(kvm);
														
 
															+	kvm_free_hwpgd(kvm_get_hwpgd(kvm));
														
 
															 	if (KVM_PREALLOC_LEVEL > 0)
														
 
															 		kfree(kvm->arch.pgd);
														
 
															-	else
														
 
															-		free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
														
 
															+
														
 
															 	kvm->arch.pgd = NULL;
														
 
															 }
														
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 
															 	pgd_t *pgd;
														
 
															 	pud_t *pud;
														
 
															-	pgd = kvm->arch.pgd + pgd_index(addr);
														
 
															+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
														
 
															 	if (WARN_ON(pgd_none(*pgd))) {
														
 
															 		if (!cache)
														
 
															 			return NULL;
														
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
 
															 	pgd_t *pgd;
														
 
															 	phys_addr_t next;
														
 
															-	pgd = kvm->arch.pgd + pgd_index(addr);
														
 
															+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
														
 
															 	do {
														
 
															 		/*
														
 
															 		 * Release kvm_mmu_lock periodically if the memory region is
														
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
 
															  * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
														
 
															  * not known to exist and will break with this configuration.
														
 
															  *
														
 
															+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
														
 
															+ * (see hyp-init.S).
														
 
															+ *
														
 
															  * Note that when using 4K pages, we concatenate two first level page tables
														
 
															  * together.
														
 
															  *
														
@@ -138,7 +141,6 @@
 
															 #ifdef CONFIG_ARM64_64K_PAGES
														
 
															 /*
														
 
															  * Stage2 translation configuration:
														
 
															- * 40bits output (PS = 2)
														
 
															  * 40bits input  (T0SZ = 24)
														
 
															  * 64kB pages (TG0 = 1)
														
 
															  * 2 level page tables (SL = 1)
														
@@ -150,7 +152,6 @@
 
															 #else
														
 
															 /*
														
 
															  * Stage2 translation configuration:
														
 
															- * 40bits output (PS = 2)
														
 
															  * 40bits input  (T0SZ = 24)
														
 
															  * 4kB pages (TG0 = 0)
														
 
															  * 3 level page tables (SL = 1)
														
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 
															 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
														
 
															 #define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
														
 
															+#define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
														
 
															+
														
 
															 /*
														
 
															  * If we are concatenating first level stage-2 page tables, we would have less
														
 
															  * than or equal to 16 pointers in the fake PGD, because that's what the
														
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 
															 #define KVM_PREALLOC_LEVEL	(0)
														
 
															 #endif
														
 
															-/**
														
 
															- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
														
 
															- * @kvm:	The KVM struct pointer for the VM.
														
 
															- * @pgd:	The kernel pseudo pgd
														
 
															- *
														
 
															- * When the kernel uses more levels of page tables than the guest, we allocate
														
 
															- * a fake PGD and pre-populate it to point to the next-level page table, which
														
 
															- * will be the real initial page table pointed to by the VTTBR.
														
 
															- *
														
 
															- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
														
 
															- * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
														
 
															- * allocate 2 consecutive PUD pages.
														
 
															- */
														
 
															-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
														
 
															-{
														
 
															-	unsigned int i;
														
 
															-	unsigned long hwpgd;
														
 
															-
														
 
															-	if (KVM_PREALLOC_LEVEL == 0)
														
 
															-		return 0;
														
 
															-
														
 
															-	hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
														
 
															-	if (!hwpgd)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	for (i = 0; i < PTRS_PER_S2_PGD; i++) {
														
 
															-		if (KVM_PREALLOC_LEVEL == 1)
														
 
															-			pgd_populate(NULL, pgd + i,
														
 
															-				     (pud_t *)hwpgd + i * PTRS_PER_PUD);
														
 
															-		else if (KVM_PREALLOC_LEVEL == 2)
														
 
															-			pud_populate(NULL, pud_offset(pgd, 0) + i,
														
 
															-				     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															 static inline void *kvm_get_hwpgd(struct kvm *kvm)
														
 
															 {
														
 
															 	pgd_t *pgd = kvm->arch.pgd;
														
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
 
															 	return pmd_offset(pud, 0);
														
 
															 }
														
 
															-static inline void kvm_free_hwpgd(struct kvm *kvm)
														
 
															+static inline unsigned int kvm_get_hwpgd_size(void)
														
 
															 {
														
 
															-	if (KVM_PREALLOC_LEVEL > 0) {
														
 
															-		unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
														
 
															-		free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
														
 
															-	}
														
 
															+	if (KVM_PREALLOC_LEVEL > 0)
														
 
															+		return PTRS_PER_S2_PGD * PAGE_SIZE;
														
 
															+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
														
 
															 }
														
 
															 static inline bool kvm_page_empty(void *ptr)
														
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -114,6 +114,7 @@ struct vgic_ops {
 
															 	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
														
 
															 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
														
 
															 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
														
 
															+	void	(*clear_eisr)(struct kvm_vcpu *vcpu);
														
 
															 	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
														
 
															 	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
														
 
															 	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
														
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 
															 {
														
 
															 	if (!(lr_desc.state & LR_STATE_MASK))
														
 
															 		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
														
 
															+	else
														
 
															+		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
														
 
															 }
														
 
															 static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
														
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
 
															 	return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
														
 
															 }
														
 
															+static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
														
 
															+}
														
 
															+
														
 
															 static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
														
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
 
															 	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
														
 
															 	.get_elrsr		= vgic_v2_get_elrsr,
														
 
															 	.get_eisr		= vgic_v2_get_eisr,
														
 
															+	.clear_eisr		= vgic_v2_clear_eisr,
														
 
															 	.get_interrupt_status	= vgic_v2_get_interrupt_status,
														
 
															 	.enable_underflow	= vgic_v2_enable_underflow,
														
 
															 	.disable_underflow	= vgic_v2_disable_underflow,
														
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 
															 {
														
 
															 	if (!(lr_desc.state & LR_STATE_MASK))
														
 
															 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
														
 
															+	else
														
 
															+		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
														
 
															 }
														
 
															 static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
														
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
 
															 	return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
														
 
															 }
														
 
															+static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
														
 
															+}
														
 
															+
														
 
															 static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
														
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
 
															 	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
														
 
															 	.get_elrsr		= vgic_v3_get_elrsr,
														
 
															 	.get_eisr		= vgic_v3_get_eisr,
														
 
															+	.clear_eisr		= vgic_v3_clear_eisr,
														
 
															 	.get_interrupt_status	= vgic_v3_get_interrupt_status,
														
 
															 	.enable_underflow	= vgic_v3_enable_underflow,
														
 
															 	.disable_underflow	= vgic_v3_disable_underflow,
														
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -883,6 +883,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
 
															 	return vgic_ops->get_eisr(vcpu);
														
 
															 }
														
 
															+static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
														
 
															+{
														
 
															+	vgic_ops->clear_eisr(vcpu);
														
 
															+}
														
 
															+
														
 
															 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
														
 
															 {
														
 
															 	return vgic_ops->get_interrupt_status(vcpu);
														
@@ -922,6 +927,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 
															 	vgic_set_lr(vcpu, lr_nr, vlr);
														
 
															 	clear_bit(lr_nr, vgic_cpu->lr_used);
														
 
															 	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
														
 
															+	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
														
 
															 }
														
 
															 /*
														
@@ -978,6 +984,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
															 			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
														
 
															 			vlr.state |= LR_STATE_PENDING;
														
 
															 			vgic_set_lr(vcpu, lr, vlr);
														
 
															+			vgic_sync_lr_elrsr(vcpu, lr, vlr);
														
 
															 			return true;
														
 
															 		}
														
 
															 	}
														
@@ -999,6 +1006,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
															 		vlr.state |= LR_EOI_INT;
														
 
															 	vgic_set_lr(vcpu, lr, vlr);
														
 
															+	vgic_sync_lr_elrsr(vcpu, lr, vlr);
														
 
															 	return true;
														
 
															 }
														
@@ -1136,6 +1144,14 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
															 	if (status & INT_STATUS_UNDERFLOW)
														
 
															 		vgic_disable_underflow(vcpu);
														
 
															+	/*
														
 
															+	 * In the next iterations of the vcpu loop, if we sync the vgic state
														
 
															+	 * after flushing it, but before entering the guest (this happens for
														
 
															+	 * pending signals and vmid rollovers), then make sure we don't pick
														
 
															+	 * up any old maintenance interrupts here.
														
 
															+	 */
														
 
															+	vgic_clear_eisr(vcpu);
														
 
															+
														
 
															 	return level_pending;
														
 
															 }
														
@@ -1583,8 +1599,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 
															 	 * emulation. So check this here again. KVM_CREATE_DEVICE does
														
 
															 	 * the proper checks already.
														
 
															 	 */
														
 
															-	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
														
 
															-		return -ENODEV;
														
 
															+	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
														
 
															+		ret = -ENODEV;
														
 
															+		goto out;
														
 
															+	}
														
 
															 	/*
														
 
															 	 * Any time a vcpu is run, vcpu_load is called which tries to grab the