|
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
|
|
|
static unsigned long hyp_idmap_end;
|
|
|
static phys_addr_t hyp_idmap_vector;
|
|
|
|
|
|
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
|
|
|
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
|
|
|
|
|
|
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
|
|
|
|
|
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
|
|
|
}
|
|
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
|
|
|
|
|
- if (kvm_pte_table_empty(start_pte))
|
|
|
+ if (kvm_pte_table_empty(kvm, start_pte))
|
|
|
clear_pmd_entry(kvm, pmd, start_addr);
|
|
|
}
|
|
|
|
|
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
|
|
|
}
|
|
|
} while (pmd++, addr = next, addr != end);
|
|
|
|
|
|
- if (kvm_pmd_table_empty(start_pmd))
|
|
|
+ if (kvm_pmd_table_empty(kvm, start_pmd))
|
|
|
clear_pud_entry(kvm, pud, start_addr);
|
|
|
}
|
|
|
|
|
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
|
|
|
}
|
|
|
} while (pud++, addr = next, addr != end);
|
|
|
|
|
|
- if (kvm_pud_table_empty(start_pud))
|
|
|
+ if (kvm_pud_table_empty(kvm, start_pud))
|
|
|
clear_pgd_entry(kvm, pgd, start_addr);
|
|
|
}
|
|
|
|
|
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
|
|
|
if (boot_hyp_pgd) {
|
|
|
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
|
|
|
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
|
|
|
- free_pages((unsigned long)boot_hyp_pgd, pgd_order);
|
|
|
+ free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
|
|
|
boot_hyp_pgd = NULL;
|
|
|
}
|
|
|
|
|
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
|
|
|
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
|
|
|
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
|
|
|
|
|
|
- free_pages((unsigned long)hyp_pgd, pgd_order);
|
|
|
+ free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
|
|
|
hyp_pgd = NULL;
|
|
|
}
|
|
|
|
|
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
|
|
|
+ unsigned long end, unsigned long pfn,
|
|
|
+ pgprot_t prot)
|
|
|
+{
|
|
|
+ pud_t *pud;
|
|
|
+ pmd_t *pmd;
|
|
|
+ unsigned long addr, next;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ addr = start;
|
|
|
+ do {
|
|
|
+ pud = pud_offset(pgd, addr);
|
|
|
+
|
|
|
+ if (pud_none_or_clear_bad(pud)) {
|
|
|
+ pmd = pmd_alloc_one(NULL, addr);
|
|
|
+ if (!pmd) {
|
|
|
+ kvm_err("Cannot allocate Hyp pmd\n");
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+ pud_populate(NULL, pud, pmd);
|
|
|
+ get_page(virt_to_page(pud));
|
|
|
+ kvm_flush_dcache_to_poc(pud, sizeof(*pud));
|
|
|
+ }
|
|
|
+
|
|
|
+ next = pud_addr_end(addr, end);
|
|
|
+ ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+ pfn += (next - addr) >> PAGE_SHIFT;
|
|
|
+ } while (addr = next, addr != end);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int __create_hyp_mappings(pgd_t *pgdp,
|
|
|
unsigned long start, unsigned long end,
|
|
|
unsigned long pfn, pgprot_t prot)
|
|
|
{
|
|
|
pgd_t *pgd;
|
|
|
pud_t *pud;
|
|
|
- pmd_t *pmd;
|
|
|
unsigned long addr, next;
|
|
|
int err = 0;
|
|
|
|
|
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
|
|
|
end = PAGE_ALIGN(end);
|
|
|
do {
|
|
|
pgd = pgdp + pgd_index(addr);
|
|
|
- pud = pud_offset(pgd, addr);
|
|
|
|
|
|
- if (pud_none_or_clear_bad(pud)) {
|
|
|
- pmd = pmd_alloc_one(NULL, addr);
|
|
|
- if (!pmd) {
|
|
|
- kvm_err("Cannot allocate Hyp pmd\n");
|
|
|
+ if (pgd_none(*pgd)) {
|
|
|
+ pud = pud_alloc_one(NULL, addr);
|
|
|
+ if (!pud) {
|
|
|
+ kvm_err("Cannot allocate Hyp pud\n");
|
|
|
err = -ENOMEM;
|
|
|
goto out;
|
|
|
}
|
|
|
- pud_populate(NULL, pud, pmd);
|
|
|
- get_page(virt_to_page(pud));
|
|
|
- kvm_flush_dcache_to_poc(pud, sizeof(*pud));
|
|
|
+ pgd_populate(NULL, pgd, pud);
|
|
|
+ get_page(virt_to_page(pgd));
|
|
|
+ kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
|
|
|
}
|
|
|
|
|
|
next = pgd_addr_end(addr, end);
|
|
|
- err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
|
|
|
+ err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
|
|
|
if (err)
|
|
|
goto out;
|
|
|
pfn += (next - addr) >> PAGE_SHIFT;
|
|
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
|
|
|
*/
|
|
|
int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
|
|
{
|
|
|
+ int ret;
|
|
|
pgd_t *pgd;
|
|
|
|
|
|
if (kvm->arch.pgd != NULL) {
|
|
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
|
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
|
|
|
+ if (KVM_PREALLOC_LEVEL > 0) {
|
|
|
+ /*
|
|
|
+ * Allocate fake pgd for the page table manipulation macros to
|
|
|
+ * work. This is not used by the hardware and we have no
|
|
|
+ * alignment requirement for this allocation.
|
|
|
+ */
|
|
|
+ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
|
|
|
+ GFP_KERNEL | __GFP_ZERO);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Allocate actual first-level Stage-2 page table used by the
|
|
|
+ * hardware for Stage-2 page table walks.
|
|
|
+ */
|
|
|
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
|
|
|
+ }
|
|
|
+
|
|
|
if (!pgd)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
|
|
|
+ ret = kvm_prealloc_hwpgd(kvm, pgd);
|
|
|
+ if (ret)
|
|
|
+ goto out_err;
|
|
|
+
|
|
|
kvm_clean_pgd(pgd);
|
|
|
kvm->arch.pgd = pgd;
|
|
|
-
|
|
|
return 0;
|
|
|
+out_err:
|
|
|
+ if (KVM_PREALLOC_LEVEL > 0)
|
|
|
+ kfree(pgd);
|
|
|
+ else
|
|
|
+ free_pages((unsigned long)pgd, S2_PGD_ORDER);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
|
|
|
return;
|
|
|
|
|
|
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
|
|
|
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
|
|
|
+ kvm_free_hwpgd(kvm);
|
|
|
+ if (KVM_PREALLOC_LEVEL > 0)
|
|
|
+ kfree(kvm->arch.pgd);
|
|
|
+ else
|
|
|
+ free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
|
|
|
kvm->arch.pgd = NULL;
|
|
|
}
|
|
|
|
|
|
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|
|
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|
|
phys_addr_t addr)
|
|
|
{
|
|
|
pgd_t *pgd;
|
|
|
pud_t *pud;
|
|
|
- pmd_t *pmd;
|
|
|
|
|
|
pgd = kvm->arch.pgd + pgd_index(addr);
|
|
|
- pud = pud_offset(pgd, addr);
|
|
|
+ if (WARN_ON(pgd_none(*pgd))) {
|
|
|
+ if (!cache)
|
|
|
+ return NULL;
|
|
|
+ pud = mmu_memory_cache_alloc(cache);
|
|
|
+ pgd_populate(NULL, pgd, pud);
|
|
|
+ get_page(virt_to_page(pgd));
|
|
|
+ }
|
|
|
+
|
|
|
+ return pud_offset(pgd, addr);
|
|
|
+}
|
|
|
+
|
|
|
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|
|
+ phys_addr_t addr)
|
|
|
+{
|
|
|
+ pud_t *pud;
|
|
|
+ pmd_t *pmd;
|
|
|
+
|
|
|
+ pud = stage2_get_pud(kvm, cache, addr);
|
|
|
if (pud_none(*pud)) {
|
|
|
if (!cache)
|
|
|
return NULL;
|
|
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|
|
pmd_t *pmd;
|
|
|
pte_t *pte, old_pte;
|
|
|
|
|
|
- /* Create stage-2 page table mapping - Level 1 */
|
|
|
+ /* Create stage-2 page table mapping - Levels 0 and 1 */
|
|
|
pmd = stage2_get_pmd(kvm, cache, addr);
|
|
|
if (!pmd) {
|
|
|
/*
|
|
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
|
|
|
* @size: The size of the mapping
|
|
|
*/
|
|
|
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
|
|
- phys_addr_t pa, unsigned long size)
|
|
|
+ phys_addr_t pa, unsigned long size, bool writable)
|
|
|
{
|
|
|
phys_addr_t addr, end;
|
|
|
int ret = 0;
|
|
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
|
|
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
|
|
|
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
|
|
|
|
|
|
- ret = mmu_topup_memory_cache(&cache, 2, 2);
|
|
|
+ if (writable)
|
|
|
+ kvm_set_s2pte_writable(&pte);
|
|
|
+
|
|
|
+ ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
|
|
|
+ KVM_NR_MEM_OBJS);
|
|
|
if (ret)
|
|
|
goto out;
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|
|
/* Let's check if we will get back a huge page backed by hugetlbfs */
|
|
|
down_read(¤t->mm->mmap_sem);
|
|
|
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
|
|
+ if (unlikely(!vma)) {
|
|
|
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
|
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
|
+ return -EFAULT;
|
|
|
+ }
|
|
|
+
|
|
|
if (is_vm_hugetlb_page(vma)) {
|
|
|
hugetlb = true;
|
|
|
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
|
|
@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|
|
up_read(¤t->mm->mmap_sem);
|
|
|
|
|
|
/* We need minimum second+third level pages */
|
|
|
- ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
|
|
|
+ ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
|
|
|
+ KVM_NR_MEM_OBJS);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|
|
}
|
|
|
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
|
|
|
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
|
|
|
- mem_type == PAGE_S2_DEVICE);
|
|
|
+ pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
|
|
|
}
|
|
|
|
|
|
|
|
@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
|
|
|
+ /* Userspace should not be able to register out-of-bounds IPAs */
|
|
|
+ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
|
|
|
+
|
|
|
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
|
|
|
if (ret == 0)
|
|
|
ret = 1;
|
|
@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void)
|
|
|
(unsigned long)phys_base);
|
|
|
}
|
|
|
|
|
|
- hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
|
|
|
- boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
|
|
|
+ hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
|
|
|
+ boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
|
|
|
|
|
|
if (!hyp_pgd || !boot_hyp_pgd) {
|
|
|
kvm_err("Hyp mode PGD not allocated\n");
|
|
@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
|
|
const struct kvm_memory_slot *old,
|
|
|
enum kvm_mr_change change)
|
|
|
{
|
|
|
- gpa_t gpa = old->base_gfn << PAGE_SHIFT;
|
|
|
- phys_addr_t size = old->npages << PAGE_SHIFT;
|
|
|
- if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
|
|
|
- spin_lock(&kvm->mmu_lock);
|
|
|
- unmap_stage2_range(kvm, gpa, size);
|
|
|
- spin_unlock(&kvm->mmu_lock);
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|
@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|
|
struct kvm_userspace_memory_region *mem,
|
|
|
enum kvm_mr_change change)
|
|
|
{
|
|
|
- return 0;
|
|
|
+ hva_t hva = mem->userspace_addr;
|
|
|
+ hva_t reg_end = hva + mem->memory_size;
|
|
|
+ bool writable = !(mem->flags & KVM_MEM_READONLY);
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Prevent userspace from creating a memory region outside of the IPA
|
|
|
+ * space addressable by the KVM guest IPA space.
|
|
|
+ */
|
|
|
+ if (memslot->base_gfn + memslot->npages >=
|
|
|
+ (KVM_PHYS_SIZE >> PAGE_SHIFT))
|
|
|
+ return -EFAULT;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * A memory region could potentially cover multiple VMAs, and any holes
|
|
|
+ * between them, so iterate over all of them to find out if we can map
|
|
|
+ * any of them right now.
|
|
|
+ *
|
|
|
+ * +--------------------------------------------+
|
|
|
+ * +---------------+----------------+ +----------------+
|
|
|
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
|
|
|
+ * +---------------+----------------+ +----------------+
|
|
|
+ * | memory region |
|
|
|
+ * +--------------------------------------------+
|
|
|
+ */
|
|
|
+ do {
|
|
|
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
|
|
|
+ hva_t vm_start, vm_end;
|
|
|
+
|
|
|
+ if (!vma || vma->vm_start >= reg_end)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Mapping a read-only VMA is only allowed if the
|
|
|
+ * memory region is configured as read-only.
|
|
|
+ */
|
|
|
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
|
|
|
+ ret = -EPERM;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Take the intersection of this VMA with the memory region
|
|
|
+ */
|
|
|
+ vm_start = max(hva, vma->vm_start);
|
|
|
+ vm_end = min(reg_end, vma->vm_end);
|
|
|
+
|
|
|
+ if (vma->vm_flags & VM_PFNMAP) {
|
|
|
+ gpa_t gpa = mem->guest_phys_addr +
|
|
|
+ (vm_start - mem->userspace_addr);
|
|
|
+ phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
|
|
|
+ vm_start - vma->vm_start;
|
|
|
+
|
|
|
+ ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
|
|
|
+ vm_end - vm_start,
|
|
|
+ writable);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ hva = vm_end;
|
|
|
+ } while (hva < reg_end);
|
|
|
+
|
|
|
+ if (ret) {
|
|
|
+ spin_lock(&kvm->mmu_lock);
|
|
|
+ unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
|
|
|
+ spin_unlock(&kvm->mmu_lock);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
|
|
@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
|
|
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
|
|
struct kvm_memory_slot *slot)
|
|
|
{
|
|
|
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
|
|
|
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
|
|
|
+
|
|
|
+ spin_lock(&kvm->mmu_lock);
|
|
|
+ unmap_stage2_range(kvm, gpa, size);
|
|
|
+ spin_unlock(&kvm->mmu_lock);
|
|
|
}
|