|
@@ -102,6 +102,13 @@ struct vfio_pfn {
|
|
|
atomic_t ref_count;
|
|
|
};
|
|
|
|
|
|
+struct vfio_regions {
|
|
|
+ struct list_head list;
|
|
|
+ dma_addr_t iova;
|
|
|
+ phys_addr_t phys;
|
|
|
+ size_t len;
|
|
|
+};
|
|
|
+
|
|
|
#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
|
|
|
(!list_empty(&iommu->domain_list))
|
|
|
|
|
@@ -397,7 +404,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|
|
{
|
|
|
unsigned long pfn = 0;
|
|
|
long ret, pinned = 0, lock_acct = 0;
|
|
|
- bool rsvd;
|
|
|
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
|
|
|
|
|
|
/* This code path is only user initiated */
|
|
@@ -408,14 +414,23 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
|
+ if (is_invalid_reserved_pfn(*pfn_base)) {
|
|
|
+ struct vm_area_struct *vma;
|
|
|
+
|
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
|
+ vma = find_vma_intersection(current->mm, vaddr, vaddr + 1);
|
|
|
+ pinned = min_t(long, npage, vma_pages(vma));
|
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
|
+ return pinned;
|
|
|
+ }
|
|
|
+
|
|
|
pinned++;
|
|
|
- rsvd = is_invalid_reserved_pfn(*pfn_base);
|
|
|
|
|
|
/*
|
|
|
* Reserved pages aren't counted against the user, externally pinned
|
|
|
* pages are already counted against the user.
|
|
|
*/
|
|
|
- if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
|
|
+ if (!vfio_find_vpfn(dma, iova)) {
|
|
|
if (!lock_cap && current->mm->locked_vm + 1 > limit) {
|
|
|
put_pfn(*pfn_base, dma->prot);
|
|
|
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
|
|
@@ -435,13 +450,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|
|
if (ret)
|
|
|
break;
|
|
|
|
|
|
- if (pfn != *pfn_base + pinned ||
|
|
|
- rsvd != is_invalid_reserved_pfn(pfn)) {
|
|
|
+ if (pfn != *pfn_base + pinned) {
|
|
|
put_pfn(pfn, dma->prot);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
|
|
+ if (!vfio_find_vpfn(dma, iova)) {
|
|
|
if (!lock_cap &&
|
|
|
current->mm->locked_vm + lock_acct + 1 > limit) {
|
|
|
put_pfn(pfn, dma->prot);
|
|
@@ -459,10 +473,8 @@ out:
|
|
|
|
|
|
unpin_out:
|
|
|
if (ret) {
|
|
|
- if (!rsvd) {
|
|
|
- for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
|
|
- put_pfn(pfn, dma->prot);
|
|
|
- }
|
|
|
+ for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
|
|
+ put_pfn(pfn, dma->prot);
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -660,11 +672,102 @@ unpin_exit:
|
|
|
return i > npage ? npage : (i > 0 ? i : -EINVAL);
|
|
|
}
|
|
|
|
|
|
+static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
|
|
|
+ struct list_head *regions)
|
|
|
+{
|
|
|
+ long unlocked = 0;
|
|
|
+ struct vfio_regions *entry, *next;
|
|
|
+
|
|
|
+ iommu_tlb_sync(domain->domain);
|
|
|
+
|
|
|
+ list_for_each_entry_safe(entry, next, regions, list) {
|
|
|
+ unlocked += vfio_unpin_pages_remote(dma,
|
|
|
+ entry->iova,
|
|
|
+ entry->phys >> PAGE_SHIFT,
|
|
|
+ entry->len >> PAGE_SHIFT,
|
|
|
+ false);
|
|
|
+ list_del(&entry->list);
|
|
|
+ kfree(entry);
|
|
|
+ }
|
|
|
+
|
|
|
+ cond_resched();
|
|
|
+
|
|
|
+ return unlocked;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Generally, VFIO needs to unpin remote pages after each IOTLB flush.
|
|
|
+ * Therefore, when using IOTLB flush sync interface, VFIO need to keep track
|
|
|
+ * of these regions (currently using a list).
|
|
|
+ *
|
|
|
+ * This value specifies maximum number of regions for each IOTLB flush sync.
|
|
|
+ */
|
|
|
+#define VFIO_IOMMU_TLB_SYNC_MAX 512
|
|
|
+
|
|
|
+static size_t unmap_unpin_fast(struct vfio_domain *domain,
|
|
|
+ struct vfio_dma *dma, dma_addr_t *iova,
|
|
|
+ size_t len, phys_addr_t phys, long *unlocked,
|
|
|
+ struct list_head *unmapped_list,
|
|
|
+ int *unmapped_cnt)
|
|
|
+{
|
|
|
+ size_t unmapped = 0;
|
|
|
+ struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
|
|
+
|
|
|
+ if (entry) {
|
|
|
+ unmapped = iommu_unmap_fast(domain->domain, *iova, len);
|
|
|
+
|
|
|
+ if (!unmapped) {
|
|
|
+ kfree(entry);
|
|
|
+ } else {
|
|
|
+ iommu_tlb_range_add(domain->domain, *iova, unmapped);
|
|
|
+ entry->iova = *iova;
|
|
|
+ entry->phys = phys;
|
|
|
+ entry->len = unmapped;
|
|
|
+ list_add_tail(&entry->list, unmapped_list);
|
|
|
+
|
|
|
+ *iova += unmapped;
|
|
|
+ (*unmapped_cnt)++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Sync if the number of fast-unmap regions hits the limit
|
|
|
+ * or in case of errors.
|
|
|
+ */
|
|
|
+ if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
|
|
|
+ *unlocked += vfio_sync_unpin(dma, domain,
|
|
|
+ unmapped_list);
|
|
|
+ *unmapped_cnt = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return unmapped;
|
|
|
+}
|
|
|
+
|
|
|
+static size_t unmap_unpin_slow(struct vfio_domain *domain,
|
|
|
+ struct vfio_dma *dma, dma_addr_t *iova,
|
|
|
+ size_t len, phys_addr_t phys,
|
|
|
+ long *unlocked)
|
|
|
+{
|
|
|
+ size_t unmapped = iommu_unmap(domain->domain, *iova, len);
|
|
|
+
|
|
|
+ if (unmapped) {
|
|
|
+ *unlocked += vfio_unpin_pages_remote(dma, *iova,
|
|
|
+ phys >> PAGE_SHIFT,
|
|
|
+ unmapped >> PAGE_SHIFT,
|
|
|
+ false);
|
|
|
+ *iova += unmapped;
|
|
|
+ cond_resched();
|
|
|
+ }
|
|
|
+ return unmapped;
|
|
|
+}
|
|
|
+
|
|
|
static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|
|
bool do_accounting)
|
|
|
{
|
|
|
dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
|
|
|
struct vfio_domain *domain, *d;
|
|
|
+ LIST_HEAD(unmapped_region_list);
|
|
|
+ int unmapped_region_cnt = 0;
|
|
|
long unlocked = 0;
|
|
|
|
|
|
if (!dma->size)
|
|
@@ -710,20 +813,26 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- unmapped = iommu_unmap(domain->domain, iova, len);
|
|
|
- if (WARN_ON(!unmapped))
|
|
|
- break;
|
|
|
-
|
|
|
- unlocked += vfio_unpin_pages_remote(dma, iova,
|
|
|
- phys >> PAGE_SHIFT,
|
|
|
- unmapped >> PAGE_SHIFT,
|
|
|
- false);
|
|
|
- iova += unmapped;
|
|
|
-
|
|
|
- cond_resched();
|
|
|
+ /*
|
|
|
+ * First, try to use fast unmap/unpin. In case of failure,
|
|
|
+ * switch to slow unmap/unpin path.
|
|
|
+ */
|
|
|
+ unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
|
|
|
+ &unlocked, &unmapped_region_list,
|
|
|
+ &unmapped_region_cnt);
|
|
|
+ if (!unmapped) {
|
|
|
+ unmapped = unmap_unpin_slow(domain, dma, &iova, len,
|
|
|
+ phys, &unlocked);
|
|
|
+ if (WARN_ON(!unmapped))
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
dma->iommu_mapped = false;
|
|
|
+
|
|
|
+ if (unmapped_region_cnt)
|
|
|
+ unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
|
|
|
+
|
|
|
if (do_accounting) {
|
|
|
vfio_lock_acct(dma->task, -unlocked, NULL);
|
|
|
return 0;
|