|
@@ -29,6 +29,51 @@
|
|
|
static void tce_iommu_detach_group(void *iommu_data,
|
|
|
struct iommu_group *iommu_group);
|
|
|
|
|
|
+static long try_increment_locked_vm(long npages)
|
|
|
+{
|
|
|
+ long ret = 0, locked, lock_limit;
|
|
|
+
|
|
|
+ if (!current || !current->mm)
|
|
|
+ return -ESRCH; /* process exited */
|
|
|
+
|
|
|
+ if (!npages)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ down_write(¤t->mm->mmap_sem);
|
|
|
+ locked = current->mm->locked_vm + npages;
|
|
|
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
|
|
|
+ ret = -ENOMEM;
|
|
|
+ else
|
|
|
+ current->mm->locked_vm += npages;
|
|
|
+
|
|
|
+ pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
|
|
|
+ npages << PAGE_SHIFT,
|
|
|
+ current->mm->locked_vm << PAGE_SHIFT,
|
|
|
+ rlimit(RLIMIT_MEMLOCK),
|
|
|
+ ret ? " - exceeded" : "");
|
|
|
+
|
|
|
+ up_write(¤t->mm->mmap_sem);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static void decrement_locked_vm(long npages)
|
|
|
+{
|
|
|
+ if (!current || !current->mm || !npages)
|
|
|
+ return; /* process exited */
|
|
|
+
|
|
|
+ down_write(¤t->mm->mmap_sem);
|
|
|
+ if (WARN_ON_ONCE(npages > current->mm->locked_vm))
|
|
|
+ npages = current->mm->locked_vm;
|
|
|
+ current->mm->locked_vm -= npages;
|
|
|
+ pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
|
|
|
+ npages << PAGE_SHIFT,
|
|
|
+ current->mm->locked_vm << PAGE_SHIFT,
|
|
|
+ rlimit(RLIMIT_MEMLOCK));
|
|
|
+ up_write(¤t->mm->mmap_sem);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
|
|
|
*
|
|
@@ -45,6 +90,7 @@ struct tce_container {
|
|
|
struct mutex lock;
|
|
|
struct iommu_table *tbl;
|
|
|
bool enabled;
|
|
|
+ unsigned long locked_pages;
|
|
|
};
|
|
|
|
|
|
static bool tce_page_is_contained(struct page *page, unsigned page_shift)
|
|
@@ -60,7 +106,7 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
|
|
|
static int tce_iommu_enable(struct tce_container *container)
|
|
|
{
|
|
|
int ret = 0;
|
|
|
- unsigned long locked, lock_limit, npages;
|
|
|
+ unsigned long locked;
|
|
|
struct iommu_table *tbl = container->tbl;
|
|
|
|
|
|
if (!container->tbl)
|
|
@@ -89,21 +135,22 @@ static int tce_iommu_enable(struct tce_container *container)
|
|
|
* Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
|
|
|
* that would effectively kill the guest at random points, much better
|
|
|
* enforcing the limit based on the max that the guest can map.
|
|
|
+ *
|
|
|
+ * Unfortunately at the moment it counts whole tables, no matter how
|
|
|
+ * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
|
|
|
+ * each with 2GB DMA window, 8GB will be counted here. The reason for
|
|
|
+ * this is that we cannot tell here the amount of RAM used by the guest
|
|
|
+ * as this information is only available from KVM and VFIO is
|
|
|
+ * KVM agnostic.
|
|
|
*/
|
|
|
- down_write(¤t->mm->mmap_sem);
|
|
|
- npages = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
|
|
|
- locked = current->mm->locked_vm + npages;
|
|
|
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
|
|
|
- pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
|
|
|
- rlimit(RLIMIT_MEMLOCK));
|
|
|
- ret = -ENOMEM;
|
|
|
- } else {
|
|
|
+ locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
|
|
|
+ ret = try_increment_locked_vm(locked);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
- current->mm->locked_vm += npages;
|
|
|
- container->enabled = true;
|
|
|
- }
|
|
|
- up_write(¤t->mm->mmap_sem);
|
|
|
+ container->locked_pages = locked;
|
|
|
+
|
|
|
+ container->enabled = true;
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
@@ -115,13 +162,10 @@ static void tce_iommu_disable(struct tce_container *container)
|
|
|
|
|
|
container->enabled = false;
|
|
|
|
|
|
- if (!container->tbl || !current->mm)
|
|
|
+ if (!current->mm)
|
|
|
return;
|
|
|
|
|
|
- down_write(¤t->mm->mmap_sem);
|
|
|
- current->mm->locked_vm -= (container->tbl->it_size <<
|
|
|
- container->tbl->it_page_shift) >> PAGE_SHIFT;
|
|
|
- up_write(¤t->mm->mmap_sem);
|
|
|
+ decrement_locked_vm(container->locked_pages);
|
|
|
}
|
|
|
|
|
|
static void *tce_iommu_open(unsigned long arg)
|