|
@@ -511,81 +511,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
|
|
|
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Handle a single data segment in a page-fault WQE or RDMA region.
|
|
|
- *
|
|
|
- * Returns number of OS pages retrieved on success. The caller may continue to
|
|
|
- * the next data segment.
|
|
|
- * Can return the following error codes:
|
|
|
- * -EAGAIN to designate a temporary error. The caller will abort handling the
|
|
|
- * page fault and resolve it.
|
|
|
- * -EFAULT when there's an error mapping the requested pages. The caller will
|
|
|
- * abort the page fault handling.
|
|
|
- */
|
|
|
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
|
|
|
- u32 key, u64 io_virt, size_t bcnt,
|
|
|
- u32 *bytes_committed,
|
|
|
- u32 *bytes_mapped)
|
|
|
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
|
|
|
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
|
|
|
{
|
|
|
- int srcu_key;
|
|
|
- unsigned int current_seq = 0;
|
|
|
- u64 start_idx, page_mask;
|
|
|
- int npages = 0, ret = 0;
|
|
|
- struct mlx5_ib_mr *mr;
|
|
|
u64 access_mask = ODP_READ_ALLOWED_BIT;
|
|
|
+ int npages = 0, page_shift, np;
|
|
|
+ u64 start_idx, page_mask;
|
|
|
struct ib_umem_odp *odp;
|
|
|
- int implicit = 0;
|
|
|
+ int current_seq;
|
|
|
size_t size;
|
|
|
- int page_shift;
|
|
|
-
|
|
|
- srcu_key = srcu_read_lock(&dev->mr_srcu);
|
|
|
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
|
|
|
- /*
|
|
|
- * If we didn't find the MR, it means the MR was closed while we were
|
|
|
- * handling the ODP event. In this case we return -EFAULT so that the
|
|
|
- * QP will be closed.
|
|
|
- */
|
|
|
- if (!mr || !mr->ibmr.pd) {
|
|
|
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
|
|
|
- key);
|
|
|
- ret = -EFAULT;
|
|
|
- goto srcu_unlock;
|
|
|
- }
|
|
|
- if (!mr->umem->odp_data) {
|
|
|
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
|
|
|
- key);
|
|
|
- if (bytes_mapped)
|
|
|
- *bytes_mapped +=
|
|
|
- (bcnt - *bytes_committed);
|
|
|
- goto srcu_unlock;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * Avoid branches - this code will perform correctly
|
|
|
- * in all iterations (in iteration 2 and above,
|
|
|
- * bytes_committed == 0).
|
|
|
- */
|
|
|
- io_virt += *bytes_committed;
|
|
|
- bcnt -= *bytes_committed;
|
|
|
+ int ret;
|
|
|
|
|
|
if (!mr->umem->odp_data->page_list) {
|
|
|
odp = implicit_mr_get_data(mr, io_virt, bcnt);
|
|
|
|
|
|
- if (IS_ERR(odp)) {
|
|
|
- ret = PTR_ERR(odp);
|
|
|
- goto srcu_unlock;
|
|
|
- }
|
|
|
+ if (IS_ERR(odp))
|
|
|
+ return PTR_ERR(odp);
|
|
|
mr = odp->private;
|
|
|
- implicit = 1;
|
|
|
|
|
|
} else {
|
|
|
odp = mr->umem->odp_data;
|
|
|
}
|
|
|
|
|
|
+next_mr:
|
|
|
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
|
|
|
+
|
|
|
page_shift = mr->umem->page_shift;
|
|
|
page_mask = ~(BIT(page_shift) - 1);
|
|
|
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
|
|
|
+
|
|
|
+ if (mr->umem->writable)
|
|
|
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
|
|
|
|
|
|
-next_mr:
|
|
|
current_seq = READ_ONCE(odp->notifiers_seq);
|
|
|
/*
|
|
|
* Ensure the sequence number is valid for some time before we call
|
|
@@ -593,51 +550,43 @@ next_mr:
|
|
|
*/
|
|
|
smp_rmb();
|
|
|
|
|
|
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
|
|
|
- start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
|
|
|
-
|
|
|
- if (mr->umem->writable)
|
|
|
- access_mask |= ODP_WRITE_ALLOWED_BIT;
|
|
|
-
|
|
|
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
|
|
|
access_mask, current_seq);
|
|
|
|
|
|
if (ret < 0)
|
|
|
- goto srcu_unlock;
|
|
|
+ goto out;
|
|
|
|
|
|
- if (ret > 0) {
|
|
|
- int np = ret;
|
|
|
-
|
|
|
- mutex_lock(&odp->umem_mutex);
|
|
|
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
|
|
|
- /*
|
|
|
- * No need to check whether the MTTs really belong to
|
|
|
- * this MR, since ib_umem_odp_map_dma_pages already
|
|
|
- * checks this.
|
|
|
- */
|
|
|
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
|
|
|
- page_shift,
|
|
|
- MLX5_IB_UPD_XLT_ATOMIC);
|
|
|
- } else {
|
|
|
- ret = -EAGAIN;
|
|
|
- }
|
|
|
- mutex_unlock(&odp->umem_mutex);
|
|
|
- if (ret < 0) {
|
|
|
- if (ret != -EAGAIN)
|
|
|
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
|
|
|
- goto srcu_unlock;
|
|
|
- }
|
|
|
- if (bytes_mapped) {
|
|
|
- u32 new_mappings = (np << page_shift) -
|
|
|
- (io_virt - round_down(io_virt,
|
|
|
- 1 << page_shift));
|
|
|
- *bytes_mapped += min_t(u32, new_mappings, size);
|
|
|
- }
|
|
|
+ np = ret;
|
|
|
|
|
|
- npages += np << (page_shift - PAGE_SHIFT);
|
|
|
+ mutex_lock(&odp->umem_mutex);
|
|
|
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
|
|
|
+ /*
|
|
|
+ * No need to check whether the MTTs really belong to
|
|
|
+ * this MR, since ib_umem_odp_map_dma_pages already
|
|
|
+ * checks this.
|
|
|
+ */
|
|
|
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
|
|
|
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
|
|
|
+ } else {
|
|
|
+ ret = -EAGAIN;
|
|
|
}
|
|
|
+ mutex_unlock(&odp->umem_mutex);
|
|
|
|
|
|
+ if (ret < 0) {
|
|
|
+ if (ret != -EAGAIN)
|
|
|
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (bytes_mapped) {
|
|
|
+ u32 new_mappings = (np << page_shift) -
|
|
|
+ (io_virt - round_down(io_virt, 1 << page_shift));
|
|
|
+ *bytes_mapped += min_t(u32, new_mappings, size);
|
|
|
+ }
|
|
|
+
|
|
|
+ npages += np << (page_shift - PAGE_SHIFT);
|
|
|
bcnt -= size;
|
|
|
+
|
|
|
if (unlikely(bcnt)) {
|
|
|
struct ib_umem_odp *next;
|
|
|
|
|
@@ -646,17 +595,18 @@ next_mr:
|
|
|
if (unlikely(!next || next->umem->address != io_virt)) {
|
|
|
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
|
|
|
io_virt, next);
|
|
|
- ret = -EAGAIN;
|
|
|
- goto srcu_unlock_no_wait;
|
|
|
+ return -EAGAIN;
|
|
|
}
|
|
|
odp = next;
|
|
|
mr = odp->private;
|
|
|
goto next_mr;
|
|
|
}
|
|
|
|
|
|
-srcu_unlock:
|
|
|
+ return npages;
|
|
|
+
|
|
|
+out:
|
|
|
if (ret == -EAGAIN) {
|
|
|
- if (implicit || !odp->dying) {
|
|
|
+ if (mr->parent || !odp->dying) {
|
|
|
unsigned long timeout =
|
|
|
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
|
|
|
|
|
@@ -672,7 +622,62 @@ srcu_unlock:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-srcu_unlock_no_wait:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Handle a single data segment in a page-fault WQE or RDMA region.
|
|
|
+ *
|
|
|
+ * Returns number of OS pages retrieved on success. The caller may continue to
|
|
|
+ * the next data segment.
|
|
|
+ * Can return the following error codes:
|
|
|
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
|
|
|
+ * page fault and resolve it.
|
|
|
+ * -EFAULT when there's an error mapping the requested pages. The caller will
|
|
|
+ * abort the page fault handling.
|
|
|
+ */
|
|
|
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
|
|
|
+ u32 key, u64 io_virt, size_t bcnt,
|
|
|
+ u32 *bytes_committed,
|
|
|
+ u32 *bytes_mapped)
|
|
|
+{
|
|
|
+ int npages = 0, srcu_key, ret;
|
|
|
+ struct mlx5_ib_mr *mr;
|
|
|
+ size_t size;
|
|
|
+
|
|
|
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
|
|
|
+ mr = mlx5_ib_odp_find_mr_lkey(dev, key);
|
|
|
+ /*
|
|
|
+ * If we didn't find the MR, it means the MR was closed while we were
|
|
|
+ * handling the ODP event. In this case we return -EFAULT so that the
|
|
|
+ * QP will be closed.
|
|
|
+ */
|
|
|
+ if (!mr || !mr->ibmr.pd) {
|
|
|
+ mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
|
|
|
+ key);
|
|
|
+ ret = -EFAULT;
|
|
|
+ goto srcu_unlock;
|
|
|
+ }
|
|
|
+ if (!mr->umem->odp_data) {
|
|
|
+ mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
|
|
|
+ key);
|
|
|
+ if (bytes_mapped)
|
|
|
+ *bytes_mapped +=
|
|
|
+ (bcnt - *bytes_committed);
|
|
|
+ goto srcu_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Avoid branches - this code will perform correctly
|
|
|
+ * in all iterations (in iteration 2 and above,
|
|
|
+ * bytes_committed == 0).
|
|
|
+ */
|
|
|
+ io_virt += *bytes_committed;
|
|
|
+ bcnt -= *bytes_committed;
|
|
|
+
|
|
|
+ npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
|
|
|
+
|
|
|
+srcu_unlock:
|
|
|
srcu_read_unlock(&dev->mr_srcu, srcu_key);
|
|
|
*bytes_committed = 0;
|
|
|
return ret ? ret : npages;
|