|
@@ -74,6 +74,7 @@ struct ucma_file {
|
|
|
struct list_head ctx_list;
|
|
|
struct list_head event_list;
|
|
|
wait_queue_head_t poll_wait;
|
|
|
+ struct workqueue_struct *close_wq;
|
|
|
};
|
|
|
|
|
|
struct ucma_context {
|
|
@@ -89,6 +90,13 @@ struct ucma_context {
|
|
|
|
|
|
struct list_head list;
|
|
|
struct list_head mc_list;
|
|
|
+ /* mark that device is in process of destroying the internal HW
|
|
|
+ * resources, protected by the global mut
|
|
|
+ */
|
|
|
+ int closing;
|
|
|
+ /* sync between removal event and id destroy, protected by file mut */
|
|
|
+ int destroying;
|
|
|
+ struct work_struct close_work;
|
|
|
};
|
|
|
|
|
|
struct ucma_multicast {
|
|
@@ -107,6 +115,7 @@ struct ucma_event {
|
|
|
struct list_head list;
|
|
|
struct rdma_cm_id *cm_id;
|
|
|
struct rdma_ucm_event_resp resp;
|
|
|
+ struct work_struct close_work;
|
|
|
};
|
|
|
|
|
|
static DEFINE_MUTEX(mut);
|
|
@@ -132,8 +141,12 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
|
|
|
|
|
|
mutex_lock(&mut);
|
|
|
ctx = _ucma_find_context(id, file);
|
|
|
- if (!IS_ERR(ctx))
|
|
|
- atomic_inc(&ctx->ref);
|
|
|
+ if (!IS_ERR(ctx)) {
|
|
|
+ if (ctx->closing)
|
|
|
+ ctx = ERR_PTR(-EIO);
|
|
|
+ else
|
|
|
+ atomic_inc(&ctx->ref);
|
|
|
+ }
|
|
|
mutex_unlock(&mut);
|
|
|
return ctx;
|
|
|
}
|
|
@@ -144,6 +157,28 @@ static void ucma_put_ctx(struct ucma_context *ctx)
|
|
|
complete(&ctx->comp);
|
|
|
}
|
|
|
|
|
|
+static void ucma_close_event_id(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
|
|
|
+
|
|
|
+ rdma_destroy_id(uevent_close->cm_id);
|
|
|
+ kfree(uevent_close);
|
|
|
+}
|
|
|
+
|
|
|
+static void ucma_close_id(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
|
|
|
+
|
|
|
+ /* once all inflight tasks are finished, we close all underlying
|
|
|
+ * resources. The context is still alive till its explicit destryoing
|
|
|
+ * by its creator.
|
|
|
+ */
|
|
|
+ ucma_put_ctx(ctx);
|
|
|
+ wait_for_completion(&ctx->comp);
|
|
|
+ /* No new events will be generated after destroying the id. */
|
|
|
+ rdma_destroy_id(ctx->cm_id);
|
|
|
+}
|
|
|
+
|
|
|
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
|
|
|
{
|
|
|
struct ucma_context *ctx;
|
|
@@ -152,6 +187,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
|
|
|
if (!ctx)
|
|
|
return NULL;
|
|
|
|
|
|
+ INIT_WORK(&ctx->close_work, ucma_close_id);
|
|
|
atomic_set(&ctx->ref, 1);
|
|
|
init_completion(&ctx->comp);
|
|
|
INIT_LIST_HEAD(&ctx->mc_list);
|
|
@@ -242,6 +278,44 @@ static void ucma_set_event_context(struct ucma_context *ctx,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/* Called with file->mut locked for the relevant context. */
|
|
|
+static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
|
|
|
+{
|
|
|
+ struct ucma_context *ctx = cm_id->context;
|
|
|
+ struct ucma_event *con_req_eve;
|
|
|
+ int event_found = 0;
|
|
|
+
|
|
|
+ if (ctx->destroying)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* only if context is pointing to cm_id that it owns it and can be
|
|
|
+ * queued to be closed, otherwise that cm_id is an inflight one that
|
|
|
+ * is part of that context event list pending to be detached and
|
|
|
+ * reattached to its new context as part of ucma_get_event,
|
|
|
+ * handled separately below.
|
|
|
+ */
|
|
|
+ if (ctx->cm_id == cm_id) {
|
|
|
+ mutex_lock(&mut);
|
|
|
+ ctx->closing = 1;
|
|
|
+ mutex_unlock(&mut);
|
|
|
+ queue_work(ctx->file->close_wq, &ctx->close_work);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
|
|
|
+ if (con_req_eve->cm_id == cm_id &&
|
|
|
+ con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
|
|
|
+ list_del(&con_req_eve->list);
|
|
|
+ INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
|
|
|
+ queue_work(ctx->file->close_wq, &con_req_eve->close_work);
|
|
|
+ event_found = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!event_found)
|
|
|
+ printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
|
|
|
+}
|
|
|
+
|
|
|
static int ucma_event_handler(struct rdma_cm_id *cm_id,
|
|
|
struct rdma_cm_event *event)
|
|
|
{
|
|
@@ -276,14 +350,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
|
|
|
* We ignore events for new connections until userspace has set
|
|
|
* their context. This can only happen if an error occurs on a
|
|
|
* new connection before the user accepts it. This is okay,
|
|
|
- * since the accept will just fail later.
|
|
|
+ * since the accept will just fail later. However, we do need
|
|
|
+ * to release the underlying HW resources in case of a device
|
|
|
+ * removal event.
|
|
|
*/
|
|
|
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
|
|
|
+ ucma_removal_event_handler(cm_id);
|
|
|
+
|
|
|
kfree(uevent);
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
list_add_tail(&uevent->list, &ctx->file->event_list);
|
|
|
wake_up_interruptible(&ctx->file->poll_wait);
|
|
|
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
|
|
|
+ ucma_removal_event_handler(cm_id);
|
|
|
out:
|
|
|
mutex_unlock(&ctx->file->mut);
|
|
|
return ret;
|
|
@@ -442,9 +523,15 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * We cannot hold file->mut when calling rdma_destroy_id() or we can
|
|
|
- * deadlock. We also acquire file->mut in ucma_event_handler(), and
|
|
|
- * rdma_destroy_id() will wait until all callbacks have completed.
|
|
|
+ * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
|
|
|
+ * this point, no new events will be reported from the hardware. However, we
|
|
|
+ * still need to cleanup the UCMA context for this ID. Specifically, there
|
|
|
+ * might be events that have not yet been consumed by the user space software.
|
|
|
+ * These might include pending connect requests which we have not completed
|
|
|
+ * processing. We cannot call rdma_destroy_id while holding the lock of the
|
|
|
+ * context (file->mut), as it might cause a deadlock. We therefore extract all
|
|
|
+ * relevant events from the context pending events list while holding the
|
|
|
+ * mutex. After that we release them as needed.
|
|
|
*/
|
|
|
static int ucma_free_ctx(struct ucma_context *ctx)
|
|
|
{
|
|
@@ -452,8 +539,6 @@ static int ucma_free_ctx(struct ucma_context *ctx)
|
|
|
struct ucma_event *uevent, *tmp;
|
|
|
LIST_HEAD(list);
|
|
|
|
|
|
- /* No new events will be generated after destroying the id. */
|
|
|
- rdma_destroy_id(ctx->cm_id);
|
|
|
|
|
|
ucma_cleanup_multicast(ctx);
|
|
|
|
|
@@ -501,10 +586,24 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
|
|
|
if (IS_ERR(ctx))
|
|
|
return PTR_ERR(ctx);
|
|
|
|
|
|
- ucma_put_ctx(ctx);
|
|
|
- wait_for_completion(&ctx->comp);
|
|
|
- resp.events_reported = ucma_free_ctx(ctx);
|
|
|
+ mutex_lock(&ctx->file->mut);
|
|
|
+ ctx->destroying = 1;
|
|
|
+ mutex_unlock(&ctx->file->mut);
|
|
|
+
|
|
|
+ flush_workqueue(ctx->file->close_wq);
|
|
|
+ /* At this point it's guaranteed that there is no inflight
|
|
|
+ * closing task */
|
|
|
+ mutex_lock(&mut);
|
|
|
+ if (!ctx->closing) {
|
|
|
+ mutex_unlock(&mut);
|
|
|
+ ucma_put_ctx(ctx);
|
|
|
+ wait_for_completion(&ctx->comp);
|
|
|
+ rdma_destroy_id(ctx->cm_id);
|
|
|
+ } else {
|
|
|
+ mutex_unlock(&mut);
|
|
|
+ }
|
|
|
|
|
|
+ resp.events_reported = ucma_free_ctx(ctx);
|
|
|
if (copy_to_user((void __user *)(unsigned long)cmd.response,
|
|
|
&resp, sizeof(resp)))
|
|
|
ret = -EFAULT;
|
|
@@ -1529,6 +1628,7 @@ static int ucma_open(struct inode *inode, struct file *filp)
|
|
|
INIT_LIST_HEAD(&file->ctx_list);
|
|
|
init_waitqueue_head(&file->poll_wait);
|
|
|
mutex_init(&file->mut);
|
|
|
+ file->close_wq = create_singlethread_workqueue("ucma_close_id");
|
|
|
|
|
|
filp->private_data = file;
|
|
|
file->filp = filp;
|
|
@@ -1543,16 +1643,34 @@ static int ucma_close(struct inode *inode, struct file *filp)
|
|
|
|
|
|
mutex_lock(&file->mut);
|
|
|
list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
|
|
|
+ ctx->destroying = 1;
|
|
|
mutex_unlock(&file->mut);
|
|
|
|
|
|
mutex_lock(&mut);
|
|
|
idr_remove(&ctx_idr, ctx->id);
|
|
|
mutex_unlock(&mut);
|
|
|
|
|
|
+ flush_workqueue(file->close_wq);
|
|
|
+ /* At that step once ctx was marked as destroying and workqueue
|
|
|
+ * was flushed we are safe from any inflights handlers that
|
|
|
+ * might put other closing task.
|
|
|
+ */
|
|
|
+ mutex_lock(&mut);
|
|
|
+ if (!ctx->closing) {
|
|
|
+ mutex_unlock(&mut);
|
|
|
+ /* rdma_destroy_id ensures that no event handlers are
|
|
|
+ * inflight for that id before releasing it.
|
|
|
+ */
|
|
|
+ rdma_destroy_id(ctx->cm_id);
|
|
|
+ } else {
|
|
|
+ mutex_unlock(&mut);
|
|
|
+ }
|
|
|
+
|
|
|
ucma_free_ctx(ctx);
|
|
|
mutex_lock(&file->mut);
|
|
|
}
|
|
|
mutex_unlock(&file->mut);
|
|
|
+ destroy_workqueue(file->close_wq);
|
|
|
kfree(file);
|
|
|
return 0;
|
|
|
}
|