|
@@ -998,49 +998,64 @@ done:
|
|
return rq->tag != -1;
|
|
return rq->tag != -1;
|
|
}
|
|
}
|
|
|
|
|
|
-static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
|
|
|
|
- void *key)
|
|
|
|
|
|
+static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
|
|
|
|
+ int flags, void *key)
|
|
{
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
struct blk_mq_hw_ctx *hctx;
|
|
|
|
|
|
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
|
|
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
|
|
|
|
|
|
- list_del(&wait->entry);
|
|
|
|
- clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
|
|
|
|
|
|
+ list_del_init(&wait->entry);
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
-static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
|
|
|
|
|
|
+static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
|
|
|
|
+ struct request *rq)
|
|
{
|
|
{
|
|
|
|
+ struct blk_mq_hw_ctx *this_hctx = *hctx;
|
|
|
|
+ wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
|
|
struct sbq_wait_state *ws;
|
|
struct sbq_wait_state *ws;
|
|
|
|
|
|
|
|
+ if (!list_empty_careful(&wait->entry))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ spin_lock(&this_hctx->lock);
|
|
|
|
+ if (!list_empty(&wait->entry)) {
|
|
|
|
+ spin_unlock(&this_hctx->lock);
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
|
|
|
|
+ add_wait_queue(&ws->wait, wait);
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
|
|
|
|
- * The thread which wins the race to grab this bit adds the hardware
|
|
|
|
- * queue to the wait queue.
|
|
|
|
|
|
+ * It's possible that a tag was freed in the window between the
|
|
|
|
+ * allocation failure and adding the hardware queue to the wait
|
|
|
|
+ * queue.
|
|
*/
|
|
*/
|
|
- if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
|
|
|
|
- test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
|
|
|
|
|
+ if (!blk_mq_get_driver_tag(rq, hctx, false)) {
|
|
|
|
+ spin_unlock(&this_hctx->lock);
|
|
return false;
|
|
return false;
|
|
-
|
|
|
|
- init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
|
|
|
|
- ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
|
|
|
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
/*
|
|
- * As soon as this returns, it's no longer safe to fiddle with
|
|
|
|
- * hctx->dispatch_wait, since a completion can wake up the wait queue
|
|
|
|
- * and unlock the bit.
|
|
|
|
|
|
+ * We got a tag, remove ourselves from the wait queue to ensure
|
|
|
|
+ * someone else gets the wakeup.
|
|
*/
|
|
*/
|
|
- add_wait_queue(&ws->wait, &hctx->dispatch_wait);
|
|
|
|
|
|
+ spin_lock_irq(&ws->wait.lock);
|
|
|
|
+ list_del_init(&wait->entry);
|
|
|
|
+ spin_unlock_irq(&ws->wait.lock);
|
|
|
|
+ spin_unlock(&this_hctx->lock);
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
|
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
|
- bool got_budget)
|
|
|
|
|
|
+ bool got_budget)
|
|
{
|
|
{
|
|
struct blk_mq_hw_ctx *hctx;
|
|
struct blk_mq_hw_ctx *hctx;
|
|
struct request *rq, *nxt;
|
|
struct request *rq, *nxt;
|
|
|
|
+ bool no_tag = false;
|
|
int errors, queued;
|
|
int errors, queued;
|
|
|
|
|
|
if (list_empty(list))
|
|
if (list_empty(list))
|
|
@@ -1060,22 +1075,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
|
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
|
|
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
|
|
/*
|
|
/*
|
|
* The initial allocation attempt failed, so we need to
|
|
* The initial allocation attempt failed, so we need to
|
|
- * rerun the hardware queue when a tag is freed.
|
|
|
|
|
|
+ * rerun the hardware queue when a tag is freed. The
|
|
|
|
+ * waitqueue takes care of that. If the queue is run
|
|
|
|
+ * before we add this entry back on the dispatch list,
|
|
|
|
+ * we'll re-run it below.
|
|
*/
|
|
*/
|
|
- if (!blk_mq_dispatch_wait_add(hctx)) {
|
|
|
|
- if (got_budget)
|
|
|
|
- blk_mq_put_dispatch_budget(hctx);
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * It's possible that a tag was freed in the window
|
|
|
|
- * between the allocation failure and adding the
|
|
|
|
- * hardware queue to the wait queue.
|
|
|
|
- */
|
|
|
|
- if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
|
|
|
|
|
|
+ if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
|
|
if (got_budget)
|
|
if (got_budget)
|
|
blk_mq_put_dispatch_budget(hctx);
|
|
blk_mq_put_dispatch_budget(hctx);
|
|
|
|
+ no_tag = true;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -1140,10 +1148,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
|
* it is no longer set that means that it was cleared by another
|
|
* it is no longer set that means that it was cleared by another
|
|
* thread and hence that a queue rerun is needed.
|
|
* thread and hence that a queue rerun is needed.
|
|
*
|
|
*
|
|
- * If TAG_WAITING is set that means that an I/O scheduler has
|
|
|
|
- * been configured and another thread is waiting for a driver
|
|
|
|
- * tag. To guarantee fairness, do not rerun this hardware queue
|
|
|
|
- * but let the other thread grab the driver tag.
|
|
|
|
|
|
+ * If 'no_tag' is set, that means that we failed getting
|
|
|
|
+ * a driver tag with an I/O scheduler attached. If our dispatch
|
|
|
|
+ * waitqueue is no longer active, ensure that we run the queue
|
|
|
|
+ * AFTER adding our entries back to the list.
|
|
*
|
|
*
|
|
* If no I/O scheduler has been configured it is possible that
|
|
* If no I/O scheduler has been configured it is possible that
|
|
* the hardware queue got stopped and restarted before requests
|
|
* the hardware queue got stopped and restarted before requests
|
|
@@ -1155,8 +1163,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
|
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
|
|
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
|
|
* and dm-rq.
|
|
* and dm-rq.
|
|
*/
|
|
*/
|
|
- if (!blk_mq_sched_needs_restart(hctx) &&
|
|
|
|
- !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
|
|
|
|
|
+ if (!blk_mq_sched_needs_restart(hctx) ||
|
|
|
|
+ (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
blk_mq_run_hw_queue(hctx, true);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -2020,6 +2028,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|
|
|
|
|
hctx->nr_ctx = 0;
|
|
hctx->nr_ctx = 0;
|
|
|
|
|
|
|
|
+ init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
|
|
|
|
+ INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
|
|
|
|
+
|
|
if (set->ops->init_hctx &&
|
|
if (set->ops->init_hctx &&
|
|
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
|
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
|
goto free_bitmap;
|
|
goto free_bitmap;
|