8 years ago · 64f1c21e86
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2132,6 +2132,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
				 	 */
			
 
				 	q->nr_requests = set->queue_depth;
			
 
				 
			
 
				+	/*
			
 
				+	 * Default to classic polling
			
 
				+	 */
			
 
				+	q->poll_nsec = -1;
			
 
				+
			
 
				 	if (set->ops->complete)
			
 
				 		blk_queue_softirq_done(q, set->ops->complete);
			
 
				 
			
@@ -2469,14 +2474,70 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
			
 
				 
			
 
				+static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
			
 
				+				       struct blk_mq_hw_ctx *hctx,
			
 
				+				       struct request *rq)
			
 
				+{
			
 
				+	struct blk_rq_stat stat[2];
			
 
				+	unsigned long ret = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * If stats collection isn't on, don't sleep but turn it on for
			
 
				+	 * future users
			
 
				+	 */
			
 
				+	if (!blk_stat_enable(q))
			
 
				+		return 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't have to do this once per IO, should optimize this
			
 
				+	 * to just use the current window of stats until it changes
			
 
				+	 */
			
 
				+	memset(&stat, 0, sizeof(stat));
			
 
				+	blk_hctx_stat_get(hctx, stat);
			
 
				+
			
 
				+	/*
			
 
				+	 * As an optimistic guess, use half of the mean service time
			
 
				+	 * for this type of request. We can (and should) make this smarter.
			
 
				+	 * For instance, if the completion latencies are tight, we can
			
 
				+	 * get closer than just half the mean. This is especially
			
 
				+	 * important on devices where the completion latencies are longer
			
 
				+	 * than ~10 usec.
			
 
				+	 */
			
 
				+	if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
			
 
				+		ret = (stat[BLK_STAT_READ].mean + 1) / 2;
			
 
				+	else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
			
 
				+		ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
			
 
				+				     struct blk_mq_hw_ctx *hctx,
			
 
				 				     struct request *rq)
			
 
				 {
			
 
				 	struct hrtimer_sleeper hs;
			
 
				 	enum hrtimer_mode mode;
			
 
				+	unsigned int nsecs;
			
 
				 	ktime_t kt;
			
 
				 
			
 
				-	if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
			
 
				+	if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
			
 
				+		return false;
			
 
				+
			
 
				+	/*
			
 
				+	 * poll_nsec can be:
			
 
				+	 *
			
 
				+	 * -1:	don't ever hybrid sleep
			
 
				+	 *  0:	use half of prev avg
			
 
				+	 * >0:	use this specific value
			
 
				+	 */
			
 
				+	if (q->poll_nsec == -1)
			
 
				+		return false;
			
 
				+	else if (q->poll_nsec > 0)
			
 
				+		nsecs = q->poll_nsec;
			
 
				+	else
			
 
				+		nsecs = blk_mq_poll_nsecs(q, hctx, rq);
			
 
				+
			
 
				+	if (!nsecs)
			
 
				 		return false;
			
 
				 
			
 
				 	set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
			
@@ -2485,7 +2546,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 
				 	 * This will be replaced with the stats tracking code, using
			
 
				 	 * 'avg_completion_time / 2' as the pre-sleep target.
			
 
				 	 */
			
 
				-	kt = ktime_set(0, q->poll_nsec);
			
 
				+	kt = ktime_set(0, nsecs);
			
 
				 
			
 
				 	mode = HRTIMER_MODE_REL;
			
 
				 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
			
@@ -2520,7 +2581,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 
				 	 * the IO isn't complete, we'll get called again and will go
			
 
				 	 * straight to the busy poll loop.
			
 
				 	 */
			
 
				-	if (blk_mq_poll_hybrid_sleep(q, rq))
			
 
				+	if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
			
 
				 		return true;
			
 
				 
			
 
				 	hctx->poll_considered++;
			
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -352,24 +352,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 
				 
			
 
				 static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
			
 
				 {
			
 
				-	return queue_var_show(q->poll_nsec / 1000, page);
			
 
				+	int val;
			
 
				+
			
 
				+	if (q->poll_nsec == -1)
			
 
				+		val = -1;
			
 
				+	else
			
 
				+		val = q->poll_nsec / 1000;
			
 
				+
			
 
				+	return sprintf(page, "%d\n", val);
			
 
				 }
			
 
				 
			
 
				 static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
			
 
				 				size_t count)
			
 
				 {
			
 
				-	unsigned long poll_usec;
			
 
				-	ssize_t ret;
			
 
				+	int err, val;
			
 
				 
			
 
				 	if (!q->mq_ops || !q->mq_ops->poll)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	ret = queue_var_store(&poll_usec, page, count);
			
 
				-	if (ret < 0)
			
 
				-		return ret;
			
 
				+	err = kstrtoint(page, 10, &val);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				 
			
 
				-	q->poll_nsec = poll_usec * 1000;
			
 
				-	return ret;
			
 
				+	if (val == -1)
			
 
				+		q->poll_nsec = -1;
			
 
				+	else
			
 
				+		q->poll_nsec = val * 1000;
			
 
				+
			
 
				+	return count;
			
 
				 }
			
 
				 
			
 
				 static ssize_t queue_poll_show(struct request_queue *q, char *page)
			
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,7 +509,7 @@ struct request_queue {
 
				 	unsigned int		request_fn_active;
			
 
				 
			
 
				 	unsigned int		rq_timeout;
			
 
				-	unsigned int		poll_nsec;
			
 
				+	int			poll_nsec;
			
 
				 	struct timer_list	timeout;
			
 
				 	struct work_struct	timeout_work;
			
 
				 	struct list_head	timeout_list;