|
@@ -4,6 +4,7 @@
|
|
* Copyright (C) 2016 Jens Axboe
|
|
* Copyright (C) 2016 Jens Axboe
|
|
*/
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/kernel.h>
|
|
|
|
+#include <linux/rculist.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/blk-mq.h>
|
|
|
|
|
|
#include "blk-stat.h"
|
|
#include "blk-stat.h"
|
|
@@ -11,6 +12,24 @@
|
|
|
|
|
|
#define BLK_RQ_STAT_BATCH 64
|
|
#define BLK_RQ_STAT_BATCH 64
|
|
|
|
|
|
|
|
+struct blk_queue_stats {
|
|
|
|
+ struct list_head callbacks;
|
|
|
|
+ spinlock_t lock;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+unsigned int blk_stat_rq_ddir(const struct request *rq)
|
|
|
|
+{
|
|
|
|
+ return rq_data_dir(rq);
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL_GPL(blk_stat_rq_ddir);
|
|
|
|
+
|
|
|
|
+static void blk_stat_init(struct blk_rq_stat *stat)
|
|
|
|
+{
|
|
|
|
+ stat->min = -1ULL;
|
|
|
|
+ stat->max = stat->nr_samples = stat->mean = 0;
|
|
|
|
+ stat->batch = stat->nr_batch = 0;
|
|
|
|
+}
|
|
|
|
+
|
|
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
|
|
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
|
|
{
|
|
{
|
|
const s32 nr_batch = READ_ONCE(stat->nr_batch);
|
|
const s32 nr_batch = READ_ONCE(stat->nr_batch);
|
|
@@ -50,207 +69,171 @@ static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
|
|
dst->nr_samples += src->nr_samples;
|
|
dst->nr_samples += src->nr_samples;
|
|
}
|
|
}
|
|
|
|
|
|
-static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
|
|
|
|
|
|
+static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
|
|
{
|
|
{
|
|
- struct blk_mq_hw_ctx *hctx;
|
|
|
|
- struct blk_mq_ctx *ctx;
|
|
|
|
- uint64_t latest = 0;
|
|
|
|
- int i, j, nr;
|
|
|
|
-
|
|
|
|
- blk_stat_init(&dst[READ]);
|
|
|
|
- blk_stat_init(&dst[WRITE]);
|
|
|
|
-
|
|
|
|
- nr = 0;
|
|
|
|
- do {
|
|
|
|
- uint64_t newest = 0;
|
|
|
|
-
|
|
|
|
- queue_for_each_hw_ctx(q, hctx, i) {
|
|
|
|
- hctx_for_each_ctx(hctx, ctx, j) {
|
|
|
|
- blk_stat_flush_batch(&ctx->stat[READ]);
|
|
|
|
- blk_stat_flush_batch(&ctx->stat[WRITE]);
|
|
|
|
-
|
|
|
|
- if (!ctx->stat[READ].nr_samples &&
|
|
|
|
- !ctx->stat[WRITE].nr_samples)
|
|
|
|
- continue;
|
|
|
|
- if (ctx->stat[READ].time > newest)
|
|
|
|
- newest = ctx->stat[READ].time;
|
|
|
|
- if (ctx->stat[WRITE].time > newest)
|
|
|
|
- newest = ctx->stat[WRITE].time;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ stat->min = min(stat->min, value);
|
|
|
|
+ stat->max = max(stat->max, value);
|
|
|
|
|
|
- /*
|
|
|
|
- * No samples
|
|
|
|
- */
|
|
|
|
- if (!newest)
|
|
|
|
- break;
|
|
|
|
-
|
|
|
|
- if (newest > latest)
|
|
|
|
- latest = newest;
|
|
|
|
-
|
|
|
|
- queue_for_each_hw_ctx(q, hctx, i) {
|
|
|
|
- hctx_for_each_ctx(hctx, ctx, j) {
|
|
|
|
- if (ctx->stat[READ].time == newest) {
|
|
|
|
- blk_stat_sum(&dst[READ],
|
|
|
|
- &ctx->stat[READ]);
|
|
|
|
- nr++;
|
|
|
|
- }
|
|
|
|
- if (ctx->stat[WRITE].time == newest) {
|
|
|
|
- blk_stat_sum(&dst[WRITE],
|
|
|
|
- &ctx->stat[WRITE]);
|
|
|
|
- nr++;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- /*
|
|
|
|
- * If we race on finding an entry, just loop back again.
|
|
|
|
- * Should be very rare.
|
|
|
|
- */
|
|
|
|
- } while (!nr);
|
|
|
|
|
|
+ if (stat->batch + value < stat->batch ||
|
|
|
|
+ stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
|
|
|
|
+ blk_stat_flush_batch(stat);
|
|
|
|
|
|
- dst[READ].time = dst[WRITE].time = latest;
|
|
|
|
|
|
+ stat->batch += value;
|
|
|
|
+ stat->nr_batch++;
|
|
}
|
|
}
|
|
|
|
|
|
-void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
|
|
|
|
|
|
+void blk_stat_add(struct request *rq)
|
|
{
|
|
{
|
|
- if (q->mq_ops)
|
|
|
|
- blk_mq_stat_get(q, dst);
|
|
|
|
- else {
|
|
|
|
- blk_stat_flush_batch(&q->rq_stats[READ]);
|
|
|
|
- blk_stat_flush_batch(&q->rq_stats[WRITE]);
|
|
|
|
- memcpy(&dst[READ], &q->rq_stats[READ],
|
|
|
|
- sizeof(struct blk_rq_stat));
|
|
|
|
- memcpy(&dst[WRITE], &q->rq_stats[WRITE],
|
|
|
|
- sizeof(struct blk_rq_stat));
|
|
|
|
|
|
+ struct request_queue *q = rq->q;
|
|
|
|
+ struct blk_stat_callback *cb;
|
|
|
|
+ struct blk_rq_stat *stat;
|
|
|
|
+ int bucket;
|
|
|
|
+ s64 now, value;
|
|
|
|
+
|
|
|
|
+ now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
|
|
|
+ if (now < blk_stat_time(&rq->issue_stat))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ value = now - blk_stat_time(&rq->issue_stat);
|
|
|
|
+
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
|
|
|
+ if (blk_stat_is_active(cb)) {
|
|
|
|
+ bucket = cb->bucket_fn(rq);
|
|
|
|
+ stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
|
|
|
|
+ __blk_stat_add(stat, value);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+ rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
|
|
-void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
|
|
|
|
|
|
+static void blk_stat_timer_fn(unsigned long data)
|
|
{
|
|
{
|
|
- struct blk_mq_ctx *ctx;
|
|
|
|
- unsigned int i, nr;
|
|
|
|
|
|
+ struct blk_stat_callback *cb = (void *)data;
|
|
|
|
+ unsigned int bucket;
|
|
|
|
+ int cpu;
|
|
|
|
|
|
- nr = 0;
|
|
|
|
- do {
|
|
|
|
- uint64_t newest = 0;
|
|
|
|
|
|
+ for (bucket = 0; bucket < cb->buckets; bucket++)
|
|
|
|
+ blk_stat_init(&cb->stat[bucket]);
|
|
|
|
|
|
- hctx_for_each_ctx(hctx, ctx, i) {
|
|
|
|
- blk_stat_flush_batch(&ctx->stat[READ]);
|
|
|
|
- blk_stat_flush_batch(&ctx->stat[WRITE]);
|
|
|
|
|
|
+ for_each_online_cpu(cpu) {
|
|
|
|
+ struct blk_rq_stat *cpu_stat;
|
|
|
|
|
|
- if (!ctx->stat[READ].nr_samples &&
|
|
|
|
- !ctx->stat[WRITE].nr_samples)
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- if (ctx->stat[READ].time > newest)
|
|
|
|
- newest = ctx->stat[READ].time;
|
|
|
|
- if (ctx->stat[WRITE].time > newest)
|
|
|
|
- newest = ctx->stat[WRITE].time;
|
|
|
|
|
|
+ cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
|
|
|
|
+ for (bucket = 0; bucket < cb->buckets; bucket++) {
|
|
|
|
+ blk_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]);
|
|
|
|
+ blk_stat_init(&cpu_stat[bucket]);
|
|
}
|
|
}
|
|
|
|
+ }
|
|
|
|
|
|
- if (!newest)
|
|
|
|
- break;
|
|
|
|
-
|
|
|
|
- hctx_for_each_ctx(hctx, ctx, i) {
|
|
|
|
- if (ctx->stat[READ].time == newest) {
|
|
|
|
- blk_stat_sum(&dst[READ], &ctx->stat[READ]);
|
|
|
|
- nr++;
|
|
|
|
- }
|
|
|
|
- if (ctx->stat[WRITE].time == newest) {
|
|
|
|
- blk_stat_sum(&dst[WRITE], &ctx->stat[WRITE]);
|
|
|
|
- nr++;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- /*
|
|
|
|
- * If we race on finding an entry, just loop back again.
|
|
|
|
- * Should be very rare, as the window is only updated
|
|
|
|
- * occasionally
|
|
|
|
- */
|
|
|
|
- } while (!nr);
|
|
|
|
|
|
+ cb->timer_fn(cb);
|
|
}
|
|
}
|
|
|
|
|
|
-static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
|
|
|
|
|
|
+struct blk_stat_callback *
|
|
|
|
+blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
|
|
|
|
+ unsigned int (*bucket_fn)(const struct request *),
|
|
|
|
+ unsigned int buckets, void *data)
|
|
{
|
|
{
|
|
- stat->min = -1ULL;
|
|
|
|
- stat->max = stat->nr_samples = stat->mean = 0;
|
|
|
|
- stat->batch = stat->nr_batch = 0;
|
|
|
|
- stat->time = time_now & BLK_STAT_NSEC_MASK;
|
|
|
|
-}
|
|
|
|
|
|
+ struct blk_stat_callback *cb;
|
|
|
|
|
|
-void blk_stat_init(struct blk_rq_stat *stat)
|
|
|
|
-{
|
|
|
|
- __blk_stat_init(stat, ktime_to_ns(ktime_get()));
|
|
|
|
-}
|
|
|
|
|
|
+ cb = kmalloc(sizeof(*cb), GFP_KERNEL);
|
|
|
|
+ if (!cb)
|
|
|
|
+ return NULL;
|
|
|
|
|
|
-static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
|
|
|
|
-{
|
|
|
|
- return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
|
|
|
|
-}
|
|
|
|
|
|
+ cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat),
|
|
|
|
+ GFP_KERNEL);
|
|
|
|
+ if (!cb->stat) {
|
|
|
|
+ kfree(cb);
|
|
|
|
+ return NULL;
|
|
|
|
+ }
|
|
|
|
+ cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat),
|
|
|
|
+ __alignof__(struct blk_rq_stat));
|
|
|
|
+ if (!cb->cpu_stat) {
|
|
|
|
+ kfree(cb->stat);
|
|
|
|
+ kfree(cb);
|
|
|
|
+ return NULL;
|
|
|
|
+ }
|
|
|
|
|
|
-bool blk_stat_is_current(struct blk_rq_stat *stat)
|
|
|
|
-{
|
|
|
|
- return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
|
|
|
|
|
|
+ cb->timer_fn = timer_fn;
|
|
|
|
+ cb->bucket_fn = bucket_fn;
|
|
|
|
+ cb->data = data;
|
|
|
|
+ cb->buckets = buckets;
|
|
|
|
+ setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb);
|
|
|
|
+
|
|
|
|
+ return cb;
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
|
|
|
|
|
|
-void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
|
|
|
|
|
|
+void blk_stat_add_callback(struct request_queue *q,
|
|
|
|
+ struct blk_stat_callback *cb)
|
|
{
|
|
{
|
|
- s64 now, value;
|
|
|
|
|
|
+ unsigned int bucket;
|
|
|
|
+ int cpu;
|
|
|
|
|
|
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
|
|
|
- if (now < blk_stat_time(&rq->issue_stat))
|
|
|
|
- return;
|
|
|
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
|
|
+ struct blk_rq_stat *cpu_stat;
|
|
|
|
|
|
- if (!__blk_stat_is_current(stat, now))
|
|
|
|
- __blk_stat_init(stat, now);
|
|
|
|
|
|
+ cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
|
|
|
|
+ for (bucket = 0; bucket < cb->buckets; bucket++)
|
|
|
|
+ blk_stat_init(&cpu_stat[bucket]);
|
|
|
|
+ }
|
|
|
|
|
|
- value = now - blk_stat_time(&rq->issue_stat);
|
|
|
|
- if (value > stat->max)
|
|
|
|
- stat->max = value;
|
|
|
|
- if (value < stat->min)
|
|
|
|
- stat->min = value;
|
|
|
|
|
|
+ spin_lock(&q->stats->lock);
|
|
|
|
+ list_add_tail_rcu(&cb->list, &q->stats->callbacks);
|
|
|
|
+ set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
|
|
|
+ spin_unlock(&q->stats->lock);
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL_GPL(blk_stat_add_callback);
|
|
|
|
|
|
- if (stat->batch + value < stat->batch ||
|
|
|
|
- stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
|
|
|
|
- blk_stat_flush_batch(stat);
|
|
|
|
|
|
+void blk_stat_remove_callback(struct request_queue *q,
|
|
|
|
+ struct blk_stat_callback *cb)
|
|
|
|
+{
|
|
|
|
+ spin_lock(&q->stats->lock);
|
|
|
|
+ list_del_rcu(&cb->list);
|
|
|
|
+ if (list_empty(&q->stats->callbacks))
|
|
|
|
+ clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
|
|
|
+ spin_unlock(&q->stats->lock);
|
|
|
|
|
|
- stat->batch += value;
|
|
|
|
- stat->nr_batch++;
|
|
|
|
|
|
+ del_timer_sync(&cb->timer);
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL_GPL(blk_stat_remove_callback);
|
|
|
|
|
|
-void blk_stat_clear(struct request_queue *q)
|
|
|
|
|
|
+static void blk_stat_free_callback_rcu(struct rcu_head *head)
|
|
{
|
|
{
|
|
- if (q->mq_ops) {
|
|
|
|
- struct blk_mq_hw_ctx *hctx;
|
|
|
|
- struct blk_mq_ctx *ctx;
|
|
|
|
- int i, j;
|
|
|
|
-
|
|
|
|
- queue_for_each_hw_ctx(q, hctx, i) {
|
|
|
|
- hctx_for_each_ctx(hctx, ctx, j) {
|
|
|
|
- blk_stat_init(&ctx->stat[READ]);
|
|
|
|
- blk_stat_init(&ctx->stat[WRITE]);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- blk_stat_init(&q->rq_stats[READ]);
|
|
|
|
- blk_stat_init(&q->rq_stats[WRITE]);
|
|
|
|
- }
|
|
|
|
|
|
+ struct blk_stat_callback *cb;
|
|
|
|
+
|
|
|
|
+ cb = container_of(head, struct blk_stat_callback, rcu);
|
|
|
|
+ free_percpu(cb->cpu_stat);
|
|
|
|
+ kfree(cb->stat);
|
|
|
|
+ kfree(cb);
|
|
}
|
|
}
|
|
|
|
|
|
-void blk_stat_set_issue_time(struct blk_issue_stat *stat)
|
|
|
|
|
|
+void blk_stat_free_callback(struct blk_stat_callback *cb)
|
|
{
|
|
{
|
|
- stat->time = (stat->time & BLK_STAT_MASK) |
|
|
|
|
- (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
|
|
|
|
|
|
+ call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL_GPL(blk_stat_free_callback);
|
|
|
|
|
|
-/*
|
|
|
|
- * Enable stat tracking, return whether it was enabled
|
|
|
|
- */
|
|
|
|
-bool blk_stat_enable(struct request_queue *q)
|
|
|
|
|
|
+struct blk_queue_stats *blk_alloc_queue_stats(void)
|
|
{
|
|
{
|
|
- if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
|
|
|
- set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
|
|
+ struct blk_queue_stats *stats;
|
|
|
|
+
|
|
|
|
+ stats = kmalloc(sizeof(*stats), GFP_KERNEL);
|
|
|
|
+ if (!stats)
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ INIT_LIST_HEAD(&stats->callbacks);
|
|
|
|
+ spin_lock_init(&stats->lock);
|
|
|
|
+
|
|
|
|
+ return stats;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void blk_free_queue_stats(struct blk_queue_stats *stats)
|
|
|
|
+{
|
|
|
|
+ if (!stats)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ WARN_ON(!list_empty(&stats->callbacks));
|
|
|
|
|
|
- return true;
|
|
|
|
|
|
+ kfree(stats);
|
|
}
|
|
}
|