6 years ago · 1fa2840e56
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -115,9 +115,21 @@ struct child_latency_info {
 
				 	atomic_t scale_cookie;
			
 
				 };
			
 
				 
			
 
				+struct percentile_stats {
			
 
				+	u64 total;
			
 
				+	u64 missed;
			
 
				+};
			
 
				+
			
 
				+struct latency_stat {
			
 
				+	union {
			
 
				+		struct percentile_stats ps;
			
 
				+		struct blk_rq_stat rqs;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				 struct iolatency_grp {
			
 
				 	struct blkg_policy_data pd;
			
 
				-	struct blk_rq_stat __percpu *stats;
			
 
				+	struct latency_stat __percpu *stats;
			
 
				 	struct blk_iolatency *blkiolat;
			
 
				 	struct rq_depth rq_depth;
			
 
				 	struct rq_wait rq_wait;
			
@@ -132,6 +144,7 @@ struct iolatency_grp {
 
				 	/* Our current number of IO's for the last summation. */
			
 
				 	u64 nr_samples;
			
 
				 
			
 
				+	bool ssd;
			
 
				 	struct child_latency_info child_lat;
			
 
				 };
			
 
				 
			
@@ -172,6 +185,80 @@ static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
 
				 	return pd_to_blkg(&iolat->pd);
			
 
				 }
			
 
				 
			
 
				+static inline void latency_stat_init(struct iolatency_grp *iolat,
			
 
				+				     struct latency_stat *stat)
			
 
				+{
			
 
				+	if (iolat->ssd) {
			
 
				+		stat->ps.total = 0;
			
 
				+		stat->ps.missed = 0;
			
 
				+	} else
			
 
				+		blk_rq_stat_init(&stat->rqs);
			
 
				+}
			
 
				+
			
 
				+static inline void latency_stat_sum(struct iolatency_grp *iolat,
			
 
				+				    struct latency_stat *sum,
			
 
				+				    struct latency_stat *stat)
			
 
				+{
			
 
				+	if (iolat->ssd) {
			
 
				+		sum->ps.total += stat->ps.total;
			
 
				+		sum->ps.missed += stat->ps.missed;
			
 
				+	} else
			
 
				+		blk_rq_stat_sum(&sum->rqs, &stat->rqs);
			
 
				+}
			
 
				+
			
 
				+static inline void latency_stat_record_time(struct iolatency_grp *iolat,
			
 
				+					    u64 req_time)
			
 
				+{
			
 
				+	struct latency_stat *stat = get_cpu_ptr(iolat->stats);
			
 
				+	if (iolat->ssd) {
			
 
				+		if (req_time >= iolat->min_lat_nsec)
			
 
				+			stat->ps.missed++;
			
 
				+		stat->ps.total++;
			
 
				+	} else
			
 
				+		blk_rq_stat_add(&stat->rqs, req_time);
			
 
				+	put_cpu_ptr(stat);
			
 
				+}
			
 
				+
			
 
				+static inline bool latency_sum_ok(struct iolatency_grp *iolat,
			
 
				+				  struct latency_stat *stat)
			
 
				+{
			
 
				+	if (iolat->ssd) {
			
 
				+		u64 thresh = div64_u64(stat->ps.total, 10);
			
 
				+		thresh = max(thresh, 1ULL);
			
 
				+		return stat->ps.missed < thresh;
			
 
				+	}
			
 
				+	return stat->rqs.mean <= iolat->min_lat_nsec;
			
 
				+}
			
 
				+
			
 
				+static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
			
 
				+				       struct latency_stat *stat)
			
 
				+{
			
 
				+	if (iolat->ssd)
			
 
				+		return stat->ps.total;
			
 
				+	return stat->rqs.nr_samples;
			
 
				+}
			
 
				+
			
 
				+static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
			
 
				+					      struct latency_stat *stat)
			
 
				+{
			
 
				+	int exp_idx;
			
 
				+
			
 
				+	if (iolat->ssd)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * CALC_LOAD takes in a number stored in fixed point representation.
			
 
				+	 * Because we are using this for IO time in ns, the values stored
			
 
				+	 * are significantly larger than the FIXED_1 denominator (2048).
			
 
				+	 * Therefore, rounding errors in the calculation are negligible and
			
 
				+	 * can be ignored.
			
 
				+	 */
			
 
				+	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
			
 
				+			div64_u64(iolat->cur_win_nsec,
			
 
				+				  BLKIOLATENCY_EXP_BUCKET_SIZE));
			
 
				+	CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean);
			
 
				+}
			
 
				+
			
 
				 static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
			
 
				 				       wait_queue_entry_t *wait,
			
 
				 				       bool first_block)
			
@@ -418,7 +505,6 @@ static void iolatency_record_time(struct iolatency_grp *iolat,
 
				 				  struct bio_issue *issue, u64 now,
			
 
				 				  bool issue_as_root)
			
 
				 {
			
 
				-	struct blk_rq_stat *rq_stat;
			
 
				 	u64 start = bio_issue_time(issue);
			
 
				 	u64 req_time;
			
 
				 
			
@@ -444,9 +530,7 @@ static void iolatency_record_time(struct iolatency_grp *iolat,
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	rq_stat = get_cpu_ptr(iolat->stats);
			
 
				-	blk_rq_stat_add(rq_stat, req_time);
			
 
				-	put_cpu_ptr(rq_stat);
			
 
				+	latency_stat_record_time(iolat, req_time);
			
 
				 }
			
 
				 
			
 
				 #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
			
@@ -457,17 +541,17 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
 
				 	struct blkcg_gq *blkg = lat_to_blkg(iolat);
			
 
				 	struct iolatency_grp *parent;
			
 
				 	struct child_latency_info *lat_info;
			
 
				-	struct blk_rq_stat stat;
			
 
				+	struct latency_stat stat;
			
 
				 	unsigned long flags;
			
 
				-	int cpu, exp_idx;
			
 
				+	int cpu;
			
 
				 
			
 
				-	blk_rq_stat_init(&stat);
			
 
				+	latency_stat_init(iolat, &stat);
			
 
				 	preempt_disable();
			
 
				 	for_each_online_cpu(cpu) {
			
 
				-		struct blk_rq_stat *s;
			
 
				+		struct latency_stat *s;
			
 
				 		s = per_cpu_ptr(iolat->stats, cpu);
			
 
				-		blk_rq_stat_sum(&stat, s);
			
 
				-		blk_rq_stat_init(s);
			
 
				+		latency_stat_sum(iolat, &stat, s);
			
 
				+		latency_stat_init(iolat, s);
			
 
				 	}
			
 
				 	preempt_enable();
			
 
				 
			
@@ -477,41 +561,33 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
 
				 
			
 
				 	lat_info = &parent->child_lat;
			
 
				 
			
 
				-	/*
			
 
				-	 * CALC_LOAD takes in a number stored in fixed point representation.
			
 
				-	 * Because we are using this for IO time in ns, the values stored
			
 
				-	 * are significantly larger than the FIXED_1 denominator (2048).
			
 
				-	 * Therefore, rounding errors in the calculation are negligible and
			
 
				-	 * can be ignored.
			
 
				-	 */
			
 
				-	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
			
 
				-			div64_u64(iolat->cur_win_nsec,
			
 
				-				  BLKIOLATENCY_EXP_BUCKET_SIZE));
			
 
				-	CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean);
			
 
				+	iolat_update_total_lat_avg(iolat, &stat);
			
 
				 
			
 
				 	/* Everything is ok and we don't need to adjust the scale. */
			
 
				-	if (stat.mean <= iolat->min_lat_nsec &&
			
 
				+	if (latency_sum_ok(iolat, &stat) &&
			
 
				 	    atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
			
 
				 		return;
			
 
				 
			
 
				 	/* Somebody beat us to the punch, just bail. */
			
 
				 	spin_lock_irqsave(&lat_info->lock, flags);
			
 
				 	lat_info->nr_samples -= iolat->nr_samples;
			
 
				-	lat_info->nr_samples += stat.nr_samples;
			
 
				-	iolat->nr_samples = stat.nr_samples;
			
 
				+	lat_info->nr_samples += latency_stat_samples(iolat, &stat);
			
 
				+	iolat->nr_samples = latency_stat_samples(iolat, &stat);
			
 
				 
			
 
				 	if ((lat_info->last_scale_event >= now ||
			
 
				 	    now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
			
 
				 	    lat_info->scale_lat <= iolat->min_lat_nsec)
			
 
				 		goto out;
			
 
				 
			
 
				-	if (stat.mean <= iolat->min_lat_nsec &&
			
 
				-	    stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
			
 
				+	if (latency_sum_ok(iolat, &stat)) {
			
 
				+		if (latency_stat_samples(iolat, &stat) <
			
 
				+		    BLKIOLATENCY_MIN_GOOD_SAMPLES)
			
 
				+			goto out;
			
 
				 		if (lat_info->scale_grp == iolat) {
			
 
				 			lat_info->last_scale_event = now;
			
 
				 			scale_cookie_change(iolat->blkiolat, lat_info, true);
			
 
				 		}
			
 
				-	} else if (stat.mean > iolat->min_lat_nsec) {
			
 
				+	} else {
			
 
				 		lat_info->last_scale_event = now;
			
 
				 		if (!lat_info->scale_grp ||
			
 
				 		    lat_info->scale_lat > iolat->min_lat_nsec) {
			
@@ -808,13 +884,43 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
			
 
				+				 size_t size)
			
 
				+{
			
 
				+	struct latency_stat stat;
			
 
				+	int cpu;
			
 
				+
			
 
				+	latency_stat_init(iolat, &stat);
			
 
				+	preempt_disable();
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		struct latency_stat *s;
			
 
				+		s = per_cpu_ptr(iolat->stats, cpu);
			
 
				+		latency_stat_sum(iolat, &stat, s);
			
 
				+	}
			
 
				+	preempt_enable();
			
 
				+
			
 
				+	if (iolat->rq_depth.max_depth == UINT_MAX)
			
 
				+		return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
			
 
				+				 (unsigned long long)stat.ps.missed,
			
 
				+				 (unsigned long long)stat.ps.total);
			
 
				+	return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
			
 
				+			 (unsigned long long)stat.ps.missed,
			
 
				+			 (unsigned long long)stat.ps.total,
			
 
				+			 iolat->rq_depth.max_depth);
			
 
				+}
			
 
				+
			
 
				 static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
			
 
				 				size_t size)
			
 
				 {
			
 
				 	struct iolatency_grp *iolat = pd_to_lat(pd);
			
 
				-	unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
			
 
				-	unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
			
 
				+	unsigned long long avg_lat;
			
 
				+	unsigned long long cur_win;
			
 
				+
			
 
				+	if (iolat->ssd)
			
 
				+		return iolatency_ssd_stat(iolat, buf, size);
			
 
				 
			
 
				+	avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
			
 
				+	cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
			
 
				 	if (iolat->rq_depth.max_depth == UINT_MAX)
			
 
				 		return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
			
 
				 				 avg_lat, cur_win);
			
@@ -831,8 +937,8 @@ static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
 
				 	iolat = kzalloc_node(sizeof(*iolat), gfp, node);
			
 
				 	if (!iolat)
			
 
				 		return NULL;
			
 
				-	iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
			
 
				-				       __alignof__(struct blk_rq_stat), gfp);
			
 
				+	iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
			
 
				+				       __alignof__(struct latency_stat), gfp);
			
 
				 	if (!iolat->stats) {
			
 
				 		kfree(iolat);
			
 
				 		return NULL;
			
@@ -849,10 +955,15 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
 
				 	u64 now = ktime_to_ns(ktime_get());
			
 
				 	int cpu;
			
 
				 
			
 
				+	if (blk_queue_nonrot(blkg->q))
			
 
				+		iolat->ssd = true;
			
 
				+	else
			
 
				+		iolat->ssd = false;
			
 
				+
			
 
				 	for_each_possible_cpu(cpu) {
			
 
				-		struct blk_rq_stat *stat;
			
 
				+		struct latency_stat *stat;
			
 
				 		stat = per_cpu_ptr(iolat->stats, cpu);
			
 
				-		blk_rq_stat_init(stat);
			
 
				+		latency_stat_init(iolat, stat);
			
 
				 	}
			
 
				 
			
 
				 	rq_wait_init(&iolat->rq_wait);