Эх сурвалжийг харах

block: Track DISCARD statistics and output them in stat and diskstat

Add tracking of REQ_OP_DISCARD ios to the partition statistics and
append them to the various stat files in /sys as well as
/proc/diskstats.  These are tracked with the same four stats as reads
and writes:

Number of discard ios completed.
Number of discard ios merged
Number of discard sectors completed
Milliseconds spent on discard requests

This is done via adding a new STAT_DISCARD define to genhd.h and then
using it to index that stat field for discard requests.

tj: Refreshed on top of v4.17 and other previous updates.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Michael Callahan 7 жил өмнө
parent
commit
bdca3c87fb

+ 10 - 0
Documentation/ABI/testing/procfs-diskstats

@@ -5,6 +5,7 @@ Description:
 		The /proc/diskstats file displays the I/O statistics
 		The /proc/diskstats file displays the I/O statistics
 		of block devices. Each line contains the following 14
 		of block devices. Each line contains the following 14
 		fields:
 		fields:
+
 		 1 - major number
 		 1 - major number
 		 2 - minor mumber
 		 2 - minor mumber
 		 3 - device name
 		 3 - device name
@@ -19,4 +20,13 @@ Description:
 		12 - I/Os currently in progress
 		12 - I/Os currently in progress
 		13 - time spent doing I/Os (ms)
 		13 - time spent doing I/Os (ms)
 		14 - weighted time spent doing I/Os (ms)
 		14 - weighted time spent doing I/Os (ms)
+
+		Kernel 4.18+ appends four more fields for discard
+		tracking putting the total at 18:
+
+		15 - discards completed successfully
+		16 - discards merged
+		17 - sectors discarded
+		18 - time spent discarding
+
 		For more details refer to Documentation/iostats.txt
 		For more details refer to Documentation/iostats.txt

+ 16 - 12
Documentation/block/stat.txt

@@ -31,28 +31,32 @@ write ticks     milliseconds  total wait time for write requests
 in_flight       requests      number of I/Os currently in flight
 in_flight       requests      number of I/Os currently in flight
 io_ticks        milliseconds  total time this block device has been active
 io_ticks        milliseconds  total time this block device has been active
 time_in_queue   milliseconds  total wait time for all requests
 time_in_queue   milliseconds  total wait time for all requests
+discard I/Os    requests      number of discard I/Os processed
+discard merges  requests      number of discard I/Os merged with in-queue I/O
+discard sectors sectors       number of sectors discarded
+discard ticks   milliseconds  total wait time for discard requests
 
 
-read I/Os, write I/Os
-=====================
+read I/Os, write I/Os, discard I/0s
+===================================
 
 
 These values increment when an I/O request completes.
 These values increment when an I/O request completes.
 
 
-read merges, write merges
-=========================
+read merges, write merges, discard merges
+=========================================
 
 
 These values increment when an I/O request is merged with an
 These values increment when an I/O request is merged with an
 already-queued I/O request.
 already-queued I/O request.
 
 
-read sectors, write sectors
-===========================
+read sectors, write sectors, discard_sectors
+============================================
 
 
-These values count the number of sectors read from or written to this
-block device.  The "sectors" in question are the standard UNIX 512-byte
-sectors, not any device- or filesystem-specific block size.  The
-counters are incremented when the I/O completes.
+These values count the number of sectors read from, written to, or
+discarded from this block device.  The "sectors" in question are the
+standard UNIX 512-byte sectors, not any device- or filesystem-specific
+block size.  The counters are incremented when the I/O completes.
 
 
-read ticks, write ticks
-=======================
+read ticks, write ticks, discard ticks
+======================================
 
 
 These values count the number of milliseconds that I/O requests have
 These values count the number of milliseconds that I/O requests have
 waited on this block device.  If there are multiple I/O requests waiting,
 waited on this block device.  If there are multiple I/O requests waiting,

+ 15 - 0
Documentation/iostats.txt

@@ -31,6 +31,9 @@ Here are examples of these different formats::
       3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3    1   hda1 35486 38030 38030 38030
       3    1   hda1 35486 38030 38030 38030
 
 
+   4.18+ diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
+
 On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
 On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
 a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
 a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
 
 
@@ -101,6 +104,18 @@ Field 11 -- weighted # of milliseconds spent doing I/Os
     last update of this field.  This can provide an easy measure of both
     last update of this field.  This can provide an easy measure of both
     I/O completion time and the backlog that may be accumulating.
     I/O completion time and the backlog that may be accumulating.
 
 
+Field 12 -- # of discards completed
+    This is the total number of discards completed successfully.
+
+Field 13 -- # of discards merged
+    See the description of field 2
+
+Field 14 -- # of sectors discarded
+    This is the total number of sectors discarded successfully.
+
+Field 15 -- # of milliseconds spent discarding
+    This is the total number of milliseconds spent by all discards (as
+    measured from __make_request() to end_that_request_last()).
 
 
 To avoid introducing performance bottlenecks, no locks are held while
 To avoid introducing performance bottlenecks, no locks are held while
 modifying these counters.  This implies that minor inaccuracies may be
 modifying these counters.  This implies that minor inaccuracies may be

+ 10 - 3
block/genhd.c

@@ -1333,8 +1333,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 		part_round_stats(gp->queue, cpu, hd);
 		part_round_stats(gp->queue, cpu, hd);
 		part_stat_unlock();
 		part_stat_unlock();
 		part_in_flight(gp->queue, hd, inflight);
 		part_in_flight(gp->queue, hd, inflight);
-		seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
-			   "%u %lu %lu %lu %u %u %u %u\n",
+		seq_printf(seqf, "%4d %7d %s "
+			   "%lu %lu %lu %u "
+			   "%lu %lu %lu %u "
+			   "%u %u %u "
+			   "%lu %lu %lu %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
 			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[STAT_READ]),
 			   part_stat_read(hd, ios[STAT_READ]),
@@ -1347,7 +1350,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+			   jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
+			   part_stat_read(hd, ios[STAT_DISCARD]),
+			   part_stat_read(hd, merges[STAT_DISCARD]),
+			   part_stat_read(hd, sectors[STAT_DISCARD]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
 			);
 			);
 	}
 	}
 	disk_part_iter_exit(&piter);
 	disk_part_iter_exit(&piter);

+ 7 - 2
block/partition-generic.c

@@ -130,7 +130,8 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u "
+		"%8lu %8lu %8llu %8u"
 		"\n",
 		"\n",
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
@@ -142,7 +143,11 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		part_stat_read(p, ios[STAT_DISCARD]),
+		part_stat_read(p, merges[STAT_DISCARD]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
 }
 }
 
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,

+ 8 - 0
include/linux/blk_types.h

@@ -360,6 +360,7 @@ enum req_flag_bits {
 enum stat_group {
 enum stat_group {
 	STAT_READ,
 	STAT_READ,
 	STAT_WRITE,
 	STAT_WRITE,
+	STAT_DISCARD,
 
 
 	NR_STAT_GROUPS
 	NR_STAT_GROUPS
 };
 };
@@ -401,8 +402,15 @@ static inline bool op_is_sync(unsigned int op)
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 }
 
 
+static inline bool op_is_discard(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
+}
+
 static inline int op_stat_group(unsigned int op)
 static inline int op_stat_group(unsigned int op)
 {
 {
+	if (op_is_discard(op))
+		return STAT_DISCARD;
 	return op_is_write(op);
 	return op_is_write(op);
 }
 }
 
 

+ 2 - 1
include/linux/genhd.h

@@ -356,7 +356,8 @@ static inline void free_part_stats(struct hd_struct *part)
 
 
 #define part_stat_read_accum(part, field)				\
 #define part_stat_read_accum(part, field)				\
 	(part_stat_read(part, field[STAT_READ]) +			\
 	(part_stat_read(part, field[STAT_READ]) +			\
-	 part_stat_read(part, field[STAT_WRITE]))
+	 part_stat_read(part, field[STAT_WRITE]) +			\
+	 part_stat_read(part, field[STAT_DISCARD]))
 
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
 	__part_stat_add((cpu), (part), field, addnd);			\