|
|
@@ -58,6 +58,7 @@
|
|
|
#include <linux/sched/signal.h>
|
|
|
|
|
|
#include <trace/events/block.h>
|
|
|
+#include <linux/list_sort.h>
|
|
|
|
|
|
#include "md.h"
|
|
|
#include "raid5.h"
|
|
|
@@ -878,41 +879,107 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
|
|
|
return 1;
|
|
|
}
|
|
|
|
|
|
-static void flush_deferred_bios(struct r5conf *conf)
|
|
|
+static void dispatch_bio_list(struct bio_list *tmp)
|
|
|
{
|
|
|
- struct bio_list tmp;
|
|
|
struct bio *bio;
|
|
|
|
|
|
- if (!conf->batch_bio_dispatch || !conf->group_cnt)
|
|
|
+ while ((bio = bio_list_pop(tmp)))
|
|
|
+ generic_make_request(bio);
|
|
|
+}
|
|
|
+
|
|
|
+static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b)
|
|
|
+{
|
|
|
+ const struct r5pending_data *da = list_entry(a,
|
|
|
+ struct r5pending_data, sibling);
|
|
|
+ const struct r5pending_data *db = list_entry(b,
|
|
|
+ struct r5pending_data, sibling);
|
|
|
+ if (da->sector > db->sector)
|
|
|
+ return 1;
|
|
|
+ if (da->sector < db->sector)
|
|
|
+ return -1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void dispatch_defer_bios(struct r5conf *conf, int target,
|
|
|
+ struct bio_list *list)
|
|
|
+{
|
|
|
+ struct r5pending_data *data;
|
|
|
+ struct list_head *first, *next = NULL;
|
|
|
+ int cnt = 0;
|
|
|
+
|
|
|
+ if (conf->pending_data_cnt == 0)
|
|
|
+ return;
|
|
|
+
|
|
|
+ list_sort(NULL, &conf->pending_list, cmp_stripe);
|
|
|
+
|
|
|
+ first = conf->pending_list.next;
|
|
|
+
|
|
|
+ /* temporarily move the head */
|
|
|
+ if (conf->next_pending_data)
|
|
|
+ list_move_tail(&conf->pending_list,
|
|
|
+ &conf->next_pending_data->sibling);
|
|
|
+
|
|
|
+ while (!list_empty(&conf->pending_list)) {
|
|
|
+ data = list_first_entry(&conf->pending_list,
|
|
|
+ struct r5pending_data, sibling);
|
|
|
+ if (&data->sibling == first)
|
|
|
+ first = data->sibling.next;
|
|
|
+ next = data->sibling.next;
|
|
|
+
|
|
|
+ bio_list_merge(list, &data->bios);
|
|
|
+ list_move(&data->sibling, &conf->free_list);
|
|
|
+ cnt++;
|
|
|
+ if (cnt >= target)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ conf->pending_data_cnt -= cnt;
|
|
|
+ BUG_ON(conf->pending_data_cnt < 0 || cnt < target);
|
|
|
+
|
|
|
+ if (next != &conf->pending_list)
|
|
|
+ conf->next_pending_data = list_entry(next,
|
|
|
+ struct r5pending_data, sibling);
|
|
|
+ else
|
|
|
+ conf->next_pending_data = NULL;
|
|
|
+ /* list isn't empty */
|
|
|
+ if (first != &conf->pending_list)
|
|
|
+ list_move_tail(&conf->pending_list, first);
|
|
|
+}
|
|
|
+
|
|
|
+static void flush_deferred_bios(struct r5conf *conf)
|
|
|
+{
|
|
|
+ struct bio_list tmp = BIO_EMPTY_LIST;
|
|
|
+
|
|
|
+ if (conf->pending_data_cnt == 0)
|
|
|
return;
|
|
|
|
|
|
- bio_list_init(&tmp);
|
|
|
spin_lock(&conf->pending_bios_lock);
|
|
|
- bio_list_merge(&tmp, &conf->pending_bios);
|
|
|
- bio_list_init(&conf->pending_bios);
|
|
|
+ dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp);
|
|
|
+ BUG_ON(conf->pending_data_cnt != 0);
|
|
|
spin_unlock(&conf->pending_bios_lock);
|
|
|
|
|
|
- while ((bio = bio_list_pop(&tmp)))
|
|
|
- generic_make_request(bio);
|
|
|
+ dispatch_bio_list(&tmp);
|
|
|
}
|
|
|
|
|
|
-static void defer_bio_issue(struct r5conf *conf, struct bio *bio)
|
|
|
+static void defer_issue_bios(struct r5conf *conf, sector_t sector,
|
|
|
+ struct bio_list *bios)
|
|
|
{
|
|
|
- /*
|
|
|
- * change group_cnt will drain all bios, so this is safe
|
|
|
- *
|
|
|
- * A read generally means a read-modify-write, which usually means a
|
|
|
- * randwrite, so we don't delay it
|
|
|
- */
|
|
|
- if (!conf->batch_bio_dispatch || !conf->group_cnt ||
|
|
|
- bio_op(bio) == REQ_OP_READ) {
|
|
|
- generic_make_request(bio);
|
|
|
- return;
|
|
|
- }
|
|
|
+ struct bio_list tmp = BIO_EMPTY_LIST;
|
|
|
+ struct r5pending_data *ent;
|
|
|
+
|
|
|
spin_lock(&conf->pending_bios_lock);
|
|
|
- bio_list_add(&conf->pending_bios, bio);
|
|
|
+ ent = list_first_entry(&conf->free_list, struct r5pending_data,
|
|
|
+ sibling);
|
|
|
+ list_move_tail(&ent->sibling, &conf->pending_list);
|
|
|
+ ent->sector = sector;
|
|
|
+ bio_list_init(&ent->bios);
|
|
|
+ bio_list_merge(&ent->bios, bios);
|
|
|
+ conf->pending_data_cnt++;
|
|
|
+ if (conf->pending_data_cnt >= PENDING_IO_MAX)
|
|
|
+ dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp);
|
|
|
+
|
|
|
spin_unlock(&conf->pending_bios_lock);
|
|
|
- md_wakeup_thread(conf->mddev->thread);
|
|
|
+
|
|
|
+ dispatch_bio_list(&tmp);
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
@@ -925,6 +992,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
struct r5conf *conf = sh->raid_conf;
|
|
|
int i, disks = sh->disks;
|
|
|
struct stripe_head *head_sh = sh;
|
|
|
+ struct bio_list pending_bios = BIO_EMPTY_LIST;
|
|
|
+ bool should_defer;
|
|
|
|
|
|
might_sleep();
|
|
|
|
|
|
@@ -941,6 +1010,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ should_defer = conf->batch_bio_dispatch && conf->group_cnt;
|
|
|
+
|
|
|
for (i = disks; i--; ) {
|
|
|
int op, op_flags = 0;
|
|
|
int replace_only = 0;
|
|
|
@@ -1095,7 +1166,10 @@ again:
|
|
|
trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
|
|
|
bi, disk_devt(conf->mddev->gendisk),
|
|
|
sh->dev[i].sector);
|
|
|
- defer_bio_issue(conf, bi);
|
|
|
+ if (should_defer && op_is_write(op))
|
|
|
+ bio_list_add(&pending_bios, bi);
|
|
|
+ else
|
|
|
+ generic_make_request(bi);
|
|
|
}
|
|
|
if (rrdev) {
|
|
|
if (s->syncing || s->expanding || s->expanded
|
|
|
@@ -1140,7 +1214,10 @@ again:
|
|
|
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
|
|
|
rbi, disk_devt(conf->mddev->gendisk),
|
|
|
sh->dev[i].sector);
|
|
|
- defer_bio_issue(conf, rbi);
|
|
|
+ if (should_defer && op_is_write(op))
|
|
|
+ bio_list_add(&pending_bios, rbi);
|
|
|
+ else
|
|
|
+ generic_make_request(rbi);
|
|
|
}
|
|
|
if (!rdev && !rrdev) {
|
|
|
if (op_is_write(op))
|
|
|
@@ -1158,6 +1235,9 @@ again:
|
|
|
if (sh != head_sh)
|
|
|
goto again;
|
|
|
}
|
|
|
+
|
|
|
+ if (should_defer && !bio_list_empty(&pending_bios))
|
|
|
+ defer_issue_bios(conf, head_sh->sector, &pending_bios);
|
|
|
}
|
|
|
|
|
|
static struct dma_async_tx_descriptor *
|
|
|
@@ -6678,6 +6758,7 @@ static void free_conf(struct r5conf *conf)
|
|
|
put_page(conf->disks[i].extra_page);
|
|
|
kfree(conf->disks);
|
|
|
kfree(conf->stripe_hashtbl);
|
|
|
+ kfree(conf->pending_data);
|
|
|
kfree(conf);
|
|
|
}
|
|
|
|
|
|
@@ -6787,6 +6868,14 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|
|
conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
|
|
|
if (conf == NULL)
|
|
|
goto abort;
|
|
|
+ INIT_LIST_HEAD(&conf->free_list);
|
|
|
+ INIT_LIST_HEAD(&conf->pending_list);
|
|
|
+ conf->pending_data = kzalloc(sizeof(struct r5pending_data) *
|
|
|
+ PENDING_IO_MAX, GFP_KERNEL);
|
|
|
+ if (!conf->pending_data)
|
|
|
+ goto abort;
|
|
|
+ for (i = 0; i < PENDING_IO_MAX; i++)
|
|
|
+ list_add(&conf->pending_data[i].sibling, &conf->free_list);
|
|
|
/* Don't enable multi-threading by default*/
|
|
|
if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
|
|
|
&new_group)) {
|
|
|
@@ -6811,7 +6900,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|
|
atomic_set(&conf->active_stripes, 0);
|
|
|
atomic_set(&conf->preread_active_stripes, 0);
|
|
|
atomic_set(&conf->active_aligned_reads, 0);
|
|
|
- bio_list_init(&conf->pending_bios);
|
|
|
spin_lock_init(&conf->pending_bios_lock);
|
|
|
conf->batch_bio_dispatch = true;
|
|
|
rdev_for_each(rdev, mddev) {
|