|
|
@@ -111,22 +111,30 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
|
|
|
/*
|
|
|
* Key building.
|
|
|
*/
|
|
|
-static void build_data_key(struct dm_thin_device *td,
|
|
|
- dm_block_t b, struct dm_cell_key *key)
|
|
|
+enum lock_space {
|
|
|
+ VIRTUAL,
|
|
|
+ PHYSICAL
|
|
|
+};
|
|
|
+
|
|
|
+static void build_key(struct dm_thin_device *td, enum lock_space ls,
|
|
|
+ dm_block_t b, dm_block_t e, struct dm_cell_key *key)
|
|
|
{
|
|
|
- key->virtual = 0;
|
|
|
+ key->virtual = (ls == VIRTUAL);
|
|
|
key->dev = dm_thin_dev_id(td);
|
|
|
key->block_begin = b;
|
|
|
- key->block_end = b + 1ULL;
|
|
|
+ key->block_end = e;
|
|
|
+}
|
|
|
+
|
|
|
+static void build_data_key(struct dm_thin_device *td, dm_block_t b,
|
|
|
+ struct dm_cell_key *key)
|
|
|
+{
|
|
|
+ build_key(td, PHYSICAL, b, b + 1llu, key);
|
|
|
}
|
|
|
|
|
|
static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
|
|
|
struct dm_cell_key *key)
|
|
|
{
|
|
|
- key->virtual = 1;
|
|
|
- key->dev = dm_thin_dev_id(td);
|
|
|
- key->block_begin = b;
|
|
|
- key->block_end = b + 1ULL;
|
|
|
+ build_key(td, VIRTUAL, b, b + 1llu, key);
|
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------*/
|
|
|
@@ -312,6 +320,138 @@ struct thin_c {
|
|
|
|
|
|
/*----------------------------------------------------------------*/
|
|
|
|
|
|
+/**
|
|
|
+ * __blkdev_issue_discard_async - queue a discard with async completion
|
|
|
+ * @bdev: blockdev to issue discard for
|
|
|
+ * @sector: start sector
|
|
|
+ * @nr_sects: number of sectors to discard
|
|
|
+ * @gfp_mask: memory allocation flags (for bio_alloc)
|
|
|
+ * @flags: BLKDEV_IFL_* flags to control behaviour
|
|
|
+ * @parent_bio: parent discard bio that all sub discards get chained to
|
|
|
+ *
|
|
|
+ * Description:
|
|
|
+ * Asynchronously issue a discard request for the sectors in question.
|
|
|
+ * NOTE: this variant of blk-core's blkdev_issue_discard() is a stop-gap
|
|
|
+ * that is being kept local to DM thinp until the block changes to allow
|
|
|
+ * late bio splitting land upstream.
|
|
|
+ */
|
|
|
+static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sector,
|
|
|
+ sector_t nr_sects, gfp_t gfp_mask, unsigned long flags,
|
|
|
+ struct bio *parent_bio)
|
|
|
+{
|
|
|
+ struct request_queue *q = bdev_get_queue(bdev);
|
|
|
+ int type = REQ_WRITE | REQ_DISCARD;
|
|
|
+ unsigned int max_discard_sectors, granularity;
|
|
|
+ int alignment;
|
|
|
+ struct bio *bio;
|
|
|
+ int ret = 0;
|
|
|
+ struct blk_plug plug;
|
|
|
+
|
|
|
+ if (!q)
|
|
|
+ return -ENXIO;
|
|
|
+
|
|
|
+ if (!blk_queue_discard(q))
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+
|
|
|
+ /* Zero-sector (unknown) and one-sector granularities are the same. */
|
|
|
+ granularity = max(q->limits.discard_granularity >> 9, 1U);
|
|
|
+ alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Ensure that max_discard_sectors is of the proper
|
|
|
+ * granularity, so that requests stay aligned after a split.
|
|
|
+ */
|
|
|
+ max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
|
|
|
+ max_discard_sectors -= max_discard_sectors % granularity;
|
|
|
+ if (unlikely(!max_discard_sectors)) {
|
|
|
+ /* Avoid infinite loop below. Being cautious never hurts. */
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (flags & BLKDEV_DISCARD_SECURE) {
|
|
|
+ if (!blk_queue_secdiscard(q))
|
|
|
+ return -EOPNOTSUPP;
|
|
|
+ type |= REQ_SECURE;
|
|
|
+ }
|
|
|
+
|
|
|
+ blk_start_plug(&plug);
|
|
|
+ while (nr_sects) {
|
|
|
+ unsigned int req_sects;
|
|
|
+ sector_t end_sect, tmp;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Required bio_put occurs in bio_endio thanks to bio_chain below
|
|
|
+ */
|
|
|
+ bio = bio_alloc(gfp_mask, 1);
|
|
|
+ if (!bio) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ req_sects = min_t(sector_t, nr_sects, max_discard_sectors);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If splitting a request, and the next starting sector would be
|
|
|
+ * misaligned, stop the discard at the previous aligned sector.
|
|
|
+ */
|
|
|
+ end_sect = sector + req_sects;
|
|
|
+ tmp = end_sect;
|
|
|
+ if (req_sects < nr_sects &&
|
|
|
+ sector_div(tmp, granularity) != alignment) {
|
|
|
+ end_sect = end_sect - alignment;
|
|
|
+ sector_div(end_sect, granularity);
|
|
|
+ end_sect = end_sect * granularity + alignment;
|
|
|
+ req_sects = end_sect - sector;
|
|
|
+ }
|
|
|
+
|
|
|
+ bio_chain(bio, parent_bio);
|
|
|
+
|
|
|
+ bio->bi_iter.bi_sector = sector;
|
|
|
+ bio->bi_bdev = bdev;
|
|
|
+
|
|
|
+ bio->bi_iter.bi_size = req_sects << 9;
|
|
|
+ nr_sects -= req_sects;
|
|
|
+ sector = end_sect;
|
|
|
+
|
|
|
+ submit_bio(type, bio);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We can loop for a long time in here, if someone does
|
|
|
+ * full device discards (like mkfs). Be nice and allow
|
|
|
+ * us to schedule out to avoid softlocking if preempt
|
|
|
+ * is disabled.
|
|
|
+ */
|
|
|
+ cond_resched();
|
|
|
+ }
|
|
|
+ blk_finish_plug(&plug);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static bool block_size_is_power_of_two(struct pool *pool)
|
|
|
+{
|
|
|
+ return pool->sectors_per_block_shift >= 0;
|
|
|
+}
|
|
|
+
|
|
|
+static sector_t block_to_sectors(struct pool *pool, dm_block_t b)
|
|
|
+{
|
|
|
+ return block_size_is_power_of_two(pool) ?
|
|
|
+ (b << pool->sectors_per_block_shift) :
|
|
|
+ (b * pool->sectors_per_block);
|
|
|
+}
|
|
|
+
|
|
|
+static int issue_discard(struct thin_c *tc, dm_block_t data_b, dm_block_t data_e,
|
|
|
+ struct bio *parent_bio)
|
|
|
+{
|
|
|
+ sector_t s = block_to_sectors(tc->pool, data_b);
|
|
|
+ sector_t len = block_to_sectors(tc->pool, data_e - data_b);
|
|
|
+
|
|
|
+ return __blkdev_issue_discard_async(tc->pool_dev->bdev, s, len,
|
|
|
+ GFP_NOWAIT, 0, parent_bio);
|
|
|
+}
|
|
|
+
|
|
|
+/*----------------------------------------------------------------*/
|
|
|
+
|
|
|
/*
|
|
|
* wake_worker() is used when new work is queued and when pool_resume is
|
|
|
* ready to continue deferred IO processing.
|
|
|
@@ -461,6 +601,7 @@ struct dm_thin_endio_hook {
|
|
|
struct dm_deferred_entry *all_io_entry;
|
|
|
struct dm_thin_new_mapping *overwrite_mapping;
|
|
|
struct rb_node rb_node;
|
|
|
+ struct dm_bio_prison_cell *cell;
|
|
|
};
|
|
|
|
|
|
static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
|
|
|
@@ -541,11 +682,6 @@ static void error_retry_list(struct pool *pool)
|
|
|
* target.
|
|
|
*/
|
|
|
|
|
|
-static bool block_size_is_power_of_two(struct pool *pool)
|
|
|
-{
|
|
|
- return pool->sectors_per_block_shift >= 0;
|
|
|
-}
|
|
|
-
|
|
|
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
|
|
|
{
|
|
|
struct pool *pool = tc->pool;
|
|
|
@@ -559,6 +695,34 @@ static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
|
|
|
return block_nr;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Returns the _complete_ blocks that this bio covers.
|
|
|
+ */
|
|
|
+static void get_bio_block_range(struct thin_c *tc, struct bio *bio,
|
|
|
+ dm_block_t *begin, dm_block_t *end)
|
|
|
+{
|
|
|
+ struct pool *pool = tc->pool;
|
|
|
+ sector_t b = bio->bi_iter.bi_sector;
|
|
|
+ sector_t e = b + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
|
|
|
+
|
|
|
+ b += pool->sectors_per_block - 1ull; /* so we round up */
|
|
|
+
|
|
|
+ if (block_size_is_power_of_two(pool)) {
|
|
|
+ b >>= pool->sectors_per_block_shift;
|
|
|
+ e >>= pool->sectors_per_block_shift;
|
|
|
+ } else {
|
|
|
+ (void) sector_div(b, pool->sectors_per_block);
|
|
|
+ (void) sector_div(e, pool->sectors_per_block);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (e < b)
|
|
|
+ /* Can happen if the bio is within a single block. */
|
|
|
+ e = b;
|
|
|
+
|
|
|
+ *begin = b;
|
|
|
+ *end = e;
|
|
|
+}
|
|
|
+
|
|
|
static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
|
|
|
{
|
|
|
struct pool *pool = tc->pool;
|
|
|
@@ -647,7 +811,7 @@ struct dm_thin_new_mapping {
|
|
|
struct list_head list;
|
|
|
|
|
|
bool pass_discard:1;
|
|
|
- bool definitely_not_shared:1;
|
|
|
+ bool maybe_shared:1;
|
|
|
|
|
|
/*
|
|
|
* Track quiescing, copying and zeroing preparation actions. When this
|
|
|
@@ -658,9 +822,9 @@ struct dm_thin_new_mapping {
|
|
|
|
|
|
int err;
|
|
|
struct thin_c *tc;
|
|
|
- dm_block_t virt_block;
|
|
|
+ dm_block_t virt_begin, virt_end;
|
|
|
dm_block_t data_block;
|
|
|
- struct dm_bio_prison_cell *cell, *cell2;
|
|
|
+ struct dm_bio_prison_cell *cell;
|
|
|
|
|
|
/*
|
|
|
* If the bio covers the whole area of a block then we can avoid
|
|
|
@@ -817,7 +981,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
|
|
|
* Any I/O for this block arriving after this point will get
|
|
|
* remapped to it directly.
|
|
|
*/
|
|
|
- r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
|
|
|
+ r = dm_thin_insert_block(tc->td, m->virt_begin, m->data_block);
|
|
|
if (r) {
|
|
|
metadata_operation_failed(pool, "dm_thin_insert_block", r);
|
|
|
cell_error(pool, m->cell);
|
|
|
@@ -844,50 +1008,112 @@ out:
|
|
|
mempool_free(m, pool->mapping_pool);
|
|
|
}
|
|
|
|
|
|
-static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
|
|
|
+/*----------------------------------------------------------------*/
|
|
|
+
|
|
|
+static void free_discard_mapping(struct dm_thin_new_mapping *m)
|
|
|
{
|
|
|
struct thin_c *tc = m->tc;
|
|
|
+ if (m->cell)
|
|
|
+ cell_defer_no_holder(tc, m->cell);
|
|
|
+ mempool_free(m, tc->pool->mapping_pool);
|
|
|
+}
|
|
|
|
|
|
+static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
|
|
|
+{
|
|
|
bio_io_error(m->bio);
|
|
|
+ free_discard_mapping(m);
|
|
|
+}
|
|
|
+
|
|
|
+static void process_prepared_discard_success(struct dm_thin_new_mapping *m)
|
|
|
+{
|
|
|
+ bio_endio(m->bio, 0);
|
|
|
+ free_discard_mapping(m);
|
|
|
+}
|
|
|
+
|
|
|
+static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
|
|
|
+{
|
|
|
+ int r;
|
|
|
+ struct thin_c *tc = m->tc;
|
|
|
+
|
|
|
+ r = dm_thin_remove_range(tc->td, m->cell->key.block_begin, m->cell->key.block_end);
|
|
|
+ if (r) {
|
|
|
+ metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
|
|
|
+ bio_io_error(m->bio);
|
|
|
+ } else
|
|
|
+ bio_endio(m->bio, 0);
|
|
|
+
|
|
|
cell_defer_no_holder(tc, m->cell);
|
|
|
- cell_defer_no_holder(tc, m->cell2);
|
|
|
mempool_free(m, tc->pool->mapping_pool);
|
|
|
}
|
|
|
|
|
|
-static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
|
|
|
+static int passdown_double_checking_shared_status(struct dm_thin_new_mapping *m)
|
|
|
{
|
|
|
+ /*
|
|
|
+ * We've already unmapped this range of blocks, but before we
|
|
|
+ * passdown we have to check that these blocks are now unused.
|
|
|
+ */
|
|
|
+ int r;
|
|
|
+ bool used = true;
|
|
|
struct thin_c *tc = m->tc;
|
|
|
+ struct pool *pool = tc->pool;
|
|
|
+ dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
|
|
|
|
|
|
- inc_all_io_entry(tc->pool, m->bio);
|
|
|
- cell_defer_no_holder(tc, m->cell);
|
|
|
- cell_defer_no_holder(tc, m->cell2);
|
|
|
+ while (b != end) {
|
|
|
+ /* find start of unmapped run */
|
|
|
+ for (; b < end; b++) {
|
|
|
+ r = dm_pool_block_is_used(pool->pmd, b, &used);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
|
|
|
- if (m->pass_discard)
|
|
|
- if (m->definitely_not_shared)
|
|
|
- remap_and_issue(tc, m->bio, m->data_block);
|
|
|
- else {
|
|
|
- bool used = false;
|
|
|
- if (dm_pool_block_is_used(tc->pool->pmd, m->data_block, &used) || used)
|
|
|
- bio_endio(m->bio, 0);
|
|
|
- else
|
|
|
- remap_and_issue(tc, m->bio, m->data_block);
|
|
|
+ if (!used)
|
|
|
+ break;
|
|
|
}
|
|
|
- else
|
|
|
- bio_endio(m->bio, 0);
|
|
|
|
|
|
- mempool_free(m, tc->pool->mapping_pool);
|
|
|
+ if (b == end)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /* find end of run */
|
|
|
+ for (e = b + 1; e != end; e++) {
|
|
|
+ r = dm_pool_block_is_used(pool->pmd, e, &used);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ if (used)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = issue_discard(tc, b, e, m->bio);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ b = e;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
-static void process_prepared_discard(struct dm_thin_new_mapping *m)
|
|
|
+static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
|
|
|
{
|
|
|
int r;
|
|
|
struct thin_c *tc = m->tc;
|
|
|
+ struct pool *pool = tc->pool;
|
|
|
|
|
|
- r = dm_thin_remove_block(tc->td, m->virt_block);
|
|
|
+ r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
|
|
|
if (r)
|
|
|
- DMERR_LIMIT("dm_thin_remove_block() failed");
|
|
|
+ metadata_operation_failed(pool, "dm_thin_remove_range", r);
|
|
|
+
|
|
|
+ else if (m->maybe_shared)
|
|
|
+ r = passdown_double_checking_shared_status(m);
|
|
|
+ else
|
|
|
+ r = issue_discard(tc, m->data_block, m->data_block + (m->virt_end - m->virt_begin), m->bio);
|
|
|
|
|
|
- process_prepared_discard_passdown(m);
|
|
|
+ /*
|
|
|
+ * Even if r is set, there could be sub discards in flight that we
|
|
|
+ * need to wait for.
|
|
|
+ */
|
|
|
+ bio_endio(m->bio, r);
|
|
|
+ cell_defer_no_holder(tc, m->cell);
|
|
|
+ mempool_free(m, pool->mapping_pool);
|
|
|
}
|
|
|
|
|
|
static void process_prepared(struct pool *pool, struct list_head *head,
|
|
|
@@ -971,7 +1197,7 @@ static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
|
|
|
}
|
|
|
|
|
|
static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
|
|
|
- dm_block_t data_block,
|
|
|
+ dm_block_t data_begin,
|
|
|
struct dm_thin_new_mapping *m)
|
|
|
{
|
|
|
struct pool *pool = tc->pool;
|
|
|
@@ -981,7 +1207,7 @@ static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
|
|
|
m->bio = bio;
|
|
|
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
|
|
|
inc_all_io_entry(pool, bio);
|
|
|
- remap_and_issue(tc, bio, data_block);
|
|
|
+ remap_and_issue(tc, bio, data_begin);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -998,7 +1224,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
|
|
|
struct dm_thin_new_mapping *m = get_next_mapping(pool);
|
|
|
|
|
|
m->tc = tc;
|
|
|
- m->virt_block = virt_block;
|
|
|
+ m->virt_begin = virt_block;
|
|
|
+ m->virt_end = virt_block + 1u;
|
|
|
m->data_block = data_dest;
|
|
|
m->cell = cell;
|
|
|
|
|
|
@@ -1077,7 +1304,8 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
|
|
|
|
|
|
atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
|
|
|
m->tc = tc;
|
|
|
- m->virt_block = virt_block;
|
|
|
+ m->virt_begin = virt_block;
|
|
|
+ m->virt_end = virt_block + 1u;
|
|
|
m->data_block = data_block;
|
|
|
m->cell = cell;
|
|
|
|
|
|
@@ -1284,99 +1512,149 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
|
|
|
retry_on_resume(bio);
|
|
|
}
|
|
|
|
|
|
-static void process_discard_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
|
|
|
+static void process_discard_cell_no_passdown(struct thin_c *tc,
|
|
|
+ struct dm_bio_prison_cell *virt_cell)
|
|
|
{
|
|
|
- int r;
|
|
|
- struct bio *bio = cell->holder;
|
|
|
struct pool *pool = tc->pool;
|
|
|
- struct dm_bio_prison_cell *cell2;
|
|
|
- struct dm_cell_key key2;
|
|
|
- dm_block_t block = get_bio_block(tc, bio);
|
|
|
- struct dm_thin_lookup_result lookup_result;
|
|
|
- struct dm_thin_new_mapping *m;
|
|
|
+ struct dm_thin_new_mapping *m = get_next_mapping(pool);
|
|
|
|
|
|
- if (tc->requeue_mode) {
|
|
|
- cell_requeue(pool, cell);
|
|
|
- return;
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * We don't need to lock the data blocks, since there's no
|
|
|
+ * passdown. We only lock data blocks for allocation and breaking sharing.
|
|
|
+ */
|
|
|
+ m->tc = tc;
|
|
|
+ m->virt_begin = virt_cell->key.block_begin;
|
|
|
+ m->virt_end = virt_cell->key.block_end;
|
|
|
+ m->cell = virt_cell;
|
|
|
+ m->bio = virt_cell->holder;
|
|
|
|
|
|
- r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
|
|
|
- switch (r) {
|
|
|
- case 0:
|
|
|
- /*
|
|
|
- * Check nobody is fiddling with this pool block. This can
|
|
|
- * happen if someone's in the process of breaking sharing
|
|
|
- * on this block.
|
|
|
- */
|
|
|
- build_data_key(tc->td, lookup_result.block, &key2);
|
|
|
- if (bio_detain(tc->pool, &key2, bio, &cell2)) {
|
|
|
- cell_defer_no_holder(tc, cell);
|
|
|
- break;
|
|
|
- }
|
|
|
+ if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
|
|
|
+ pool->process_prepared_discard(m);
|
|
|
+}
|
|
|
|
|
|
- if (io_overlaps_block(pool, bio)) {
|
|
|
- /*
|
|
|
- * IO may still be going to the destination block. We must
|
|
|
- * quiesce before we can do the removal.
|
|
|
- */
|
|
|
- m = get_next_mapping(pool);
|
|
|
- m->tc = tc;
|
|
|
- m->pass_discard = pool->pf.discard_passdown;
|
|
|
- m->definitely_not_shared = !lookup_result.shared;
|
|
|
- m->virt_block = block;
|
|
|
- m->data_block = lookup_result.block;
|
|
|
- m->cell = cell;
|
|
|
- m->cell2 = cell2;
|
|
|
- m->bio = bio;
|
|
|
-
|
|
|
- if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
|
|
|
- pool->process_prepared_discard(m);
|
|
|
+/*
|
|
|
+ * FIXME: DM local hack to defer parent bios's end_io until we
|
|
|
+ * _know_ all chained sub range discard bios have completed.
|
|
|
+ * Will go away once late bio splitting lands upstream!
|
|
|
+ */
|
|
|
+static inline void __bio_inc_remaining(struct bio *bio)
|
|
|
+{
|
|
|
+ bio->bi_flags |= (1 << BIO_CHAIN);
|
|
|
+ smp_mb__before_atomic();
|
|
|
+ atomic_inc(&bio->__bi_remaining);
|
|
|
+}
|
|
|
|
|
|
- } else {
|
|
|
- inc_all_io_entry(pool, bio);
|
|
|
- cell_defer_no_holder(tc, cell);
|
|
|
- cell_defer_no_holder(tc, cell2);
|
|
|
+static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t end,
|
|
|
+ struct bio *bio)
|
|
|
+{
|
|
|
+ struct pool *pool = tc->pool;
|
|
|
+
|
|
|
+ int r;
|
|
|
+ bool maybe_shared;
|
|
|
+ struct dm_cell_key data_key;
|
|
|
+ struct dm_bio_prison_cell *data_cell;
|
|
|
+ struct dm_thin_new_mapping *m;
|
|
|
+ dm_block_t virt_begin, virt_end, data_begin;
|
|
|
+
|
|
|
+ while (begin != end) {
|
|
|
+ r = ensure_next_mapping(pool);
|
|
|
+ if (r)
|
|
|
+ /* we did our best */
|
|
|
+ return;
|
|
|
|
|
|
+ r = dm_thin_find_mapped_range(tc->td, begin, end, &virt_begin, &virt_end,
|
|
|
+ &data_begin, &maybe_shared);
|
|
|
+ if (r)
|
|
|
/*
|
|
|
- * The DM core makes sure that the discard doesn't span
|
|
|
- * a block boundary. So we submit the discard of a
|
|
|
- * partial block appropriately.
|
|
|
+ * Silently fail, letting any mappings we've
|
|
|
+ * created complete.
|
|
|
*/
|
|
|
- if ((!lookup_result.shared) && pool->pf.discard_passdown)
|
|
|
- remap_and_issue(tc, bio, lookup_result.block);
|
|
|
- else
|
|
|
- bio_endio(bio, 0);
|
|
|
+ break;
|
|
|
+
|
|
|
+ build_key(tc->td, PHYSICAL, data_begin, data_begin + (virt_end - virt_begin), &data_key);
|
|
|
+ if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
|
|
|
+ /* contention, we'll give up with this range */
|
|
|
+ begin = virt_end;
|
|
|
+ continue;
|
|
|
}
|
|
|
- break;
|
|
|
|
|
|
- case -ENODATA:
|
|
|
/*
|
|
|
- * It isn't provisioned, just forget it.
|
|
|
+ * IO may still be going to the destination block. We must
|
|
|
+ * quiesce before we can do the removal.
|
|
|
*/
|
|
|
- cell_defer_no_holder(tc, cell);
|
|
|
- bio_endio(bio, 0);
|
|
|
- break;
|
|
|
+ m = get_next_mapping(pool);
|
|
|
+ m->tc = tc;
|
|
|
+ m->maybe_shared = maybe_shared;
|
|
|
+ m->virt_begin = virt_begin;
|
|
|
+ m->virt_end = virt_end;
|
|
|
+ m->data_block = data_begin;
|
|
|
+ m->cell = data_cell;
|
|
|
+ m->bio = bio;
|
|
|
|
|
|
- default:
|
|
|
- DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
|
|
|
- __func__, r);
|
|
|
- cell_defer_no_holder(tc, cell);
|
|
|
- bio_io_error(bio);
|
|
|
- break;
|
|
|
+ /*
|
|
|
+ * The parent bio must not complete before sub discard bios are
|
|
|
+ * chained to it (see __blkdev_issue_discard_async's bio_chain)!
|
|
|
+ *
|
|
|
+ * This per-mapping bi_remaining increment is paired with
|
|
|
+ * the implicit decrement that occurs via bio_endio() in
|
|
|
+ * process_prepared_discard_{passdown,no_passdown}.
|
|
|
+ */
|
|
|
+ __bio_inc_remaining(bio);
|
|
|
+ if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
|
|
|
+ pool->process_prepared_discard(m);
|
|
|
+
|
|
|
+ begin = virt_end;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_prison_cell *virt_cell)
|
|
|
+{
|
|
|
+ struct bio *bio = virt_cell->holder;
|
|
|
+ struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The virt_cell will only get freed once the origin bio completes.
|
|
|
+ * This means it will remain locked while all the individual
|
|
|
+ * passdown bios are in flight.
|
|
|
+ */
|
|
|
+ h->cell = virt_cell;
|
|
|
+ break_up_discard_bio(tc, virt_cell->key.block_begin, virt_cell->key.block_end, bio);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We complete the bio now, knowing that the bi_remaining field
|
|
|
+ * will prevent completion until the sub range discards have
|
|
|
+ * completed.
|
|
|
+ */
|
|
|
+ bio_endio(bio, 0);
|
|
|
+}
|
|
|
+
|
|
|
static void process_discard_bio(struct thin_c *tc, struct bio *bio)
|
|
|
{
|
|
|
- struct dm_bio_prison_cell *cell;
|
|
|
- struct dm_cell_key key;
|
|
|
- dm_block_t block = get_bio_block(tc, bio);
|
|
|
+ dm_block_t begin, end;
|
|
|
+ struct dm_cell_key virt_key;
|
|
|
+ struct dm_bio_prison_cell *virt_cell;
|
|
|
|
|
|
- build_virtual_key(tc->td, block, &key);
|
|
|
- if (bio_detain(tc->pool, &key, bio, &cell))
|
|
|
+ get_bio_block_range(tc, bio, &begin, &end);
|
|
|
+ if (begin == end) {
|
|
|
+ /*
|
|
|
+ * The discard covers less than a block.
|
|
|
+ */
|
|
|
+ bio_endio(bio, 0);
|
|
|
return;
|
|
|
+ }
|
|
|
|
|
|
- process_discard_cell(tc, cell);
|
|
|
+ build_key(tc->td, VIRTUAL, begin, end, &virt_key);
|
|
|
+ if (bio_detain(tc->pool, &virt_key, bio, &virt_cell))
|
|
|
+ /*
|
|
|
+ * Potential starvation issue: We're relying on the
|
|
|
+ * fs/application being well behaved, and not trying to
|
|
|
+ * send IO to a region at the same time as discarding it.
|
|
|
+ * If they do this persistently then it's possible this
|
|
|
+ * cell will never be granted.
|
|
|
+ */
|
|
|
+ return;
|
|
|
+
|
|
|
+ tc->pool->process_discard_cell(tc, virt_cell);
|
|
|
}
|
|
|
|
|
|
static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
|
|
|
@@ -2092,6 +2370,24 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
|
|
|
dm_device_name(pool->pool_md), new_mode);
|
|
|
}
|
|
|
|
|
|
+static bool passdown_enabled(struct pool_c *pt)
|
|
|
+{
|
|
|
+ return pt->adjusted_pf.discard_passdown;
|
|
|
+}
|
|
|
+
|
|
|
+static void set_discard_callbacks(struct pool *pool)
|
|
|
+{
|
|
|
+ struct pool_c *pt = pool->ti->private;
|
|
|
+
|
|
|
+ if (passdown_enabled(pt)) {
|
|
|
+ pool->process_discard_cell = process_discard_cell_passdown;
|
|
|
+ pool->process_prepared_discard = process_prepared_discard_passdown;
|
|
|
+ } else {
|
|
|
+ pool->process_discard_cell = process_discard_cell_no_passdown;
|
|
|
+ pool->process_prepared_discard = process_prepared_discard_no_passdown;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|
|
{
|
|
|
struct pool_c *pt = pool->ti->private;
|
|
|
@@ -2143,7 +2439,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|
|
pool->process_cell = process_cell_read_only;
|
|
|
pool->process_discard_cell = process_cell_success;
|
|
|
pool->process_prepared_mapping = process_prepared_mapping_fail;
|
|
|
- pool->process_prepared_discard = process_prepared_discard_passdown;
|
|
|
+ pool->process_prepared_discard = process_prepared_discard_success;
|
|
|
|
|
|
error_retry_list(pool);
|
|
|
break;
|
|
|
@@ -2162,9 +2458,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|
|
pool->process_bio = process_bio_read_only;
|
|
|
pool->process_discard = process_discard_bio;
|
|
|
pool->process_cell = process_cell_read_only;
|
|
|
- pool->process_discard_cell = process_discard_cell;
|
|
|
pool->process_prepared_mapping = process_prepared_mapping;
|
|
|
- pool->process_prepared_discard = process_prepared_discard;
|
|
|
+ set_discard_callbacks(pool);
|
|
|
|
|
|
if (!pool->pf.error_if_no_space && no_space_timeout)
|
|
|
queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
|
|
|
@@ -2177,9 +2472,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
|
|
pool->process_bio = process_bio;
|
|
|
pool->process_discard = process_discard_bio;
|
|
|
pool->process_cell = process_cell;
|
|
|
- pool->process_discard_cell = process_discard_cell;
|
|
|
pool->process_prepared_mapping = process_prepared_mapping;
|
|
|
- pool->process_prepared_discard = process_prepared_discard;
|
|
|
+ set_discard_callbacks(pool);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
@@ -2268,6 +2562,7 @@ static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
|
|
|
h->shared_read_entry = NULL;
|
|
|
h->all_io_entry = NULL;
|
|
|
h->overwrite_mapping = NULL;
|
|
|
+ h->cell = NULL;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -2415,7 +2710,6 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
|
|
|
struct pool *pool = pt->pool;
|
|
|
struct block_device *data_bdev = pt->data_dev->bdev;
|
|
|
struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
|
|
|
- sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
|
|
|
const char *reason = NULL;
|
|
|
char buf[BDEVNAME_SIZE];
|
|
|
|
|
|
@@ -2428,12 +2722,6 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
|
|
|
else if (data_limits->max_discard_sectors < pool->sectors_per_block)
|
|
|
reason = "max discard sectors smaller than a block";
|
|
|
|
|
|
- else if (data_limits->discard_granularity > block_size)
|
|
|
- reason = "discard granularity larger than a block";
|
|
|
-
|
|
|
- else if (!is_factor(block_size, data_limits->discard_granularity))
|
|
|
- reason = "discard granularity not a factor of block size";
|
|
|
-
|
|
|
if (reason) {
|
|
|
DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
|
|
|
pt->adjusted_pf.discard_passdown = false;
|
|
|
@@ -3566,24 +3854,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
|
|
|
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
|
|
|
}
|
|
|
|
|
|
-static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
|
|
|
-{
|
|
|
- struct pool *pool = pt->pool;
|
|
|
- struct queue_limits *data_limits;
|
|
|
-
|
|
|
- limits->max_discard_sectors = pool->sectors_per_block;
|
|
|
-
|
|
|
- /*
|
|
|
- * discard_granularity is just a hint, and not enforced.
|
|
|
- */
|
|
|
- if (pt->adjusted_pf.discard_passdown) {
|
|
|
- data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
|
|
|
- limits->discard_granularity = max(data_limits->discard_granularity,
|
|
|
- pool->sectors_per_block << SECTOR_SHIFT);
|
|
|
- } else
|
|
|
- limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
|
|
|
-}
|
|
|
-
|
|
|
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|
|
{
|
|
|
struct pool_c *pt = ti->private;
|
|
|
@@ -3638,14 +3908,17 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|
|
|
|
|
disable_passdown_if_not_supported(pt);
|
|
|
|
|
|
- set_discard_limits(pt, limits);
|
|
|
+ /*
|
|
|
+ * The pool uses the same discard limits as the underlying data
|
|
|
+ * device. DM core has already set this up.
|
|
|
+ */
|
|
|
}
|
|
|
|
|
|
static struct target_type pool_target = {
|
|
|
.name = "thin-pool",
|
|
|
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
|
|
|
DM_TARGET_IMMUTABLE,
|
|
|
- .version = {1, 14, 0},
|
|
|
+ .version = {1, 15, 0},
|
|
|
.module = THIS_MODULE,
|
|
|
.ctr = pool_ctr,
|
|
|
.dtr = pool_dtr,
|
|
|
@@ -3804,8 +4077,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|
|
if (tc->pool->pf.discard_enabled) {
|
|
|
ti->discards_supported = true;
|
|
|
ti->num_discard_bios = 1;
|
|
|
- /* Discard bios must be split on a block boundary */
|
|
|
- ti->split_discard_bios = true;
|
|
|
+ ti->split_discard_bios = false;
|
|
|
}
|
|
|
|
|
|
mutex_unlock(&dm_thin_pool_table.mutex);
|
|
|
@@ -3892,6 +4164,9 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ if (h->cell)
|
|
|
+ cell_defer_no_holder(h->tc, h->cell);
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
@@ -4019,9 +4294,18 @@ static int thin_iterate_devices(struct dm_target *ti,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|
|
+{
|
|
|
+ struct thin_c *tc = ti->private;
|
|
|
+ struct pool *pool = tc->pool;
|
|
|
+
|
|
|
+ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
|
|
|
+ limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
|
|
|
+}
|
|
|
+
|
|
|
static struct target_type thin_target = {
|
|
|
.name = "thin",
|
|
|
- .version = {1, 14, 0},
|
|
|
+ .version = {1, 15, 0},
|
|
|
.module = THIS_MODULE,
|
|
|
.ctr = thin_ctr,
|
|
|
.dtr = thin_dtr,
|
|
|
@@ -4033,6 +4317,7 @@ static struct target_type thin_target = {
|
|
|
.status = thin_status,
|
|
|
.merge = thin_merge,
|
|
|
.iterate_devices = thin_iterate_devices,
|
|
|
+ .io_hints = thin_io_hints,
|
|
|
};
|
|
|
|
|
|
/*----------------------------------------------------------------*/
|