|
@@ -106,14 +106,6 @@ struct dm_rq_clone_bio_info {
|
|
|
struct bio clone;
|
|
|
};
|
|
|
|
|
|
-union map_info *dm_get_rq_mapinfo(struct request *rq)
|
|
|
-{
|
|
|
- if (rq && rq->end_io_data)
|
|
|
- return &((struct dm_rq_target_io *)rq->end_io_data)->info;
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
|
|
|
-
|
|
|
#define MINOR_ALLOCED ((void *)-1)
|
|
|
|
|
|
/*
|
|
@@ -128,29 +120,19 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
|
|
|
#define DMF_DEFERRED_REMOVE 6
|
|
|
#define DMF_SUSPENDED_INTERNALLY 7
|
|
|
|
|
|
-/*
|
|
|
- * A dummy definition to make RCU happy.
|
|
|
- * struct dm_table should never be dereferenced in this file.
|
|
|
- */
|
|
|
-struct dm_table {
|
|
|
- int undefined__;
|
|
|
-};
|
|
|
-
|
|
|
/*
|
|
|
* Work processed by per-device workqueue.
|
|
|
*/
|
|
|
struct mapped_device {
|
|
|
struct srcu_struct io_barrier;
|
|
|
struct mutex suspend_lock;
|
|
|
- atomic_t holders;
|
|
|
- atomic_t open_count;
|
|
|
|
|
|
/*
|
|
|
- * The current mapping.
|
|
|
+ * The current mapping (struct dm_table *).
|
|
|
* Use dm_get_live_table{_fast} or take suspend_lock for
|
|
|
* dereference.
|
|
|
*/
|
|
|
- struct dm_table __rcu *map;
|
|
|
+ void __rcu *map;
|
|
|
|
|
|
struct list_head table_devices;
|
|
|
struct mutex table_devices_lock;
|
|
@@ -158,10 +140,16 @@ struct mapped_device {
|
|
|
unsigned long flags;
|
|
|
|
|
|
struct request_queue *queue;
|
|
|
+ int numa_node_id;
|
|
|
+
|
|
|
unsigned type;
|
|
|
/* Protect queue and type against concurrent access. */
|
|
|
struct mutex type_lock;
|
|
|
|
|
|
+ atomic_t holders;
|
|
|
+ atomic_t open_count;
|
|
|
+
|
|
|
+ struct dm_target *immutable_target;
|
|
|
struct target_type *immutable_target_type;
|
|
|
|
|
|
struct gendisk *disk;
|
|
@@ -175,8 +163,20 @@ struct mapped_device {
|
|
|
atomic_t pending[2];
|
|
|
wait_queue_head_t wait;
|
|
|
struct work_struct work;
|
|
|
- struct bio_list deferred;
|
|
|
spinlock_t deferred_lock;
|
|
|
+ struct bio_list deferred;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Event handling.
|
|
|
+ */
|
|
|
+ wait_queue_head_t eventq;
|
|
|
+ atomic_t event_nr;
|
|
|
+ atomic_t uevent_seq;
|
|
|
+ struct list_head uevent_list;
|
|
|
+ spinlock_t uevent_lock; /* Protect access to uevent_list */
|
|
|
+
|
|
|
+ /* the number of internal suspends */
|
|
|
+ unsigned internal_suspend_count;
|
|
|
|
|
|
/*
|
|
|
* Processing queue (flush)
|
|
@@ -191,33 +191,22 @@ struct mapped_device {
|
|
|
|
|
|
struct bio_set *bs;
|
|
|
|
|
|
- /*
|
|
|
- * Event handling.
|
|
|
- */
|
|
|
- atomic_t event_nr;
|
|
|
- wait_queue_head_t eventq;
|
|
|
- atomic_t uevent_seq;
|
|
|
- struct list_head uevent_list;
|
|
|
- spinlock_t uevent_lock; /* Protect access to uevent_list */
|
|
|
-
|
|
|
/*
|
|
|
* freeze/thaw support require holding onto a super block
|
|
|
*/
|
|
|
struct super_block *frozen_sb;
|
|
|
- struct block_device *bdev;
|
|
|
|
|
|
/* forced geometry settings */
|
|
|
struct hd_geometry geometry;
|
|
|
|
|
|
+ struct block_device *bdev;
|
|
|
+
|
|
|
/* kobject and completion */
|
|
|
struct dm_kobject_holder kobj_holder;
|
|
|
|
|
|
/* zero-length flush that will be cloned and submitted to targets */
|
|
|
struct bio flush_bio;
|
|
|
|
|
|
- /* the number of internal suspends */
|
|
|
- unsigned internal_suspend_count;
|
|
|
-
|
|
|
struct dm_stats stats;
|
|
|
|
|
|
struct kthread_worker kworker;
|
|
@@ -230,8 +219,9 @@ struct mapped_device {
|
|
|
ktime_t last_rq_start_time;
|
|
|
|
|
|
/* for blk-mq request-based DM support */
|
|
|
- struct blk_mq_tag_set tag_set;
|
|
|
- bool use_blk_mq;
|
|
|
+ struct blk_mq_tag_set *tag_set;
|
|
|
+ bool use_blk_mq:1;
|
|
|
+ bool init_tio_pdu:1;
|
|
|
};
|
|
|
|
|
|
#ifdef CONFIG_DM_MQ_DEFAULT
|
|
@@ -240,10 +230,19 @@ static bool use_blk_mq = true;
|
|
|
static bool use_blk_mq = false;
|
|
|
#endif
|
|
|
|
|
|
+#define DM_MQ_NR_HW_QUEUES 1
|
|
|
+#define DM_MQ_QUEUE_DEPTH 2048
|
|
|
+#define DM_NUMA_NODE NUMA_NO_NODE
|
|
|
+
|
|
|
+static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
|
|
|
+static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
|
|
|
+static int dm_numa_node = DM_NUMA_NODE;
|
|
|
+
|
|
|
bool dm_use_blk_mq(struct mapped_device *md)
|
|
|
{
|
|
|
return md->use_blk_mq;
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(dm_use_blk_mq);
|
|
|
|
|
|
/*
|
|
|
* For mempools pre-allocation at the table loading time.
|
|
@@ -277,6 +276,27 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
|
|
|
*/
|
|
|
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
|
|
|
|
|
|
+static int __dm_get_module_param_int(int *module_param, int min, int max)
|
|
|
+{
|
|
|
+ int param = ACCESS_ONCE(*module_param);
|
|
|
+ int modified_param = 0;
|
|
|
+ bool modified = true;
|
|
|
+
|
|
|
+ if (param < min)
|
|
|
+ modified_param = min;
|
|
|
+ else if (param > max)
|
|
|
+ modified_param = max;
|
|
|
+ else
|
|
|
+ modified = false;
|
|
|
+
|
|
|
+ if (modified) {
|
|
|
+ (void)cmpxchg(module_param, param, modified_param);
|
|
|
+ param = modified_param;
|
|
|
+ }
|
|
|
+
|
|
|
+ return param;
|
|
|
+}
|
|
|
+
|
|
|
static unsigned __dm_get_module_param(unsigned *module_param,
|
|
|
unsigned def, unsigned max)
|
|
|
{
|
|
@@ -310,6 +330,23 @@ unsigned dm_get_reserved_rq_based_ios(void)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
|
|
|
|
|
|
+static unsigned dm_get_blk_mq_nr_hw_queues(void)
|
|
|
+{
|
|
|
+ return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned dm_get_blk_mq_queue_depth(void)
|
|
|
+{
|
|
|
+ return __dm_get_module_param(&dm_mq_queue_depth,
|
|
|
+ DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned dm_get_numa_node(void)
|
|
|
+{
|
|
|
+ return __dm_get_module_param_int(&dm_numa_node,
|
|
|
+ DM_NUMA_NODE, num_online_nodes() - 1);
|
|
|
+}
|
|
|
+
|
|
|
static int __init local_init(void)
|
|
|
{
|
|
|
int r = -ENOMEM;
|
|
@@ -323,7 +360,7 @@ static int __init local_init(void)
|
|
|
if (!_rq_tio_cache)
|
|
|
goto out_free_io_cache;
|
|
|
|
|
|
- _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
|
|
|
+ _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
|
|
|
__alignof__(struct request), 0, NULL);
|
|
|
if (!_rq_cache)
|
|
|
goto out_free_rq_tio_cache;
|
|
@@ -556,16 +593,17 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
|
|
return dm_get_geometry(md, geo);
|
|
|
}
|
|
|
|
|
|
-static int dm_get_live_table_for_ioctl(struct mapped_device *md,
|
|
|
- struct dm_target **tgt, struct block_device **bdev,
|
|
|
- fmode_t *mode, int *srcu_idx)
|
|
|
+static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
|
|
|
+ struct block_device **bdev,
|
|
|
+ fmode_t *mode)
|
|
|
{
|
|
|
+ struct dm_target *tgt;
|
|
|
struct dm_table *map;
|
|
|
- int r;
|
|
|
+ int srcu_idx, r;
|
|
|
|
|
|
retry:
|
|
|
r = -ENOTTY;
|
|
|
- map = dm_get_live_table(md, srcu_idx);
|
|
|
+ map = dm_get_live_table(md, &srcu_idx);
|
|
|
if (!map || !dm_table_get_size(map))
|
|
|
goto out;
|
|
|
|
|
@@ -573,9 +611,8 @@ retry:
|
|
|
if (dm_table_get_num_targets(map) != 1)
|
|
|
goto out;
|
|
|
|
|
|
- *tgt = dm_table_get_target(map, 0);
|
|
|
-
|
|
|
- if (!(*tgt)->type->prepare_ioctl)
|
|
|
+ tgt = dm_table_get_target(map, 0);
|
|
|
+ if (!tgt->type->prepare_ioctl)
|
|
|
goto out;
|
|
|
|
|
|
if (dm_suspended_md(md)) {
|
|
@@ -583,14 +620,16 @@ retry:
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode);
|
|
|
+ r = tgt->type->prepare_ioctl(tgt, bdev, mode);
|
|
|
if (r < 0)
|
|
|
goto out;
|
|
|
|
|
|
+ bdgrab(*bdev);
|
|
|
+ dm_put_live_table(md, srcu_idx);
|
|
|
return r;
|
|
|
|
|
|
out:
|
|
|
- dm_put_live_table(md, *srcu_idx);
|
|
|
+ dm_put_live_table(md, srcu_idx);
|
|
|
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
|
|
|
msleep(10);
|
|
|
goto retry;
|
|
@@ -602,11 +641,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
|
|
|
unsigned int cmd, unsigned long arg)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
- struct dm_target *tgt;
|
|
|
- struct block_device *tgt_bdev = NULL;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -621,9 +658,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- r = __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
|
|
|
+ r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
|
|
|
out:
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -642,24 +679,24 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
|
|
|
bio_put(&tio->clone);
|
|
|
}
|
|
|
|
|
|
-static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
|
|
|
- gfp_t gfp_mask)
|
|
|
+static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
return mempool_alloc(md->io_pool, gfp_mask);
|
|
|
}
|
|
|
|
|
|
-static void free_rq_tio(struct dm_rq_target_io *tio)
|
|
|
+static void free_old_rq_tio(struct dm_rq_target_io *tio)
|
|
|
{
|
|
|
mempool_free(tio, tio->md->io_pool);
|
|
|
}
|
|
|
|
|
|
-static struct request *alloc_clone_request(struct mapped_device *md,
|
|
|
- gfp_t gfp_mask)
|
|
|
+static struct request *alloc_old_clone_request(struct mapped_device *md,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
return mempool_alloc(md->rq_pool, gfp_mask);
|
|
|
}
|
|
|
|
|
|
-static void free_clone_request(struct mapped_device *md, struct request *rq)
|
|
|
+static void free_old_clone_request(struct mapped_device *md, struct request *rq)
|
|
|
{
|
|
|
mempool_free(rq, md->rq_pool);
|
|
|
}
|
|
@@ -827,7 +864,7 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
|
|
|
mutex_lock(&md->table_devices_lock);
|
|
|
td = find_table_device(&md->table_devices, dev, mode);
|
|
|
if (!td) {
|
|
|
- td = kmalloc(sizeof(*td), GFP_KERNEL);
|
|
|
+ td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
|
|
|
if (!td) {
|
|
|
mutex_unlock(&md->table_devices_lock);
|
|
|
return -ENOMEM;
|
|
@@ -1109,12 +1146,8 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
|
|
* back into ->request_fn() could deadlock attempting to grab the
|
|
|
* queue lock again.
|
|
|
*/
|
|
|
- if (run_queue) {
|
|
|
- if (md->queue->mq_ops)
|
|
|
- blk_mq_run_hw_queues(md->queue, true);
|
|
|
- else
|
|
|
- blk_run_queue_async(md->queue);
|
|
|
- }
|
|
|
+ if (!md->queue->mq_ops && run_queue)
|
|
|
+ blk_run_queue_async(md->queue);
|
|
|
|
|
|
/*
|
|
|
* dm_put() must be at the end of this function. See the comment above
|
|
@@ -1134,15 +1167,10 @@ static void free_rq_clone(struct request *clone)
|
|
|
tio->ti->type->release_clone_rq(clone);
|
|
|
else if (!md->queue->mq_ops)
|
|
|
/* request_fn queue stacked on request_fn queue(s) */
|
|
|
- free_clone_request(md, clone);
|
|
|
- /*
|
|
|
- * NOTE: for the blk-mq queue stacked on request_fn queue(s) case:
|
|
|
- * no need to call free_clone_request() because we leverage blk-mq by
|
|
|
- * allocating the clone at the end of the blk-mq pdu (see: clone_rq)
|
|
|
- */
|
|
|
+ free_old_clone_request(md, clone);
|
|
|
|
|
|
if (!md->queue->mq_ops)
|
|
|
- free_rq_tio(tio);
|
|
|
+ free_old_rq_tio(tio);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1192,13 +1220,13 @@ static void dm_unprep_request(struct request *rq)
|
|
|
if (clone)
|
|
|
free_rq_clone(clone);
|
|
|
else if (!tio->md->queue->mq_ops)
|
|
|
- free_rq_tio(tio);
|
|
|
+ free_old_rq_tio(tio);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Requeue the original request of a clone.
|
|
|
*/
|
|
|
-static void old_requeue_request(struct request *rq)
|
|
|
+static void dm_old_requeue_request(struct request *rq)
|
|
|
{
|
|
|
struct request_queue *q = rq->q;
|
|
|
unsigned long flags;
|
|
@@ -1209,45 +1237,57 @@ static void old_requeue_request(struct request *rq)
|
|
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
}
|
|
|
|
|
|
+static void dm_mq_requeue_request(struct request *rq)
|
|
|
+{
|
|
|
+ struct request_queue *q = rq->q;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ blk_mq_requeue_request(rq);
|
|
|
+ spin_lock_irqsave(q->queue_lock, flags);
|
|
|
+ if (!blk_queue_stopped(q))
|
|
|
+ blk_mq_kick_requeue_list(q);
|
|
|
+ spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
static void dm_requeue_original_request(struct mapped_device *md,
|
|
|
struct request *rq)
|
|
|
{
|
|
|
int rw = rq_data_dir(rq);
|
|
|
|
|
|
+ rq_end_stats(md, rq);
|
|
|
dm_unprep_request(rq);
|
|
|
|
|
|
- rq_end_stats(md, rq);
|
|
|
if (!rq->q->mq_ops)
|
|
|
- old_requeue_request(rq);
|
|
|
- else {
|
|
|
- blk_mq_requeue_request(rq);
|
|
|
- blk_mq_kick_requeue_list(rq->q);
|
|
|
- }
|
|
|
+ dm_old_requeue_request(rq);
|
|
|
+ else
|
|
|
+ dm_mq_requeue_request(rq);
|
|
|
|
|
|
rq_completed(md, rw, false);
|
|
|
}
|
|
|
|
|
|
-static void old_stop_queue(struct request_queue *q)
|
|
|
+static void dm_old_stop_queue(struct request_queue *q)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
|
|
|
- if (blk_queue_stopped(q))
|
|
|
+ spin_lock_irqsave(q->queue_lock, flags);
|
|
|
+ if (blk_queue_stopped(q)) {
|
|
|
+ spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
return;
|
|
|
+ }
|
|
|
|
|
|
- spin_lock_irqsave(q->queue_lock, flags);
|
|
|
blk_stop_queue(q);
|
|
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
}
|
|
|
|
|
|
-static void stop_queue(struct request_queue *q)
|
|
|
+static void dm_stop_queue(struct request_queue *q)
|
|
|
{
|
|
|
if (!q->mq_ops)
|
|
|
- old_stop_queue(q);
|
|
|
+ dm_old_stop_queue(q);
|
|
|
else
|
|
|
blk_mq_stop_hw_queues(q);
|
|
|
}
|
|
|
|
|
|
-static void old_start_queue(struct request_queue *q)
|
|
|
+static void dm_old_start_queue(struct request_queue *q)
|
|
|
{
|
|
|
unsigned long flags;
|
|
|
|
|
@@ -1257,12 +1297,14 @@ static void old_start_queue(struct request_queue *q)
|
|
|
spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
}
|
|
|
|
|
|
-static void start_queue(struct request_queue *q)
|
|
|
+static void dm_start_queue(struct request_queue *q)
|
|
|
{
|
|
|
if (!q->mq_ops)
|
|
|
- old_start_queue(q);
|
|
|
- else
|
|
|
+ dm_old_start_queue(q);
|
|
|
+ else {
|
|
|
blk_mq_start_stopped_hw_queues(q, true);
|
|
|
+ blk_mq_kick_requeue_list(q);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static void dm_done(struct request *clone, int error, bool mapped)
|
|
@@ -1313,7 +1355,7 @@ static void dm_softirq_done(struct request *rq)
|
|
|
if (!rq->q->mq_ops) {
|
|
|
blk_end_request_all(rq, tio->error);
|
|
|
rq_completed(tio->md, rw, false);
|
|
|
- free_rq_tio(tio);
|
|
|
+ free_old_rq_tio(tio);
|
|
|
} else {
|
|
|
blk_mq_end_request(rq, tio->error);
|
|
|
rq_completed(tio->md, rw, false);
|
|
@@ -1336,7 +1378,10 @@ static void dm_complete_request(struct request *rq, int error)
|
|
|
struct dm_rq_target_io *tio = tio_from_request(rq);
|
|
|
|
|
|
tio->error = error;
|
|
|
- blk_complete_request(rq);
|
|
|
+ if (!rq->q->mq_ops)
|
|
|
+ blk_complete_request(rq);
|
|
|
+ else
|
|
|
+ blk_mq_complete_request(rq, error);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1352,7 +1397,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Called with the clone's queue lock held (for non-blk-mq)
|
|
|
+ * Called with the clone's queue lock held (in the case of .request_fn)
|
|
|
*/
|
|
|
static void end_clone_request(struct request *clone, int error)
|
|
|
{
|
|
@@ -1522,21 +1567,26 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
|
|
|
/*
|
|
|
* Creates a bio that consists of range of complete bvecs.
|
|
|
*/
|
|
|
-static void clone_bio(struct dm_target_io *tio, struct bio *bio,
|
|
|
- sector_t sector, unsigned len)
|
|
|
+static int clone_bio(struct dm_target_io *tio, struct bio *bio,
|
|
|
+ sector_t sector, unsigned len)
|
|
|
{
|
|
|
struct bio *clone = &tio->clone;
|
|
|
|
|
|
__bio_clone_fast(clone, bio);
|
|
|
|
|
|
- if (bio_integrity(bio))
|
|
|
- bio_integrity_clone(clone, bio, GFP_NOIO);
|
|
|
+ if (bio_integrity(bio)) {
|
|
|
+ int r = bio_integrity_clone(clone, bio, GFP_NOIO);
|
|
|
+ if (r < 0)
|
|
|
+ return r;
|
|
|
+ }
|
|
|
|
|
|
bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
|
|
|
clone->bi_iter.bi_size = to_bytes(len);
|
|
|
|
|
|
if (bio_integrity(bio))
|
|
|
bio_integrity_trim(clone, 0, len);
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
static struct dm_target_io *alloc_tio(struct clone_info *ci,
|
|
@@ -1593,13 +1643,14 @@ static int __send_empty_flush(struct clone_info *ci)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
|
|
|
+static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
|
|
|
sector_t sector, unsigned *len)
|
|
|
{
|
|
|
struct bio *bio = ci->bio;
|
|
|
struct dm_target_io *tio;
|
|
|
unsigned target_bio_nr;
|
|
|
unsigned num_target_bios = 1;
|
|
|
+ int r = 0;
|
|
|
|
|
|
/*
|
|
|
* Does the target want to receive duplicate copies of the bio?
|
|
@@ -1610,9 +1661,13 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
|
|
|
for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
|
|
|
tio = alloc_tio(ci, ti, target_bio_nr);
|
|
|
tio->len_ptr = len;
|
|
|
- clone_bio(tio, bio, sector, *len);
|
|
|
+ r = clone_bio(tio, bio, sector, *len);
|
|
|
+ if (r < 0)
|
|
|
+ break;
|
|
|
__map_bio(tio);
|
|
|
}
|
|
|
+
|
|
|
+ return r;
|
|
|
}
|
|
|
|
|
|
typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
|
|
@@ -1689,6 +1744,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
|
|
|
struct bio *bio = ci->bio;
|
|
|
struct dm_target *ti;
|
|
|
unsigned len;
|
|
|
+ int r;
|
|
|
|
|
|
if (unlikely(bio->bi_rw & REQ_DISCARD))
|
|
|
return __send_discard(ci);
|
|
@@ -1701,7 +1757,9 @@ static int __split_and_process_non_flush(struct clone_info *ci)
|
|
|
|
|
|
len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
|
|
|
|
|
|
- __clone_and_map_data_bio(ci, ti, ci->sector, &len);
|
|
|
+ r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
|
|
|
+ if (r < 0)
|
|
|
+ return r;
|
|
|
|
|
|
ci->sector += len;
|
|
|
ci->sector_count -= len;
|
|
@@ -1839,28 +1897,22 @@ static int setup_clone(struct request *clone, struct request *rq,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
|
|
- struct dm_rq_target_io *tio, gfp_t gfp_mask)
|
|
|
+static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
|
|
|
+ struct dm_rq_target_io *tio, gfp_t gfp_mask)
|
|
|
{
|
|
|
/*
|
|
|
- * Do not allocate a clone if tio->clone was already set
|
|
|
- * (see: dm_mq_queue_rq).
|
|
|
+ * Create clone for use with .request_fn request_queue
|
|
|
*/
|
|
|
- bool alloc_clone = !tio->clone;
|
|
|
struct request *clone;
|
|
|
|
|
|
- if (alloc_clone) {
|
|
|
- clone = alloc_clone_request(md, gfp_mask);
|
|
|
- if (!clone)
|
|
|
- return NULL;
|
|
|
- } else
|
|
|
- clone = tio->clone;
|
|
|
+ clone = alloc_old_clone_request(md, gfp_mask);
|
|
|
+ if (!clone)
|
|
|
+ return NULL;
|
|
|
|
|
|
blk_rq_init(NULL, clone);
|
|
|
if (setup_clone(clone, rq, tio, gfp_mask)) {
|
|
|
/* -ENOMEM */
|
|
|
- if (alloc_clone)
|
|
|
- free_clone_request(md, clone);
|
|
|
+ free_old_clone_request(md, clone);
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
@@ -1877,29 +1929,40 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
|
|
|
tio->clone = NULL;
|
|
|
tio->orig = rq;
|
|
|
tio->error = 0;
|
|
|
- memset(&tio->info, 0, sizeof(tio->info));
|
|
|
+ /*
|
|
|
+ * Avoid initializing info for blk-mq; it passes
|
|
|
+ * target-specific data through info.ptr
|
|
|
+ * (see: dm_mq_init_request)
|
|
|
+ */
|
|
|
+ if (!md->init_tio_pdu)
|
|
|
+ memset(&tio->info, 0, sizeof(tio->info));
|
|
|
if (md->kworker_task)
|
|
|
init_kthread_work(&tio->work, map_tio_request);
|
|
|
}
|
|
|
|
|
|
-static struct dm_rq_target_io *prep_tio(struct request *rq,
|
|
|
- struct mapped_device *md, gfp_t gfp_mask)
|
|
|
+static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
|
|
|
+ struct mapped_device *md,
|
|
|
+ gfp_t gfp_mask)
|
|
|
{
|
|
|
struct dm_rq_target_io *tio;
|
|
|
int srcu_idx;
|
|
|
struct dm_table *table;
|
|
|
|
|
|
- tio = alloc_rq_tio(md, gfp_mask);
|
|
|
+ tio = alloc_old_rq_tio(md, gfp_mask);
|
|
|
if (!tio)
|
|
|
return NULL;
|
|
|
|
|
|
init_tio(tio, rq, md);
|
|
|
|
|
|
table = dm_get_live_table(md, &srcu_idx);
|
|
|
+ /*
|
|
|
+ * Must clone a request if this .request_fn DM device
|
|
|
+ * is stacked on .request_fn device(s).
|
|
|
+ */
|
|
|
if (!dm_table_mq_request_based(table)) {
|
|
|
- if (!clone_rq(rq, md, tio, gfp_mask)) {
|
|
|
+ if (!clone_old_rq(rq, md, tio, gfp_mask)) {
|
|
|
dm_put_live_table(md, srcu_idx);
|
|
|
- free_rq_tio(tio);
|
|
|
+ free_old_rq_tio(tio);
|
|
|
return NULL;
|
|
|
}
|
|
|
}
|
|
@@ -1911,7 +1974,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
|
|
|
/*
|
|
|
* Called with the queue lock held.
|
|
|
*/
|
|
|
-static int dm_prep_fn(struct request_queue *q, struct request *rq)
|
|
|
+static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
|
|
|
{
|
|
|
struct mapped_device *md = q->queuedata;
|
|
|
struct dm_rq_target_io *tio;
|
|
@@ -1921,7 +1984,7 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
|
|
|
return BLKPREP_KILL;
|
|
|
}
|
|
|
|
|
|
- tio = prep_tio(rq, md, GFP_ATOMIC);
|
|
|
+ tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
|
|
|
if (!tio)
|
|
|
return BLKPREP_DEFER;
|
|
|
|
|
@@ -2079,12 +2142,18 @@ static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
|
|
|
static void dm_request_fn(struct request_queue *q)
|
|
|
{
|
|
|
struct mapped_device *md = q->queuedata;
|
|
|
- int srcu_idx;
|
|
|
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
|
|
- struct dm_target *ti;
|
|
|
+ struct dm_target *ti = md->immutable_target;
|
|
|
struct request *rq;
|
|
|
struct dm_rq_target_io *tio;
|
|
|
- sector_t pos;
|
|
|
+ sector_t pos = 0;
|
|
|
+
|
|
|
+ if (unlikely(!ti)) {
|
|
|
+ int srcu_idx;
|
|
|
+ struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
|
|
+
|
|
|
+ ti = dm_table_find_target(map, pos);
|
|
|
+ dm_put_live_table(md, srcu_idx);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* For suspend, check blk_queue_stopped() and increment
|
|
@@ -2095,33 +2164,21 @@ static void dm_request_fn(struct request_queue *q)
|
|
|
while (!blk_queue_stopped(q)) {
|
|
|
rq = blk_peek_request(q);
|
|
|
if (!rq)
|
|
|
- goto out;
|
|
|
+ return;
|
|
|
|
|
|
/* always use block 0 to find the target for flushes for now */
|
|
|
pos = 0;
|
|
|
if (!(rq->cmd_flags & REQ_FLUSH))
|
|
|
pos = blk_rq_pos(rq);
|
|
|
|
|
|
- ti = dm_table_find_target(map, pos);
|
|
|
- if (!dm_target_is_valid(ti)) {
|
|
|
- /*
|
|
|
- * Must perform setup, that rq_completed() requires,
|
|
|
- * before calling dm_kill_unmapped_request
|
|
|
- */
|
|
|
- DMERR_LIMIT("request attempted access beyond the end of device");
|
|
|
- dm_start_request(md, rq);
|
|
|
- dm_kill_unmapped_request(rq, -EIO);
|
|
|
- continue;
|
|
|
+ if ((dm_request_peeked_before_merge_deadline(md) &&
|
|
|
+ md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
|
|
|
+ md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
|
|
|
+ (ti->type->busy && ti->type->busy(ti))) {
|
|
|
+ blk_delay_queue(q, HZ / 100);
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
- if (dm_request_peeked_before_merge_deadline(md) &&
|
|
|
- md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
|
|
|
- md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
|
|
|
- goto delay_and_out;
|
|
|
-
|
|
|
- if (ti->type->busy && ti->type->busy(ti))
|
|
|
- goto delay_and_out;
|
|
|
-
|
|
|
dm_start_request(md, rq);
|
|
|
|
|
|
tio = tio_from_request(rq);
|
|
@@ -2130,13 +2187,6 @@ static void dm_request_fn(struct request_queue *q)
|
|
|
queue_kthread_work(&md->kworker, &tio->work);
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
}
|
|
|
-
|
|
|
- goto out;
|
|
|
-
|
|
|
-delay_and_out:
|
|
|
- blk_delay_queue(q, HZ / 100);
|
|
|
-out:
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
}
|
|
|
|
|
|
static int dm_any_congested(void *congested_data, int bdi_bits)
|
|
@@ -2146,19 +2196,18 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
|
|
|
struct dm_table *map;
|
|
|
|
|
|
if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
|
|
- map = dm_get_live_table_fast(md);
|
|
|
- if (map) {
|
|
|
+ if (dm_request_based(md)) {
|
|
|
/*
|
|
|
- * Request-based dm cares about only own queue for
|
|
|
- * the query about congestion status of request_queue
|
|
|
+ * With request-based DM we only need to check the
|
|
|
+ * top-level queue for congestion.
|
|
|
*/
|
|
|
- if (dm_request_based(md))
|
|
|
- r = md->queue->backing_dev_info.wb.state &
|
|
|
- bdi_bits;
|
|
|
- else
|
|
|
+ r = md->queue->backing_dev_info.wb.state & bdi_bits;
|
|
|
+ } else {
|
|
|
+ map = dm_get_live_table_fast(md);
|
|
|
+ if (map)
|
|
|
r = dm_table_any_congested(map, bdi_bits);
|
|
|
+ dm_put_live_table_fast(md);
|
|
|
}
|
|
|
- dm_put_live_table_fast(md);
|
|
|
}
|
|
|
|
|
|
return r;
|
|
@@ -2238,7 +2287,7 @@ static void dm_init_md_queue(struct mapped_device *md)
|
|
|
md->queue->backing_dev_info.congested_data = md;
|
|
|
}
|
|
|
|
|
|
-static void dm_init_old_md_queue(struct mapped_device *md)
|
|
|
+static void dm_init_normal_md_queue(struct mapped_device *md)
|
|
|
{
|
|
|
md->use_blk_mq = false;
|
|
|
dm_init_md_queue(md);
|
|
@@ -2285,10 +2334,11 @@ static void cleanup_mapped_device(struct mapped_device *md)
|
|
|
*/
|
|
|
static struct mapped_device *alloc_dev(int minor)
|
|
|
{
|
|
|
- int r;
|
|
|
- struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
|
|
|
+ int r, numa_node_id = dm_get_numa_node();
|
|
|
+ struct mapped_device *md;
|
|
|
void *old_md;
|
|
|
|
|
|
+ md = kzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
|
|
|
if (!md) {
|
|
|
DMWARN("unable to allocate device, out of memory.");
|
|
|
return NULL;
|
|
@@ -2309,7 +2359,9 @@ static struct mapped_device *alloc_dev(int minor)
|
|
|
if (r < 0)
|
|
|
goto bad_io_barrier;
|
|
|
|
|
|
+ md->numa_node_id = numa_node_id;
|
|
|
md->use_blk_mq = use_blk_mq;
|
|
|
+ md->init_tio_pdu = false;
|
|
|
md->type = DM_TYPE_NONE;
|
|
|
mutex_init(&md->suspend_lock);
|
|
|
mutex_init(&md->type_lock);
|
|
@@ -2323,13 +2375,13 @@ static struct mapped_device *alloc_dev(int minor)
|
|
|
INIT_LIST_HEAD(&md->table_devices);
|
|
|
spin_lock_init(&md->uevent_lock);
|
|
|
|
|
|
- md->queue = blk_alloc_queue(GFP_KERNEL);
|
|
|
+ md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
|
|
|
if (!md->queue)
|
|
|
goto bad;
|
|
|
|
|
|
dm_init_md_queue(md);
|
|
|
|
|
|
- md->disk = alloc_disk(1);
|
|
|
+ md->disk = alloc_disk_node(1, numa_node_id);
|
|
|
if (!md->disk)
|
|
|
goto bad;
|
|
|
|
|
@@ -2393,8 +2445,10 @@ static void free_dev(struct mapped_device *md)
|
|
|
unlock_fs(md);
|
|
|
|
|
|
cleanup_mapped_device(md);
|
|
|
- if (md->use_blk_mq)
|
|
|
- blk_mq_free_tag_set(&md->tag_set);
|
|
|
+ if (md->tag_set) {
|
|
|
+ blk_mq_free_tag_set(md->tag_set);
|
|
|
+ kfree(md->tag_set);
|
|
|
+ }
|
|
|
|
|
|
free_table_devices(&md->table_devices);
|
|
|
dm_stats_cleanup(&md->stats);
|
|
@@ -2502,13 +2556,20 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
|
|
|
* This must be done before setting the queue restrictions,
|
|
|
* because request-based dm may be run just after the setting.
|
|
|
*/
|
|
|
- if (dm_table_request_based(t))
|
|
|
- stop_queue(q);
|
|
|
+ if (dm_table_request_based(t)) {
|
|
|
+ dm_stop_queue(q);
|
|
|
+ /*
|
|
|
+ * Leverage the fact that request-based DM targets are
|
|
|
+ * immutable singletons and establish md->immutable_target
|
|
|
+ * - used to optimize both dm_request_fn and dm_mq_queue_rq
|
|
|
+ */
|
|
|
+ md->immutable_target = dm_table_get_immutable_target(t);
|
|
|
+ }
|
|
|
|
|
|
__bind_mempools(md, t);
|
|
|
|
|
|
old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
|
|
|
- rcu_assign_pointer(md->map, t);
|
|
|
+ rcu_assign_pointer(md->map, (void *)t);
|
|
|
md->immutable_target_type = dm_table_get_immutable_target_type(t);
|
|
|
|
|
|
dm_table_set_restrictions(t, q, limits);
|
|
@@ -2574,7 +2635,6 @@ void dm_set_md_type(struct mapped_device *md, unsigned type)
|
|
|
|
|
|
unsigned dm_get_md_type(struct mapped_device *md)
|
|
|
{
|
|
|
- BUG_ON(!mutex_is_locked(&md->type_lock));
|
|
|
return md->type;
|
|
|
}
|
|
|
|
|
@@ -2594,7 +2654,7 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
|
|
|
|
|
|
-static void init_rq_based_worker_thread(struct mapped_device *md)
|
|
|
+static void dm_old_init_rq_based_worker_thread(struct mapped_device *md)
|
|
|
{
|
|
|
/* Initialize the request-based DM worker thread */
|
|
|
init_kthread_worker(&md->kworker);
|
|
@@ -2603,26 +2663,22 @@ static void init_rq_based_worker_thread(struct mapped_device *md)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
|
|
|
+ * Fully initialize a .request_fn request-based queue.
|
|
|
*/
|
|
|
-static int dm_init_request_based_queue(struct mapped_device *md)
|
|
|
+static int dm_old_init_request_queue(struct mapped_device *md)
|
|
|
{
|
|
|
- struct request_queue *q = NULL;
|
|
|
-
|
|
|
/* Fully initialize the queue */
|
|
|
- q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
|
|
|
- if (!q)
|
|
|
+ if (!blk_init_allocated_queue(md->queue, dm_request_fn, NULL))
|
|
|
return -EINVAL;
|
|
|
|
|
|
/* disable dm_request_fn's merge heuristic by default */
|
|
|
md->seq_rq_merge_deadline_usecs = 0;
|
|
|
|
|
|
- md->queue = q;
|
|
|
- dm_init_old_md_queue(md);
|
|
|
+ dm_init_normal_md_queue(md);
|
|
|
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
|
|
- blk_queue_prep_rq(md->queue, dm_prep_fn);
|
|
|
+ blk_queue_prep_rq(md->queue, dm_old_prep_fn);
|
|
|
|
|
|
- init_rq_based_worker_thread(md);
|
|
|
+ dm_old_init_rq_based_worker_thread(md);
|
|
|
|
|
|
elv_register_queue(md->queue);
|
|
|
|
|
@@ -2642,6 +2698,11 @@ static int dm_mq_init_request(void *data, struct request *rq,
|
|
|
*/
|
|
|
tio->md = md;
|
|
|
|
|
|
+ if (md->init_tio_pdu) {
|
|
|
+ /* target-specific per-io data is immediately after the tio */
|
|
|
+ tio->info.ptr = tio + 1;
|
|
|
+ }
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2651,28 +2712,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
struct request *rq = bd->rq;
|
|
|
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
|
|
|
struct mapped_device *md = tio->md;
|
|
|
- int srcu_idx;
|
|
|
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
|
|
- struct dm_target *ti;
|
|
|
- sector_t pos;
|
|
|
+ struct dm_target *ti = md->immutable_target;
|
|
|
|
|
|
- /* always use block 0 to find the target for flushes for now */
|
|
|
- pos = 0;
|
|
|
- if (!(rq->cmd_flags & REQ_FLUSH))
|
|
|
- pos = blk_rq_pos(rq);
|
|
|
+ if (unlikely(!ti)) {
|
|
|
+ int srcu_idx;
|
|
|
+ struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
|
|
|
|
|
- ti = dm_table_find_target(map, pos);
|
|
|
- if (!dm_target_is_valid(ti)) {
|
|
|
+ ti = dm_table_find_target(map, 0);
|
|
|
dm_put_live_table(md, srcu_idx);
|
|
|
- DMERR_LIMIT("request attempted access beyond the end of device");
|
|
|
- /*
|
|
|
- * Must perform setup, that rq_completed() requires,
|
|
|
- * before returning BLK_MQ_RQ_QUEUE_ERROR
|
|
|
- */
|
|
|
- dm_start_request(md, rq);
|
|
|
- return BLK_MQ_RQ_QUEUE_ERROR;
|
|
|
}
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
|
|
|
if (ti->type->busy && ti->type->busy(ti))
|
|
|
return BLK_MQ_RQ_QUEUE_BUSY;
|
|
@@ -2688,20 +2736,12 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
*/
|
|
|
tio->ti = ti;
|
|
|
|
|
|
- /* Clone the request if underlying devices aren't blk-mq */
|
|
|
- if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
|
|
|
- /* clone request is allocated at the end of the pdu */
|
|
|
- tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
|
|
|
- (void) clone_rq(rq, md, tio, GFP_ATOMIC);
|
|
|
- queue_kthread_work(&md->kworker, &tio->work);
|
|
|
- } else {
|
|
|
- /* Direct call is fine since .queue_rq allows allocations */
|
|
|
- if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
|
|
|
- /* Undo dm_start_request() before requeuing */
|
|
|
- rq_end_stats(md, rq);
|
|
|
- rq_completed(md, rq_data_dir(rq), false);
|
|
|
- return BLK_MQ_RQ_QUEUE_BUSY;
|
|
|
- }
|
|
|
+ /* Direct call is fine since .queue_rq allows allocations */
|
|
|
+ if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
|
|
|
+ /* Undo dm_start_request() before requeuing */
|
|
|
+ rq_end_stats(md, rq);
|
|
|
+ rq_completed(md, rq_data_dir(rq), false);
|
|
|
+ return BLK_MQ_RQ_QUEUE_BUSY;
|
|
|
}
|
|
|
|
|
|
return BLK_MQ_RQ_QUEUE_OK;
|
|
@@ -2714,47 +2754,56 @@ static struct blk_mq_ops dm_mq_ops = {
|
|
|
.init_request = dm_mq_init_request,
|
|
|
};
|
|
|
|
|
|
-static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
|
|
|
+static int dm_mq_init_request_queue(struct mapped_device *md,
|
|
|
+ struct dm_target *immutable_tgt)
|
|
|
{
|
|
|
- unsigned md_type = dm_get_md_type(md);
|
|
|
struct request_queue *q;
|
|
|
int err;
|
|
|
|
|
|
- memset(&md->tag_set, 0, sizeof(md->tag_set));
|
|
|
- md->tag_set.ops = &dm_mq_ops;
|
|
|
- md->tag_set.queue_depth = BLKDEV_MAX_RQ;
|
|
|
- md->tag_set.numa_node = NUMA_NO_NODE;
|
|
|
- md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
|
|
- md->tag_set.nr_hw_queues = 1;
|
|
|
- if (md_type == DM_TYPE_REQUEST_BASED) {
|
|
|
- /* make the memory for non-blk-mq clone part of the pdu */
|
|
|
- md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
|
|
|
- } else
|
|
|
- md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
|
|
|
- md->tag_set.driver_data = md;
|
|
|
-
|
|
|
- err = blk_mq_alloc_tag_set(&md->tag_set);
|
|
|
+ if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
|
|
|
+ DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+
|
|
|
+ md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
|
|
|
+ if (!md->tag_set)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ md->tag_set->ops = &dm_mq_ops;
|
|
|
+ md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
|
|
|
+ md->tag_set->numa_node = md->numa_node_id;
|
|
|
+ md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
|
|
+ md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
|
|
|
+ md->tag_set->driver_data = md;
|
|
|
+
|
|
|
+ md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
|
|
|
+ if (immutable_tgt && immutable_tgt->per_io_data_size) {
|
|
|
+ /* any target-specific per-io data is immediately after the tio */
|
|
|
+ md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
|
|
|
+ md->init_tio_pdu = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ err = blk_mq_alloc_tag_set(md->tag_set);
|
|
|
if (err)
|
|
|
- return err;
|
|
|
+ goto out_kfree_tag_set;
|
|
|
|
|
|
- q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
|
|
|
+ q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
|
|
|
if (IS_ERR(q)) {
|
|
|
err = PTR_ERR(q);
|
|
|
goto out_tag_set;
|
|
|
}
|
|
|
- md->queue = q;
|
|
|
dm_init_md_queue(md);
|
|
|
|
|
|
/* backfill 'mq' sysfs registration normally done in blk_register_queue */
|
|
|
blk_mq_register_disk(md->disk);
|
|
|
|
|
|
- if (md_type == DM_TYPE_REQUEST_BASED)
|
|
|
- init_rq_based_worker_thread(md);
|
|
|
-
|
|
|
return 0;
|
|
|
|
|
|
out_tag_set:
|
|
|
- blk_mq_free_tag_set(&md->tag_set);
|
|
|
+ blk_mq_free_tag_set(md->tag_set);
|
|
|
+out_kfree_tag_set:
|
|
|
+ kfree(md->tag_set);
|
|
|
+
|
|
|
return err;
|
|
|
}
|
|
|
|
|
@@ -2769,28 +2818,28 @@ static unsigned filter_md_type(unsigned type, struct mapped_device *md)
|
|
|
/*
|
|
|
* Setup the DM device's queue based on md's type
|
|
|
*/
|
|
|
-int dm_setup_md_queue(struct mapped_device *md)
|
|
|
+int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
|
|
|
{
|
|
|
int r;
|
|
|
unsigned md_type = filter_md_type(dm_get_md_type(md), md);
|
|
|
|
|
|
switch (md_type) {
|
|
|
case DM_TYPE_REQUEST_BASED:
|
|
|
- r = dm_init_request_based_queue(md);
|
|
|
+ r = dm_old_init_request_queue(md);
|
|
|
if (r) {
|
|
|
- DMWARN("Cannot initialize queue for request-based mapped device");
|
|
|
+ DMERR("Cannot initialize queue for request-based mapped device");
|
|
|
return r;
|
|
|
}
|
|
|
break;
|
|
|
case DM_TYPE_MQ_REQUEST_BASED:
|
|
|
- r = dm_init_request_based_blk_mq_queue(md);
|
|
|
+ r = dm_mq_init_request_queue(md, dm_table_get_immutable_target(t));
|
|
|
if (r) {
|
|
|
- DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
|
|
|
+ DMERR("Cannot initialize queue for request-based dm-mq mapped device");
|
|
|
return r;
|
|
|
}
|
|
|
break;
|
|
|
case DM_TYPE_BIO_BASED:
|
|
|
- dm_init_old_md_queue(md);
|
|
|
+ dm_init_normal_md_queue(md);
|
|
|
blk_queue_make_request(md->queue, dm_make_request);
|
|
|
/*
|
|
|
* DM handles splitting bios as needed. Free the bio_split bioset
|
|
@@ -3133,7 +3182,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
|
|
* dm defers requests to md->wq from md->queue.
|
|
|
*/
|
|
|
if (dm_request_based(md)) {
|
|
|
- stop_queue(md->queue);
|
|
|
+ dm_stop_queue(md->queue);
|
|
|
if (md->kworker_task)
|
|
|
flush_kthread_worker(&md->kworker);
|
|
|
}
|
|
@@ -3157,7 +3206,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
|
|
dm_queue_flush(md);
|
|
|
|
|
|
if (dm_request_based(md))
|
|
|
- start_queue(md->queue);
|
|
|
+ dm_start_queue(md->queue);
|
|
|
|
|
|
unlock_fs(md);
|
|
|
dm_table_presuspend_undo_targets(map);
|
|
@@ -3236,7 +3285,7 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map)
|
|
|
* Request-based dm is queueing the deferred I/Os in its request_queue.
|
|
|
*/
|
|
|
if (dm_request_based(md))
|
|
|
- start_queue(md->queue);
|
|
|
+ dm_start_queue(md->queue);
|
|
|
|
|
|
unlock_fs(md);
|
|
|
|
|
@@ -3482,9 +3531,9 @@ int dm_noflush_suspending(struct dm_target *ti)
|
|
|
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
|
|
|
|
|
|
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
|
|
|
- unsigned integrity, unsigned per_bio_data_size)
|
|
|
+ unsigned integrity, unsigned per_io_data_size)
|
|
|
{
|
|
|
- struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
|
|
|
+ struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
|
|
|
struct kmem_cache *cachep = NULL;
|
|
|
unsigned int pool_size = 0;
|
|
|
unsigned int front_pad;
|
|
@@ -3498,7 +3547,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
|
|
|
case DM_TYPE_BIO_BASED:
|
|
|
cachep = _io_cache;
|
|
|
pool_size = dm_get_reserved_bio_based_ios();
|
|
|
- front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
|
|
|
+ front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
|
|
|
break;
|
|
|
case DM_TYPE_REQUEST_BASED:
|
|
|
cachep = _rq_tio_cache;
|
|
@@ -3511,8 +3560,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
|
|
|
if (!pool_size)
|
|
|
pool_size = dm_get_reserved_rq_based_ios();
|
|
|
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
|
|
|
- /* per_bio_data_size is not used. See __bind_mempools(). */
|
|
|
- WARN_ON(per_bio_data_size != 0);
|
|
|
+ /* per_io_data_size is used for blk-mq pdu at queue allocation */
|
|
|
break;
|
|
|
default:
|
|
|
BUG();
|
|
@@ -3554,15 +3602,14 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
|
|
|
}
|
|
|
|
|
|
static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
|
|
|
- u32 flags)
|
|
|
+ u32 flags)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
const struct pr_ops *ops;
|
|
|
- struct dm_target *tgt;
|
|
|
fmode_t mode;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -3572,20 +3619,19 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
|
|
|
else
|
|
|
r = -EOPNOTSUPP;
|
|
|
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
|
|
|
- u32 flags)
|
|
|
+ u32 flags)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
const struct pr_ops *ops;
|
|
|
- struct dm_target *tgt;
|
|
|
fmode_t mode;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -3595,7 +3641,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
|
|
|
else
|
|
|
r = -EOPNOTSUPP;
|
|
|
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -3603,11 +3649,10 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
const struct pr_ops *ops;
|
|
|
- struct dm_target *tgt;
|
|
|
fmode_t mode;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -3617,20 +3662,19 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
|
|
|
else
|
|
|
r = -EOPNOTSUPP;
|
|
|
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
|
|
|
- enum pr_type type, bool abort)
|
|
|
+ enum pr_type type, bool abort)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
const struct pr_ops *ops;
|
|
|
- struct dm_target *tgt;
|
|
|
fmode_t mode;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -3640,7 +3684,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
|
|
|
else
|
|
|
r = -EOPNOTSUPP;
|
|
|
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -3648,11 +3692,10 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
|
|
|
{
|
|
|
struct mapped_device *md = bdev->bd_disk->private_data;
|
|
|
const struct pr_ops *ops;
|
|
|
- struct dm_target *tgt;
|
|
|
fmode_t mode;
|
|
|
- int srcu_idx, r;
|
|
|
+ int r;
|
|
|
|
|
|
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
|
|
|
+ r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
|
|
|
if (r < 0)
|
|
|
return r;
|
|
|
|
|
@@ -3662,7 +3705,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
|
|
|
else
|
|
|
r = -EOPNOTSUPP;
|
|
|
|
|
|
- dm_put_live_table(md, srcu_idx);
|
|
|
+ bdput(bdev);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -3701,6 +3744,15 @@ MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"
|
|
|
module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
|
|
|
MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
|
|
|
|
|
|
+module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
|
|
|
+MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
|
|
|
+
|
|
|
+module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
|
|
|
+MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
|
|
|
+
|
|
|
+module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
|
|
|
+MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
|
|
|
+
|
|
|
MODULE_DESCRIPTION(DM_NAME " driver");
|
|
|
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
|
|
|
MODULE_LICENSE("GPL");
|