dm-rq.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988
  1. /*
  2. * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-core.h"
  7. #include "dm-rq.h"
  8. #include <linux/elevator.h> /* for rq_end_sector() */
  9. #include <linux/blk-mq.h>
  10. #define DM_MSG_PREFIX "core-rq"
  11. #define DM_MQ_NR_HW_QUEUES 1
  12. #define DM_MQ_QUEUE_DEPTH 2048
  13. static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
  14. static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  15. /*
  16. * Request-based DM's mempools' reserved IOs set by the user.
  17. */
  18. #define RESERVED_REQUEST_BASED_IOS 256
  19. static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  20. #ifdef CONFIG_DM_MQ_DEFAULT
  21. static bool use_blk_mq = true;
  22. #else
  23. static bool use_blk_mq = false;
  24. #endif
  25. bool dm_use_blk_mq_default(void)
  26. {
  27. return use_blk_mq;
  28. }
  29. bool dm_use_blk_mq(struct mapped_device *md)
  30. {
  31. return md->use_blk_mq;
  32. }
  33. EXPORT_SYMBOL_GPL(dm_use_blk_mq);
  34. unsigned dm_get_reserved_rq_based_ios(void)
  35. {
  36. return __dm_get_module_param(&reserved_rq_based_ios,
  37. RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
  38. }
  39. EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
  40. static unsigned dm_get_blk_mq_nr_hw_queues(void)
  41. {
  42. return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
  43. }
  44. static unsigned dm_get_blk_mq_queue_depth(void)
  45. {
  46. return __dm_get_module_param(&dm_mq_queue_depth,
  47. DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
  48. }
  49. int dm_request_based(struct mapped_device *md)
  50. {
  51. return blk_queue_stackable(md->queue);
  52. }
  53. static void dm_old_start_queue(struct request_queue *q)
  54. {
  55. unsigned long flags;
  56. spin_lock_irqsave(q->queue_lock, flags);
  57. if (blk_queue_stopped(q))
  58. blk_start_queue(q);
  59. spin_unlock_irqrestore(q->queue_lock, flags);
  60. }
  61. void dm_start_queue(struct request_queue *q)
  62. {
  63. if (!q->mq_ops)
  64. dm_old_start_queue(q);
  65. else {
  66. queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q);
  67. blk_mq_start_stopped_hw_queues(q, true);
  68. blk_mq_kick_requeue_list(q);
  69. }
  70. }
  71. static void dm_old_stop_queue(struct request_queue *q)
  72. {
  73. unsigned long flags;
  74. spin_lock_irqsave(q->queue_lock, flags);
  75. if (blk_queue_stopped(q)) {
  76. spin_unlock_irqrestore(q->queue_lock, flags);
  77. return;
  78. }
  79. blk_stop_queue(q);
  80. spin_unlock_irqrestore(q->queue_lock, flags);
  81. }
  82. void dm_stop_queue(struct request_queue *q)
  83. {
  84. if (!q->mq_ops)
  85. dm_old_stop_queue(q);
  86. else {
  87. spin_lock_irq(q->queue_lock);
  88. queue_flag_set(QUEUE_FLAG_STOPPED, q);
  89. spin_unlock_irq(q->queue_lock);
  90. blk_mq_cancel_requeue_work(q);
  91. blk_mq_stop_hw_queues(q);
  92. }
  93. }
  94. static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
  95. gfp_t gfp_mask)
  96. {
  97. return mempool_alloc(md->io_pool, gfp_mask);
  98. }
  99. static void free_old_rq_tio(struct dm_rq_target_io *tio)
  100. {
  101. mempool_free(tio, tio->md->io_pool);
  102. }
  103. static struct request *alloc_old_clone_request(struct mapped_device *md,
  104. gfp_t gfp_mask)
  105. {
  106. return mempool_alloc(md->rq_pool, gfp_mask);
  107. }
  108. static void free_old_clone_request(struct mapped_device *md, struct request *rq)
  109. {
  110. mempool_free(rq, md->rq_pool);
  111. }
  112. /*
  113. * Partial completion handling for request-based dm
  114. */
  115. static void end_clone_bio(struct bio *clone)
  116. {
  117. struct dm_rq_clone_bio_info *info =
  118. container_of(clone, struct dm_rq_clone_bio_info, clone);
  119. struct dm_rq_target_io *tio = info->tio;
  120. struct bio *bio = info->orig;
  121. unsigned int nr_bytes = info->orig->bi_iter.bi_size;
  122. int error = clone->bi_error;
  123. bio_put(clone);
  124. if (tio->error)
  125. /*
  126. * An error has already been detected on the request.
  127. * Once error occurred, just let clone->end_io() handle
  128. * the remainder.
  129. */
  130. return;
  131. else if (error) {
  132. /*
  133. * Don't notice the error to the upper layer yet.
  134. * The error handling decision is made by the target driver,
  135. * when the request is completed.
  136. */
  137. tio->error = error;
  138. return;
  139. }
  140. /*
  141. * I/O for the bio successfully completed.
  142. * Notice the data completion to the upper layer.
  143. */
  144. /*
  145. * bios are processed from the head of the list.
  146. * So the completing bio should always be rq->bio.
  147. * If it's not, something wrong is happening.
  148. */
  149. if (tio->orig->bio != bio)
  150. DMERR("bio completion is going in the middle of the request");
  151. /*
  152. * Update the original request.
  153. * Do not use blk_end_request() here, because it may complete
  154. * the original request before the clone, and break the ordering.
  155. */
  156. blk_update_request(tio->orig, 0, nr_bytes);
  157. }
  158. static struct dm_rq_target_io *tio_from_request(struct request *rq)
  159. {
  160. return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
  161. }
  162. static void rq_end_stats(struct mapped_device *md, struct request *orig)
  163. {
  164. if (unlikely(dm_stats_used(&md->stats))) {
  165. struct dm_rq_target_io *tio = tio_from_request(orig);
  166. tio->duration_jiffies = jiffies - tio->duration_jiffies;
  167. dm_stats_account_io(&md->stats, rq_data_dir(orig),
  168. blk_rq_pos(orig), tio->n_sectors, true,
  169. tio->duration_jiffies, &tio->stats_aux);
  170. }
  171. }
  172. /*
  173. * Don't touch any member of the md after calling this function because
  174. * the md may be freed in dm_put() at the end of this function.
  175. * Or do dm_get() before calling this function and dm_put() later.
  176. */
  177. static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  178. {
  179. atomic_dec(&md->pending[rw]);
  180. /* nudge anyone waiting on suspend queue */
  181. if (!md_in_flight(md))
  182. wake_up(&md->wait);
  183. /*
  184. * Run this off this callpath, as drivers could invoke end_io while
  185. * inside their request_fn (and holding the queue lock). Calling
  186. * back into ->request_fn() could deadlock attempting to grab the
  187. * queue lock again.
  188. */
  189. if (!md->queue->mq_ops && run_queue)
  190. blk_run_queue_async(md->queue);
  191. /*
  192. * dm_put() must be at the end of this function. See the comment above
  193. */
  194. dm_put(md);
  195. }
  196. static void free_rq_clone(struct request *clone)
  197. {
  198. struct dm_rq_target_io *tio = clone->end_io_data;
  199. struct mapped_device *md = tio->md;
  200. blk_rq_unprep_clone(clone);
  201. /*
  202. * It is possible for a clone_old_rq() allocated clone to
  203. * get passed in -- it may not yet have a request_queue.
  204. * This is known to occur if the error target replaces
  205. * a multipath target that has a request_fn queue stacked
  206. * on blk-mq queue(s).
  207. */
  208. if (clone->q && clone->q->mq_ops)
  209. /* stacked on blk-mq queue(s) */
  210. tio->ti->type->release_clone_rq(clone);
  211. else if (!md->queue->mq_ops)
  212. /* request_fn queue stacked on request_fn queue(s) */
  213. free_old_clone_request(md, clone);
  214. if (!md->queue->mq_ops)
  215. free_old_rq_tio(tio);
  216. }
  217. /*
  218. * Complete the clone and the original request.
  219. * Must be called without clone's queue lock held,
  220. * see end_clone_request() for more details.
  221. */
  222. static void dm_end_request(struct request *clone, int error)
  223. {
  224. int rw = rq_data_dir(clone);
  225. struct dm_rq_target_io *tio = clone->end_io_data;
  226. struct mapped_device *md = tio->md;
  227. struct request *rq = tio->orig;
  228. if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
  229. rq->errors = clone->errors;
  230. rq->resid_len = clone->resid_len;
  231. if (rq->sense)
  232. /*
  233. * We are using the sense buffer of the original
  234. * request.
  235. * So setting the length of the sense data is enough.
  236. */
  237. rq->sense_len = clone->sense_len;
  238. }
  239. free_rq_clone(clone);
  240. rq_end_stats(md, rq);
  241. if (!rq->q->mq_ops)
  242. blk_end_request_all(rq, error);
  243. else
  244. blk_mq_end_request(rq, error);
  245. rq_completed(md, rw, true);
  246. }
  247. static void dm_unprep_request(struct request *rq)
  248. {
  249. struct dm_rq_target_io *tio = tio_from_request(rq);
  250. struct request *clone = tio->clone;
  251. if (!rq->q->mq_ops) {
  252. rq->special = NULL;
  253. rq->cmd_flags &= ~REQ_DONTPREP;
  254. }
  255. if (clone)
  256. free_rq_clone(clone);
  257. else if (!tio->md->queue->mq_ops)
  258. free_old_rq_tio(tio);
  259. }
  260. /*
  261. * Requeue the original request of a clone.
  262. */
  263. static void dm_old_requeue_request(struct request *rq)
  264. {
  265. struct request_queue *q = rq->q;
  266. unsigned long flags;
  267. spin_lock_irqsave(q->queue_lock, flags);
  268. blk_requeue_request(q, rq);
  269. blk_run_queue_async(q);
  270. spin_unlock_irqrestore(q->queue_lock, flags);
  271. }
  272. static void dm_mq_requeue_request(struct request *rq)
  273. {
  274. struct request_queue *q = rq->q;
  275. unsigned long flags;
  276. blk_mq_requeue_request(rq);
  277. spin_lock_irqsave(q->queue_lock, flags);
  278. if (!blk_queue_stopped(q))
  279. blk_mq_kick_requeue_list(q);
  280. spin_unlock_irqrestore(q->queue_lock, flags);
  281. }
  282. static void dm_requeue_original_request(struct mapped_device *md,
  283. struct request *rq)
  284. {
  285. int rw = rq_data_dir(rq);
  286. rq_end_stats(md, rq);
  287. dm_unprep_request(rq);
  288. if (!rq->q->mq_ops)
  289. dm_old_requeue_request(rq);
  290. else
  291. dm_mq_requeue_request(rq);
  292. rq_completed(md, rw, false);
  293. }
  294. static void dm_done(struct request *clone, int error, bool mapped)
  295. {
  296. int r = error;
  297. struct dm_rq_target_io *tio = clone->end_io_data;
  298. dm_request_endio_fn rq_end_io = NULL;
  299. if (tio->ti) {
  300. rq_end_io = tio->ti->type->rq_end_io;
  301. if (mapped && rq_end_io)
  302. r = rq_end_io(tio->ti, clone, error, &tio->info);
  303. }
  304. if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
  305. !clone->q->limits.max_write_same_sectors))
  306. disable_write_same(tio->md);
  307. if (r <= 0)
  308. /* The target wants to complete the I/O */
  309. dm_end_request(clone, r);
  310. else if (r == DM_ENDIO_INCOMPLETE)
  311. /* The target will handle the I/O */
  312. return;
  313. else if (r == DM_ENDIO_REQUEUE)
  314. /* The target wants to requeue the I/O */
  315. dm_requeue_original_request(tio->md, tio->orig);
  316. else {
  317. DMWARN("unimplemented target endio return value: %d", r);
  318. BUG();
  319. }
  320. }
  321. /*
  322. * Request completion handler for request-based dm
  323. */
  324. static void dm_softirq_done(struct request *rq)
  325. {
  326. bool mapped = true;
  327. struct dm_rq_target_io *tio = tio_from_request(rq);
  328. struct request *clone = tio->clone;
  329. int rw;
  330. if (!clone) {
  331. rq_end_stats(tio->md, rq);
  332. rw = rq_data_dir(rq);
  333. if (!rq->q->mq_ops) {
  334. blk_end_request_all(rq, tio->error);
  335. rq_completed(tio->md, rw, false);
  336. free_old_rq_tio(tio);
  337. } else {
  338. blk_mq_end_request(rq, tio->error);
  339. rq_completed(tio->md, rw, false);
  340. }
  341. return;
  342. }
  343. if (rq->cmd_flags & REQ_FAILED)
  344. mapped = false;
  345. dm_done(clone, tio->error, mapped);
  346. }
  347. /*
  348. * Complete the clone and the original request with the error status
  349. * through softirq context.
  350. */
  351. static void dm_complete_request(struct request *rq, int error)
  352. {
  353. struct dm_rq_target_io *tio = tio_from_request(rq);
  354. tio->error = error;
  355. if (!rq->q->mq_ops)
  356. blk_complete_request(rq);
  357. else
  358. blk_mq_complete_request(rq, error);
  359. }
  360. /*
  361. * Complete the not-mapped clone and the original request with the error status
  362. * through softirq context.
  363. * Target's rq_end_io() function isn't called.
  364. * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  365. */
  366. static void dm_kill_unmapped_request(struct request *rq, int error)
  367. {
  368. rq->cmd_flags |= REQ_FAILED;
  369. dm_complete_request(rq, error);
  370. }
  371. /*
  372. * Called with the clone's queue lock held (in the case of .request_fn)
  373. */
  374. static void end_clone_request(struct request *clone, int error)
  375. {
  376. struct dm_rq_target_io *tio = clone->end_io_data;
  377. if (!clone->q->mq_ops) {
  378. /*
  379. * For just cleaning up the information of the queue in which
  380. * the clone was dispatched.
  381. * The clone is *NOT* freed actually here because it is alloced
  382. * from dm own mempool (REQ_ALLOCED isn't set).
  383. */
  384. __blk_put_request(clone->q, clone);
  385. }
  386. /*
  387. * Actual request completion is done in a softirq context which doesn't
  388. * hold the clone's queue lock. Otherwise, deadlock could occur because:
  389. * - another request may be submitted by the upper level driver
  390. * of the stacking during the completion
  391. * - the submission which requires queue lock may be done
  392. * against this clone's queue
  393. */
  394. dm_complete_request(tio->orig, error);
  395. }
  396. static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
  397. {
  398. int r;
  399. if (blk_queue_io_stat(clone->q))
  400. clone->cmd_flags |= REQ_IO_STAT;
  401. clone->start_time = jiffies;
  402. r = blk_insert_cloned_request(clone->q, clone);
  403. if (r)
  404. /* must complete clone in terms of original request */
  405. dm_complete_request(rq, r);
  406. }
  407. static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
  408. void *data)
  409. {
  410. struct dm_rq_target_io *tio = data;
  411. struct dm_rq_clone_bio_info *info =
  412. container_of(bio, struct dm_rq_clone_bio_info, clone);
  413. info->orig = bio_orig;
  414. info->tio = tio;
  415. bio->bi_end_io = end_clone_bio;
  416. return 0;
  417. }
  418. static int setup_clone(struct request *clone, struct request *rq,
  419. struct dm_rq_target_io *tio, gfp_t gfp_mask)
  420. {
  421. int r;
  422. r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
  423. dm_rq_bio_constructor, tio);
  424. if (r)
  425. return r;
  426. clone->cmd = rq->cmd;
  427. clone->cmd_len = rq->cmd_len;
  428. clone->sense = rq->sense;
  429. clone->end_io = end_clone_request;
  430. clone->end_io_data = tio;
  431. tio->clone = clone;
  432. return 0;
  433. }
  434. static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
  435. struct dm_rq_target_io *tio, gfp_t gfp_mask)
  436. {
  437. /*
  438. * Create clone for use with .request_fn request_queue
  439. */
  440. struct request *clone;
  441. clone = alloc_old_clone_request(md, gfp_mask);
  442. if (!clone)
  443. return NULL;
  444. blk_rq_init(NULL, clone);
  445. if (setup_clone(clone, rq, tio, gfp_mask)) {
  446. /* -ENOMEM */
  447. free_old_clone_request(md, clone);
  448. return NULL;
  449. }
  450. return clone;
  451. }
  452. static void map_tio_request(struct kthread_work *work);
  453. static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
  454. struct mapped_device *md)
  455. {
  456. tio->md = md;
  457. tio->ti = NULL;
  458. tio->clone = NULL;
  459. tio->orig = rq;
  460. tio->error = 0;
  461. /*
  462. * Avoid initializing info for blk-mq; it passes
  463. * target-specific data through info.ptr
  464. * (see: dm_mq_init_request)
  465. */
  466. if (!md->init_tio_pdu)
  467. memset(&tio->info, 0, sizeof(tio->info));
  468. if (md->kworker_task)
  469. init_kthread_work(&tio->work, map_tio_request);
  470. }
  471. static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
  472. struct mapped_device *md,
  473. gfp_t gfp_mask)
  474. {
  475. struct dm_rq_target_io *tio;
  476. int srcu_idx;
  477. struct dm_table *table;
  478. tio = alloc_old_rq_tio(md, gfp_mask);
  479. if (!tio)
  480. return NULL;
  481. init_tio(tio, rq, md);
  482. table = dm_get_live_table(md, &srcu_idx);
  483. /*
  484. * Must clone a request if this .request_fn DM device
  485. * is stacked on .request_fn device(s).
  486. */
  487. if (!dm_table_all_blk_mq_devices(table)) {
  488. if (!clone_old_rq(rq, md, tio, gfp_mask)) {
  489. dm_put_live_table(md, srcu_idx);
  490. free_old_rq_tio(tio);
  491. return NULL;
  492. }
  493. }
  494. dm_put_live_table(md, srcu_idx);
  495. return tio;
  496. }
  497. /*
  498. * Called with the queue lock held.
  499. */
  500. static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
  501. {
  502. struct mapped_device *md = q->queuedata;
  503. struct dm_rq_target_io *tio;
  504. if (unlikely(rq->special)) {
  505. DMWARN("Already has something in rq->special.");
  506. return BLKPREP_KILL;
  507. }
  508. tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
  509. if (!tio)
  510. return BLKPREP_DEFER;
  511. rq->special = tio;
  512. rq->cmd_flags |= REQ_DONTPREP;
  513. return BLKPREP_OK;
  514. }
  515. /*
  516. * Returns:
  517. * 0 : the request has been processed
  518. * DM_MAPIO_REQUEUE : the original request needs to be requeued
  519. * < 0 : the request was completed due to failure
  520. */
  521. static int map_request(struct dm_rq_target_io *tio, struct request *rq,
  522. struct mapped_device *md)
  523. {
  524. int r;
  525. struct dm_target *ti = tio->ti;
  526. struct request *clone = NULL;
  527. if (tio->clone) {
  528. clone = tio->clone;
  529. r = ti->type->map_rq(ti, clone, &tio->info);
  530. } else {
  531. r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
  532. if (r < 0) {
  533. /* The target wants to complete the I/O */
  534. dm_kill_unmapped_request(rq, r);
  535. return r;
  536. }
  537. if (r != DM_MAPIO_REMAPPED)
  538. return r;
  539. if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
  540. /* -ENOMEM */
  541. ti->type->release_clone_rq(clone);
  542. return DM_MAPIO_REQUEUE;
  543. }
  544. }
  545. switch (r) {
  546. case DM_MAPIO_SUBMITTED:
  547. /* The target has taken the I/O to submit by itself later */
  548. break;
  549. case DM_MAPIO_REMAPPED:
  550. /* The target has remapped the I/O so dispatch it */
  551. trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
  552. blk_rq_pos(rq));
  553. dm_dispatch_clone_request(clone, rq);
  554. break;
  555. case DM_MAPIO_REQUEUE:
  556. /* The target wants to requeue the I/O */
  557. dm_requeue_original_request(md, tio->orig);
  558. break;
  559. default:
  560. if (r > 0) {
  561. DMWARN("unimplemented target map return value: %d", r);
  562. BUG();
  563. }
  564. /* The target wants to complete the I/O */
  565. dm_kill_unmapped_request(rq, r);
  566. return r;
  567. }
  568. return 0;
  569. }
  570. static void dm_start_request(struct mapped_device *md, struct request *orig)
  571. {
  572. if (!orig->q->mq_ops)
  573. blk_start_request(orig);
  574. else
  575. blk_mq_start_request(orig);
  576. atomic_inc(&md->pending[rq_data_dir(orig)]);
  577. if (md->seq_rq_merge_deadline_usecs) {
  578. md->last_rq_pos = rq_end_sector(orig);
  579. md->last_rq_rw = rq_data_dir(orig);
  580. md->last_rq_start_time = ktime_get();
  581. }
  582. if (unlikely(dm_stats_used(&md->stats))) {
  583. struct dm_rq_target_io *tio = tio_from_request(orig);
  584. tio->duration_jiffies = jiffies;
  585. tio->n_sectors = blk_rq_sectors(orig);
  586. dm_stats_account_io(&md->stats, rq_data_dir(orig),
  587. blk_rq_pos(orig), tio->n_sectors, false, 0,
  588. &tio->stats_aux);
  589. }
  590. /*
  591. * Hold the md reference here for the in-flight I/O.
  592. * We can't rely on the reference count by device opener,
  593. * because the device may be closed during the request completion
  594. * when all bios are completed.
  595. * See the comment in rq_completed() too.
  596. */
  597. dm_get(md);
  598. }
  599. static void map_tio_request(struct kthread_work *work)
  600. {
  601. struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
  602. struct request *rq = tio->orig;
  603. struct mapped_device *md = tio->md;
  604. if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
  605. dm_requeue_original_request(md, rq);
  606. }
  607. ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
  608. {
  609. return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
  610. }
  611. #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
  612. ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
  613. const char *buf, size_t count)
  614. {
  615. unsigned deadline;
  616. if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
  617. return count;
  618. if (kstrtouint(buf, 10, &deadline))
  619. return -EINVAL;
  620. if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
  621. deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
  622. md->seq_rq_merge_deadline_usecs = deadline;
  623. return count;
  624. }
  625. static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
  626. {
  627. ktime_t kt_deadline;
  628. if (!md->seq_rq_merge_deadline_usecs)
  629. return false;
  630. kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
  631. kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
  632. return !ktime_after(ktime_get(), kt_deadline);
  633. }
  634. /*
  635. * q->request_fn for old request-based dm.
  636. * Called with the queue lock held.
  637. */
  638. static void dm_old_request_fn(struct request_queue *q)
  639. {
  640. struct mapped_device *md = q->queuedata;
  641. struct dm_target *ti = md->immutable_target;
  642. struct request *rq;
  643. struct dm_rq_target_io *tio;
  644. sector_t pos = 0;
  645. if (unlikely(!ti)) {
  646. int srcu_idx;
  647. struct dm_table *map = dm_get_live_table(md, &srcu_idx);
  648. ti = dm_table_find_target(map, pos);
  649. dm_put_live_table(md, srcu_idx);
  650. }
  651. /*
  652. * For suspend, check blk_queue_stopped() and increment
  653. * ->pending within a single queue_lock not to increment the
  654. * number of in-flight I/Os after the queue is stopped in
  655. * dm_suspend().
  656. */
  657. while (!blk_queue_stopped(q)) {
  658. rq = blk_peek_request(q);
  659. if (!rq)
  660. return;
  661. /* always use block 0 to find the target for flushes for now */
  662. pos = 0;
  663. if (req_op(rq) != REQ_OP_FLUSH)
  664. pos = blk_rq_pos(rq);
  665. if ((dm_old_request_peeked_before_merge_deadline(md) &&
  666. md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
  667. md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
  668. (ti->type->busy && ti->type->busy(ti))) {
  669. blk_delay_queue(q, 10);
  670. return;
  671. }
  672. dm_start_request(md, rq);
  673. tio = tio_from_request(rq);
  674. /* Establish tio->ti before queuing work (map_tio_request) */
  675. tio->ti = ti;
  676. queue_kthread_work(&md->kworker, &tio->work);
  677. BUG_ON(!irqs_disabled());
  678. }
  679. }
  680. /*
  681. * Fully initialize a .request_fn request-based queue.
  682. */
  683. int dm_old_init_request_queue(struct mapped_device *md)
  684. {
  685. /* Fully initialize the queue */
  686. if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
  687. return -EINVAL;
  688. /* disable dm_old_request_fn's merge heuristic by default */
  689. md->seq_rq_merge_deadline_usecs = 0;
  690. dm_init_normal_md_queue(md);
  691. blk_queue_softirq_done(md->queue, dm_softirq_done);
  692. blk_queue_prep_rq(md->queue, dm_old_prep_fn);
  693. /* Initialize the request-based DM worker thread */
  694. init_kthread_worker(&md->kworker);
  695. md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
  696. "kdmwork-%s", dm_device_name(md));
  697. if (IS_ERR(md->kworker_task))
  698. return PTR_ERR(md->kworker_task);
  699. elv_register_queue(md->queue);
  700. return 0;
  701. }
  702. static int dm_mq_init_request(void *data, struct request *rq,
  703. unsigned int hctx_idx, unsigned int request_idx,
  704. unsigned int numa_node)
  705. {
  706. struct mapped_device *md = data;
  707. struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
  708. /*
  709. * Must initialize md member of tio, otherwise it won't
  710. * be available in dm_mq_queue_rq.
  711. */
  712. tio->md = md;
  713. if (md->init_tio_pdu) {
  714. /* target-specific per-io data is immediately after the tio */
  715. tio->info.ptr = tio + 1;
  716. }
  717. return 0;
  718. }
  719. static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
  720. const struct blk_mq_queue_data *bd)
  721. {
  722. struct request *rq = bd->rq;
  723. struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
  724. struct mapped_device *md = tio->md;
  725. struct dm_target *ti = md->immutable_target;
  726. if (unlikely(!ti)) {
  727. int srcu_idx;
  728. struct dm_table *map = dm_get_live_table(md, &srcu_idx);
  729. ti = dm_table_find_target(map, 0);
  730. dm_put_live_table(md, srcu_idx);
  731. }
  732. /*
  733. * On suspend dm_stop_queue() handles stopping the blk-mq
  734. * request_queue BUT: even though the hw_queues are marked
  735. * BLK_MQ_S_STOPPED at that point there is still a race that
  736. * is allowing block/blk-mq.c to call ->queue_rq against a
  737. * hctx that it really shouldn't. The following check guards
  738. * against this rarity (albeit _not_ race-free).
  739. */
  740. if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
  741. return BLK_MQ_RQ_QUEUE_BUSY;
  742. if (ti->type->busy && ti->type->busy(ti))
  743. return BLK_MQ_RQ_QUEUE_BUSY;
  744. dm_start_request(md, rq);
  745. /* Init tio using md established in .init_request */
  746. init_tio(tio, rq, md);
  747. /*
  748. * Establish tio->ti before calling map_request().
  749. */
  750. tio->ti = ti;
  751. /* Direct call is fine since .queue_rq allows allocations */
  752. if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
  753. /* Undo dm_start_request() before requeuing */
  754. rq_end_stats(md, rq);
  755. rq_completed(md, rq_data_dir(rq), false);
  756. return BLK_MQ_RQ_QUEUE_BUSY;
  757. }
  758. return BLK_MQ_RQ_QUEUE_OK;
  759. }
  760. static struct blk_mq_ops dm_mq_ops = {
  761. .queue_rq = dm_mq_queue_rq,
  762. .map_queue = blk_mq_map_queue,
  763. .complete = dm_softirq_done,
  764. .init_request = dm_mq_init_request,
  765. };
  766. int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
  767. {
  768. struct request_queue *q;
  769. struct dm_target *immutable_tgt;
  770. int err;
  771. if (!dm_table_all_blk_mq_devices(t)) {
  772. DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
  773. return -EINVAL;
  774. }
  775. md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
  776. if (!md->tag_set)
  777. return -ENOMEM;
  778. md->tag_set->ops = &dm_mq_ops;
  779. md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
  780. md->tag_set->numa_node = md->numa_node_id;
  781. md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
  782. md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
  783. md->tag_set->driver_data = md;
  784. md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
  785. immutable_tgt = dm_table_get_immutable_target(t);
  786. if (immutable_tgt && immutable_tgt->per_io_data_size) {
  787. /* any target-specific per-io data is immediately after the tio */
  788. md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
  789. md->init_tio_pdu = true;
  790. }
  791. err = blk_mq_alloc_tag_set(md->tag_set);
  792. if (err)
  793. goto out_kfree_tag_set;
  794. q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
  795. if (IS_ERR(q)) {
  796. err = PTR_ERR(q);
  797. goto out_tag_set;
  798. }
  799. dm_init_md_queue(md);
  800. /* backfill 'mq' sysfs registration normally done in blk_register_queue */
  801. blk_mq_register_disk(md->disk);
  802. return 0;
  803. out_tag_set:
  804. blk_mq_free_tag_set(md->tag_set);
  805. out_kfree_tag_set:
  806. kfree(md->tag_set);
  807. return err;
  808. }
  809. void dm_mq_cleanup_mapped_device(struct mapped_device *md)
  810. {
  811. if (md->tag_set) {
  812. blk_mq_free_tag_set(md->tag_set);
  813. kfree(md->tag_set);
  814. }
  815. }
  816. module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
  817. MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
  818. module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
  819. MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
  820. module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
  821. MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
  822. module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
  823. MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");