null_blk.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674
  1. #include <linux/module.h>
  2. #include <linux/moduleparam.h>
  3. #include <linux/sched.h>
  4. #include <linux/fs.h>
  5. #include <linux/blkdev.h>
  6. #include <linux/init.h>
  7. #include <linux/slab.h>
  8. #include <linux/blk-mq.h>
  9. #include <linux/hrtimer.h>
  10. struct nullb_cmd {
  11. struct list_head list;
  12. struct llist_node ll_list;
  13. struct call_single_data csd;
  14. struct request *rq;
  15. struct bio *bio;
  16. unsigned int tag;
  17. struct nullb_queue *nq;
  18. };
  19. struct nullb_queue {
  20. unsigned long *tag_map;
  21. wait_queue_head_t wait;
  22. unsigned int queue_depth;
  23. struct nullb_cmd *cmds;
  24. };
  25. struct nullb {
  26. struct list_head list;
  27. unsigned int index;
  28. struct request_queue *q;
  29. struct gendisk *disk;
  30. struct blk_mq_tag_set tag_set;
  31. struct hrtimer timer;
  32. unsigned int queue_depth;
  33. spinlock_t lock;
  34. struct nullb_queue *queues;
  35. unsigned int nr_queues;
  36. };
  37. static LIST_HEAD(nullb_list);
  38. static struct mutex lock;
  39. static int null_major;
  40. static int nullb_indexes;
  41. struct completion_queue {
  42. struct llist_head list;
  43. struct hrtimer timer;
  44. };
  45. /*
  46. * These are per-cpu for now, they will need to be configured by the
  47. * complete_queues parameter and appropriately mapped.
  48. */
  49. static DEFINE_PER_CPU(struct completion_queue, completion_queues);
  50. enum {
  51. NULL_IRQ_NONE = 0,
  52. NULL_IRQ_SOFTIRQ = 1,
  53. NULL_IRQ_TIMER = 2,
  54. };
  55. enum {
  56. NULL_Q_BIO = 0,
  57. NULL_Q_RQ = 1,
  58. NULL_Q_MQ = 2,
  59. };
  60. static int submit_queues;
  61. module_param(submit_queues, int, S_IRUGO);
  62. MODULE_PARM_DESC(submit_queues, "Number of submission queues");
  63. static int home_node = NUMA_NO_NODE;
  64. module_param(home_node, int, S_IRUGO);
  65. MODULE_PARM_DESC(home_node, "Home node for the device");
  66. static int queue_mode = NULL_Q_MQ;
  67. static int null_param_store_val(const char *str, int *val, int min, int max)
  68. {
  69. int ret, new_val;
  70. ret = kstrtoint(str, 10, &new_val);
  71. if (ret)
  72. return -EINVAL;
  73. if (new_val < min || new_val > max)
  74. return -EINVAL;
  75. *val = new_val;
  76. return 0;
  77. }
  78. static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
  79. {
  80. return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
  81. }
  82. static const struct kernel_param_ops null_queue_mode_param_ops = {
  83. .set = null_set_queue_mode,
  84. .get = param_get_int,
  85. };
  86. device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO);
  87. MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
  88. static int gb = 250;
  89. module_param(gb, int, S_IRUGO);
  90. MODULE_PARM_DESC(gb, "Size in GB");
  91. static int bs = 512;
  92. module_param(bs, int, S_IRUGO);
  93. MODULE_PARM_DESC(bs, "Block size (in bytes)");
  94. static int nr_devices = 2;
  95. module_param(nr_devices, int, S_IRUGO);
  96. MODULE_PARM_DESC(nr_devices, "Number of devices to register");
  97. static int irqmode = NULL_IRQ_SOFTIRQ;
  98. static int null_set_irqmode(const char *str, const struct kernel_param *kp)
  99. {
  100. return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
  101. NULL_IRQ_TIMER);
  102. }
  103. static const struct kernel_param_ops null_irqmode_param_ops = {
  104. .set = null_set_irqmode,
  105. .get = param_get_int,
  106. };
  107. device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
  108. MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
  109. static int completion_nsec = 10000;
  110. module_param(completion_nsec, int, S_IRUGO);
  111. MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
  112. static int hw_queue_depth = 64;
  113. module_param(hw_queue_depth, int, S_IRUGO);
  114. MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
  115. static bool use_per_node_hctx = false;
  116. module_param(use_per_node_hctx, bool, S_IRUGO);
  117. MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
  118. static void put_tag(struct nullb_queue *nq, unsigned int tag)
  119. {
  120. clear_bit_unlock(tag, nq->tag_map);
  121. if (waitqueue_active(&nq->wait))
  122. wake_up(&nq->wait);
  123. }
  124. static unsigned int get_tag(struct nullb_queue *nq)
  125. {
  126. unsigned int tag;
  127. do {
  128. tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
  129. if (tag >= nq->queue_depth)
  130. return -1U;
  131. } while (test_and_set_bit_lock(tag, nq->tag_map));
  132. return tag;
  133. }
  134. static void free_cmd(struct nullb_cmd *cmd)
  135. {
  136. put_tag(cmd->nq, cmd->tag);
  137. }
  138. static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
  139. {
  140. struct nullb_cmd *cmd;
  141. unsigned int tag;
  142. tag = get_tag(nq);
  143. if (tag != -1U) {
  144. cmd = &nq->cmds[tag];
  145. cmd->tag = tag;
  146. cmd->nq = nq;
  147. return cmd;
  148. }
  149. return NULL;
  150. }
  151. static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
  152. {
  153. struct nullb_cmd *cmd;
  154. DEFINE_WAIT(wait);
  155. cmd = __alloc_cmd(nq);
  156. if (cmd || !can_wait)
  157. return cmd;
  158. do {
  159. prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
  160. cmd = __alloc_cmd(nq);
  161. if (cmd)
  162. break;
  163. io_schedule();
  164. } while (1);
  165. finish_wait(&nq->wait, &wait);
  166. return cmd;
  167. }
  168. static void end_cmd(struct nullb_cmd *cmd)
  169. {
  170. switch (queue_mode) {
  171. case NULL_Q_MQ:
  172. blk_mq_end_request(cmd->rq, 0);
  173. return;
  174. case NULL_Q_RQ:
  175. INIT_LIST_HEAD(&cmd->rq->queuelist);
  176. blk_end_request_all(cmd->rq, 0);
  177. break;
  178. case NULL_Q_BIO:
  179. bio_endio(cmd->bio, 0);
  180. break;
  181. }
  182. free_cmd(cmd);
  183. }
  184. static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
  185. {
  186. struct completion_queue *cq;
  187. struct llist_node *entry;
  188. struct nullb_cmd *cmd;
  189. cq = &per_cpu(completion_queues, smp_processor_id());
  190. while ((entry = llist_del_all(&cq->list)) != NULL) {
  191. entry = llist_reverse_order(entry);
  192. do {
  193. cmd = container_of(entry, struct nullb_cmd, ll_list);
  194. entry = entry->next;
  195. end_cmd(cmd);
  196. } while (entry);
  197. }
  198. return HRTIMER_NORESTART;
  199. }
  200. static void null_cmd_end_timer(struct nullb_cmd *cmd)
  201. {
  202. struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
  203. cmd->ll_list.next = NULL;
  204. if (llist_add(&cmd->ll_list, &cq->list)) {
  205. ktime_t kt = ktime_set(0, completion_nsec);
  206. hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
  207. }
  208. put_cpu();
  209. }
  210. static void null_softirq_done_fn(struct request *rq)
  211. {
  212. if (queue_mode == NULL_Q_MQ)
  213. end_cmd(blk_mq_rq_to_pdu(rq));
  214. else
  215. end_cmd(rq->special);
  216. }
  217. static inline void null_handle_cmd(struct nullb_cmd *cmd)
  218. {
  219. /* Complete IO by inline, softirq or timer */
  220. switch (irqmode) {
  221. case NULL_IRQ_SOFTIRQ:
  222. switch (queue_mode) {
  223. case NULL_Q_MQ:
  224. blk_mq_complete_request(cmd->rq);
  225. break;
  226. case NULL_Q_RQ:
  227. blk_complete_request(cmd->rq);
  228. break;
  229. case NULL_Q_BIO:
  230. /*
  231. * XXX: no proper submitting cpu information available.
  232. */
  233. end_cmd(cmd);
  234. break;
  235. }
  236. break;
  237. case NULL_IRQ_NONE:
  238. end_cmd(cmd);
  239. break;
  240. case NULL_IRQ_TIMER:
  241. null_cmd_end_timer(cmd);
  242. break;
  243. }
  244. }
  245. static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
  246. {
  247. int index = 0;
  248. if (nullb->nr_queues != 1)
  249. index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
  250. return &nullb->queues[index];
  251. }
  252. static void null_queue_bio(struct request_queue *q, struct bio *bio)
  253. {
  254. struct nullb *nullb = q->queuedata;
  255. struct nullb_queue *nq = nullb_to_queue(nullb);
  256. struct nullb_cmd *cmd;
  257. cmd = alloc_cmd(nq, 1);
  258. cmd->bio = bio;
  259. null_handle_cmd(cmd);
  260. }
  261. static int null_rq_prep_fn(struct request_queue *q, struct request *req)
  262. {
  263. struct nullb *nullb = q->queuedata;
  264. struct nullb_queue *nq = nullb_to_queue(nullb);
  265. struct nullb_cmd *cmd;
  266. cmd = alloc_cmd(nq, 0);
  267. if (cmd) {
  268. cmd->rq = req;
  269. req->special = cmd;
  270. return BLKPREP_OK;
  271. }
  272. return BLKPREP_DEFER;
  273. }
  274. static void null_request_fn(struct request_queue *q)
  275. {
  276. struct request *rq;
  277. while ((rq = blk_fetch_request(q)) != NULL) {
  278. struct nullb_cmd *cmd = rq->special;
  279. spin_unlock_irq(q->queue_lock);
  280. null_handle_cmd(cmd);
  281. spin_lock_irq(q->queue_lock);
  282. }
  283. }
  284. static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
  285. const struct blk_mq_queue_data *bd)
  286. {
  287. struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
  288. cmd->rq = bd->rq;
  289. cmd->nq = hctx->driver_data;
  290. blk_mq_start_request(bd->rq);
  291. null_handle_cmd(cmd);
  292. return BLK_MQ_RQ_QUEUE_OK;
  293. }
  294. static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
  295. {
  296. BUG_ON(!nullb);
  297. BUG_ON(!nq);
  298. init_waitqueue_head(&nq->wait);
  299. nq->queue_depth = nullb->queue_depth;
  300. }
  301. static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
  302. unsigned int index)
  303. {
  304. struct nullb *nullb = data;
  305. struct nullb_queue *nq = &nullb->queues[index];
  306. hctx->driver_data = nq;
  307. null_init_queue(nullb, nq);
  308. nullb->nr_queues++;
  309. return 0;
  310. }
  311. static struct blk_mq_ops null_mq_ops = {
  312. .queue_rq = null_queue_rq,
  313. .map_queue = blk_mq_map_queue,
  314. .init_hctx = null_init_hctx,
  315. .complete = null_softirq_done_fn,
  316. };
  317. static void null_del_dev(struct nullb *nullb)
  318. {
  319. list_del_init(&nullb->list);
  320. del_gendisk(nullb->disk);
  321. blk_cleanup_queue(nullb->q);
  322. if (queue_mode == NULL_Q_MQ)
  323. blk_mq_free_tag_set(&nullb->tag_set);
  324. put_disk(nullb->disk);
  325. kfree(nullb);
  326. }
  327. static int null_open(struct block_device *bdev, fmode_t mode)
  328. {
  329. return 0;
  330. }
  331. static void null_release(struct gendisk *disk, fmode_t mode)
  332. {
  333. }
  334. static const struct block_device_operations null_fops = {
  335. .owner = THIS_MODULE,
  336. .open = null_open,
  337. .release = null_release,
  338. };
  339. static int setup_commands(struct nullb_queue *nq)
  340. {
  341. struct nullb_cmd *cmd;
  342. int i, tag_size;
  343. nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
  344. if (!nq->cmds)
  345. return -ENOMEM;
  346. tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
  347. nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
  348. if (!nq->tag_map) {
  349. kfree(nq->cmds);
  350. return -ENOMEM;
  351. }
  352. for (i = 0; i < nq->queue_depth; i++) {
  353. cmd = &nq->cmds[i];
  354. INIT_LIST_HEAD(&cmd->list);
  355. cmd->ll_list.next = NULL;
  356. cmd->tag = -1U;
  357. }
  358. return 0;
  359. }
  360. static void cleanup_queue(struct nullb_queue *nq)
  361. {
  362. kfree(nq->tag_map);
  363. kfree(nq->cmds);
  364. }
  365. static void cleanup_queues(struct nullb *nullb)
  366. {
  367. int i;
  368. for (i = 0; i < nullb->nr_queues; i++)
  369. cleanup_queue(&nullb->queues[i]);
  370. kfree(nullb->queues);
  371. }
  372. static int setup_queues(struct nullb *nullb)
  373. {
  374. nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
  375. GFP_KERNEL);
  376. if (!nullb->queues)
  377. return -ENOMEM;
  378. nullb->nr_queues = 0;
  379. nullb->queue_depth = hw_queue_depth;
  380. return 0;
  381. }
  382. static int init_driver_queues(struct nullb *nullb)
  383. {
  384. struct nullb_queue *nq;
  385. int i, ret = 0;
  386. for (i = 0; i < submit_queues; i++) {
  387. nq = &nullb->queues[i];
  388. null_init_queue(nullb, nq);
  389. ret = setup_commands(nq);
  390. if (ret)
  391. return ret;
  392. nullb->nr_queues++;
  393. }
  394. return 0;
  395. }
  396. static int null_add_dev(void)
  397. {
  398. struct gendisk *disk;
  399. struct nullb *nullb;
  400. sector_t size;
  401. int rv;
  402. nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
  403. if (!nullb) {
  404. rv = -ENOMEM;
  405. goto out;
  406. }
  407. spin_lock_init(&nullb->lock);
  408. if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
  409. submit_queues = nr_online_nodes;
  410. rv = setup_queues(nullb);
  411. if (rv)
  412. goto out_free_nullb;
  413. if (queue_mode == NULL_Q_MQ) {
  414. nullb->tag_set.ops = &null_mq_ops;
  415. nullb->tag_set.nr_hw_queues = submit_queues;
  416. nullb->tag_set.queue_depth = hw_queue_depth;
  417. nullb->tag_set.numa_node = home_node;
  418. nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
  419. nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  420. nullb->tag_set.driver_data = nullb;
  421. rv = blk_mq_alloc_tag_set(&nullb->tag_set);
  422. if (rv)
  423. goto out_cleanup_queues;
  424. nullb->q = blk_mq_init_queue(&nullb->tag_set);
  425. if (IS_ERR(nullb->q)) {
  426. rv = -ENOMEM;
  427. goto out_cleanup_tags;
  428. }
  429. } else if (queue_mode == NULL_Q_BIO) {
  430. nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
  431. if (!nullb->q) {
  432. rv = -ENOMEM;
  433. goto out_cleanup_queues;
  434. }
  435. blk_queue_make_request(nullb->q, null_queue_bio);
  436. rv = init_driver_queues(nullb);
  437. if (rv)
  438. goto out_cleanup_blk_queue;
  439. } else {
  440. nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
  441. if (!nullb->q) {
  442. rv = -ENOMEM;
  443. goto out_cleanup_queues;
  444. }
  445. blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
  446. blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
  447. rv = init_driver_queues(nullb);
  448. if (rv)
  449. goto out_cleanup_blk_queue;
  450. }
  451. nullb->q->queuedata = nullb;
  452. queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
  453. queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
  454. disk = nullb->disk = alloc_disk_node(1, home_node);
  455. if (!disk) {
  456. rv = -ENOMEM;
  457. goto out_cleanup_blk_queue;
  458. }
  459. mutex_lock(&lock);
  460. list_add_tail(&nullb->list, &nullb_list);
  461. nullb->index = nullb_indexes++;
  462. mutex_unlock(&lock);
  463. blk_queue_logical_block_size(nullb->q, bs);
  464. blk_queue_physical_block_size(nullb->q, bs);
  465. size = gb * 1024 * 1024 * 1024ULL;
  466. sector_div(size, bs);
  467. set_capacity(disk, size);
  468. disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
  469. disk->major = null_major;
  470. disk->first_minor = nullb->index;
  471. disk->fops = &null_fops;
  472. disk->private_data = nullb;
  473. disk->queue = nullb->q;
  474. sprintf(disk->disk_name, "nullb%d", nullb->index);
  475. add_disk(disk);
  476. return 0;
  477. out_cleanup_blk_queue:
  478. blk_cleanup_queue(nullb->q);
  479. out_cleanup_tags:
  480. if (queue_mode == NULL_Q_MQ)
  481. blk_mq_free_tag_set(&nullb->tag_set);
  482. out_cleanup_queues:
  483. cleanup_queues(nullb);
  484. out_free_nullb:
  485. kfree(nullb);
  486. out:
  487. return rv;
  488. }
  489. static int __init null_init(void)
  490. {
  491. unsigned int i;
  492. if (bs > PAGE_SIZE) {
  493. pr_warn("null_blk: invalid block size\n");
  494. pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
  495. bs = PAGE_SIZE;
  496. }
  497. if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
  498. if (submit_queues < nr_online_nodes) {
  499. pr_warn("null_blk: submit_queues param is set to %u.",
  500. nr_online_nodes);
  501. submit_queues = nr_online_nodes;
  502. }
  503. } else if (submit_queues > nr_cpu_ids)
  504. submit_queues = nr_cpu_ids;
  505. else if (!submit_queues)
  506. submit_queues = 1;
  507. mutex_init(&lock);
  508. /* Initialize a separate list for each CPU for issuing softirqs */
  509. for_each_possible_cpu(i) {
  510. struct completion_queue *cq = &per_cpu(completion_queues, i);
  511. init_llist_head(&cq->list);
  512. if (irqmode != NULL_IRQ_TIMER)
  513. continue;
  514. hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  515. cq->timer.function = null_cmd_timer_expired;
  516. }
  517. null_major = register_blkdev(0, "nullb");
  518. if (null_major < 0)
  519. return null_major;
  520. for (i = 0; i < nr_devices; i++) {
  521. if (null_add_dev()) {
  522. unregister_blkdev(null_major, "nullb");
  523. return -EINVAL;
  524. }
  525. }
  526. pr_info("null: module loaded\n");
  527. return 0;
  528. }
  529. static void __exit null_exit(void)
  530. {
  531. struct nullb *nullb;
  532. unregister_blkdev(null_major, "nullb");
  533. mutex_lock(&lock);
  534. while (!list_empty(&nullb_list)) {
  535. nullb = list_entry(nullb_list.next, struct nullb, list);
  536. null_del_dev(nullb);
  537. }
  538. mutex_unlock(&lock);
  539. }
  540. module_init(null_init);
  541. module_exit(null_exit);
  542. MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
  543. MODULE_LICENSE("GPL");