blk-merge.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /*
  2. * Functions related to segment and merge handling
  3. */
  4. #include <linux/kernel.h>
  5. #include <linux/module.h>
  6. #include <linux/bio.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/scatterlist.h>
  9. #include "blk.h"
  10. static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
  11. struct bio *bio)
  12. {
  13. struct bio_vec bv, bvprv = { NULL };
  14. int cluster, high, highprv = 1;
  15. unsigned int seg_size, nr_phys_segs;
  16. struct bio *fbio, *bbio;
  17. struct bvec_iter iter;
  18. if (!bio)
  19. return 0;
  20. fbio = bio;
  21. cluster = blk_queue_cluster(q);
  22. seg_size = 0;
  23. nr_phys_segs = 0;
  24. for_each_bio(bio) {
  25. bio_for_each_segment(bv, bio, iter) {
  26. /*
  27. * the trick here is making sure that a high page is
  28. * never considered part of another segment, since that
  29. * might change with the bounce page.
  30. */
  31. high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
  32. if (!high && !highprv && cluster) {
  33. if (seg_size + bv.bv_len
  34. > queue_max_segment_size(q))
  35. goto new_segment;
  36. if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
  37. goto new_segment;
  38. if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
  39. goto new_segment;
  40. seg_size += bv.bv_len;
  41. bvprv = bv;
  42. continue;
  43. }
  44. new_segment:
  45. if (nr_phys_segs == 1 && seg_size >
  46. fbio->bi_seg_front_size)
  47. fbio->bi_seg_front_size = seg_size;
  48. nr_phys_segs++;
  49. bvprv = bv;
  50. seg_size = bv.bv_len;
  51. highprv = high;
  52. }
  53. bbio = bio;
  54. }
  55. if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size)
  56. fbio->bi_seg_front_size = seg_size;
  57. if (seg_size > bbio->bi_seg_back_size)
  58. bbio->bi_seg_back_size = seg_size;
  59. return nr_phys_segs;
  60. }
  61. void blk_recalc_rq_segments(struct request *rq)
  62. {
  63. rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
  64. }
  65. void blk_recount_segments(struct request_queue *q, struct bio *bio)
  66. {
  67. struct bio *nxt = bio->bi_next;
  68. bio->bi_next = NULL;
  69. bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
  70. bio->bi_next = nxt;
  71. bio->bi_flags |= (1 << BIO_SEG_VALID);
  72. }
  73. EXPORT_SYMBOL(blk_recount_segments);
  74. static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
  75. struct bio *nxt)
  76. {
  77. struct bio_vec end_bv = { NULL }, nxt_bv;
  78. struct bvec_iter iter;
  79. if (!blk_queue_cluster(q))
  80. return 0;
  81. if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
  82. queue_max_segment_size(q))
  83. return 0;
  84. if (!bio_has_data(bio))
  85. return 1;
  86. bio_for_each_segment(end_bv, bio, iter)
  87. if (end_bv.bv_len == iter.bi_size)
  88. break;
  89. nxt_bv = bio_iovec(nxt);
  90. if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
  91. return 0;
  92. /*
  93. * bio and nxt are contiguous in memory; check if the queue allows
  94. * these two to be merged into one
  95. */
  96. if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
  97. return 1;
  98. return 0;
  99. }
  100. static inline void
  101. __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
  102. struct scatterlist *sglist, struct bio_vec *bvprv,
  103. struct scatterlist **sg, int *nsegs, int *cluster)
  104. {
  105. int nbytes = bvec->bv_len;
  106. if (*sg && *cluster) {
  107. if ((*sg)->length + nbytes > queue_max_segment_size(q))
  108. goto new_segment;
  109. if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
  110. goto new_segment;
  111. if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
  112. goto new_segment;
  113. (*sg)->length += nbytes;
  114. } else {
  115. new_segment:
  116. if (!*sg)
  117. *sg = sglist;
  118. else {
  119. /*
  120. * If the driver previously mapped a shorter
  121. * list, we could see a termination bit
  122. * prematurely unless it fully inits the sg
  123. * table on each mapping. We KNOW that there
  124. * must be more entries here or the driver
  125. * would be buggy, so force clear the
  126. * termination bit to avoid doing a full
  127. * sg_init_table() in drivers for each command.
  128. */
  129. sg_unmark_end(*sg);
  130. *sg = sg_next(*sg);
  131. }
  132. sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
  133. (*nsegs)++;
  134. }
  135. *bvprv = *bvec;
  136. }
  137. /*
  138. * map a request to scatterlist, return number of sg entries setup. Caller
  139. * must make sure sg can hold rq->nr_phys_segments entries
  140. */
  141. int blk_rq_map_sg(struct request_queue *q, struct request *rq,
  142. struct scatterlist *sglist)
  143. {
  144. struct bio_vec bvec, bvprv = { NULL };
  145. struct req_iterator iter;
  146. struct scatterlist *sg;
  147. int nsegs, cluster;
  148. nsegs = 0;
  149. cluster = blk_queue_cluster(q);
  150. /*
  151. * for each bio in rq
  152. */
  153. sg = NULL;
  154. rq_for_each_segment(bvec, rq, iter) {
  155. __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
  156. &nsegs, &cluster);
  157. } /* segments in rq */
  158. if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
  159. (blk_rq_bytes(rq) & q->dma_pad_mask)) {
  160. unsigned int pad_len =
  161. (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
  162. sg->length += pad_len;
  163. rq->extra_len += pad_len;
  164. }
  165. if (q->dma_drain_size && q->dma_drain_needed(rq)) {
  166. if (rq->cmd_flags & REQ_WRITE)
  167. memset(q->dma_drain_buffer, 0, q->dma_drain_size);
  168. sg->page_link &= ~0x02;
  169. sg = sg_next(sg);
  170. sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
  171. q->dma_drain_size,
  172. ((unsigned long)q->dma_drain_buffer) &
  173. (PAGE_SIZE - 1));
  174. nsegs++;
  175. rq->extra_len += q->dma_drain_size;
  176. }
  177. if (sg)
  178. sg_mark_end(sg);
  179. return nsegs;
  180. }
  181. EXPORT_SYMBOL(blk_rq_map_sg);
  182. /**
  183. * blk_bio_map_sg - map a bio to a scatterlist
  184. * @q: request_queue in question
  185. * @bio: bio being mapped
  186. * @sglist: scatterlist being mapped
  187. *
  188. * Note:
  189. * Caller must make sure sg can hold bio->bi_phys_segments entries
  190. *
  191. * Will return the number of sg entries setup
  192. */
  193. int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
  194. struct scatterlist *sglist)
  195. {
  196. struct bio_vec bvec, bvprv = { NULL };
  197. struct scatterlist *sg;
  198. int nsegs, cluster;
  199. struct bvec_iter iter;
  200. nsegs = 0;
  201. cluster = blk_queue_cluster(q);
  202. sg = NULL;
  203. bio_for_each_segment(bvec, bio, iter) {
  204. __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
  205. &nsegs, &cluster);
  206. } /* segments in bio */
  207. if (sg)
  208. sg_mark_end(sg);
  209. BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
  210. return nsegs;
  211. }
  212. EXPORT_SYMBOL(blk_bio_map_sg);
  213. static inline int ll_new_hw_segment(struct request_queue *q,
  214. struct request *req,
  215. struct bio *bio)
  216. {
  217. int nr_phys_segs = bio_phys_segments(q, bio);
  218. if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
  219. goto no_merge;
  220. if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio))
  221. goto no_merge;
  222. /*
  223. * This will form the start of a new hw segment. Bump both
  224. * counters.
  225. */
  226. req->nr_phys_segments += nr_phys_segs;
  227. return 1;
  228. no_merge:
  229. req->cmd_flags |= REQ_NOMERGE;
  230. if (req == q->last_merge)
  231. q->last_merge = NULL;
  232. return 0;
  233. }
  234. int ll_back_merge_fn(struct request_queue *q, struct request *req,
  235. struct bio *bio)
  236. {
  237. if (blk_rq_sectors(req) + bio_sectors(bio) >
  238. blk_rq_get_max_sectors(req)) {
  239. req->cmd_flags |= REQ_NOMERGE;
  240. if (req == q->last_merge)
  241. q->last_merge = NULL;
  242. return 0;
  243. }
  244. if (!bio_flagged(req->biotail, BIO_SEG_VALID))
  245. blk_recount_segments(q, req->biotail);
  246. if (!bio_flagged(bio, BIO_SEG_VALID))
  247. blk_recount_segments(q, bio);
  248. return ll_new_hw_segment(q, req, bio);
  249. }
  250. int ll_front_merge_fn(struct request_queue *q, struct request *req,
  251. struct bio *bio)
  252. {
  253. if (blk_rq_sectors(req) + bio_sectors(bio) >
  254. blk_rq_get_max_sectors(req)) {
  255. req->cmd_flags |= REQ_NOMERGE;
  256. if (req == q->last_merge)
  257. q->last_merge = NULL;
  258. return 0;
  259. }
  260. if (!bio_flagged(bio, BIO_SEG_VALID))
  261. blk_recount_segments(q, bio);
  262. if (!bio_flagged(req->bio, BIO_SEG_VALID))
  263. blk_recount_segments(q, req->bio);
  264. return ll_new_hw_segment(q, req, bio);
  265. }
  266. /*
  267. * blk-mq uses req->special to carry normal driver per-request payload, it
  268. * does not indicate a prepared command that we cannot merge with.
  269. */
  270. static bool req_no_special_merge(struct request *req)
  271. {
  272. struct request_queue *q = req->q;
  273. return !q->mq_ops && req->special;
  274. }
  275. static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
  276. struct request *next)
  277. {
  278. int total_phys_segments;
  279. unsigned int seg_size =
  280. req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
  281. /*
  282. * First check if the either of the requests are re-queued
  283. * requests. Can't merge them if they are.
  284. */
  285. if (req_no_special_merge(req) || req_no_special_merge(next))
  286. return 0;
  287. /*
  288. * Will it become too large?
  289. */
  290. if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
  291. blk_rq_get_max_sectors(req))
  292. return 0;
  293. total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
  294. if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
  295. if (req->nr_phys_segments == 1)
  296. req->bio->bi_seg_front_size = seg_size;
  297. if (next->nr_phys_segments == 1)
  298. next->biotail->bi_seg_back_size = seg_size;
  299. total_phys_segments--;
  300. }
  301. if (total_phys_segments > queue_max_segments(q))
  302. return 0;
  303. if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next))
  304. return 0;
  305. /* Merge is OK... */
  306. req->nr_phys_segments = total_phys_segments;
  307. return 1;
  308. }
  309. /**
  310. * blk_rq_set_mixed_merge - mark a request as mixed merge
  311. * @rq: request to mark as mixed merge
  312. *
  313. * Description:
  314. * @rq is about to be mixed merged. Make sure the attributes
  315. * which can be mixed are set in each bio and mark @rq as mixed
  316. * merged.
  317. */
  318. void blk_rq_set_mixed_merge(struct request *rq)
  319. {
  320. unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
  321. struct bio *bio;
  322. if (rq->cmd_flags & REQ_MIXED_MERGE)
  323. return;
  324. /*
  325. * @rq will no longer represent mixable attributes for all the
  326. * contained bios. It will just track those of the first one.
  327. * Distributes the attributs to each bio.
  328. */
  329. for (bio = rq->bio; bio; bio = bio->bi_next) {
  330. WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
  331. (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
  332. bio->bi_rw |= ff;
  333. }
  334. rq->cmd_flags |= REQ_MIXED_MERGE;
  335. }
  336. static void blk_account_io_merge(struct request *req)
  337. {
  338. if (blk_do_io_stat(req)) {
  339. struct hd_struct *part;
  340. int cpu;
  341. cpu = part_stat_lock();
  342. part = req->part;
  343. part_round_stats(cpu, part);
  344. part_dec_in_flight(part, rq_data_dir(req));
  345. hd_struct_put(part);
  346. part_stat_unlock();
  347. }
  348. }
  349. /*
  350. * Has to be called with the request spinlock acquired
  351. */
  352. static int attempt_merge(struct request_queue *q, struct request *req,
  353. struct request *next)
  354. {
  355. if (!rq_mergeable(req) || !rq_mergeable(next))
  356. return 0;
  357. if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
  358. return 0;
  359. /*
  360. * not contiguous
  361. */
  362. if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
  363. return 0;
  364. if (rq_data_dir(req) != rq_data_dir(next)
  365. || req->rq_disk != next->rq_disk
  366. || req_no_special_merge(next))
  367. return 0;
  368. if (req->cmd_flags & REQ_WRITE_SAME &&
  369. !blk_write_same_mergeable(req->bio, next->bio))
  370. return 0;
  371. /*
  372. * If we are allowed to merge, then append bio list
  373. * from next to rq and release next. merge_requests_fn
  374. * will have updated segment counts, update sector
  375. * counts here.
  376. */
  377. if (!ll_merge_requests_fn(q, req, next))
  378. return 0;
  379. /*
  380. * If failfast settings disagree or any of the two is already
  381. * a mixed merge, mark both as mixed before proceeding. This
  382. * makes sure that all involved bios have mixable attributes
  383. * set properly.
  384. */
  385. if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
  386. (req->cmd_flags & REQ_FAILFAST_MASK) !=
  387. (next->cmd_flags & REQ_FAILFAST_MASK)) {
  388. blk_rq_set_mixed_merge(req);
  389. blk_rq_set_mixed_merge(next);
  390. }
  391. /*
  392. * At this point we have either done a back merge
  393. * or front merge. We need the smaller start_time of
  394. * the merged requests to be the current request
  395. * for accounting purposes.
  396. */
  397. if (time_after(req->start_time, next->start_time))
  398. req->start_time = next->start_time;
  399. req->biotail->bi_next = next->bio;
  400. req->biotail = next->biotail;
  401. req->__data_len += blk_rq_bytes(next);
  402. elv_merge_requests(q, req, next);
  403. /*
  404. * 'next' is going away, so update stats accordingly
  405. */
  406. blk_account_io_merge(next);
  407. req->ioprio = ioprio_best(req->ioprio, next->ioprio);
  408. if (blk_rq_cpu_valid(next))
  409. req->cpu = next->cpu;
  410. /* owner-ship of bio passed from next to req */
  411. next->bio = NULL;
  412. __blk_put_request(q, next);
  413. return 1;
  414. }
  415. int attempt_back_merge(struct request_queue *q, struct request *rq)
  416. {
  417. struct request *next = elv_latter_request(q, rq);
  418. if (next)
  419. return attempt_merge(q, rq, next);
  420. return 0;
  421. }
  422. int attempt_front_merge(struct request_queue *q, struct request *rq)
  423. {
  424. struct request *prev = elv_former_request(q, rq);
  425. if (prev)
  426. return attempt_merge(q, prev, rq);
  427. return 0;
  428. }
  429. int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
  430. struct request *next)
  431. {
  432. return attempt_merge(q, rq, next);
  433. }
  434. bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
  435. {
  436. if (!rq_mergeable(rq) || !bio_mergeable(bio))
  437. return false;
  438. if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
  439. return false;
  440. /* different data direction or already started, don't merge */
  441. if (bio_data_dir(bio) != rq_data_dir(rq))
  442. return false;
  443. /* must be same device and not a special request */
  444. if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
  445. return false;
  446. /* only merge integrity protected bio into ditto rq */
  447. if (bio_integrity(bio) != blk_integrity_rq(rq))
  448. return false;
  449. /* must be using the same buffer */
  450. if (rq->cmd_flags & REQ_WRITE_SAME &&
  451. !blk_write_same_mergeable(rq->bio, bio))
  452. return false;
  453. return true;
  454. }
  455. int blk_try_merge(struct request *rq, struct bio *bio)
  456. {
  457. if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
  458. return ELEVATOR_BACK_MERGE;
  459. else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
  460. return ELEVATOR_FRONT_MERGE;
  461. return ELEVATOR_NO_MERGE;
  462. }