blk-cgroup.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _BLK_CGROUP_H
  3. #define _BLK_CGROUP_H
  4. /*
  5. * Common Block IO controller cgroup interface
  6. *
  7. * Based on ideas and code from CFQ, CFS and BFQ:
  8. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  9. *
  10. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  11. * Paolo Valente <paolo.valente@unimore.it>
  12. *
  13. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  14. * Nauman Rafique <nauman@google.com>
  15. */
  16. #include <linux/cgroup.h>
  17. #include <linux/percpu_counter.h>
  18. #include <linux/seq_file.h>
  19. #include <linux/radix-tree.h>
  20. #include <linux/blkdev.h>
  21. #include <linux/atomic.h>
  22. #include <linux/kthread.h>
  23. /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
  24. #define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
  25. /* Max limits for throttle policy */
  26. #define THROTL_IOPS_MAX UINT_MAX
  27. #ifdef CONFIG_BLK_CGROUP
  28. enum blkg_rwstat_type {
  29. BLKG_RWSTAT_READ,
  30. BLKG_RWSTAT_WRITE,
  31. BLKG_RWSTAT_SYNC,
  32. BLKG_RWSTAT_ASYNC,
  33. BLKG_RWSTAT_DISCARD,
  34. BLKG_RWSTAT_NR,
  35. BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
  36. };
  37. struct blkcg_gq;
  38. struct blkcg {
  39. struct cgroup_subsys_state css;
  40. spinlock_t lock;
  41. struct radix_tree_root blkg_tree;
  42. struct blkcg_gq __rcu *blkg_hint;
  43. struct hlist_head blkg_list;
  44. struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
  45. struct list_head all_blkcgs_node;
  46. #ifdef CONFIG_CGROUP_WRITEBACK
  47. struct list_head cgwb_list;
  48. #endif
  49. };
  50. /*
  51. * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
  52. * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
  53. * to carry result values from read and sum operations.
  54. */
  55. struct blkg_stat {
  56. struct percpu_counter cpu_cnt;
  57. atomic64_t aux_cnt;
  58. };
  59. struct blkg_rwstat {
  60. struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
  61. atomic64_t aux_cnt[BLKG_RWSTAT_NR];
  62. };
  63. /*
  64. * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  65. * request_queue (q). This is used by blkcg policies which need to track
  66. * information per blkcg - q pair.
  67. *
  68. * There can be multiple active blkcg policies and each blkg:policy pair is
  69. * represented by a blkg_policy_data which is allocated and freed by each
  70. * policy's pd_alloc/free_fn() methods. A policy can allocate private data
  71. * area by allocating larger data structure which embeds blkg_policy_data
  72. * at the beginning.
  73. */
  74. struct blkg_policy_data {
  75. /* the blkg and policy id this per-policy data belongs to */
  76. struct blkcg_gq *blkg;
  77. int plid;
  78. bool offline;
  79. };
  80. /*
  81. * Policies that need to keep per-blkcg data which is independent from any
  82. * request_queue associated to it should implement cpd_alloc/free_fn()
  83. * methods. A policy can allocate private data area by allocating larger
  84. * data structure which embeds blkcg_policy_data at the beginning.
  85. * cpd_init() is invoked to let each policy handle per-blkcg data.
  86. */
  87. struct blkcg_policy_data {
  88. /* the blkcg and policy id this per-policy data belongs to */
  89. struct blkcg *blkcg;
  90. int plid;
  91. };
  92. /* association between a blk cgroup and a request queue */
  93. struct blkcg_gq {
  94. /* Pointer to the associated request_queue */
  95. struct request_queue *q;
  96. struct list_head q_node;
  97. struct hlist_node blkcg_node;
  98. struct blkcg *blkcg;
  99. /*
  100. * Each blkg gets congested separately and the congestion state is
  101. * propagated to the matching bdi_writeback_congested.
  102. */
  103. struct bdi_writeback_congested *wb_congested;
  104. /* all non-root blkcg_gq's are guaranteed to have access to parent */
  105. struct blkcg_gq *parent;
  106. /* request allocation list for this blkcg-q pair */
  107. struct request_list rl;
  108. /* reference count */
  109. atomic_t refcnt;
  110. /* is this blkg online? protected by both blkcg and q locks */
  111. bool online;
  112. struct blkg_rwstat stat_bytes;
  113. struct blkg_rwstat stat_ios;
  114. struct blkg_policy_data *pd[BLKCG_MAX_POLS];
  115. struct rcu_head rcu_head;
  116. atomic_t use_delay;
  117. atomic64_t delay_nsec;
  118. atomic64_t delay_start;
  119. u64 last_delay;
  120. int last_use;
  121. };
  122. typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
  123. typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
  124. typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
  125. typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
  126. typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
  127. typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
  128. typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
  129. typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
  130. typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
  131. typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
  132. typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
  133. size_t size);
  134. struct blkcg_policy {
  135. int plid;
  136. /* cgroup files for the policy */
  137. struct cftype *dfl_cftypes;
  138. struct cftype *legacy_cftypes;
  139. /* operations */
  140. blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
  141. blkcg_pol_init_cpd_fn *cpd_init_fn;
  142. blkcg_pol_free_cpd_fn *cpd_free_fn;
  143. blkcg_pol_bind_cpd_fn *cpd_bind_fn;
  144. blkcg_pol_alloc_pd_fn *pd_alloc_fn;
  145. blkcg_pol_init_pd_fn *pd_init_fn;
  146. blkcg_pol_online_pd_fn *pd_online_fn;
  147. blkcg_pol_offline_pd_fn *pd_offline_fn;
  148. blkcg_pol_free_pd_fn *pd_free_fn;
  149. blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
  150. blkcg_pol_stat_pd_fn *pd_stat_fn;
  151. };
  152. extern struct blkcg blkcg_root;
  153. extern struct cgroup_subsys_state * const blkcg_root_css;
  154. struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
  155. struct request_queue *q, bool update_hint);
  156. struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
  157. struct request_queue *q);
  158. int blkcg_init_queue(struct request_queue *q);
  159. void blkcg_drain_queue(struct request_queue *q);
  160. void blkcg_exit_queue(struct request_queue *q);
  161. /* Blkio controller policy registration */
  162. int blkcg_policy_register(struct blkcg_policy *pol);
  163. void blkcg_policy_unregister(struct blkcg_policy *pol);
  164. int blkcg_activate_policy(struct request_queue *q,
  165. const struct blkcg_policy *pol);
  166. void blkcg_deactivate_policy(struct request_queue *q,
  167. const struct blkcg_policy *pol);
  168. const char *blkg_dev_name(struct blkcg_gq *blkg);
  169. void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
  170. u64 (*prfill)(struct seq_file *,
  171. struct blkg_policy_data *, int),
  172. const struct blkcg_policy *pol, int data,
  173. bool show_total);
  174. u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
  175. u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  176. const struct blkg_rwstat *rwstat);
  177. u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
  178. u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  179. int off);
  180. int blkg_print_stat_bytes(struct seq_file *sf, void *v);
  181. int blkg_print_stat_ios(struct seq_file *sf, void *v);
  182. int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
  183. int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
  184. u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
  185. struct blkcg_policy *pol, int off);
  186. struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
  187. struct blkcg_policy *pol, int off);
  188. struct blkg_conf_ctx {
  189. struct gendisk *disk;
  190. struct blkcg_gq *blkg;
  191. char *body;
  192. };
  193. int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
  194. char *input, struct blkg_conf_ctx *ctx);
  195. void blkg_conf_finish(struct blkg_conf_ctx *ctx);
  196. static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
  197. {
  198. return css ? container_of(css, struct blkcg, css) : NULL;
  199. }
  200. static inline struct blkcg *bio_blkcg(struct bio *bio)
  201. {
  202. struct cgroup_subsys_state *css;
  203. if (bio && bio->bi_css)
  204. return css_to_blkcg(bio->bi_css);
  205. css = kthread_blkcg();
  206. if (css)
  207. return css_to_blkcg(css);
  208. return css_to_blkcg(task_css(current, io_cgrp_id));
  209. }
  210. static inline bool blk_cgroup_congested(void)
  211. {
  212. struct cgroup_subsys_state *css;
  213. bool ret = false;
  214. rcu_read_lock();
  215. css = kthread_blkcg();
  216. if (!css)
  217. css = task_css(current, io_cgrp_id);
  218. while (css) {
  219. if (atomic_read(&css->cgroup->congestion_count)) {
  220. ret = true;
  221. break;
  222. }
  223. css = css->parent;
  224. }
  225. rcu_read_unlock();
  226. return ret;
  227. }
  228. /**
  229. * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  230. * @return: true if this bio needs to be submitted with the root blkg context.
  231. *
  232. * In order to avoid priority inversions we sometimes need to issue a bio as if
  233. * it were attached to the root blkg, and then backcharge to the actual owning
  234. * blkg. The idea is we do bio_blkcg() to look up the actual context for the
  235. * bio and attach the appropriate blkg to the bio. Then we call this helper and
  236. * if it is true run with the root blkg for that queue and then do any
  237. * backcharging to the originating cgroup once the io is complete.
  238. */
  239. static inline bool bio_issue_as_root_blkg(struct bio *bio)
  240. {
  241. return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
  242. }
  243. /**
  244. * blkcg_parent - get the parent of a blkcg
  245. * @blkcg: blkcg of interest
  246. *
  247. * Return the parent blkcg of @blkcg. Can be called anytime.
  248. */
  249. static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
  250. {
  251. return css_to_blkcg(blkcg->css.parent);
  252. }
  253. /**
  254. * __blkg_lookup - internal version of blkg_lookup()
  255. * @blkcg: blkcg of interest
  256. * @q: request_queue of interest
  257. * @update_hint: whether to update lookup hint with the result or not
  258. *
  259. * This is internal version and shouldn't be used by policy
  260. * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
  261. * @q's bypass state. If @update_hint is %true, the caller should be
  262. * holding @q->queue_lock and lookup hint is updated on success.
  263. */
  264. static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
  265. struct request_queue *q,
  266. bool update_hint)
  267. {
  268. struct blkcg_gq *blkg;
  269. if (blkcg == &blkcg_root)
  270. return q->root_blkg;
  271. blkg = rcu_dereference(blkcg->blkg_hint);
  272. if (blkg && blkg->q == q)
  273. return blkg;
  274. return blkg_lookup_slowpath(blkcg, q, update_hint);
  275. }
  276. /**
  277. * blkg_lookup - lookup blkg for the specified blkcg - q pair
  278. * @blkcg: blkcg of interest
  279. * @q: request_queue of interest
  280. *
  281. * Lookup blkg for the @blkcg - @q pair. This function should be called
  282. * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
  283. * - see blk_queue_bypass_start() for details.
  284. */
  285. static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
  286. struct request_queue *q)
  287. {
  288. WARN_ON_ONCE(!rcu_read_lock_held());
  289. if (unlikely(blk_queue_bypass(q)))
  290. return NULL;
  291. return __blkg_lookup(blkcg, q, false);
  292. }
  293. /**
  294. * blkg_to_pdata - get policy private data
  295. * @blkg: blkg of interest
  296. * @pol: policy of interest
  297. *
  298. * Return pointer to private data associated with the @blkg-@pol pair.
  299. */
  300. static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
  301. struct blkcg_policy *pol)
  302. {
  303. return blkg ? blkg->pd[pol->plid] : NULL;
  304. }
  305. static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
  306. struct blkcg_policy *pol)
  307. {
  308. return blkcg ? blkcg->cpd[pol->plid] : NULL;
  309. }
  310. /**
  311. * pdata_to_blkg - get blkg associated with policy private data
  312. * @pd: policy private data of interest
  313. *
  314. * @pd is policy private data. Determine the blkg it's associated with.
  315. */
  316. static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
  317. {
  318. return pd ? pd->blkg : NULL;
  319. }
  320. static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
  321. {
  322. return cpd ? cpd->blkcg : NULL;
  323. }
  324. /**
  325. * blkg_path - format cgroup path of blkg
  326. * @blkg: blkg of interest
  327. * @buf: target buffer
  328. * @buflen: target buffer length
  329. *
  330. * Format the path of the cgroup of @blkg into @buf.
  331. */
  332. static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  333. {
  334. return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
  335. }
  336. /**
  337. * blkg_get - get a blkg reference
  338. * @blkg: blkg to get
  339. *
  340. * The caller should be holding an existing reference.
  341. */
  342. static inline void blkg_get(struct blkcg_gq *blkg)
  343. {
  344. WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
  345. atomic_inc(&blkg->refcnt);
  346. }
  347. /**
  348. * blkg_try_get - try and get a blkg reference
  349. * @blkg: blkg to get
  350. *
  351. * This is for use when doing an RCU lookup of the blkg. We may be in the midst
  352. * of freeing this blkg, so we can only use it if the refcnt is not zero.
  353. */
  354. static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
  355. {
  356. if (atomic_inc_not_zero(&blkg->refcnt))
  357. return blkg;
  358. return NULL;
  359. }
  360. void __blkg_release_rcu(struct rcu_head *rcu);
  361. /**
  362. * blkg_put - put a blkg reference
  363. * @blkg: blkg to put
  364. */
  365. static inline void blkg_put(struct blkcg_gq *blkg)
  366. {
  367. WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
  368. if (atomic_dec_and_test(&blkg->refcnt))
  369. call_rcu(&blkg->rcu_head, __blkg_release_rcu);
  370. }
  371. /**
  372. * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
  373. * @d_blkg: loop cursor pointing to the current descendant
  374. * @pos_css: used for iteration
  375. * @p_blkg: target blkg to walk descendants of
  376. *
  377. * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
  378. * read locked. If called under either blkcg or queue lock, the iteration
  379. * is guaranteed to include all and only online blkgs. The caller may
  380. * update @pos_css by calling css_rightmost_descendant() to skip subtree.
  381. * @p_blkg is included in the iteration and the first node to be visited.
  382. */
  383. #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
  384. css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
  385. if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
  386. (p_blkg)->q, false)))
  387. /**
  388. * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
  389. * @d_blkg: loop cursor pointing to the current descendant
  390. * @pos_css: used for iteration
  391. * @p_blkg: target blkg to walk descendants of
  392. *
  393. * Similar to blkg_for_each_descendant_pre() but performs post-order
  394. * traversal instead. Synchronization rules are the same. @p_blkg is
  395. * included in the iteration and the last node to be visited.
  396. */
  397. #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
  398. css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
  399. if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
  400. (p_blkg)->q, false)))
  401. /**
  402. * blk_get_rl - get request_list to use
  403. * @q: request_queue of interest
  404. * @bio: bio which will be attached to the allocated request (may be %NULL)
  405. *
  406. * The caller wants to allocate a request from @q to use for @bio. Find
  407. * the request_list to use and obtain a reference on it. Should be called
  408. * under queue_lock. This function is guaranteed to return non-%NULL
  409. * request_list.
  410. */
  411. static inline struct request_list *blk_get_rl(struct request_queue *q,
  412. struct bio *bio)
  413. {
  414. struct blkcg *blkcg;
  415. struct blkcg_gq *blkg;
  416. rcu_read_lock();
  417. blkcg = bio_blkcg(bio);
  418. /* bypass blkg lookup and use @q->root_rl directly for root */
  419. if (blkcg == &blkcg_root)
  420. goto root_rl;
  421. /*
  422. * Try to use blkg->rl. blkg lookup may fail under memory pressure
  423. * or if either the blkcg or queue is going away. Fall back to
  424. * root_rl in such cases.
  425. */
  426. blkg = blkg_lookup(blkcg, q);
  427. if (unlikely(!blkg))
  428. goto root_rl;
  429. blkg_get(blkg);
  430. rcu_read_unlock();
  431. return &blkg->rl;
  432. root_rl:
  433. rcu_read_unlock();
  434. return &q->root_rl;
  435. }
  436. /**
  437. * blk_put_rl - put request_list
  438. * @rl: request_list to put
  439. *
  440. * Put the reference acquired by blk_get_rl(). Should be called under
  441. * queue_lock.
  442. */
  443. static inline void blk_put_rl(struct request_list *rl)
  444. {
  445. if (rl->blkg->blkcg != &blkcg_root)
  446. blkg_put(rl->blkg);
  447. }
  448. /**
  449. * blk_rq_set_rl - associate a request with a request_list
  450. * @rq: request of interest
  451. * @rl: target request_list
  452. *
  453. * Associate @rq with @rl so that accounting and freeing can know the
  454. * request_list @rq came from.
  455. */
  456. static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
  457. {
  458. rq->rl = rl;
  459. }
  460. /**
  461. * blk_rq_rl - return the request_list a request came from
  462. * @rq: request of interest
  463. *
  464. * Return the request_list @rq is allocated from.
  465. */
  466. static inline struct request_list *blk_rq_rl(struct request *rq)
  467. {
  468. return rq->rl;
  469. }
  470. struct request_list *__blk_queue_next_rl(struct request_list *rl,
  471. struct request_queue *q);
  472. /**
  473. * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
  474. *
  475. * Should be used under queue_lock.
  476. */
  477. #define blk_queue_for_each_rl(rl, q) \
  478. for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
  479. static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
  480. {
  481. int ret;
  482. ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
  483. if (ret)
  484. return ret;
  485. atomic64_set(&stat->aux_cnt, 0);
  486. return 0;
  487. }
  488. static inline void blkg_stat_exit(struct blkg_stat *stat)
  489. {
  490. percpu_counter_destroy(&stat->cpu_cnt);
  491. }
  492. /**
  493. * blkg_stat_add - add a value to a blkg_stat
  494. * @stat: target blkg_stat
  495. * @val: value to add
  496. *
  497. * Add @val to @stat. The caller must ensure that IRQ on the same CPU
  498. * don't re-enter this function for the same counter.
  499. */
  500. static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
  501. {
  502. percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
  503. }
  504. /**
  505. * blkg_stat_read - read the current value of a blkg_stat
  506. * @stat: blkg_stat to read
  507. */
  508. static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
  509. {
  510. return percpu_counter_sum_positive(&stat->cpu_cnt);
  511. }
  512. /**
  513. * blkg_stat_reset - reset a blkg_stat
  514. * @stat: blkg_stat to reset
  515. */
  516. static inline void blkg_stat_reset(struct blkg_stat *stat)
  517. {
  518. percpu_counter_set(&stat->cpu_cnt, 0);
  519. atomic64_set(&stat->aux_cnt, 0);
  520. }
  521. /**
  522. * blkg_stat_add_aux - add a blkg_stat into another's aux count
  523. * @to: the destination blkg_stat
  524. * @from: the source
  525. *
  526. * Add @from's count including the aux one to @to's aux count.
  527. */
  528. static inline void blkg_stat_add_aux(struct blkg_stat *to,
  529. struct blkg_stat *from)
  530. {
  531. atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
  532. &to->aux_cnt);
  533. }
  534. static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
  535. {
  536. int i, ret;
  537. for (i = 0; i < BLKG_RWSTAT_NR; i++) {
  538. ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
  539. if (ret) {
  540. while (--i >= 0)
  541. percpu_counter_destroy(&rwstat->cpu_cnt[i]);
  542. return ret;
  543. }
  544. atomic64_set(&rwstat->aux_cnt[i], 0);
  545. }
  546. return 0;
  547. }
  548. static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
  549. {
  550. int i;
  551. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  552. percpu_counter_destroy(&rwstat->cpu_cnt[i]);
  553. }
  554. /**
  555. * blkg_rwstat_add - add a value to a blkg_rwstat
  556. * @rwstat: target blkg_rwstat
  557. * @op: REQ_OP and flags
  558. * @val: value to add
  559. *
  560. * Add @val to @rwstat. The counters are chosen according to @rw. The
  561. * caller is responsible for synchronizing calls to this function.
  562. */
  563. static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
  564. unsigned int op, uint64_t val)
  565. {
  566. struct percpu_counter *cnt;
  567. if (op_is_discard(op))
  568. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
  569. else if (op_is_write(op))
  570. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
  571. else
  572. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
  573. percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
  574. if (op_is_sync(op))
  575. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
  576. else
  577. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
  578. percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
  579. }
  580. /**
  581. * blkg_rwstat_read - read the current values of a blkg_rwstat
  582. * @rwstat: blkg_rwstat to read
  583. *
  584. * Read the current snapshot of @rwstat and return it in the aux counts.
  585. */
  586. static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
  587. {
  588. struct blkg_rwstat result;
  589. int i;
  590. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  591. atomic64_set(&result.aux_cnt[i],
  592. percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
  593. return result;
  594. }
  595. /**
  596. * blkg_rwstat_total - read the total count of a blkg_rwstat
  597. * @rwstat: blkg_rwstat to read
  598. *
  599. * Return the total count of @rwstat regardless of the IO direction. This
  600. * function can be called without synchronization and takes care of u64
  601. * atomicity.
  602. */
  603. static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
  604. {
  605. struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
  606. return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
  607. atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
  608. }
  609. /**
  610. * blkg_rwstat_reset - reset a blkg_rwstat
  611. * @rwstat: blkg_rwstat to reset
  612. */
  613. static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
  614. {
  615. int i;
  616. for (i = 0; i < BLKG_RWSTAT_NR; i++) {
  617. percpu_counter_set(&rwstat->cpu_cnt[i], 0);
  618. atomic64_set(&rwstat->aux_cnt[i], 0);
  619. }
  620. }
  621. /**
  622. * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
  623. * @to: the destination blkg_rwstat
  624. * @from: the source
  625. *
  626. * Add @from's count including the aux one to @to's aux count.
  627. */
  628. static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
  629. struct blkg_rwstat *from)
  630. {
  631. u64 sum[BLKG_RWSTAT_NR];
  632. int i;
  633. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  634. sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
  635. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  636. atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
  637. &to->aux_cnt[i]);
  638. }
  639. #ifdef CONFIG_BLK_DEV_THROTTLING
  640. extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  641. struct bio *bio);
  642. #else
  643. static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  644. struct bio *bio) { return false; }
  645. #endif
  646. static inline bool blkcg_bio_issue_check(struct request_queue *q,
  647. struct bio *bio)
  648. {
  649. struct blkcg *blkcg;
  650. struct blkcg_gq *blkg;
  651. bool throtl = false;
  652. rcu_read_lock();
  653. blkcg = bio_blkcg(bio);
  654. /* associate blkcg if bio hasn't attached one */
  655. bio_associate_blkcg(bio, &blkcg->css);
  656. blkg = blkg_lookup(blkcg, q);
  657. if (unlikely(!blkg)) {
  658. spin_lock_irq(q->queue_lock);
  659. blkg = blkg_lookup_create(blkcg, q);
  660. if (IS_ERR(blkg))
  661. blkg = NULL;
  662. spin_unlock_irq(q->queue_lock);
  663. }
  664. throtl = blk_throtl_bio(q, blkg, bio);
  665. if (!throtl) {
  666. blkg = blkg ?: q->root_blkg;
  667. blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
  668. bio->bi_iter.bi_size);
  669. blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
  670. }
  671. rcu_read_unlock();
  672. return !throtl;
  673. }
  674. static inline void blkcg_use_delay(struct blkcg_gq *blkg)
  675. {
  676. if (atomic_add_return(1, &blkg->use_delay) == 1)
  677. atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
  678. }
  679. static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
  680. {
  681. int old = atomic_read(&blkg->use_delay);
  682. if (old == 0)
  683. return 0;
  684. /*
  685. * We do this song and dance because we can race with somebody else
  686. * adding or removing delay. If we just did an atomic_dec we'd end up
  687. * negative and we'd already be in trouble. We need to subtract 1 and
  688. * then check to see if we were the last delay so we can drop the
  689. * congestion count on the cgroup.
  690. */
  691. while (old) {
  692. int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
  693. if (cur == old)
  694. break;
  695. old = cur;
  696. }
  697. if (old == 0)
  698. return 0;
  699. if (old == 1)
  700. atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
  701. return 1;
  702. }
  703. static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
  704. {
  705. int old = atomic_read(&blkg->use_delay);
  706. if (!old)
  707. return;
  708. /* We only want 1 person clearing the congestion count for this blkg. */
  709. while (old) {
  710. int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
  711. if (cur == old) {
  712. atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
  713. break;
  714. }
  715. old = cur;
  716. }
  717. }
  718. void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
  719. void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
  720. void blkcg_maybe_throttle_current(void);
  721. #else /* CONFIG_BLK_CGROUP */
  722. struct blkcg {
  723. };
  724. struct blkg_policy_data {
  725. };
  726. struct blkcg_policy_data {
  727. };
  728. struct blkcg_gq {
  729. };
  730. struct blkcg_policy {
  731. };
  732. #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
  733. static inline void blkcg_maybe_throttle_current(void) { }
  734. static inline bool blk_cgroup_congested(void) { return false; }
  735. #ifdef CONFIG_BLOCK
  736. static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
  737. static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
  738. static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
  739. static inline void blkcg_drain_queue(struct request_queue *q) { }
  740. static inline void blkcg_exit_queue(struct request_queue *q) { }
  741. static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
  742. static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
  743. static inline int blkcg_activate_policy(struct request_queue *q,
  744. const struct blkcg_policy *pol) { return 0; }
  745. static inline void blkcg_deactivate_policy(struct request_queue *q,
  746. const struct blkcg_policy *pol) { }
  747. static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
  748. static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
  749. struct blkcg_policy *pol) { return NULL; }
  750. static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
  751. static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
  752. static inline void blkg_get(struct blkcg_gq *blkg) { }
  753. static inline void blkg_put(struct blkcg_gq *blkg) { }
  754. static inline struct request_list *blk_get_rl(struct request_queue *q,
  755. struct bio *bio) { return &q->root_rl; }
  756. static inline void blk_put_rl(struct request_list *rl) { }
  757. static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
  758. static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
  759. static inline bool blkcg_bio_issue_check(struct request_queue *q,
  760. struct bio *bio) { return true; }
  761. #define blk_queue_for_each_rl(rl, q) \
  762. for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
  763. #endif /* CONFIG_BLOCK */
  764. #endif /* CONFIG_BLK_CGROUP */
  765. #endif /* _BLK_CGROUP_H */