rstat.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. #include "cgroup-internal.h"
  2. #include <linux/sched/cputime.h>
  3. static DEFINE_SPINLOCK(cgroup_rstat_lock);
  4. static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
  5. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
  6. static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
  7. {
  8. return per_cpu_ptr(cgrp->rstat_cpu, cpu);
  9. }
  10. /**
  11. * cgroup_rstat_updated - keep track of updated rstat_cpu
  12. * @cgrp: target cgroup
  13. * @cpu: cpu on which rstat_cpu was updated
  14. *
  15. * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
  16. * rstat_cpu->updated_children list. See the comment on top of
  17. * cgroup_rstat_cpu definition for details.
  18. */
  19. void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
  20. {
  21. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
  22. struct cgroup *parent;
  23. unsigned long flags;
  24. /* nothing to do for root */
  25. if (!cgroup_parent(cgrp))
  26. return;
  27. /*
  28. * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we
  29. * see NULL updated_next or they see our updated stat.
  30. */
  31. smp_mb();
  32. /*
  33. * Because @parent's updated_children is terminated with @parent
  34. * instead of NULL, we can tell whether @cgrp is on the list by
  35. * testing the next pointer for NULL.
  36. */
  37. if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
  38. return;
  39. raw_spin_lock_irqsave(cpu_lock, flags);
  40. /* put @cgrp and all ancestors on the corresponding updated lists */
  41. for (parent = cgroup_parent(cgrp); parent;
  42. cgrp = parent, parent = cgroup_parent(cgrp)) {
  43. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  44. struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
  45. /*
  46. * Both additions and removals are bottom-up. If a cgroup
  47. * is already in the tree, all ancestors are.
  48. */
  49. if (rstatc->updated_next)
  50. break;
  51. rstatc->updated_next = prstatc->updated_children;
  52. prstatc->updated_children = cgrp;
  53. }
  54. raw_spin_unlock_irqrestore(cpu_lock, flags);
  55. }
  56. EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
  57. /**
  58. * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
  59. * @pos: current position
  60. * @root: root of the tree to traversal
  61. * @cpu: target cpu
  62. *
  63. * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts
  64. * the traversal and %NULL return indicates the end. During traversal,
  65. * each returned cgroup is unlinked from the tree. Must be called with the
  66. * matching cgroup_rstat_cpu_lock held.
  67. *
  68. * The only ordering guarantee is that, for a parent and a child pair
  69. * covered by a given traversal, if a child is visited, its parent is
  70. * guaranteed to be visited afterwards.
  71. */
  72. static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
  73. struct cgroup *root, int cpu)
  74. {
  75. struct cgroup_rstat_cpu *rstatc;
  76. struct cgroup *parent;
  77. if (pos == root)
  78. return NULL;
  79. /*
  80. * We're gonna walk down to the first leaf and visit/remove it. We
  81. * can pick whatever unvisited node as the starting point.
  82. */
  83. if (!pos)
  84. pos = root;
  85. else
  86. pos = cgroup_parent(pos);
  87. /* walk down to the first leaf */
  88. while (true) {
  89. rstatc = cgroup_rstat_cpu(pos, cpu);
  90. if (rstatc->updated_children == pos)
  91. break;
  92. pos = rstatc->updated_children;
  93. }
  94. /*
  95. * Unlink @pos from the tree. As the updated_children list is
  96. * singly linked, we have to walk it to find the removal point.
  97. * However, due to the way we traverse, @pos will be the first
  98. * child in most cases. The only exception is @root.
  99. */
  100. parent = cgroup_parent(pos);
  101. if (parent && rstatc->updated_next) {
  102. struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
  103. struct cgroup_rstat_cpu *nrstatc;
  104. struct cgroup **nextp;
  105. nextp = &prstatc->updated_children;
  106. while (true) {
  107. nrstatc = cgroup_rstat_cpu(*nextp, cpu);
  108. if (*nextp == pos)
  109. break;
  110. WARN_ON_ONCE(*nextp == parent);
  111. nextp = &nrstatc->updated_next;
  112. }
  113. *nextp = rstatc->updated_next;
  114. rstatc->updated_next = NULL;
  115. /*
  116. * Paired with the one in cgroup_rstat_cpu_updated().
  117. * Either they see NULL updated_next or we see their
  118. * updated stat.
  119. */
  120. smp_mb();
  121. }
  122. return pos;
  123. }
  124. /* see cgroup_rstat_flush() */
  125. static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
  126. __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
  127. {
  128. int cpu;
  129. lockdep_assert_held(&cgroup_rstat_lock);
  130. for_each_possible_cpu(cpu) {
  131. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
  132. cpu);
  133. struct cgroup *pos = NULL;
  134. raw_spin_lock(cpu_lock);
  135. while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
  136. struct cgroup_subsys_state *css;
  137. cgroup_base_stat_flush(pos, cpu);
  138. rcu_read_lock();
  139. list_for_each_entry_rcu(css, &pos->rstat_css_list,
  140. rstat_css_node)
  141. css->ss->css_rstat_flush(css, cpu);
  142. rcu_read_unlock();
  143. }
  144. raw_spin_unlock(cpu_lock);
  145. /* if @may_sleep, play nice and yield if necessary */
  146. if (may_sleep && (need_resched() ||
  147. spin_needbreak(&cgroup_rstat_lock))) {
  148. spin_unlock_irq(&cgroup_rstat_lock);
  149. if (!cond_resched())
  150. cpu_relax();
  151. spin_lock_irq(&cgroup_rstat_lock);
  152. }
  153. }
  154. }
  155. /**
  156. * cgroup_rstat_flush - flush stats in @cgrp's subtree
  157. * @cgrp: target cgroup
  158. *
  159. * Collect all per-cpu stats in @cgrp's subtree into the global counters
  160. * and propagate them upwards. After this function returns, all cgroups in
  161. * the subtree have up-to-date ->stat.
  162. *
  163. * This also gets all cgroups in the subtree including @cgrp off the
  164. * ->updated_children lists.
  165. *
  166. * This function may block.
  167. */
  168. void cgroup_rstat_flush(struct cgroup *cgrp)
  169. {
  170. might_sleep();
  171. spin_lock_irq(&cgroup_rstat_lock);
  172. cgroup_rstat_flush_locked(cgrp, true);
  173. spin_unlock_irq(&cgroup_rstat_lock);
  174. }
  175. /**
  176. * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
  177. * @cgrp: target cgroup
  178. *
  179. * This function can be called from any context.
  180. */
  181. void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
  182. {
  183. unsigned long flags;
  184. spin_lock_irqsave(&cgroup_rstat_lock, flags);
  185. cgroup_rstat_flush_locked(cgrp, false);
  186. spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
  187. }
  188. /**
  189. * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
  190. * @cgrp: target cgroup
  191. *
  192. * Flush stats in @cgrp's subtree and prevent further flushes. Must be
  193. * paired with cgroup_rstat_flush_release().
  194. *
  195. * This function may block.
  196. */
  197. void cgroup_rstat_flush_hold(struct cgroup *cgrp)
  198. __acquires(&cgroup_rstat_lock)
  199. {
  200. might_sleep();
  201. spin_lock_irq(&cgroup_rstat_lock);
  202. cgroup_rstat_flush_locked(cgrp, true);
  203. }
  204. /**
  205. * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
  206. */
  207. void cgroup_rstat_flush_release(void)
  208. __releases(&cgroup_rstat_lock)
  209. {
  210. spin_unlock_irq(&cgroup_rstat_lock);
  211. }
  212. int cgroup_rstat_init(struct cgroup *cgrp)
  213. {
  214. int cpu;
  215. /* the root cgrp has rstat_cpu preallocated */
  216. if (!cgrp->rstat_cpu) {
  217. cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
  218. if (!cgrp->rstat_cpu)
  219. return -ENOMEM;
  220. }
  221. /* ->updated_children list is self terminated */
  222. for_each_possible_cpu(cpu) {
  223. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  224. rstatc->updated_children = cgrp;
  225. u64_stats_init(&rstatc->bsync);
  226. }
  227. return 0;
  228. }
  229. void cgroup_rstat_exit(struct cgroup *cgrp)
  230. {
  231. int cpu;
  232. cgroup_rstat_flush(cgrp);
  233. /* sanity check */
  234. for_each_possible_cpu(cpu) {
  235. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  236. if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
  237. WARN_ON_ONCE(rstatc->updated_next))
  238. return;
  239. }
  240. free_percpu(cgrp->rstat_cpu);
  241. cgrp->rstat_cpu = NULL;
  242. }
  243. void __init cgroup_rstat_boot(void)
  244. {
  245. int cpu;
  246. for_each_possible_cpu(cpu)
  247. raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
  248. BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
  249. }
  250. /*
  251. * Functions for cgroup basic resource statistics implemented on top of
  252. * rstat.
  253. */
  254. static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
  255. struct cgroup_base_stat *src_bstat)
  256. {
  257. dst_bstat->cputime.utime += src_bstat->cputime.utime;
  258. dst_bstat->cputime.stime += src_bstat->cputime.stime;
  259. dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
  260. }
  261. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
  262. {
  263. struct cgroup *parent = cgroup_parent(cgrp);
  264. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  265. struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
  266. struct task_cputime cputime;
  267. struct cgroup_base_stat delta;
  268. unsigned seq;
  269. /* fetch the current per-cpu values */
  270. do {
  271. seq = __u64_stats_fetch_begin(&rstatc->bsync);
  272. cputime = rstatc->bstat.cputime;
  273. } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
  274. /* calculate the delta to propgate */
  275. delta.cputime.utime = cputime.utime - last_cputime->utime;
  276. delta.cputime.stime = cputime.stime - last_cputime->stime;
  277. delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
  278. last_cputime->sum_exec_runtime;
  279. *last_cputime = cputime;
  280. /* transfer the pending stat into delta */
  281. cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
  282. memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
  283. /* propagate delta into the global stat and the parent's pending */
  284. cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
  285. if (parent)
  286. cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
  287. }
  288. static struct cgroup_rstat_cpu *
  289. cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
  290. {
  291. struct cgroup_rstat_cpu *rstatc;
  292. rstatc = get_cpu_ptr(cgrp->rstat_cpu);
  293. u64_stats_update_begin(&rstatc->bsync);
  294. return rstatc;
  295. }
  296. static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
  297. struct cgroup_rstat_cpu *rstatc)
  298. {
  299. u64_stats_update_end(&rstatc->bsync);
  300. cgroup_rstat_updated(cgrp, smp_processor_id());
  301. put_cpu_ptr(rstatc);
  302. }
  303. void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
  304. {
  305. struct cgroup_rstat_cpu *rstatc;
  306. rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
  307. rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
  308. cgroup_base_stat_cputime_account_end(cgrp, rstatc);
  309. }
  310. void __cgroup_account_cputime_field(struct cgroup *cgrp,
  311. enum cpu_usage_stat index, u64 delta_exec)
  312. {
  313. struct cgroup_rstat_cpu *rstatc;
  314. rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
  315. switch (index) {
  316. case CPUTIME_USER:
  317. case CPUTIME_NICE:
  318. rstatc->bstat.cputime.utime += delta_exec;
  319. break;
  320. case CPUTIME_SYSTEM:
  321. case CPUTIME_IRQ:
  322. case CPUTIME_SOFTIRQ:
  323. rstatc->bstat.cputime.stime += delta_exec;
  324. break;
  325. default:
  326. break;
  327. }
  328. cgroup_base_stat_cputime_account_end(cgrp, rstatc);
  329. }
  330. void cgroup_base_stat_cputime_show(struct seq_file *seq)
  331. {
  332. struct cgroup *cgrp = seq_css(seq)->cgroup;
  333. u64 usage, utime, stime;
  334. if (!cgroup_parent(cgrp))
  335. return;
  336. cgroup_rstat_flush_hold(cgrp);
  337. usage = cgrp->bstat.cputime.sum_exec_runtime;
  338. cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
  339. cgroup_rstat_flush_release();
  340. do_div(usage, NSEC_PER_USEC);
  341. do_div(utime, NSEC_PER_USEC);
  342. do_div(stime, NSEC_PER_USEC);
  343. seq_printf(seq, "usage_usec %llu\n"
  344. "user_usec %llu\n"
  345. "system_usec %llu\n",
  346. usage, utime, stime);
  347. }