stat.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #include "cgroup-internal.h"
  2. #include <linux/sched/cputime.h>
  3. static DEFINE_MUTEX(cgroup_stat_mutex);
  4. static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
  5. static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
  6. {
  7. return per_cpu_ptr(cgrp->cpu_stat, cpu);
  8. }
  9. /**
  10. * cgroup_cpu_stat_updated - keep track of updated cpu_stat
  11. * @cgrp: target cgroup
  12. * @cpu: cpu on which cpu_stat was updated
  13. *
  14. * @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching
  15. * cpu_stat->updated_children list. See the comment on top of
  16. * cgroup_cpu_stat definition for details.
  17. */
  18. static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
  19. {
  20. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
  21. struct cgroup *parent;
  22. unsigned long flags;
  23. /*
  24. * Speculative already-on-list test. This may race leading to
  25. * temporary inaccuracies, which is fine.
  26. *
  27. * Because @parent's updated_children is terminated with @parent
  28. * instead of NULL, we can tell whether @cgrp is on the list by
  29. * testing the next pointer for NULL.
  30. */
  31. if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
  32. return;
  33. raw_spin_lock_irqsave(cpu_lock, flags);
  34. /* put @cgrp and all ancestors on the corresponding updated lists */
  35. for (parent = cgroup_parent(cgrp); parent;
  36. cgrp = parent, parent = cgroup_parent(cgrp)) {
  37. struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
  38. struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
  39. /*
  40. * Both additions and removals are bottom-up. If a cgroup
  41. * is already in the tree, all ancestors are.
  42. */
  43. if (cstat->updated_next)
  44. break;
  45. cstat->updated_next = pcstat->updated_children;
  46. pcstat->updated_children = cgrp;
  47. }
  48. raw_spin_unlock_irqrestore(cpu_lock, flags);
  49. }
  50. /**
  51. * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
  52. * @pos: current position
  53. * @root: root of the tree to traversal
  54. * @cpu: target cpu
  55. *
  56. * Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts
  57. * the traversal and %NULL return indicates the end. During traversal,
  58. * each returned cgroup is unlinked from the tree. Must be called with the
  59. * matching cgroup_cpu_stat_lock held.
  60. *
  61. * The only ordering guarantee is that, for a parent and a child pair
  62. * covered by a given traversal, if a child is visited, its parent is
  63. * guaranteed to be visited afterwards.
  64. */
  65. static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
  66. struct cgroup *root, int cpu)
  67. {
  68. struct cgroup_cpu_stat *cstat;
  69. struct cgroup *parent;
  70. if (pos == root)
  71. return NULL;
  72. /*
  73. * We're gonna walk down to the first leaf and visit/remove it. We
  74. * can pick whatever unvisited node as the starting point.
  75. */
  76. if (!pos)
  77. pos = root;
  78. else
  79. pos = cgroup_parent(pos);
  80. /* walk down to the first leaf */
  81. while (true) {
  82. cstat = cgroup_cpu_stat(pos, cpu);
  83. if (cstat->updated_children == pos)
  84. break;
  85. pos = cstat->updated_children;
  86. }
  87. /*
  88. * Unlink @pos from the tree. As the updated_children list is
  89. * singly linked, we have to walk it to find the removal point.
  90. * However, due to the way we traverse, @pos will be the first
  91. * child in most cases. The only exception is @root.
  92. */
  93. parent = cgroup_parent(pos);
  94. if (parent && cstat->updated_next) {
  95. struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
  96. struct cgroup_cpu_stat *ncstat;
  97. struct cgroup **nextp;
  98. nextp = &pcstat->updated_children;
  99. while (true) {
  100. ncstat = cgroup_cpu_stat(*nextp, cpu);
  101. if (*nextp == pos)
  102. break;
  103. WARN_ON_ONCE(*nextp == parent);
  104. nextp = &ncstat->updated_next;
  105. }
  106. *nextp = cstat->updated_next;
  107. cstat->updated_next = NULL;
  108. }
  109. return pos;
  110. }
  111. static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
  112. struct cgroup_stat *src_stat)
  113. {
  114. dst_stat->cputime.utime += src_stat->cputime.utime;
  115. dst_stat->cputime.stime += src_stat->cputime.stime;
  116. dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
  117. }
  118. static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
  119. {
  120. struct cgroup *parent = cgroup_parent(cgrp);
  121. struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
  122. struct task_cputime *last_cputime = &cstat->last_cputime;
  123. struct task_cputime cputime;
  124. struct cgroup_stat delta;
  125. unsigned seq;
  126. lockdep_assert_held(&cgroup_stat_mutex);
  127. /* fetch the current per-cpu values */
  128. do {
  129. seq = __u64_stats_fetch_begin(&cstat->sync);
  130. cputime = cstat->cputime;
  131. } while (__u64_stats_fetch_retry(&cstat->sync, seq));
  132. /* accumulate the deltas to propgate */
  133. delta.cputime.utime = cputime.utime - last_cputime->utime;
  134. delta.cputime.stime = cputime.stime - last_cputime->stime;
  135. delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
  136. last_cputime->sum_exec_runtime;
  137. *last_cputime = cputime;
  138. /* transfer the pending stat into delta */
  139. cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
  140. memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
  141. /* propagate delta into the global stat and the parent's pending */
  142. cgroup_stat_accumulate(&cgrp->stat, &delta);
  143. if (parent)
  144. cgroup_stat_accumulate(&parent->pending_stat, &delta);
  145. }
  146. /* see cgroup_stat_flush() */
  147. static void cgroup_stat_flush_locked(struct cgroup *cgrp)
  148. {
  149. int cpu;
  150. lockdep_assert_held(&cgroup_stat_mutex);
  151. for_each_possible_cpu(cpu) {
  152. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
  153. struct cgroup *pos = NULL;
  154. raw_spin_lock_irq(cpu_lock);
  155. while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
  156. cgroup_cpu_stat_flush_one(pos, cpu);
  157. raw_spin_unlock_irq(cpu_lock);
  158. }
  159. }
  160. /**
  161. * cgroup_stat_flush - flush stats in @cgrp's subtree
  162. * @cgrp: target cgroup
  163. *
  164. * Collect all per-cpu stats in @cgrp's subtree into the global counters
  165. * and propagate them upwards. After this function returns, all cgroups in
  166. * the subtree have up-to-date ->stat.
  167. *
  168. * This also gets all cgroups in the subtree including @cgrp off the
  169. * ->updated_children lists.
  170. */
  171. void cgroup_stat_flush(struct cgroup *cgrp)
  172. {
  173. mutex_lock(&cgroup_stat_mutex);
  174. cgroup_stat_flush_locked(cgrp);
  175. mutex_unlock(&cgroup_stat_mutex);
  176. }
  177. static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
  178. {
  179. struct cgroup_cpu_stat *cstat;
  180. cstat = get_cpu_ptr(cgrp->cpu_stat);
  181. u64_stats_update_begin(&cstat->sync);
  182. return cstat;
  183. }
  184. static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
  185. struct cgroup_cpu_stat *cstat)
  186. {
  187. u64_stats_update_end(&cstat->sync);
  188. cgroup_cpu_stat_updated(cgrp, smp_processor_id());
  189. put_cpu_ptr(cstat);
  190. }
  191. void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
  192. {
  193. struct cgroup_cpu_stat *cstat;
  194. cstat = cgroup_cpu_stat_account_begin(cgrp);
  195. cstat->cputime.sum_exec_runtime += delta_exec;
  196. cgroup_cpu_stat_account_end(cgrp, cstat);
  197. }
  198. void __cgroup_account_cputime_field(struct cgroup *cgrp,
  199. enum cpu_usage_stat index, u64 delta_exec)
  200. {
  201. struct cgroup_cpu_stat *cstat;
  202. cstat = cgroup_cpu_stat_account_begin(cgrp);
  203. switch (index) {
  204. case CPUTIME_USER:
  205. case CPUTIME_NICE:
  206. cstat->cputime.utime += delta_exec;
  207. break;
  208. case CPUTIME_SYSTEM:
  209. case CPUTIME_IRQ:
  210. case CPUTIME_SOFTIRQ:
  211. cstat->cputime.stime += delta_exec;
  212. break;
  213. default:
  214. break;
  215. }
  216. cgroup_cpu_stat_account_end(cgrp, cstat);
  217. }
  218. void cgroup_stat_show_cputime(struct seq_file *seq)
  219. {
  220. struct cgroup *cgrp = seq_css(seq)->cgroup;
  221. u64 usage, utime, stime;
  222. if (!cgroup_parent(cgrp))
  223. return;
  224. mutex_lock(&cgroup_stat_mutex);
  225. cgroup_stat_flush_locked(cgrp);
  226. usage = cgrp->stat.cputime.sum_exec_runtime;
  227. cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
  228. &utime, &stime);
  229. mutex_unlock(&cgroup_stat_mutex);
  230. do_div(usage, NSEC_PER_USEC);
  231. do_div(utime, NSEC_PER_USEC);
  232. do_div(stime, NSEC_PER_USEC);
  233. seq_printf(seq, "usage_usec %llu\n"
  234. "user_usec %llu\n"
  235. "system_usec %llu\n",
  236. usage, utime, stime);
  237. }
  238. int cgroup_stat_init(struct cgroup *cgrp)
  239. {
  240. int cpu;
  241. /* the root cgrp has cpu_stat preallocated */
  242. if (!cgrp->cpu_stat) {
  243. cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
  244. if (!cgrp->cpu_stat)
  245. return -ENOMEM;
  246. }
  247. /* ->updated_children list is self terminated */
  248. for_each_possible_cpu(cpu) {
  249. struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
  250. cstat->updated_children = cgrp;
  251. u64_stats_init(&cstat->sync);
  252. }
  253. prev_cputime_init(&cgrp->stat.prev_cputime);
  254. return 0;
  255. }
  256. void cgroup_stat_exit(struct cgroup *cgrp)
  257. {
  258. int cpu;
  259. cgroup_stat_flush(cgrp);
  260. /* sanity check */
  261. for_each_possible_cpu(cpu) {
  262. struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
  263. if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
  264. WARN_ON_ONCE(cstat->updated_next))
  265. return;
  266. }
  267. free_percpu(cgrp->cpu_stat);
  268. cgrp->cpu_stat = NULL;
  269. }
  270. void __init cgroup_stat_boot(void)
  271. {
  272. int cpu;
  273. for_each_possible_cpu(cpu)
  274. raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
  275. BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
  276. }