psi.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. /*
  2. * Pressure stall information for CPU, memory and IO
  3. *
  4. * Copyright (c) 2018 Facebook, Inc.
  5. * Author: Johannes Weiner <hannes@cmpxchg.org>
  6. *
  7. * When CPU, memory and IO are contended, tasks experience delays that
  8. * reduce throughput and introduce latencies into the workload. Memory
  9. * and IO contention, in addition, can cause a full loss of forward
  10. * progress in which the CPU goes idle.
  11. *
  12. * This code aggregates individual task delays into resource pressure
  13. * metrics that indicate problems with both workload health and
  14. * resource utilization.
  15. *
  16. * Model
  17. *
  18. * The time in which a task can execute on a CPU is our baseline for
  19. * productivity. Pressure expresses the amount of time in which this
  20. * potential cannot be realized due to resource contention.
  21. *
  22. * This concept of productivity has two components: the workload and
  23. * the CPU. To measure the impact of pressure on both, we define two
  24. * contention states for a resource: SOME and FULL.
  25. *
  26. * In the SOME state of a given resource, one or more tasks are
  27. * delayed on that resource. This affects the workload's ability to
  28. * perform work, but the CPU may still be executing other tasks.
  29. *
  30. * In the FULL state of a given resource, all non-idle tasks are
  31. * delayed on that resource such that nobody is advancing and the CPU
  32. * goes idle. This leaves both workload and CPU unproductive.
  33. *
  34. * (Naturally, the FULL state doesn't exist for the CPU resource.)
  35. *
  36. * SOME = nr_delayed_tasks != 0
  37. * FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0
  38. *
  39. * The percentage of wallclock time spent in those compound stall
  40. * states gives pressure numbers between 0 and 100 for each resource,
  41. * where the SOME percentage indicates workload slowdowns and the FULL
  42. * percentage indicates reduced CPU utilization:
  43. *
  44. * %SOME = time(SOME) / period
  45. * %FULL = time(FULL) / period
  46. *
  47. * Multiple CPUs
  48. *
  49. * The more tasks and available CPUs there are, the more work can be
  50. * performed concurrently. This means that the potential that can go
  51. * unrealized due to resource contention *also* scales with non-idle
  52. * tasks and CPUs.
  53. *
  54. * Consider a scenario where 257 number crunching tasks are trying to
  55. * run concurrently on 256 CPUs. If we simply aggregated the task
  56. * states, we would have to conclude a CPU SOME pressure number of
  57. * 100%, since *somebody* is waiting on a runqueue at all
  58. * times. However, that is clearly not the amount of contention the
  59. * workload is experiencing: only one out of 256 possible exceution
  60. * threads will be contended at any given time, or about 0.4%.
  61. *
  62. * Conversely, consider a scenario of 4 tasks and 4 CPUs where at any
  63. * given time *one* of the tasks is delayed due to a lack of memory.
  64. * Again, looking purely at the task state would yield a memory FULL
  65. * pressure number of 0%, since *somebody* is always making forward
  66. * progress. But again this wouldn't capture the amount of execution
  67. * potential lost, which is 1 out of 4 CPUs, or 25%.
  68. *
  69. * To calculate wasted potential (pressure) with multiple processors,
  70. * we have to base our calculation on the number of non-idle tasks in
  71. * conjunction with the number of available CPUs, which is the number
  72. * of potential execution threads. SOME becomes then the proportion of
  73. * delayed tasks to possibe threads, and FULL is the share of possible
  74. * threads that are unproductive due to delays:
  75. *
  76. * threads = min(nr_nonidle_tasks, nr_cpus)
  77. * SOME = min(nr_delayed_tasks / threads, 1)
  78. * FULL = (threads - min(nr_running_tasks, threads)) / threads
  79. *
  80. * For the 257 number crunchers on 256 CPUs, this yields:
  81. *
  82. * threads = min(257, 256)
  83. * SOME = min(1 / 256, 1) = 0.4%
  84. * FULL = (256 - min(257, 256)) / 256 = 0%
  85. *
  86. * For the 1 out of 4 memory-delayed tasks, this yields:
  87. *
  88. * threads = min(4, 4)
  89. * SOME = min(1 / 4, 1) = 25%
  90. * FULL = (4 - min(3, 4)) / 4 = 25%
  91. *
  92. * [ Substitute nr_cpus with 1, and you can see that it's a natural
  93. * extension of the single-CPU model. ]
  94. *
  95. * Implementation
  96. *
  97. * To assess the precise time spent in each such state, we would have
  98. * to freeze the system on task changes and start/stop the state
  99. * clocks accordingly. Obviously that doesn't scale in practice.
  100. *
  101. * Because the scheduler aims to distribute the compute load evenly
  102. * among the available CPUs, we can track task state locally to each
  103. * CPU and, at much lower frequency, extrapolate the global state for
  104. * the cumulative stall times and the running averages.
  105. *
  106. * For each runqueue, we track:
  107. *
  108. * tSOME[cpu] = time(nr_delayed_tasks[cpu] != 0)
  109. * tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_running_tasks[cpu])
  110. * tNONIDLE[cpu] = time(nr_nonidle_tasks[cpu] != 0)
  111. *
  112. * and then periodically aggregate:
  113. *
  114. * tNONIDLE = sum(tNONIDLE[i])
  115. *
  116. * tSOME = sum(tSOME[i] * tNONIDLE[i]) / tNONIDLE
  117. * tFULL = sum(tFULL[i] * tNONIDLE[i]) / tNONIDLE
  118. *
  119. * %SOME = tSOME / period
  120. * %FULL = tFULL / period
  121. *
  122. * This gives us an approximation of pressure that is practical
  123. * cost-wise, yet way more sensitive and accurate than periodic
  124. * sampling of the aggregate task states would be.
  125. */
  126. #include <linux/sched/loadavg.h>
  127. #include <linux/seq_file.h>
  128. #include <linux/proc_fs.h>
  129. #include <linux/seqlock.h>
  130. #include <linux/cgroup.h>
  131. #include <linux/module.h>
  132. #include <linux/sched.h>
  133. #include <linux/psi.h>
  134. #include "sched.h"
  135. static int psi_bug __read_mostly;
  136. bool psi_disabled __read_mostly;
  137. core_param(psi_disabled, psi_disabled, bool, 0644);
  138. /* Running averages - we need to be higher-res than loadavg */
  139. #define PSI_FREQ (2*HZ+1) /* 2 sec intervals */
  140. #define EXP_10s 1677 /* 1/exp(2s/10s) as fixed-point */
  141. #define EXP_60s 1981 /* 1/exp(2s/60s) */
  142. #define EXP_300s 2034 /* 1/exp(2s/300s) */
  143. /* Sampling frequency in nanoseconds */
  144. static u64 psi_period __read_mostly;
  145. /* System-level pressure and stall tracking */
  146. static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu);
  147. static struct psi_group psi_system = {
  148. .pcpu = &system_group_pcpu,
  149. };
  150. static void psi_update_work(struct work_struct *work);
  151. static void group_init(struct psi_group *group)
  152. {
  153. int cpu;
  154. for_each_possible_cpu(cpu)
  155. seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
  156. group->next_update = sched_clock() + psi_period;
  157. INIT_DELAYED_WORK(&group->clock_work, psi_update_work);
  158. mutex_init(&group->stat_lock);
  159. }
  160. void __init psi_init(void)
  161. {
  162. if (psi_disabled)
  163. return;
  164. psi_period = jiffies_to_nsecs(PSI_FREQ);
  165. group_init(&psi_system);
  166. }
  167. static bool test_state(unsigned int *tasks, enum psi_states state)
  168. {
  169. switch (state) {
  170. case PSI_IO_SOME:
  171. return tasks[NR_IOWAIT];
  172. case PSI_IO_FULL:
  173. return tasks[NR_IOWAIT] && !tasks[NR_RUNNING];
  174. case PSI_MEM_SOME:
  175. return tasks[NR_MEMSTALL];
  176. case PSI_MEM_FULL:
  177. return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING];
  178. case PSI_CPU_SOME:
  179. return tasks[NR_RUNNING] > 1;
  180. case PSI_NONIDLE:
  181. return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
  182. tasks[NR_RUNNING];
  183. default:
  184. return false;
  185. }
  186. }
  187. static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
  188. {
  189. struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
  190. unsigned int tasks[NR_PSI_TASK_COUNTS];
  191. u64 now, state_start;
  192. unsigned int seq;
  193. int s;
  194. /* Snapshot a coherent view of the CPU state */
  195. do {
  196. seq = read_seqcount_begin(&groupc->seq);
  197. now = cpu_clock(cpu);
  198. memcpy(times, groupc->times, sizeof(groupc->times));
  199. memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
  200. state_start = groupc->state_start;
  201. } while (read_seqcount_retry(&groupc->seq, seq));
  202. /* Calculate state time deltas against the previous snapshot */
  203. for (s = 0; s < NR_PSI_STATES; s++) {
  204. u32 delta;
  205. /*
  206. * In addition to already concluded states, we also
  207. * incorporate currently active states on the CPU,
  208. * since states may last for many sampling periods.
  209. *
  210. * This way we keep our delta sampling buckets small
  211. * (u32) and our reported pressure close to what's
  212. * actually happening.
  213. */
  214. if (test_state(tasks, s))
  215. times[s] += now - state_start;
  216. delta = times[s] - groupc->times_prev[s];
  217. groupc->times_prev[s] = times[s];
  218. times[s] = delta;
  219. }
  220. }
  221. static void calc_avgs(unsigned long avg[3], int missed_periods,
  222. u64 time, u64 period)
  223. {
  224. unsigned long pct;
  225. /* Fill in zeroes for periods of no activity */
  226. if (missed_periods) {
  227. avg[0] = calc_load_n(avg[0], EXP_10s, 0, missed_periods);
  228. avg[1] = calc_load_n(avg[1], EXP_60s, 0, missed_periods);
  229. avg[2] = calc_load_n(avg[2], EXP_300s, 0, missed_periods);
  230. }
  231. /* Sample the most recent active period */
  232. pct = div_u64(time * 100, period);
  233. pct *= FIXED_1;
  234. avg[0] = calc_load(avg[0], EXP_10s, pct);
  235. avg[1] = calc_load(avg[1], EXP_60s, pct);
  236. avg[2] = calc_load(avg[2], EXP_300s, pct);
  237. }
  238. static bool update_stats(struct psi_group *group)
  239. {
  240. u64 deltas[NR_PSI_STATES - 1] = { 0, };
  241. unsigned long missed_periods = 0;
  242. unsigned long nonidle_total = 0;
  243. u64 now, expires, period;
  244. int cpu;
  245. int s;
  246. mutex_lock(&group->stat_lock);
  247. /*
  248. * Collect the per-cpu time buckets and average them into a
  249. * single time sample that is normalized to wallclock time.
  250. *
  251. * For averaging, each CPU is weighted by its non-idle time in
  252. * the sampling period. This eliminates artifacts from uneven
  253. * loading, or even entirely idle CPUs.
  254. */
  255. for_each_possible_cpu(cpu) {
  256. u32 times[NR_PSI_STATES];
  257. u32 nonidle;
  258. get_recent_times(group, cpu, times);
  259. nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
  260. nonidle_total += nonidle;
  261. for (s = 0; s < PSI_NONIDLE; s++)
  262. deltas[s] += (u64)times[s] * nonidle;
  263. }
  264. /*
  265. * Integrate the sample into the running statistics that are
  266. * reported to userspace: the cumulative stall times and the
  267. * decaying averages.
  268. *
  269. * Pressure percentages are sampled at PSI_FREQ. We might be
  270. * called more often when the user polls more frequently than
  271. * that; we might be called less often when there is no task
  272. * activity, thus no data, and clock ticks are sporadic. The
  273. * below handles both.
  274. */
  275. /* total= */
  276. for (s = 0; s < NR_PSI_STATES - 1; s++)
  277. group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL));
  278. /* avgX= */
  279. now = sched_clock();
  280. expires = group->next_update;
  281. if (now < expires)
  282. goto out;
  283. if (now - expires > psi_period)
  284. missed_periods = div_u64(now - expires, psi_period);
  285. /*
  286. * The periodic clock tick can get delayed for various
  287. * reasons, especially on loaded systems. To avoid clock
  288. * drift, we schedule the clock in fixed psi_period intervals.
  289. * But the deltas we sample out of the per-cpu buckets above
  290. * are based on the actual time elapsing between clock ticks.
  291. */
  292. group->next_update = expires + ((1 + missed_periods) * psi_period);
  293. period = now - (group->last_update + (missed_periods * psi_period));
  294. group->last_update = now;
  295. for (s = 0; s < NR_PSI_STATES - 1; s++) {
  296. u32 sample;
  297. sample = group->total[s] - group->total_prev[s];
  298. /*
  299. * Due to the lockless sampling of the time buckets,
  300. * recorded time deltas can slip into the next period,
  301. * which under full pressure can result in samples in
  302. * excess of the period length.
  303. *
  304. * We don't want to report non-sensical pressures in
  305. * excess of 100%, nor do we want to drop such events
  306. * on the floor. Instead we punt any overage into the
  307. * future until pressure subsides. By doing this we
  308. * don't underreport the occurring pressure curve, we
  309. * just report it delayed by one period length.
  310. *
  311. * The error isn't cumulative. As soon as another
  312. * delta slips from a period P to P+1, by definition
  313. * it frees up its time T in P.
  314. */
  315. if (sample > period)
  316. sample = period;
  317. group->total_prev[s] += sample;
  318. calc_avgs(group->avg[s], missed_periods, sample, period);
  319. }
  320. out:
  321. mutex_unlock(&group->stat_lock);
  322. return nonidle_total;
  323. }
  324. static void psi_update_work(struct work_struct *work)
  325. {
  326. struct delayed_work *dwork;
  327. struct psi_group *group;
  328. bool nonidle;
  329. dwork = to_delayed_work(work);
  330. group = container_of(dwork, struct psi_group, clock_work);
  331. /*
  332. * If there is task activity, periodically fold the per-cpu
  333. * times and feed samples into the running averages. If things
  334. * are idle and there is no data to process, stop the clock.
  335. * Once restarted, we'll catch up the running averages in one
  336. * go - see calc_avgs() and missed_periods.
  337. */
  338. nonidle = update_stats(group);
  339. if (nonidle) {
  340. unsigned long delay = 0;
  341. u64 now;
  342. now = sched_clock();
  343. if (group->next_update > now)
  344. delay = nsecs_to_jiffies(group->next_update - now) + 1;
  345. schedule_delayed_work(dwork, delay);
  346. }
  347. }
  348. static void record_times(struct psi_group_cpu *groupc, int cpu,
  349. bool memstall_tick)
  350. {
  351. u32 delta;
  352. u64 now;
  353. now = cpu_clock(cpu);
  354. delta = now - groupc->state_start;
  355. groupc->state_start = now;
  356. if (test_state(groupc->tasks, PSI_IO_SOME)) {
  357. groupc->times[PSI_IO_SOME] += delta;
  358. if (test_state(groupc->tasks, PSI_IO_FULL))
  359. groupc->times[PSI_IO_FULL] += delta;
  360. }
  361. if (test_state(groupc->tasks, PSI_MEM_SOME)) {
  362. groupc->times[PSI_MEM_SOME] += delta;
  363. if (test_state(groupc->tasks, PSI_MEM_FULL))
  364. groupc->times[PSI_MEM_FULL] += delta;
  365. else if (memstall_tick) {
  366. u32 sample;
  367. /*
  368. * Since we care about lost potential, a
  369. * memstall is FULL when there are no other
  370. * working tasks, but also when the CPU is
  371. * actively reclaiming and nothing productive
  372. * could run even if it were runnable.
  373. *
  374. * When the timer tick sees a reclaiming CPU,
  375. * regardless of runnable tasks, sample a FULL
  376. * tick (or less if it hasn't been a full tick
  377. * since the last state change).
  378. */
  379. sample = min(delta, (u32)jiffies_to_nsecs(1));
  380. groupc->times[PSI_MEM_FULL] += sample;
  381. }
  382. }
  383. if (test_state(groupc->tasks, PSI_CPU_SOME))
  384. groupc->times[PSI_CPU_SOME] += delta;
  385. if (test_state(groupc->tasks, PSI_NONIDLE))
  386. groupc->times[PSI_NONIDLE] += delta;
  387. }
  388. static void psi_group_change(struct psi_group *group, int cpu,
  389. unsigned int clear, unsigned int set)
  390. {
  391. struct psi_group_cpu *groupc;
  392. unsigned int t, m;
  393. groupc = per_cpu_ptr(group->pcpu, cpu);
  394. /*
  395. * First we assess the aggregate resource states this CPU's
  396. * tasks have been in since the last change, and account any
  397. * SOME and FULL time these may have resulted in.
  398. *
  399. * Then we update the task counts according to the state
  400. * change requested through the @clear and @set bits.
  401. */
  402. write_seqcount_begin(&groupc->seq);
  403. record_times(groupc, cpu, false);
  404. for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
  405. if (!(m & (1 << t)))
  406. continue;
  407. if (groupc->tasks[t] == 0 && !psi_bug) {
  408. printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u] clear=%x set=%x\n",
  409. cpu, t, groupc->tasks[0],
  410. groupc->tasks[1], groupc->tasks[2],
  411. clear, set);
  412. psi_bug = 1;
  413. }
  414. groupc->tasks[t]--;
  415. }
  416. for (t = 0; set; set &= ~(1 << t), t++)
  417. if (set & (1 << t))
  418. groupc->tasks[t]++;
  419. write_seqcount_end(&groupc->seq);
  420. if (!delayed_work_pending(&group->clock_work))
  421. schedule_delayed_work(&group->clock_work, PSI_FREQ);
  422. }
  423. void psi_task_change(struct task_struct *task, int clear, int set)
  424. {
  425. int cpu = task_cpu(task);
  426. if (!task->pid)
  427. return;
  428. if (((task->psi_flags & set) ||
  429. (task->psi_flags & clear) != clear) &&
  430. !psi_bug) {
  431. printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
  432. task->pid, task->comm, cpu,
  433. task->psi_flags, clear, set);
  434. psi_bug = 1;
  435. }
  436. task->psi_flags &= ~clear;
  437. task->psi_flags |= set;
  438. psi_group_change(&psi_system, cpu, clear, set);
  439. }
  440. void psi_memstall_tick(struct task_struct *task, int cpu)
  441. {
  442. struct psi_group_cpu *groupc;
  443. groupc = per_cpu_ptr(psi_system.pcpu, cpu);
  444. write_seqcount_begin(&groupc->seq);
  445. record_times(groupc, cpu, true);
  446. write_seqcount_end(&groupc->seq);
  447. }
  448. /**
  449. * psi_memstall_enter - mark the beginning of a memory stall section
  450. * @flags: flags to handle nested sections
  451. *
  452. * Marks the calling task as being stalled due to a lack of memory,
  453. * such as waiting for a refault or performing reclaim.
  454. */
  455. void psi_memstall_enter(unsigned long *flags)
  456. {
  457. struct rq_flags rf;
  458. struct rq *rq;
  459. if (psi_disabled)
  460. return;
  461. *flags = current->flags & PF_MEMSTALL;
  462. if (*flags)
  463. return;
  464. /*
  465. * PF_MEMSTALL setting & accounting needs to be atomic wrt
  466. * changes to the task's scheduling state, otherwise we can
  467. * race with CPU migration.
  468. */
  469. rq = this_rq_lock_irq(&rf);
  470. current->flags |= PF_MEMSTALL;
  471. psi_task_change(current, 0, TSK_MEMSTALL);
  472. rq_unlock_irq(rq, &rf);
  473. }
  474. /**
  475. * psi_memstall_leave - mark the end of an memory stall section
  476. * @flags: flags to handle nested memdelay sections
  477. *
  478. * Marks the calling task as no longer stalled due to lack of memory.
  479. */
  480. void psi_memstall_leave(unsigned long *flags)
  481. {
  482. struct rq_flags rf;
  483. struct rq *rq;
  484. if (psi_disabled)
  485. return;
  486. if (*flags)
  487. return;
  488. /*
  489. * PF_MEMSTALL clearing & accounting needs to be atomic wrt
  490. * changes to the task's scheduling state, otherwise we could
  491. * race with CPU migration.
  492. */
  493. rq = this_rq_lock_irq(&rf);
  494. current->flags &= ~PF_MEMSTALL;
  495. psi_task_change(current, TSK_MEMSTALL, 0);
  496. rq_unlock_irq(rq, &rf);
  497. }
  498. static int psi_show(struct seq_file *m, struct psi_group *group,
  499. enum psi_res res)
  500. {
  501. int full;
  502. if (psi_disabled)
  503. return -EOPNOTSUPP;
  504. update_stats(group);
  505. for (full = 0; full < 2 - (res == PSI_CPU); full++) {
  506. unsigned long avg[3];
  507. u64 total;
  508. int w;
  509. for (w = 0; w < 3; w++)
  510. avg[w] = group->avg[res * 2 + full][w];
  511. total = div_u64(group->total[res * 2 + full], NSEC_PER_USEC);
  512. seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
  513. full ? "full" : "some",
  514. LOAD_INT(avg[0]), LOAD_FRAC(avg[0]),
  515. LOAD_INT(avg[1]), LOAD_FRAC(avg[1]),
  516. LOAD_INT(avg[2]), LOAD_FRAC(avg[2]),
  517. total);
  518. }
  519. return 0;
  520. }
  521. static int psi_io_show(struct seq_file *m, void *v)
  522. {
  523. return psi_show(m, &psi_system, PSI_IO);
  524. }
  525. static int psi_memory_show(struct seq_file *m, void *v)
  526. {
  527. return psi_show(m, &psi_system, PSI_MEM);
  528. }
  529. static int psi_cpu_show(struct seq_file *m, void *v)
  530. {
  531. return psi_show(m, &psi_system, PSI_CPU);
  532. }
  533. static int psi_io_open(struct inode *inode, struct file *file)
  534. {
  535. return single_open(file, psi_io_show, NULL);
  536. }
  537. static int psi_memory_open(struct inode *inode, struct file *file)
  538. {
  539. return single_open(file, psi_memory_show, NULL);
  540. }
  541. static int psi_cpu_open(struct inode *inode, struct file *file)
  542. {
  543. return single_open(file, psi_cpu_show, NULL);
  544. }
  545. static const struct file_operations psi_io_fops = {
  546. .open = psi_io_open,
  547. .read = seq_read,
  548. .llseek = seq_lseek,
  549. .release = single_release,
  550. };
  551. static const struct file_operations psi_memory_fops = {
  552. .open = psi_memory_open,
  553. .read = seq_read,
  554. .llseek = seq_lseek,
  555. .release = single_release,
  556. };
  557. static const struct file_operations psi_cpu_fops = {
  558. .open = psi_cpu_open,
  559. .read = seq_read,
  560. .llseek = seq_lseek,
  561. .release = single_release,
  562. };
  563. static int __init psi_proc_init(void)
  564. {
  565. proc_mkdir("pressure", NULL);
  566. proc_create("pressure/io", 0, NULL, &psi_io_fops);
  567. proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
  568. proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
  569. return 0;
  570. }
  571. module_init(psi_proc_init);