memcontrol.h 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205
  1. /* memcontrol.h - Memory Controller
  2. *
  3. * Copyright IBM Corporation, 2007
  4. * Author Balbir Singh <balbir@linux.vnet.ibm.com>
  5. *
  6. * Copyright 2007 OpenVZ SWsoft Inc
  7. * Author: Pavel Emelianov <xemul@openvz.org>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. */
  19. #ifndef _LINUX_MEMCONTROL_H
  20. #define _LINUX_MEMCONTROL_H
  21. #include <linux/cgroup.h>
  22. #include <linux/vm_event_item.h>
  23. #include <linux/hardirq.h>
  24. #include <linux/jump_label.h>
  25. #include <linux/page_counter.h>
  26. #include <linux/vmpressure.h>
  27. #include <linux/eventfd.h>
  28. #include <linux/mm.h>
  29. #include <linux/vmstat.h>
  30. #include <linux/writeback.h>
  31. #include <linux/page-flags.h>
  32. struct mem_cgroup;
  33. struct page;
  34. struct mm_struct;
  35. struct kmem_cache;
  36. /* Cgroup-specific page state, on top of universal node page state */
  37. enum memcg_stat_item {
  38. MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS,
  39. MEMCG_RSS,
  40. MEMCG_RSS_HUGE,
  41. MEMCG_SWAP,
  42. MEMCG_SOCK,
  43. /* XXX: why are these zone and not node counters? */
  44. MEMCG_KERNEL_STACK_KB,
  45. MEMCG_NR_STAT,
  46. };
  47. /* Cgroup-specific events, on top of universal VM events */
  48. enum memcg_event_item {
  49. MEMCG_LOW = NR_VM_EVENT_ITEMS,
  50. MEMCG_HIGH,
  51. MEMCG_MAX,
  52. MEMCG_OOM,
  53. MEMCG_NR_EVENTS,
  54. };
  55. struct mem_cgroup_reclaim_cookie {
  56. pg_data_t *pgdat;
  57. int priority;
  58. unsigned int generation;
  59. };
  60. #ifdef CONFIG_MEMCG
  61. #define MEM_CGROUP_ID_SHIFT 16
  62. #define MEM_CGROUP_ID_MAX USHRT_MAX
  63. struct mem_cgroup_id {
  64. int id;
  65. atomic_t ref;
  66. };
  67. /*
  68. * Per memcg event counter is incremented at every pagein/pageout. With THP,
  69. * it will be incremated by the number of pages. This counter is used for
  70. * for trigger some periodic events. This is straightforward and better
  71. * than using jiffies etc. to handle periodic memcg event.
  72. */
  73. enum mem_cgroup_events_target {
  74. MEM_CGROUP_TARGET_THRESH,
  75. MEM_CGROUP_TARGET_SOFTLIMIT,
  76. MEM_CGROUP_TARGET_NUMAINFO,
  77. MEM_CGROUP_NTARGETS,
  78. };
  79. struct mem_cgroup_stat_cpu {
  80. long count[MEMCG_NR_STAT];
  81. unsigned long events[MEMCG_NR_EVENTS];
  82. unsigned long nr_page_events;
  83. unsigned long targets[MEM_CGROUP_NTARGETS];
  84. };
  85. struct mem_cgroup_reclaim_iter {
  86. struct mem_cgroup *position;
  87. /* scan generation, increased every round-trip */
  88. unsigned int generation;
  89. };
  90. struct lruvec_stat {
  91. long count[NR_VM_NODE_STAT_ITEMS];
  92. };
  93. /*
  94. * per-zone information in memory controller.
  95. */
  96. struct mem_cgroup_per_node {
  97. struct lruvec lruvec;
  98. struct lruvec_stat __percpu *lruvec_stat_cpu;
  99. atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
  100. unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
  101. struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
  102. struct rb_node tree_node; /* RB tree node */
  103. unsigned long usage_in_excess;/* Set to the value by which */
  104. /* the soft limit is exceeded*/
  105. bool on_tree;
  106. struct mem_cgroup *memcg; /* Back pointer, we cannot */
  107. /* use container_of */
  108. };
  109. struct mem_cgroup_threshold {
  110. struct eventfd_ctx *eventfd;
  111. unsigned long threshold;
  112. };
  113. /* For threshold */
  114. struct mem_cgroup_threshold_ary {
  115. /* An array index points to threshold just below or equal to usage. */
  116. int current_threshold;
  117. /* Size of entries[] */
  118. unsigned int size;
  119. /* Array of thresholds */
  120. struct mem_cgroup_threshold entries[0];
  121. };
  122. struct mem_cgroup_thresholds {
  123. /* Primary thresholds array */
  124. struct mem_cgroup_threshold_ary *primary;
  125. /*
  126. * Spare threshold array.
  127. * This is needed to make mem_cgroup_unregister_event() "never fail".
  128. * It must be able to store at least primary->size - 1 entries.
  129. */
  130. struct mem_cgroup_threshold_ary *spare;
  131. };
  132. enum memcg_kmem_state {
  133. KMEM_NONE,
  134. KMEM_ALLOCATED,
  135. KMEM_ONLINE,
  136. };
  137. /*
  138. * The memory controller data structure. The memory controller controls both
  139. * page cache and RSS per cgroup. We would eventually like to provide
  140. * statistics based on the statistics developed by Rik Van Riel for clock-pro,
  141. * to help the administrator determine what knobs to tune.
  142. */
  143. struct mem_cgroup {
  144. struct cgroup_subsys_state css;
  145. /* Private memcg ID. Used to ID objects that outlive the cgroup */
  146. struct mem_cgroup_id id;
  147. /* Accounted resources */
  148. struct page_counter memory;
  149. struct page_counter swap;
  150. /* Legacy consumer-oriented counters */
  151. struct page_counter memsw;
  152. struct page_counter kmem;
  153. struct page_counter tcpmem;
  154. /* Normal memory consumption range */
  155. unsigned long low;
  156. unsigned long high;
  157. /* Range enforcement for interrupt charges */
  158. struct work_struct high_work;
  159. unsigned long soft_limit;
  160. /* vmpressure notifications */
  161. struct vmpressure vmpressure;
  162. /*
  163. * Should the accounting and control be hierarchical, per subtree?
  164. */
  165. bool use_hierarchy;
  166. /* protected by memcg_oom_lock */
  167. bool oom_lock;
  168. int under_oom;
  169. int swappiness;
  170. /* OOM-Killer disable */
  171. int oom_kill_disable;
  172. /* handle for "memory.events" */
  173. struct cgroup_file events_file;
  174. /* protect arrays of thresholds */
  175. struct mutex thresholds_lock;
  176. /* thresholds for memory usage. RCU-protected */
  177. struct mem_cgroup_thresholds thresholds;
  178. /* thresholds for mem+swap usage. RCU-protected */
  179. struct mem_cgroup_thresholds memsw_thresholds;
  180. /* For oom notifier event fd */
  181. struct list_head oom_notify;
  182. /*
  183. * Should we move charges of a task when a task is moved into this
  184. * mem_cgroup ? And what type of charges should we move ?
  185. */
  186. unsigned long move_charge_at_immigrate;
  187. /*
  188. * set > 0 if pages under this cgroup are moving to other cgroup.
  189. */
  190. atomic_t moving_account;
  191. /* taken only while moving_account > 0 */
  192. spinlock_t move_lock;
  193. struct task_struct *move_lock_task;
  194. unsigned long move_lock_flags;
  195. struct mem_cgroup_stat_cpu __percpu *stat_cpu;
  196. atomic_long_t stat[MEMCG_NR_STAT];
  197. atomic_long_t events[MEMCG_NR_EVENTS];
  198. unsigned long socket_pressure;
  199. /* Legacy tcp memory accounting */
  200. bool tcpmem_active;
  201. int tcpmem_pressure;
  202. #ifndef CONFIG_SLOB
  203. /* Index in the kmem_cache->memcg_params.memcg_caches array */
  204. int kmemcg_id;
  205. enum memcg_kmem_state kmem_state;
  206. struct list_head kmem_caches;
  207. #endif
  208. int last_scanned_node;
  209. #if MAX_NUMNODES > 1
  210. nodemask_t scan_nodes;
  211. atomic_t numainfo_events;
  212. atomic_t numainfo_updating;
  213. #endif
  214. #ifdef CONFIG_CGROUP_WRITEBACK
  215. struct list_head cgwb_list;
  216. struct wb_domain cgwb_domain;
  217. #endif
  218. /* List of events which userspace want to receive */
  219. struct list_head event_list;
  220. spinlock_t event_list_lock;
  221. struct mem_cgroup_per_node *nodeinfo[0];
  222. /* WARNING: nodeinfo must be the last member here */
  223. };
  224. /*
  225. * size of first charge trial. "32" comes from vmscan.c's magic value.
  226. * TODO: maybe necessary to use big numbers in big irons.
  227. */
  228. #define MEMCG_CHARGE_BATCH 32U
  229. extern struct mem_cgroup *root_mem_cgroup;
  230. static inline bool mem_cgroup_disabled(void)
  231. {
  232. return !cgroup_subsys_enabled(memory_cgrp_subsys);
  233. }
  234. bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
  235. int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
  236. gfp_t gfp_mask, struct mem_cgroup **memcgp,
  237. bool compound);
  238. void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
  239. bool lrucare, bool compound);
  240. void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
  241. bool compound);
  242. void mem_cgroup_uncharge(struct page *page);
  243. void mem_cgroup_uncharge_list(struct list_head *page_list);
  244. void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
  245. static struct mem_cgroup_per_node *
  246. mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
  247. {
  248. return memcg->nodeinfo[nid];
  249. }
  250. /**
  251. * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
  252. * @node: node of the wanted lruvec
  253. * @memcg: memcg of the wanted lruvec
  254. *
  255. * Returns the lru list vector holding pages for a given @node or a given
  256. * @memcg and @zone. This can be the node lruvec, if the memory controller
  257. * is disabled.
  258. */
  259. static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
  260. struct mem_cgroup *memcg)
  261. {
  262. struct mem_cgroup_per_node *mz;
  263. struct lruvec *lruvec;
  264. if (mem_cgroup_disabled()) {
  265. lruvec = node_lruvec(pgdat);
  266. goto out;
  267. }
  268. mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
  269. lruvec = &mz->lruvec;
  270. out:
  271. /*
  272. * Since a node can be onlined after the mem_cgroup was created,
  273. * we have to be prepared to initialize lruvec->pgdat here;
  274. * and if offlined then reonlined, we need to reinitialize it.
  275. */
  276. if (unlikely(lruvec->pgdat != pgdat))
  277. lruvec->pgdat = pgdat;
  278. return lruvec;
  279. }
  280. struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
  281. bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
  282. struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
  283. static inline
  284. struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
  285. return css ? container_of(css, struct mem_cgroup, css) : NULL;
  286. }
  287. #define mem_cgroup_from_counter(counter, member) \
  288. container_of(counter, struct mem_cgroup, member)
  289. struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
  290. struct mem_cgroup *,
  291. struct mem_cgroup_reclaim_cookie *);
  292. void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
  293. int mem_cgroup_scan_tasks(struct mem_cgroup *,
  294. int (*)(struct task_struct *, void *), void *);
  295. static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
  296. {
  297. if (mem_cgroup_disabled())
  298. return 0;
  299. return memcg->id.id;
  300. }
  301. struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
  302. static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
  303. {
  304. struct mem_cgroup_per_node *mz;
  305. if (mem_cgroup_disabled())
  306. return NULL;
  307. mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
  308. return mz->memcg;
  309. }
  310. /**
  311. * parent_mem_cgroup - find the accounting parent of a memcg
  312. * @memcg: memcg whose parent to find
  313. *
  314. * Returns the parent memcg, or NULL if this is the root or the memory
  315. * controller is in legacy no-hierarchy mode.
  316. */
  317. static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
  318. {
  319. if (!memcg->memory.parent)
  320. return NULL;
  321. return mem_cgroup_from_counter(memcg->memory.parent, memory);
  322. }
  323. static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
  324. struct mem_cgroup *root)
  325. {
  326. if (root == memcg)
  327. return true;
  328. if (!root->use_hierarchy)
  329. return false;
  330. return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
  331. }
  332. static inline bool mm_match_cgroup(struct mm_struct *mm,
  333. struct mem_cgroup *memcg)
  334. {
  335. struct mem_cgroup *task_memcg;
  336. bool match = false;
  337. rcu_read_lock();
  338. task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
  339. if (task_memcg)
  340. match = mem_cgroup_is_descendant(task_memcg, memcg);
  341. rcu_read_unlock();
  342. return match;
  343. }
  344. struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
  345. ino_t page_cgroup_ino(struct page *page);
  346. static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
  347. {
  348. if (mem_cgroup_disabled())
  349. return true;
  350. return !!(memcg->css.flags & CSS_ONLINE);
  351. }
  352. /*
  353. * For memory reclaim.
  354. */
  355. int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
  356. void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
  357. int zid, int nr_pages);
  358. unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
  359. int nid, unsigned int lru_mask);
  360. static inline
  361. unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
  362. {
  363. struct mem_cgroup_per_node *mz;
  364. unsigned long nr_pages = 0;
  365. int zid;
  366. mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
  367. for (zid = 0; zid < MAX_NR_ZONES; zid++)
  368. nr_pages += mz->lru_zone_size[zid][lru];
  369. return nr_pages;
  370. }
  371. static inline
  372. unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
  373. enum lru_list lru, int zone_idx)
  374. {
  375. struct mem_cgroup_per_node *mz;
  376. mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
  377. return mz->lru_zone_size[zone_idx][lru];
  378. }
  379. void mem_cgroup_handle_over_high(void);
  380. unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
  381. void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
  382. struct task_struct *p);
  383. static inline void mem_cgroup_oom_enable(void)
  384. {
  385. WARN_ON(current->memcg_may_oom);
  386. current->memcg_may_oom = 1;
  387. }
  388. static inline void mem_cgroup_oom_disable(void)
  389. {
  390. WARN_ON(!current->memcg_may_oom);
  391. current->memcg_may_oom = 0;
  392. }
  393. static inline bool task_in_memcg_oom(struct task_struct *p)
  394. {
  395. return p->memcg_in_oom;
  396. }
  397. bool mem_cgroup_oom_synchronize(bool wait);
  398. #ifdef CONFIG_MEMCG_SWAP
  399. extern int do_swap_account;
  400. #endif
  401. struct mem_cgroup *lock_page_memcg(struct page *page);
  402. void __unlock_page_memcg(struct mem_cgroup *memcg);
  403. void unlock_page_memcg(struct page *page);
  404. /* idx can be of type enum memcg_stat_item or node_stat_item */
  405. static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
  406. int idx)
  407. {
  408. long x = atomic_long_read(&memcg->stat[idx]);
  409. #ifdef CONFIG_SMP
  410. if (x < 0)
  411. x = 0;
  412. #endif
  413. return x;
  414. }
  415. /* idx can be of type enum memcg_stat_item or node_stat_item */
  416. static inline void __mod_memcg_state(struct mem_cgroup *memcg,
  417. int idx, int val)
  418. {
  419. long x;
  420. if (mem_cgroup_disabled())
  421. return;
  422. x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
  423. if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
  424. atomic_long_add(x, &memcg->stat[idx]);
  425. x = 0;
  426. }
  427. __this_cpu_write(memcg->stat_cpu->count[idx], x);
  428. }
  429. /* idx can be of type enum memcg_stat_item or node_stat_item */
  430. static inline void mod_memcg_state(struct mem_cgroup *memcg,
  431. int idx, int val)
  432. {
  433. unsigned long flags;
  434. local_irq_save(flags);
  435. __mod_memcg_state(memcg, idx, val);
  436. local_irq_restore(flags);
  437. }
  438. /**
  439. * mod_memcg_page_state - update page state statistics
  440. * @page: the page
  441. * @idx: page state item to account
  442. * @val: number of pages (positive or negative)
  443. *
  444. * The @page must be locked or the caller must use lock_page_memcg()
  445. * to prevent double accounting when the page is concurrently being
  446. * moved to another memcg:
  447. *
  448. * lock_page(page) or lock_page_memcg(page)
  449. * if (TestClearPageState(page))
  450. * mod_memcg_page_state(page, state, -1);
  451. * unlock_page(page) or unlock_page_memcg(page)
  452. *
  453. * Kernel pages are an exception to this, since they'll never move.
  454. */
  455. static inline void __mod_memcg_page_state(struct page *page,
  456. int idx, int val)
  457. {
  458. if (page->mem_cgroup)
  459. __mod_memcg_state(page->mem_cgroup, idx, val);
  460. }
  461. static inline void mod_memcg_page_state(struct page *page,
  462. int idx, int val)
  463. {
  464. if (page->mem_cgroup)
  465. mod_memcg_state(page->mem_cgroup, idx, val);
  466. }
  467. static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
  468. enum node_stat_item idx)
  469. {
  470. struct mem_cgroup_per_node *pn;
  471. long x;
  472. if (mem_cgroup_disabled())
  473. return node_page_state(lruvec_pgdat(lruvec), idx);
  474. pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
  475. x = atomic_long_read(&pn->lruvec_stat[idx]);
  476. #ifdef CONFIG_SMP
  477. if (x < 0)
  478. x = 0;
  479. #endif
  480. return x;
  481. }
  482. static inline void __mod_lruvec_state(struct lruvec *lruvec,
  483. enum node_stat_item idx, int val)
  484. {
  485. struct mem_cgroup_per_node *pn;
  486. long x;
  487. /* Update node */
  488. __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
  489. if (mem_cgroup_disabled())
  490. return;
  491. pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
  492. /* Update memcg */
  493. __mod_memcg_state(pn->memcg, idx, val);
  494. /* Update lruvec */
  495. x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
  496. if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
  497. atomic_long_add(x, &pn->lruvec_stat[idx]);
  498. x = 0;
  499. }
  500. __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
  501. }
  502. static inline void mod_lruvec_state(struct lruvec *lruvec,
  503. enum node_stat_item idx, int val)
  504. {
  505. unsigned long flags;
  506. local_irq_save(flags);
  507. __mod_lruvec_state(lruvec, idx, val);
  508. local_irq_restore(flags);
  509. }
  510. static inline void __mod_lruvec_page_state(struct page *page,
  511. enum node_stat_item idx, int val)
  512. {
  513. pg_data_t *pgdat = page_pgdat(page);
  514. struct lruvec *lruvec;
  515. /* Untracked pages have no memcg, no lruvec. Update only the node */
  516. if (!page->mem_cgroup) {
  517. __mod_node_page_state(pgdat, idx, val);
  518. return;
  519. }
  520. lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
  521. __mod_lruvec_state(lruvec, idx, val);
  522. }
  523. static inline void mod_lruvec_page_state(struct page *page,
  524. enum node_stat_item idx, int val)
  525. {
  526. unsigned long flags;
  527. local_irq_save(flags);
  528. __mod_lruvec_page_state(page, idx, val);
  529. local_irq_restore(flags);
  530. }
  531. unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
  532. gfp_t gfp_mask,
  533. unsigned long *total_scanned);
  534. /* idx can be of type enum memcg_event_item or vm_event_item */
  535. static inline void __count_memcg_events(struct mem_cgroup *memcg,
  536. int idx, unsigned long count)
  537. {
  538. unsigned long x;
  539. if (mem_cgroup_disabled())
  540. return;
  541. x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
  542. if (unlikely(x > MEMCG_CHARGE_BATCH)) {
  543. atomic_long_add(x, &memcg->events[idx]);
  544. x = 0;
  545. }
  546. __this_cpu_write(memcg->stat_cpu->events[idx], x);
  547. }
  548. static inline void count_memcg_events(struct mem_cgroup *memcg,
  549. int idx, unsigned long count)
  550. {
  551. unsigned long flags;
  552. local_irq_save(flags);
  553. __count_memcg_events(memcg, idx, count);
  554. local_irq_restore(flags);
  555. }
  556. /* idx can be of type enum memcg_event_item or vm_event_item */
  557. static inline void count_memcg_page_event(struct page *page,
  558. int idx)
  559. {
  560. if (page->mem_cgroup)
  561. count_memcg_events(page->mem_cgroup, idx, 1);
  562. }
  563. static inline void count_memcg_event_mm(struct mm_struct *mm,
  564. enum vm_event_item idx)
  565. {
  566. struct mem_cgroup *memcg;
  567. if (mem_cgroup_disabled())
  568. return;
  569. rcu_read_lock();
  570. memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
  571. if (likely(memcg)) {
  572. count_memcg_events(memcg, idx, 1);
  573. if (idx == OOM_KILL)
  574. cgroup_file_notify(&memcg->events_file);
  575. }
  576. rcu_read_unlock();
  577. }
  578. static inline void mem_cgroup_event(struct mem_cgroup *memcg,
  579. enum memcg_event_item event)
  580. {
  581. count_memcg_events(memcg, event, 1);
  582. cgroup_file_notify(&memcg->events_file);
  583. }
  584. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  585. void mem_cgroup_split_huge_fixup(struct page *head);
  586. #endif
  587. #else /* CONFIG_MEMCG */
  588. #define MEM_CGROUP_ID_SHIFT 0
  589. #define MEM_CGROUP_ID_MAX 0
  590. struct mem_cgroup;
  591. static inline bool mem_cgroup_disabled(void)
  592. {
  593. return true;
  594. }
  595. static inline void mem_cgroup_event(struct mem_cgroup *memcg,
  596. enum memcg_event_item event)
  597. {
  598. }
  599. static inline bool mem_cgroup_low(struct mem_cgroup *root,
  600. struct mem_cgroup *memcg)
  601. {
  602. return false;
  603. }
  604. static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
  605. gfp_t gfp_mask,
  606. struct mem_cgroup **memcgp,
  607. bool compound)
  608. {
  609. *memcgp = NULL;
  610. return 0;
  611. }
  612. static inline void mem_cgroup_commit_charge(struct page *page,
  613. struct mem_cgroup *memcg,
  614. bool lrucare, bool compound)
  615. {
  616. }
  617. static inline void mem_cgroup_cancel_charge(struct page *page,
  618. struct mem_cgroup *memcg,
  619. bool compound)
  620. {
  621. }
  622. static inline void mem_cgroup_uncharge(struct page *page)
  623. {
  624. }
  625. static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
  626. {
  627. }
  628. static inline void mem_cgroup_migrate(struct page *old, struct page *new)
  629. {
  630. }
  631. static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
  632. struct mem_cgroup *memcg)
  633. {
  634. return node_lruvec(pgdat);
  635. }
  636. static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
  637. struct pglist_data *pgdat)
  638. {
  639. return &pgdat->lruvec;
  640. }
  641. static inline bool mm_match_cgroup(struct mm_struct *mm,
  642. struct mem_cgroup *memcg)
  643. {
  644. return true;
  645. }
  646. static inline bool task_in_mem_cgroup(struct task_struct *task,
  647. const struct mem_cgroup *memcg)
  648. {
  649. return true;
  650. }
  651. static inline struct mem_cgroup *
  652. mem_cgroup_iter(struct mem_cgroup *root,
  653. struct mem_cgroup *prev,
  654. struct mem_cgroup_reclaim_cookie *reclaim)
  655. {
  656. return NULL;
  657. }
  658. static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
  659. struct mem_cgroup *prev)
  660. {
  661. }
  662. static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
  663. int (*fn)(struct task_struct *, void *), void *arg)
  664. {
  665. return 0;
  666. }
  667. static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
  668. {
  669. return 0;
  670. }
  671. static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
  672. {
  673. WARN_ON_ONCE(id);
  674. /* XXX: This should always return root_mem_cgroup */
  675. return NULL;
  676. }
  677. static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
  678. {
  679. return NULL;
  680. }
  681. static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
  682. {
  683. return true;
  684. }
  685. static inline unsigned long
  686. mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
  687. {
  688. return 0;
  689. }
  690. static inline
  691. unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
  692. enum lru_list lru, int zone_idx)
  693. {
  694. return 0;
  695. }
  696. static inline unsigned long
  697. mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
  698. int nid, unsigned int lru_mask)
  699. {
  700. return 0;
  701. }
  702. static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
  703. {
  704. return 0;
  705. }
  706. static inline void
  707. mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
  708. {
  709. }
  710. static inline struct mem_cgroup *lock_page_memcg(struct page *page)
  711. {
  712. return NULL;
  713. }
  714. static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
  715. {
  716. }
  717. static inline void unlock_page_memcg(struct page *page)
  718. {
  719. }
  720. static inline void mem_cgroup_handle_over_high(void)
  721. {
  722. }
  723. static inline void mem_cgroup_oom_enable(void)
  724. {
  725. }
  726. static inline void mem_cgroup_oom_disable(void)
  727. {
  728. }
  729. static inline bool task_in_memcg_oom(struct task_struct *p)
  730. {
  731. return false;
  732. }
  733. static inline bool mem_cgroup_oom_synchronize(bool wait)
  734. {
  735. return false;
  736. }
  737. static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
  738. int idx)
  739. {
  740. return 0;
  741. }
  742. static inline void __mod_memcg_state(struct mem_cgroup *memcg,
  743. int idx,
  744. int nr)
  745. {
  746. }
  747. static inline void mod_memcg_state(struct mem_cgroup *memcg,
  748. int idx,
  749. int nr)
  750. {
  751. }
  752. static inline void __mod_memcg_page_state(struct page *page,
  753. int idx,
  754. int nr)
  755. {
  756. }
  757. static inline void mod_memcg_page_state(struct page *page,
  758. int idx,
  759. int nr)
  760. {
  761. }
  762. static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
  763. enum node_stat_item idx)
  764. {
  765. return node_page_state(lruvec_pgdat(lruvec), idx);
  766. }
  767. static inline void __mod_lruvec_state(struct lruvec *lruvec,
  768. enum node_stat_item idx, int val)
  769. {
  770. __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
  771. }
  772. static inline void mod_lruvec_state(struct lruvec *lruvec,
  773. enum node_stat_item idx, int val)
  774. {
  775. mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
  776. }
  777. static inline void __mod_lruvec_page_state(struct page *page,
  778. enum node_stat_item idx, int val)
  779. {
  780. __mod_node_page_state(page_pgdat(page), idx, val);
  781. }
  782. static inline void mod_lruvec_page_state(struct page *page,
  783. enum node_stat_item idx, int val)
  784. {
  785. mod_node_page_state(page_pgdat(page), idx, val);
  786. }
  787. static inline
  788. unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
  789. gfp_t gfp_mask,
  790. unsigned long *total_scanned)
  791. {
  792. return 0;
  793. }
  794. static inline void mem_cgroup_split_huge_fixup(struct page *head)
  795. {
  796. }
  797. static inline void count_memcg_events(struct mem_cgroup *memcg,
  798. enum vm_event_item idx,
  799. unsigned long count)
  800. {
  801. }
  802. static inline void count_memcg_page_event(struct page *page,
  803. int idx)
  804. {
  805. }
  806. static inline
  807. void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
  808. {
  809. }
  810. #endif /* CONFIG_MEMCG */
  811. /* idx can be of type enum memcg_stat_item or node_stat_item */
  812. static inline void __inc_memcg_state(struct mem_cgroup *memcg,
  813. int idx)
  814. {
  815. __mod_memcg_state(memcg, idx, 1);
  816. }
  817. /* idx can be of type enum memcg_stat_item or node_stat_item */
  818. static inline void __dec_memcg_state(struct mem_cgroup *memcg,
  819. int idx)
  820. {
  821. __mod_memcg_state(memcg, idx, -1);
  822. }
  823. /* idx can be of type enum memcg_stat_item or node_stat_item */
  824. static inline void __inc_memcg_page_state(struct page *page,
  825. int idx)
  826. {
  827. __mod_memcg_page_state(page, idx, 1);
  828. }
  829. /* idx can be of type enum memcg_stat_item or node_stat_item */
  830. static inline void __dec_memcg_page_state(struct page *page,
  831. int idx)
  832. {
  833. __mod_memcg_page_state(page, idx, -1);
  834. }
  835. static inline void __inc_lruvec_state(struct lruvec *lruvec,
  836. enum node_stat_item idx)
  837. {
  838. __mod_lruvec_state(lruvec, idx, 1);
  839. }
  840. static inline void __dec_lruvec_state(struct lruvec *lruvec,
  841. enum node_stat_item idx)
  842. {
  843. __mod_lruvec_state(lruvec, idx, -1);
  844. }
  845. static inline void __inc_lruvec_page_state(struct page *page,
  846. enum node_stat_item idx)
  847. {
  848. __mod_lruvec_page_state(page, idx, 1);
  849. }
  850. static inline void __dec_lruvec_page_state(struct page *page,
  851. enum node_stat_item idx)
  852. {
  853. __mod_lruvec_page_state(page, idx, -1);
  854. }
  855. /* idx can be of type enum memcg_stat_item or node_stat_item */
  856. static inline void inc_memcg_state(struct mem_cgroup *memcg,
  857. int idx)
  858. {
  859. mod_memcg_state(memcg, idx, 1);
  860. }
  861. /* idx can be of type enum memcg_stat_item or node_stat_item */
  862. static inline void dec_memcg_state(struct mem_cgroup *memcg,
  863. int idx)
  864. {
  865. mod_memcg_state(memcg, idx, -1);
  866. }
  867. /* idx can be of type enum memcg_stat_item or node_stat_item */
  868. static inline void inc_memcg_page_state(struct page *page,
  869. int idx)
  870. {
  871. mod_memcg_page_state(page, idx, 1);
  872. }
  873. /* idx can be of type enum memcg_stat_item or node_stat_item */
  874. static inline void dec_memcg_page_state(struct page *page,
  875. int idx)
  876. {
  877. mod_memcg_page_state(page, idx, -1);
  878. }
  879. static inline void inc_lruvec_state(struct lruvec *lruvec,
  880. enum node_stat_item idx)
  881. {
  882. mod_lruvec_state(lruvec, idx, 1);
  883. }
  884. static inline void dec_lruvec_state(struct lruvec *lruvec,
  885. enum node_stat_item idx)
  886. {
  887. mod_lruvec_state(lruvec, idx, -1);
  888. }
  889. static inline void inc_lruvec_page_state(struct page *page,
  890. enum node_stat_item idx)
  891. {
  892. mod_lruvec_page_state(page, idx, 1);
  893. }
  894. static inline void dec_lruvec_page_state(struct page *page,
  895. enum node_stat_item idx)
  896. {
  897. mod_lruvec_page_state(page, idx, -1);
  898. }
  899. #ifdef CONFIG_CGROUP_WRITEBACK
  900. struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
  901. struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
  902. void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
  903. unsigned long *pheadroom, unsigned long *pdirty,
  904. unsigned long *pwriteback);
  905. #else /* CONFIG_CGROUP_WRITEBACK */
  906. static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
  907. {
  908. return NULL;
  909. }
  910. static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
  911. unsigned long *pfilepages,
  912. unsigned long *pheadroom,
  913. unsigned long *pdirty,
  914. unsigned long *pwriteback)
  915. {
  916. }
  917. #endif /* CONFIG_CGROUP_WRITEBACK */
  918. struct sock;
  919. bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
  920. void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
  921. #ifdef CONFIG_MEMCG
  922. extern struct static_key_false memcg_sockets_enabled_key;
  923. #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
  924. void mem_cgroup_sk_alloc(struct sock *sk);
  925. void mem_cgroup_sk_free(struct sock *sk);
  926. static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
  927. {
  928. if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
  929. return true;
  930. do {
  931. if (time_before(jiffies, memcg->socket_pressure))
  932. return true;
  933. } while ((memcg = parent_mem_cgroup(memcg)));
  934. return false;
  935. }
  936. #else
  937. #define mem_cgroup_sockets_enabled 0
  938. static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
  939. static inline void mem_cgroup_sk_free(struct sock *sk) { };
  940. static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
  941. {
  942. return false;
  943. }
  944. #endif
  945. struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
  946. void memcg_kmem_put_cache(struct kmem_cache *cachep);
  947. int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
  948. struct mem_cgroup *memcg);
  949. int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
  950. void memcg_kmem_uncharge(struct page *page, int order);
  951. #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
  952. extern struct static_key_false memcg_kmem_enabled_key;
  953. extern struct workqueue_struct *memcg_kmem_cache_wq;
  954. extern int memcg_nr_cache_ids;
  955. void memcg_get_cache_ids(void);
  956. void memcg_put_cache_ids(void);
  957. /*
  958. * Helper macro to loop through all memcg-specific caches. Callers must still
  959. * check if the cache is valid (it is either valid or NULL).
  960. * the slab_mutex must be held when looping through those caches
  961. */
  962. #define for_each_memcg_cache_index(_idx) \
  963. for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++)
  964. static inline bool memcg_kmem_enabled(void)
  965. {
  966. return static_branch_unlikely(&memcg_kmem_enabled_key);
  967. }
  968. /*
  969. * helper for accessing a memcg's index. It will be used as an index in the
  970. * child cache array in kmem_cache, and also to derive its name. This function
  971. * will return -1 when this is not a kmem-limited memcg.
  972. */
  973. static inline int memcg_cache_id(struct mem_cgroup *memcg)
  974. {
  975. return memcg ? memcg->kmemcg_id : -1;
  976. }
  977. #else
  978. #define for_each_memcg_cache_index(_idx) \
  979. for (; NULL; )
  980. static inline bool memcg_kmem_enabled(void)
  981. {
  982. return false;
  983. }
  984. static inline int memcg_cache_id(struct mem_cgroup *memcg)
  985. {
  986. return -1;
  987. }
  988. static inline void memcg_get_cache_ids(void)
  989. {
  990. }
  991. static inline void memcg_put_cache_ids(void)
  992. {
  993. }
  994. #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
  995. #endif /* _LINUX_MEMCONTROL_H */