shm.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/ipc/shm.c
  4. * Copyright (C) 1992, 1993 Krishna Balasubramanian
  5. * Many improvements/fixes by Bruno Haible.
  6. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
  7. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
  8. *
  9. * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  10. * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  11. * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  12. * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
  13. * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
  14. * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
  15. * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
  16. *
  17. * support for audit of ipc object properties and permission changes
  18. * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  19. *
  20. * namespaces support
  21. * OpenVZ, SWsoft Inc.
  22. * Pavel Emelianov <xemul@openvz.org>
  23. *
  24. * Better ipc lock (kern_ipc_perm.lock) handling
  25. * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  26. */
  27. #include <linux/slab.h>
  28. #include <linux/mm.h>
  29. #include <linux/hugetlb.h>
  30. #include <linux/shm.h>
  31. #include <linux/init.h>
  32. #include <linux/file.h>
  33. #include <linux/mman.h>
  34. #include <linux/shmem_fs.h>
  35. #include <linux/security.h>
  36. #include <linux/syscalls.h>
  37. #include <linux/audit.h>
  38. #include <linux/capability.h>
  39. #include <linux/ptrace.h>
  40. #include <linux/seq_file.h>
  41. #include <linux/rwsem.h>
  42. #include <linux/nsproxy.h>
  43. #include <linux/mount.h>
  44. #include <linux/ipc_namespace.h>
  45. #include <linux/rhashtable.h>
  46. #include <linux/uaccess.h>
  47. #include "util.h"
  48. struct shmid_kernel /* private to the kernel */
  49. {
  50. struct kern_ipc_perm shm_perm;
  51. struct file *shm_file;
  52. unsigned long shm_nattch;
  53. unsigned long shm_segsz;
  54. time64_t shm_atim;
  55. time64_t shm_dtim;
  56. time64_t shm_ctim;
  57. struct pid *shm_cprid;
  58. struct pid *shm_lprid;
  59. struct user_struct *mlock_user;
  60. /* The task created the shm object. NULL if the task is dead. */
  61. struct task_struct *shm_creator;
  62. struct list_head shm_clist; /* list by creator */
  63. } __randomize_layout;
  64. /* shm_mode upper byte flags */
  65. #define SHM_DEST 01000 /* segment will be destroyed on last detach */
  66. #define SHM_LOCKED 02000 /* segment will not be swapped */
  67. struct shm_file_data {
  68. int id;
  69. struct ipc_namespace *ns;
  70. struct file *file;
  71. const struct vm_operations_struct *vm_ops;
  72. };
  73. #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
  74. static const struct file_operations shm_file_operations;
  75. static const struct vm_operations_struct shm_vm_ops;
  76. #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
  77. #define shm_unlock(shp) \
  78. ipc_unlock(&(shp)->shm_perm)
  79. static int newseg(struct ipc_namespace *, struct ipc_params *);
  80. static void shm_open(struct vm_area_struct *vma);
  81. static void shm_close(struct vm_area_struct *vma);
  82. static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
  83. #ifdef CONFIG_PROC_FS
  84. static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
  85. #endif
  86. void shm_init_ns(struct ipc_namespace *ns)
  87. {
  88. ns->shm_ctlmax = SHMMAX;
  89. ns->shm_ctlall = SHMALL;
  90. ns->shm_ctlmni = SHMMNI;
  91. ns->shm_rmid_forced = 0;
  92. ns->shm_tot = 0;
  93. ipc_init_ids(&shm_ids(ns));
  94. }
  95. /*
  96. * Called with shm_ids.rwsem (writer) and the shp structure locked.
  97. * Only shm_ids.rwsem remains locked on exit.
  98. */
  99. static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
  100. {
  101. struct shmid_kernel *shp;
  102. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  103. if (shp->shm_nattch) {
  104. shp->shm_perm.mode |= SHM_DEST;
  105. /* Do not find it any more */
  106. ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
  107. shm_unlock(shp);
  108. } else
  109. shm_destroy(ns, shp);
  110. }
  111. #ifdef CONFIG_IPC_NS
  112. void shm_exit_ns(struct ipc_namespace *ns)
  113. {
  114. free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
  115. idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
  116. rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
  117. }
  118. #endif
  119. static int __init ipc_ns_init(void)
  120. {
  121. shm_init_ns(&init_ipc_ns);
  122. return 0;
  123. }
  124. pure_initcall(ipc_ns_init);
  125. void __init shm_init(void)
  126. {
  127. ipc_init_proc_interface("sysvipc/shm",
  128. #if BITS_PER_LONG <= 32
  129. " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
  130. #else
  131. " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
  132. #endif
  133. IPC_SHM_IDS, sysvipc_shm_proc_show);
  134. }
  135. static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
  136. {
  137. struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
  138. if (IS_ERR(ipcp))
  139. return ERR_CAST(ipcp);
  140. return container_of(ipcp, struct shmid_kernel, shm_perm);
  141. }
  142. static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
  143. {
  144. struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
  145. if (IS_ERR(ipcp))
  146. return ERR_CAST(ipcp);
  147. return container_of(ipcp, struct shmid_kernel, shm_perm);
  148. }
  149. /*
  150. * shm_lock_(check_) routines are called in the paths where the rwsem
  151. * is not necessarily held.
  152. */
  153. static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
  154. {
  155. struct kern_ipc_perm *ipcp;
  156. rcu_read_lock();
  157. ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
  158. if (IS_ERR(ipcp))
  159. goto err;
  160. ipc_lock_object(ipcp);
  161. /*
  162. * ipc_rmid() may have already freed the ID while ipc_lock_object()
  163. * was spinning: here verify that the structure is still valid.
  164. * Upon races with RMID, return -EIDRM, thus indicating that
  165. * the ID points to a removed identifier.
  166. */
  167. if (ipc_valid_object(ipcp)) {
  168. /* return a locked ipc object upon success */
  169. return container_of(ipcp, struct shmid_kernel, shm_perm);
  170. }
  171. ipc_unlock_object(ipcp);
  172. err:
  173. rcu_read_unlock();
  174. /*
  175. * Callers of shm_lock() must validate the status of the returned ipc
  176. * object pointer and error out as appropriate.
  177. */
  178. return (void *)ipcp;
  179. }
  180. static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
  181. {
  182. rcu_read_lock();
  183. ipc_lock_object(&ipcp->shm_perm);
  184. }
  185. static void shm_rcu_free(struct rcu_head *head)
  186. {
  187. struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
  188. rcu);
  189. struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
  190. shm_perm);
  191. security_shm_free(&shp->shm_perm);
  192. kvfree(shp);
  193. }
  194. static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
  195. {
  196. list_del(&s->shm_clist);
  197. ipc_rmid(&shm_ids(ns), &s->shm_perm);
  198. }
  199. static int __shm_open(struct vm_area_struct *vma)
  200. {
  201. struct file *file = vma->vm_file;
  202. struct shm_file_data *sfd = shm_file_data(file);
  203. struct shmid_kernel *shp;
  204. shp = shm_lock(sfd->ns, sfd->id);
  205. if (IS_ERR(shp))
  206. return PTR_ERR(shp);
  207. if (shp->shm_file != sfd->file) {
  208. /* ID was reused */
  209. shm_unlock(shp);
  210. return -EINVAL;
  211. }
  212. shp->shm_atim = ktime_get_real_seconds();
  213. ipc_update_pid(&shp->shm_lprid, task_tgid(current));
  214. shp->shm_nattch++;
  215. shm_unlock(shp);
  216. return 0;
  217. }
  218. /* This is called by fork, once for every shm attach. */
  219. static void shm_open(struct vm_area_struct *vma)
  220. {
  221. int err = __shm_open(vma);
  222. /*
  223. * We raced in the idr lookup or with shm_destroy().
  224. * Either way, the ID is busted.
  225. */
  226. WARN_ON_ONCE(err);
  227. }
  228. /*
  229. * shm_destroy - free the struct shmid_kernel
  230. *
  231. * @ns: namespace
  232. * @shp: struct to free
  233. *
  234. * It has to be called with shp and shm_ids.rwsem (writer) locked,
  235. * but returns with shp unlocked and freed.
  236. */
  237. static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  238. {
  239. struct file *shm_file;
  240. shm_file = shp->shm_file;
  241. shp->shm_file = NULL;
  242. ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
  243. shm_rmid(ns, shp);
  244. shm_unlock(shp);
  245. if (!is_file_hugepages(shm_file))
  246. shmem_lock(shm_file, 0, shp->mlock_user);
  247. else if (shp->mlock_user)
  248. user_shm_unlock(i_size_read(file_inode(shm_file)),
  249. shp->mlock_user);
  250. fput(shm_file);
  251. ipc_update_pid(&shp->shm_cprid, NULL);
  252. ipc_update_pid(&shp->shm_lprid, NULL);
  253. ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
  254. }
  255. /*
  256. * shm_may_destroy - identifies whether shm segment should be destroyed now
  257. *
  258. * Returns true if and only if there are no active users of the segment and
  259. * one of the following is true:
  260. *
  261. * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
  262. *
  263. * 2) sysctl kernel.shm_rmid_forced is set to 1.
  264. */
  265. static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  266. {
  267. return (shp->shm_nattch == 0) &&
  268. (ns->shm_rmid_forced ||
  269. (shp->shm_perm.mode & SHM_DEST));
  270. }
  271. /*
  272. * remove the attach descriptor vma.
  273. * free memory for segment if it is marked destroyed.
  274. * The descriptor has already been removed from the current->mm->mmap list
  275. * and will later be kfree()d.
  276. */
  277. static void shm_close(struct vm_area_struct *vma)
  278. {
  279. struct file *file = vma->vm_file;
  280. struct shm_file_data *sfd = shm_file_data(file);
  281. struct shmid_kernel *shp;
  282. struct ipc_namespace *ns = sfd->ns;
  283. down_write(&shm_ids(ns).rwsem);
  284. /* remove from the list of attaches of the shm segment */
  285. shp = shm_lock(ns, sfd->id);
  286. /*
  287. * We raced in the idr lookup or with shm_destroy().
  288. * Either way, the ID is busted.
  289. */
  290. if (WARN_ON_ONCE(IS_ERR(shp)))
  291. goto done; /* no-op */
  292. ipc_update_pid(&shp->shm_lprid, task_tgid(current));
  293. shp->shm_dtim = ktime_get_real_seconds();
  294. shp->shm_nattch--;
  295. if (shm_may_destroy(ns, shp))
  296. shm_destroy(ns, shp);
  297. else
  298. shm_unlock(shp);
  299. done:
  300. up_write(&shm_ids(ns).rwsem);
  301. }
  302. /* Called with ns->shm_ids(ns).rwsem locked */
  303. static int shm_try_destroy_orphaned(int id, void *p, void *data)
  304. {
  305. struct ipc_namespace *ns = data;
  306. struct kern_ipc_perm *ipcp = p;
  307. struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  308. /*
  309. * We want to destroy segments without users and with already
  310. * exit'ed originating process.
  311. *
  312. * As shp->* are changed under rwsem, it's safe to skip shp locking.
  313. */
  314. if (shp->shm_creator != NULL)
  315. return 0;
  316. if (shm_may_destroy(ns, shp)) {
  317. shm_lock_by_ptr(shp);
  318. shm_destroy(ns, shp);
  319. }
  320. return 0;
  321. }
  322. void shm_destroy_orphaned(struct ipc_namespace *ns)
  323. {
  324. down_write(&shm_ids(ns).rwsem);
  325. if (shm_ids(ns).in_use)
  326. idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
  327. up_write(&shm_ids(ns).rwsem);
  328. }
  329. /* Locking assumes this will only be called with task == current */
  330. void exit_shm(struct task_struct *task)
  331. {
  332. struct ipc_namespace *ns = task->nsproxy->ipc_ns;
  333. struct shmid_kernel *shp, *n;
  334. if (list_empty(&task->sysvshm.shm_clist))
  335. return;
  336. /*
  337. * If kernel.shm_rmid_forced is not set then only keep track of
  338. * which shmids are orphaned, so that a later set of the sysctl
  339. * can clean them up.
  340. */
  341. if (!ns->shm_rmid_forced) {
  342. down_read(&shm_ids(ns).rwsem);
  343. list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
  344. shp->shm_creator = NULL;
  345. /*
  346. * Only under read lock but we are only called on current
  347. * so no entry on the list will be shared.
  348. */
  349. list_del(&task->sysvshm.shm_clist);
  350. up_read(&shm_ids(ns).rwsem);
  351. return;
  352. }
  353. /*
  354. * Destroy all already created segments, that were not yet mapped,
  355. * and mark any mapped as orphan to cover the sysctl toggling.
  356. * Destroy is skipped if shm_may_destroy() returns false.
  357. */
  358. down_write(&shm_ids(ns).rwsem);
  359. list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
  360. shp->shm_creator = NULL;
  361. if (shm_may_destroy(ns, shp)) {
  362. shm_lock_by_ptr(shp);
  363. shm_destroy(ns, shp);
  364. }
  365. }
  366. /* Remove the list head from any segments still attached. */
  367. list_del(&task->sysvshm.shm_clist);
  368. up_write(&shm_ids(ns).rwsem);
  369. }
  370. static vm_fault_t shm_fault(struct vm_fault *vmf)
  371. {
  372. struct file *file = vmf->vma->vm_file;
  373. struct shm_file_data *sfd = shm_file_data(file);
  374. return sfd->vm_ops->fault(vmf);
  375. }
  376. static int shm_split(struct vm_area_struct *vma, unsigned long addr)
  377. {
  378. struct file *file = vma->vm_file;
  379. struct shm_file_data *sfd = shm_file_data(file);
  380. if (sfd->vm_ops->split)
  381. return sfd->vm_ops->split(vma, addr);
  382. return 0;
  383. }
  384. static unsigned long shm_pagesize(struct vm_area_struct *vma)
  385. {
  386. struct file *file = vma->vm_file;
  387. struct shm_file_data *sfd = shm_file_data(file);
  388. if (sfd->vm_ops->pagesize)
  389. return sfd->vm_ops->pagesize(vma);
  390. return PAGE_SIZE;
  391. }
  392. #ifdef CONFIG_NUMA
  393. static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
  394. {
  395. struct file *file = vma->vm_file;
  396. struct shm_file_data *sfd = shm_file_data(file);
  397. int err = 0;
  398. if (sfd->vm_ops->set_policy)
  399. err = sfd->vm_ops->set_policy(vma, new);
  400. return err;
  401. }
  402. static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
  403. unsigned long addr)
  404. {
  405. struct file *file = vma->vm_file;
  406. struct shm_file_data *sfd = shm_file_data(file);
  407. struct mempolicy *pol = NULL;
  408. if (sfd->vm_ops->get_policy)
  409. pol = sfd->vm_ops->get_policy(vma, addr);
  410. else if (vma->vm_policy)
  411. pol = vma->vm_policy;
  412. return pol;
  413. }
  414. #endif
  415. static int shm_mmap(struct file *file, struct vm_area_struct *vma)
  416. {
  417. struct shm_file_data *sfd = shm_file_data(file);
  418. int ret;
  419. /*
  420. * In case of remap_file_pages() emulation, the file can represent an
  421. * IPC ID that was removed, and possibly even reused by another shm
  422. * segment already. Propagate this case as an error to caller.
  423. */
  424. ret = __shm_open(vma);
  425. if (ret)
  426. return ret;
  427. ret = call_mmap(sfd->file, vma);
  428. if (ret) {
  429. shm_close(vma);
  430. return ret;
  431. }
  432. sfd->vm_ops = vma->vm_ops;
  433. #ifdef CONFIG_MMU
  434. WARN_ON(!sfd->vm_ops->fault);
  435. #endif
  436. vma->vm_ops = &shm_vm_ops;
  437. return 0;
  438. }
  439. static int shm_release(struct inode *ino, struct file *file)
  440. {
  441. struct shm_file_data *sfd = shm_file_data(file);
  442. put_ipc_ns(sfd->ns);
  443. fput(sfd->file);
  444. shm_file_data(file) = NULL;
  445. kfree(sfd);
  446. return 0;
  447. }
  448. static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  449. {
  450. struct shm_file_data *sfd = shm_file_data(file);
  451. if (!sfd->file->f_op->fsync)
  452. return -EINVAL;
  453. return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
  454. }
  455. static long shm_fallocate(struct file *file, int mode, loff_t offset,
  456. loff_t len)
  457. {
  458. struct shm_file_data *sfd = shm_file_data(file);
  459. if (!sfd->file->f_op->fallocate)
  460. return -EOPNOTSUPP;
  461. return sfd->file->f_op->fallocate(file, mode, offset, len);
  462. }
  463. static unsigned long shm_get_unmapped_area(struct file *file,
  464. unsigned long addr, unsigned long len, unsigned long pgoff,
  465. unsigned long flags)
  466. {
  467. struct shm_file_data *sfd = shm_file_data(file);
  468. return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
  469. pgoff, flags);
  470. }
  471. static const struct file_operations shm_file_operations = {
  472. .mmap = shm_mmap,
  473. .fsync = shm_fsync,
  474. .release = shm_release,
  475. .get_unmapped_area = shm_get_unmapped_area,
  476. .llseek = noop_llseek,
  477. .fallocate = shm_fallocate,
  478. };
  479. /*
  480. * shm_file_operations_huge is now identical to shm_file_operations,
  481. * but we keep it distinct for the sake of is_file_shm_hugepages().
  482. */
  483. static const struct file_operations shm_file_operations_huge = {
  484. .mmap = shm_mmap,
  485. .fsync = shm_fsync,
  486. .release = shm_release,
  487. .get_unmapped_area = shm_get_unmapped_area,
  488. .llseek = noop_llseek,
  489. .fallocate = shm_fallocate,
  490. };
  491. bool is_file_shm_hugepages(struct file *file)
  492. {
  493. return file->f_op == &shm_file_operations_huge;
  494. }
  495. static const struct vm_operations_struct shm_vm_ops = {
  496. .open = shm_open, /* callback for a new vm-area open */
  497. .close = shm_close, /* callback for when the vm-area is released */
  498. .fault = shm_fault,
  499. .split = shm_split,
  500. .pagesize = shm_pagesize,
  501. #if defined(CONFIG_NUMA)
  502. .set_policy = shm_set_policy,
  503. .get_policy = shm_get_policy,
  504. #endif
  505. };
  506. /**
  507. * newseg - Create a new shared memory segment
  508. * @ns: namespace
  509. * @params: ptr to the structure that contains key, size and shmflg
  510. *
  511. * Called with shm_ids.rwsem held as a writer.
  512. */
  513. static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
  514. {
  515. key_t key = params->key;
  516. int shmflg = params->flg;
  517. size_t size = params->u.size;
  518. int error;
  519. struct shmid_kernel *shp;
  520. size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  521. struct file *file;
  522. char name[13];
  523. vm_flags_t acctflag = 0;
  524. if (size < SHMMIN || size > ns->shm_ctlmax)
  525. return -EINVAL;
  526. if (numpages << PAGE_SHIFT < size)
  527. return -ENOSPC;
  528. if (ns->shm_tot + numpages < ns->shm_tot ||
  529. ns->shm_tot + numpages > ns->shm_ctlall)
  530. return -ENOSPC;
  531. shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
  532. if (unlikely(!shp))
  533. return -ENOMEM;
  534. shp->shm_perm.key = key;
  535. shp->shm_perm.mode = (shmflg & S_IRWXUGO);
  536. shp->mlock_user = NULL;
  537. shp->shm_perm.security = NULL;
  538. error = security_shm_alloc(&shp->shm_perm);
  539. if (error) {
  540. kvfree(shp);
  541. return error;
  542. }
  543. sprintf(name, "SYSV%08x", key);
  544. if (shmflg & SHM_HUGETLB) {
  545. struct hstate *hs;
  546. size_t hugesize;
  547. hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
  548. if (!hs) {
  549. error = -EINVAL;
  550. goto no_file;
  551. }
  552. hugesize = ALIGN(size, huge_page_size(hs));
  553. /* hugetlb_file_setup applies strict accounting */
  554. if (shmflg & SHM_NORESERVE)
  555. acctflag = VM_NORESERVE;
  556. file = hugetlb_file_setup(name, hugesize, acctflag,
  557. &shp->mlock_user, HUGETLB_SHMFS_INODE,
  558. (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
  559. } else {
  560. /*
  561. * Do not allow no accounting for OVERCOMMIT_NEVER, even
  562. * if it's asked for.
  563. */
  564. if ((shmflg & SHM_NORESERVE) &&
  565. sysctl_overcommit_memory != OVERCOMMIT_NEVER)
  566. acctflag = VM_NORESERVE;
  567. file = shmem_kernel_file_setup(name, size, acctflag);
  568. }
  569. error = PTR_ERR(file);
  570. if (IS_ERR(file))
  571. goto no_file;
  572. shp->shm_cprid = get_pid(task_tgid(current));
  573. shp->shm_lprid = NULL;
  574. shp->shm_atim = shp->shm_dtim = 0;
  575. shp->shm_ctim = ktime_get_real_seconds();
  576. shp->shm_segsz = size;
  577. shp->shm_nattch = 0;
  578. shp->shm_file = file;
  579. shp->shm_creator = current;
  580. /* ipc_addid() locks shp upon success. */
  581. error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
  582. if (error < 0)
  583. goto no_id;
  584. list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
  585. /*
  586. * shmid gets reported as "inode#" in /proc/pid/maps.
  587. * proc-ps tools use this. Changing this will break them.
  588. */
  589. file_inode(file)->i_ino = shp->shm_perm.id;
  590. ns->shm_tot += numpages;
  591. error = shp->shm_perm.id;
  592. ipc_unlock_object(&shp->shm_perm);
  593. rcu_read_unlock();
  594. return error;
  595. no_id:
  596. ipc_update_pid(&shp->shm_cprid, NULL);
  597. ipc_update_pid(&shp->shm_lprid, NULL);
  598. if (is_file_hugepages(file) && shp->mlock_user)
  599. user_shm_unlock(size, shp->mlock_user);
  600. fput(file);
  601. ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
  602. return error;
  603. no_file:
  604. call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
  605. return error;
  606. }
  607. /*
  608. * Called with shm_ids.rwsem and ipcp locked.
  609. */
  610. static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
  611. struct ipc_params *params)
  612. {
  613. struct shmid_kernel *shp;
  614. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  615. if (shp->shm_segsz < params->u.size)
  616. return -EINVAL;
  617. return 0;
  618. }
  619. long ksys_shmget(key_t key, size_t size, int shmflg)
  620. {
  621. struct ipc_namespace *ns;
  622. static const struct ipc_ops shm_ops = {
  623. .getnew = newseg,
  624. .associate = security_shm_associate,
  625. .more_checks = shm_more_checks,
  626. };
  627. struct ipc_params shm_params;
  628. ns = current->nsproxy->ipc_ns;
  629. shm_params.key = key;
  630. shm_params.flg = shmflg;
  631. shm_params.u.size = size;
  632. return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
  633. }
  634. SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
  635. {
  636. return ksys_shmget(key, size, shmflg);
  637. }
  638. static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
  639. {
  640. switch (version) {
  641. case IPC_64:
  642. return copy_to_user(buf, in, sizeof(*in));
  643. case IPC_OLD:
  644. {
  645. struct shmid_ds out;
  646. memset(&out, 0, sizeof(out));
  647. ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
  648. out.shm_segsz = in->shm_segsz;
  649. out.shm_atime = in->shm_atime;
  650. out.shm_dtime = in->shm_dtime;
  651. out.shm_ctime = in->shm_ctime;
  652. out.shm_cpid = in->shm_cpid;
  653. out.shm_lpid = in->shm_lpid;
  654. out.shm_nattch = in->shm_nattch;
  655. return copy_to_user(buf, &out, sizeof(out));
  656. }
  657. default:
  658. return -EINVAL;
  659. }
  660. }
  661. static inline unsigned long
  662. copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
  663. {
  664. switch (version) {
  665. case IPC_64:
  666. if (copy_from_user(out, buf, sizeof(*out)))
  667. return -EFAULT;
  668. return 0;
  669. case IPC_OLD:
  670. {
  671. struct shmid_ds tbuf_old;
  672. if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
  673. return -EFAULT;
  674. out->shm_perm.uid = tbuf_old.shm_perm.uid;
  675. out->shm_perm.gid = tbuf_old.shm_perm.gid;
  676. out->shm_perm.mode = tbuf_old.shm_perm.mode;
  677. return 0;
  678. }
  679. default:
  680. return -EINVAL;
  681. }
  682. }
  683. static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
  684. {
  685. switch (version) {
  686. case IPC_64:
  687. return copy_to_user(buf, in, sizeof(*in));
  688. case IPC_OLD:
  689. {
  690. struct shminfo out;
  691. if (in->shmmax > INT_MAX)
  692. out.shmmax = INT_MAX;
  693. else
  694. out.shmmax = (int)in->shmmax;
  695. out.shmmin = in->shmmin;
  696. out.shmmni = in->shmmni;
  697. out.shmseg = in->shmseg;
  698. out.shmall = in->shmall;
  699. return copy_to_user(buf, &out, sizeof(out));
  700. }
  701. default:
  702. return -EINVAL;
  703. }
  704. }
  705. /*
  706. * Calculate and add used RSS and swap pages of a shm.
  707. * Called with shm_ids.rwsem held as a reader
  708. */
  709. static void shm_add_rss_swap(struct shmid_kernel *shp,
  710. unsigned long *rss_add, unsigned long *swp_add)
  711. {
  712. struct inode *inode;
  713. inode = file_inode(shp->shm_file);
  714. if (is_file_hugepages(shp->shm_file)) {
  715. struct address_space *mapping = inode->i_mapping;
  716. struct hstate *h = hstate_file(shp->shm_file);
  717. *rss_add += pages_per_huge_page(h) * mapping->nrpages;
  718. } else {
  719. #ifdef CONFIG_SHMEM
  720. struct shmem_inode_info *info = SHMEM_I(inode);
  721. spin_lock_irq(&info->lock);
  722. *rss_add += inode->i_mapping->nrpages;
  723. *swp_add += info->swapped;
  724. spin_unlock_irq(&info->lock);
  725. #else
  726. *rss_add += inode->i_mapping->nrpages;
  727. #endif
  728. }
  729. }
  730. /*
  731. * Called with shm_ids.rwsem held as a reader
  732. */
  733. static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
  734. unsigned long *swp)
  735. {
  736. int next_id;
  737. int total, in_use;
  738. *rss = 0;
  739. *swp = 0;
  740. in_use = shm_ids(ns).in_use;
  741. for (total = 0, next_id = 0; total < in_use; next_id++) {
  742. struct kern_ipc_perm *ipc;
  743. struct shmid_kernel *shp;
  744. ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
  745. if (ipc == NULL)
  746. continue;
  747. shp = container_of(ipc, struct shmid_kernel, shm_perm);
  748. shm_add_rss_swap(shp, rss, swp);
  749. total++;
  750. }
  751. }
  752. /*
  753. * This function handles some shmctl commands which require the rwsem
  754. * to be held in write mode.
  755. * NOTE: no locks must be held, the rwsem is taken inside this function.
  756. */
  757. static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
  758. struct shmid64_ds *shmid64)
  759. {
  760. struct kern_ipc_perm *ipcp;
  761. struct shmid_kernel *shp;
  762. int err;
  763. down_write(&shm_ids(ns).rwsem);
  764. rcu_read_lock();
  765. ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
  766. &shmid64->shm_perm, 0);
  767. if (IS_ERR(ipcp)) {
  768. err = PTR_ERR(ipcp);
  769. goto out_unlock1;
  770. }
  771. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  772. err = security_shm_shmctl(&shp->shm_perm, cmd);
  773. if (err)
  774. goto out_unlock1;
  775. switch (cmd) {
  776. case IPC_RMID:
  777. ipc_lock_object(&shp->shm_perm);
  778. /* do_shm_rmid unlocks the ipc object and rcu */
  779. do_shm_rmid(ns, ipcp);
  780. goto out_up;
  781. case IPC_SET:
  782. ipc_lock_object(&shp->shm_perm);
  783. err = ipc_update_perm(&shmid64->shm_perm, ipcp);
  784. if (err)
  785. goto out_unlock0;
  786. shp->shm_ctim = ktime_get_real_seconds();
  787. break;
  788. default:
  789. err = -EINVAL;
  790. goto out_unlock1;
  791. }
  792. out_unlock0:
  793. ipc_unlock_object(&shp->shm_perm);
  794. out_unlock1:
  795. rcu_read_unlock();
  796. out_up:
  797. up_write(&shm_ids(ns).rwsem);
  798. return err;
  799. }
  800. static int shmctl_ipc_info(struct ipc_namespace *ns,
  801. struct shminfo64 *shminfo)
  802. {
  803. int err = security_shm_shmctl(NULL, IPC_INFO);
  804. if (!err) {
  805. memset(shminfo, 0, sizeof(*shminfo));
  806. shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
  807. shminfo->shmmax = ns->shm_ctlmax;
  808. shminfo->shmall = ns->shm_ctlall;
  809. shminfo->shmmin = SHMMIN;
  810. down_read(&shm_ids(ns).rwsem);
  811. err = ipc_get_maxidx(&shm_ids(ns));
  812. up_read(&shm_ids(ns).rwsem);
  813. if (err < 0)
  814. err = 0;
  815. }
  816. return err;
  817. }
  818. static int shmctl_shm_info(struct ipc_namespace *ns,
  819. struct shm_info *shm_info)
  820. {
  821. int err = security_shm_shmctl(NULL, SHM_INFO);
  822. if (!err) {
  823. memset(shm_info, 0, sizeof(*shm_info));
  824. down_read(&shm_ids(ns).rwsem);
  825. shm_info->used_ids = shm_ids(ns).in_use;
  826. shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
  827. shm_info->shm_tot = ns->shm_tot;
  828. shm_info->swap_attempts = 0;
  829. shm_info->swap_successes = 0;
  830. err = ipc_get_maxidx(&shm_ids(ns));
  831. up_read(&shm_ids(ns).rwsem);
  832. if (err < 0)
  833. err = 0;
  834. }
  835. return err;
  836. }
  837. static int shmctl_stat(struct ipc_namespace *ns, int shmid,
  838. int cmd, struct shmid64_ds *tbuf)
  839. {
  840. struct shmid_kernel *shp;
  841. int err;
  842. memset(tbuf, 0, sizeof(*tbuf));
  843. rcu_read_lock();
  844. if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
  845. shp = shm_obtain_object(ns, shmid);
  846. if (IS_ERR(shp)) {
  847. err = PTR_ERR(shp);
  848. goto out_unlock;
  849. }
  850. } else { /* IPC_STAT */
  851. shp = shm_obtain_object_check(ns, shmid);
  852. if (IS_ERR(shp)) {
  853. err = PTR_ERR(shp);
  854. goto out_unlock;
  855. }
  856. }
  857. /*
  858. * Semantically SHM_STAT_ANY ought to be identical to
  859. * that functionality provided by the /proc/sysvipc/
  860. * interface. As such, only audit these calls and
  861. * do not do traditional S_IRUGO permission checks on
  862. * the ipc object.
  863. */
  864. if (cmd == SHM_STAT_ANY)
  865. audit_ipc_obj(&shp->shm_perm);
  866. else {
  867. err = -EACCES;
  868. if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
  869. goto out_unlock;
  870. }
  871. err = security_shm_shmctl(&shp->shm_perm, cmd);
  872. if (err)
  873. goto out_unlock;
  874. ipc_lock_object(&shp->shm_perm);
  875. if (!ipc_valid_object(&shp->shm_perm)) {
  876. ipc_unlock_object(&shp->shm_perm);
  877. err = -EIDRM;
  878. goto out_unlock;
  879. }
  880. kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
  881. tbuf->shm_segsz = shp->shm_segsz;
  882. tbuf->shm_atime = shp->shm_atim;
  883. tbuf->shm_dtime = shp->shm_dtim;
  884. tbuf->shm_ctime = shp->shm_ctim;
  885. #ifndef CONFIG_64BIT
  886. tbuf->shm_atime_high = shp->shm_atim >> 32;
  887. tbuf->shm_dtime_high = shp->shm_dtim >> 32;
  888. tbuf->shm_ctime_high = shp->shm_ctim >> 32;
  889. #endif
  890. tbuf->shm_cpid = pid_vnr(shp->shm_cprid);
  891. tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
  892. tbuf->shm_nattch = shp->shm_nattch;
  893. if (cmd == IPC_STAT) {
  894. /*
  895. * As defined in SUS:
  896. * Return 0 on success
  897. */
  898. err = 0;
  899. } else {
  900. /*
  901. * SHM_STAT and SHM_STAT_ANY (both Linux specific)
  902. * Return the full id, including the sequence number
  903. */
  904. err = shp->shm_perm.id;
  905. }
  906. ipc_unlock_object(&shp->shm_perm);
  907. out_unlock:
  908. rcu_read_unlock();
  909. return err;
  910. }
  911. static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
  912. {
  913. struct shmid_kernel *shp;
  914. struct file *shm_file;
  915. int err;
  916. rcu_read_lock();
  917. shp = shm_obtain_object_check(ns, shmid);
  918. if (IS_ERR(shp)) {
  919. err = PTR_ERR(shp);
  920. goto out_unlock1;
  921. }
  922. audit_ipc_obj(&(shp->shm_perm));
  923. err = security_shm_shmctl(&shp->shm_perm, cmd);
  924. if (err)
  925. goto out_unlock1;
  926. ipc_lock_object(&shp->shm_perm);
  927. /* check if shm_destroy() is tearing down shp */
  928. if (!ipc_valid_object(&shp->shm_perm)) {
  929. err = -EIDRM;
  930. goto out_unlock0;
  931. }
  932. if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
  933. kuid_t euid = current_euid();
  934. if (!uid_eq(euid, shp->shm_perm.uid) &&
  935. !uid_eq(euid, shp->shm_perm.cuid)) {
  936. err = -EPERM;
  937. goto out_unlock0;
  938. }
  939. if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
  940. err = -EPERM;
  941. goto out_unlock0;
  942. }
  943. }
  944. shm_file = shp->shm_file;
  945. if (is_file_hugepages(shm_file))
  946. goto out_unlock0;
  947. if (cmd == SHM_LOCK) {
  948. struct user_struct *user = current_user();
  949. err = shmem_lock(shm_file, 1, user);
  950. if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
  951. shp->shm_perm.mode |= SHM_LOCKED;
  952. shp->mlock_user = user;
  953. }
  954. goto out_unlock0;
  955. }
  956. /* SHM_UNLOCK */
  957. if (!(shp->shm_perm.mode & SHM_LOCKED))
  958. goto out_unlock0;
  959. shmem_lock(shm_file, 0, shp->mlock_user);
  960. shp->shm_perm.mode &= ~SHM_LOCKED;
  961. shp->mlock_user = NULL;
  962. get_file(shm_file);
  963. ipc_unlock_object(&shp->shm_perm);
  964. rcu_read_unlock();
  965. shmem_unlock_mapping(shm_file->f_mapping);
  966. fput(shm_file);
  967. return err;
  968. out_unlock0:
  969. ipc_unlock_object(&shp->shm_perm);
  970. out_unlock1:
  971. rcu_read_unlock();
  972. return err;
  973. }
  974. long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
  975. {
  976. int err, version;
  977. struct ipc_namespace *ns;
  978. struct shmid64_ds sem64;
  979. if (cmd < 0 || shmid < 0)
  980. return -EINVAL;
  981. version = ipc_parse_version(&cmd);
  982. ns = current->nsproxy->ipc_ns;
  983. switch (cmd) {
  984. case IPC_INFO: {
  985. struct shminfo64 shminfo;
  986. err = shmctl_ipc_info(ns, &shminfo);
  987. if (err < 0)
  988. return err;
  989. if (copy_shminfo_to_user(buf, &shminfo, version))
  990. err = -EFAULT;
  991. return err;
  992. }
  993. case SHM_INFO: {
  994. struct shm_info shm_info;
  995. err = shmctl_shm_info(ns, &shm_info);
  996. if (err < 0)
  997. return err;
  998. if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
  999. err = -EFAULT;
  1000. return err;
  1001. }
  1002. case SHM_STAT:
  1003. case SHM_STAT_ANY:
  1004. case IPC_STAT: {
  1005. err = shmctl_stat(ns, shmid, cmd, &sem64);
  1006. if (err < 0)
  1007. return err;
  1008. if (copy_shmid_to_user(buf, &sem64, version))
  1009. err = -EFAULT;
  1010. return err;
  1011. }
  1012. case IPC_SET:
  1013. if (copy_shmid_from_user(&sem64, buf, version))
  1014. return -EFAULT;
  1015. /* fallthru */
  1016. case IPC_RMID:
  1017. return shmctl_down(ns, shmid, cmd, &sem64);
  1018. case SHM_LOCK:
  1019. case SHM_UNLOCK:
  1020. return shmctl_do_lock(ns, shmid, cmd);
  1021. default:
  1022. return -EINVAL;
  1023. }
  1024. }
  1025. SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
  1026. {
  1027. return ksys_shmctl(shmid, cmd, buf);
  1028. }
  1029. #ifdef CONFIG_COMPAT
  1030. struct compat_shmid_ds {
  1031. struct compat_ipc_perm shm_perm;
  1032. int shm_segsz;
  1033. compat_time_t shm_atime;
  1034. compat_time_t shm_dtime;
  1035. compat_time_t shm_ctime;
  1036. compat_ipc_pid_t shm_cpid;
  1037. compat_ipc_pid_t shm_lpid;
  1038. unsigned short shm_nattch;
  1039. unsigned short shm_unused;
  1040. compat_uptr_t shm_unused2;
  1041. compat_uptr_t shm_unused3;
  1042. };
  1043. struct compat_shminfo64 {
  1044. compat_ulong_t shmmax;
  1045. compat_ulong_t shmmin;
  1046. compat_ulong_t shmmni;
  1047. compat_ulong_t shmseg;
  1048. compat_ulong_t shmall;
  1049. compat_ulong_t __unused1;
  1050. compat_ulong_t __unused2;
  1051. compat_ulong_t __unused3;
  1052. compat_ulong_t __unused4;
  1053. };
  1054. struct compat_shm_info {
  1055. compat_int_t used_ids;
  1056. compat_ulong_t shm_tot, shm_rss, shm_swp;
  1057. compat_ulong_t swap_attempts, swap_successes;
  1058. };
  1059. static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
  1060. int version)
  1061. {
  1062. if (in->shmmax > INT_MAX)
  1063. in->shmmax = INT_MAX;
  1064. if (version == IPC_64) {
  1065. struct compat_shminfo64 info;
  1066. memset(&info, 0, sizeof(info));
  1067. info.shmmax = in->shmmax;
  1068. info.shmmin = in->shmmin;
  1069. info.shmmni = in->shmmni;
  1070. info.shmseg = in->shmseg;
  1071. info.shmall = in->shmall;
  1072. return copy_to_user(buf, &info, sizeof(info));
  1073. } else {
  1074. struct shminfo info;
  1075. memset(&info, 0, sizeof(info));
  1076. info.shmmax = in->shmmax;
  1077. info.shmmin = in->shmmin;
  1078. info.shmmni = in->shmmni;
  1079. info.shmseg = in->shmseg;
  1080. info.shmall = in->shmall;
  1081. return copy_to_user(buf, &info, sizeof(info));
  1082. }
  1083. }
  1084. static int put_compat_shm_info(struct shm_info *ip,
  1085. struct compat_shm_info __user *uip)
  1086. {
  1087. struct compat_shm_info info;
  1088. memset(&info, 0, sizeof(info));
  1089. info.used_ids = ip->used_ids;
  1090. info.shm_tot = ip->shm_tot;
  1091. info.shm_rss = ip->shm_rss;
  1092. info.shm_swp = ip->shm_swp;
  1093. info.swap_attempts = ip->swap_attempts;
  1094. info.swap_successes = ip->swap_successes;
  1095. return copy_to_user(uip, &info, sizeof(info));
  1096. }
  1097. static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
  1098. int version)
  1099. {
  1100. if (version == IPC_64) {
  1101. struct compat_shmid64_ds v;
  1102. memset(&v, 0, sizeof(v));
  1103. to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
  1104. v.shm_atime = lower_32_bits(in->shm_atime);
  1105. v.shm_atime_high = upper_32_bits(in->shm_atime);
  1106. v.shm_dtime = lower_32_bits(in->shm_dtime);
  1107. v.shm_dtime_high = upper_32_bits(in->shm_dtime);
  1108. v.shm_ctime = lower_32_bits(in->shm_ctime);
  1109. v.shm_ctime_high = upper_32_bits(in->shm_ctime);
  1110. v.shm_segsz = in->shm_segsz;
  1111. v.shm_nattch = in->shm_nattch;
  1112. v.shm_cpid = in->shm_cpid;
  1113. v.shm_lpid = in->shm_lpid;
  1114. return copy_to_user(buf, &v, sizeof(v));
  1115. } else {
  1116. struct compat_shmid_ds v;
  1117. memset(&v, 0, sizeof(v));
  1118. to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
  1119. v.shm_perm.key = in->shm_perm.key;
  1120. v.shm_atime = in->shm_atime;
  1121. v.shm_dtime = in->shm_dtime;
  1122. v.shm_ctime = in->shm_ctime;
  1123. v.shm_segsz = in->shm_segsz;
  1124. v.shm_nattch = in->shm_nattch;
  1125. v.shm_cpid = in->shm_cpid;
  1126. v.shm_lpid = in->shm_lpid;
  1127. return copy_to_user(buf, &v, sizeof(v));
  1128. }
  1129. }
  1130. static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
  1131. int version)
  1132. {
  1133. memset(out, 0, sizeof(*out));
  1134. if (version == IPC_64) {
  1135. struct compat_shmid64_ds __user *p = buf;
  1136. return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
  1137. } else {
  1138. struct compat_shmid_ds __user *p = buf;
  1139. return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
  1140. }
  1141. }
  1142. long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
  1143. {
  1144. struct ipc_namespace *ns;
  1145. struct shmid64_ds sem64;
  1146. int version = compat_ipc_parse_version(&cmd);
  1147. int err;
  1148. ns = current->nsproxy->ipc_ns;
  1149. if (cmd < 0 || shmid < 0)
  1150. return -EINVAL;
  1151. switch (cmd) {
  1152. case IPC_INFO: {
  1153. struct shminfo64 shminfo;
  1154. err = shmctl_ipc_info(ns, &shminfo);
  1155. if (err < 0)
  1156. return err;
  1157. if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
  1158. err = -EFAULT;
  1159. return err;
  1160. }
  1161. case SHM_INFO: {
  1162. struct shm_info shm_info;
  1163. err = shmctl_shm_info(ns, &shm_info);
  1164. if (err < 0)
  1165. return err;
  1166. if (put_compat_shm_info(&shm_info, uptr))
  1167. err = -EFAULT;
  1168. return err;
  1169. }
  1170. case IPC_STAT:
  1171. case SHM_STAT_ANY:
  1172. case SHM_STAT:
  1173. err = shmctl_stat(ns, shmid, cmd, &sem64);
  1174. if (err < 0)
  1175. return err;
  1176. if (copy_compat_shmid_to_user(uptr, &sem64, version))
  1177. err = -EFAULT;
  1178. return err;
  1179. case IPC_SET:
  1180. if (copy_compat_shmid_from_user(&sem64, uptr, version))
  1181. return -EFAULT;
  1182. /* fallthru */
  1183. case IPC_RMID:
  1184. return shmctl_down(ns, shmid, cmd, &sem64);
  1185. case SHM_LOCK:
  1186. case SHM_UNLOCK:
  1187. return shmctl_do_lock(ns, shmid, cmd);
  1188. break;
  1189. default:
  1190. return -EINVAL;
  1191. }
  1192. return err;
  1193. }
  1194. COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
  1195. {
  1196. return compat_ksys_shmctl(shmid, cmd, uptr);
  1197. }
  1198. #endif
  1199. /*
  1200. * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
  1201. *
  1202. * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
  1203. * "raddr" thing points to kernel space, and there has to be a wrapper around
  1204. * this.
  1205. */
  1206. long do_shmat(int shmid, char __user *shmaddr, int shmflg,
  1207. ulong *raddr, unsigned long shmlba)
  1208. {
  1209. struct shmid_kernel *shp;
  1210. unsigned long addr = (unsigned long)shmaddr;
  1211. unsigned long size;
  1212. struct file *file, *base;
  1213. int err;
  1214. unsigned long flags = MAP_SHARED;
  1215. unsigned long prot;
  1216. int acc_mode;
  1217. struct ipc_namespace *ns;
  1218. struct shm_file_data *sfd;
  1219. int f_flags;
  1220. unsigned long populate = 0;
  1221. err = -EINVAL;
  1222. if (shmid < 0)
  1223. goto out;
  1224. if (addr) {
  1225. if (addr & (shmlba - 1)) {
  1226. if (shmflg & SHM_RND) {
  1227. addr &= ~(shmlba - 1); /* round down */
  1228. /*
  1229. * Ensure that the round-down is non-nil
  1230. * when remapping. This can happen for
  1231. * cases when addr < shmlba.
  1232. */
  1233. if (!addr && (shmflg & SHM_REMAP))
  1234. goto out;
  1235. } else
  1236. #ifndef __ARCH_FORCE_SHMLBA
  1237. if (addr & ~PAGE_MASK)
  1238. #endif
  1239. goto out;
  1240. }
  1241. flags |= MAP_FIXED;
  1242. } else if ((shmflg & SHM_REMAP))
  1243. goto out;
  1244. if (shmflg & SHM_RDONLY) {
  1245. prot = PROT_READ;
  1246. acc_mode = S_IRUGO;
  1247. f_flags = O_RDONLY;
  1248. } else {
  1249. prot = PROT_READ | PROT_WRITE;
  1250. acc_mode = S_IRUGO | S_IWUGO;
  1251. f_flags = O_RDWR;
  1252. }
  1253. if (shmflg & SHM_EXEC) {
  1254. prot |= PROT_EXEC;
  1255. acc_mode |= S_IXUGO;
  1256. }
  1257. /*
  1258. * We cannot rely on the fs check since SYSV IPC does have an
  1259. * additional creator id...
  1260. */
  1261. ns = current->nsproxy->ipc_ns;
  1262. rcu_read_lock();
  1263. shp = shm_obtain_object_check(ns, shmid);
  1264. if (IS_ERR(shp)) {
  1265. err = PTR_ERR(shp);
  1266. goto out_unlock;
  1267. }
  1268. err = -EACCES;
  1269. if (ipcperms(ns, &shp->shm_perm, acc_mode))
  1270. goto out_unlock;
  1271. err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
  1272. if (err)
  1273. goto out_unlock;
  1274. ipc_lock_object(&shp->shm_perm);
  1275. /* check if shm_destroy() is tearing down shp */
  1276. if (!ipc_valid_object(&shp->shm_perm)) {
  1277. ipc_unlock_object(&shp->shm_perm);
  1278. err = -EIDRM;
  1279. goto out_unlock;
  1280. }
  1281. /*
  1282. * We need to take a reference to the real shm file to prevent the
  1283. * pointer from becoming stale in cases where the lifetime of the outer
  1284. * file extends beyond that of the shm segment. It's not usually
  1285. * possible, but it can happen during remap_file_pages() emulation as
  1286. * that unmaps the memory, then does ->mmap() via file reference only.
  1287. * We'll deny the ->mmap() if the shm segment was since removed, but to
  1288. * detect shm ID reuse we need to compare the file pointers.
  1289. */
  1290. base = get_file(shp->shm_file);
  1291. shp->shm_nattch++;
  1292. size = i_size_read(file_inode(base));
  1293. ipc_unlock_object(&shp->shm_perm);
  1294. rcu_read_unlock();
  1295. err = -ENOMEM;
  1296. sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
  1297. if (!sfd) {
  1298. fput(base);
  1299. goto out_nattch;
  1300. }
  1301. file = alloc_file_clone(base, f_flags,
  1302. is_file_hugepages(base) ?
  1303. &shm_file_operations_huge :
  1304. &shm_file_operations);
  1305. err = PTR_ERR(file);
  1306. if (IS_ERR(file)) {
  1307. kfree(sfd);
  1308. fput(base);
  1309. goto out_nattch;
  1310. }
  1311. sfd->id = shp->shm_perm.id;
  1312. sfd->ns = get_ipc_ns(ns);
  1313. sfd->file = base;
  1314. sfd->vm_ops = NULL;
  1315. file->private_data = sfd;
  1316. err = security_mmap_file(file, prot, flags);
  1317. if (err)
  1318. goto out_fput;
  1319. if (down_write_killable(&current->mm->mmap_sem)) {
  1320. err = -EINTR;
  1321. goto out_fput;
  1322. }
  1323. if (addr && !(shmflg & SHM_REMAP)) {
  1324. err = -EINVAL;
  1325. if (addr + size < addr)
  1326. goto invalid;
  1327. if (find_vma_intersection(current->mm, addr, addr + size))
  1328. goto invalid;
  1329. }
  1330. addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
  1331. *raddr = addr;
  1332. err = 0;
  1333. if (IS_ERR_VALUE(addr))
  1334. err = (long)addr;
  1335. invalid:
  1336. up_write(&current->mm->mmap_sem);
  1337. if (populate)
  1338. mm_populate(addr, populate);
  1339. out_fput:
  1340. fput(file);
  1341. out_nattch:
  1342. down_write(&shm_ids(ns).rwsem);
  1343. shp = shm_lock(ns, shmid);
  1344. shp->shm_nattch--;
  1345. if (shm_may_destroy(ns, shp))
  1346. shm_destroy(ns, shp);
  1347. else
  1348. shm_unlock(shp);
  1349. up_write(&shm_ids(ns).rwsem);
  1350. return err;
  1351. out_unlock:
  1352. rcu_read_unlock();
  1353. out:
  1354. return err;
  1355. }
  1356. SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
  1357. {
  1358. unsigned long ret;
  1359. long err;
  1360. err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
  1361. if (err)
  1362. return err;
  1363. force_successful_syscall_return();
  1364. return (long)ret;
  1365. }
  1366. #ifdef CONFIG_COMPAT
  1367. #ifndef COMPAT_SHMLBA
  1368. #define COMPAT_SHMLBA SHMLBA
  1369. #endif
  1370. COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
  1371. {
  1372. unsigned long ret;
  1373. long err;
  1374. err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
  1375. if (err)
  1376. return err;
  1377. force_successful_syscall_return();
  1378. return (long)ret;
  1379. }
  1380. #endif
  1381. /*
  1382. * detach and kill segment if marked destroyed.
  1383. * The work is done in shm_close.
  1384. */
  1385. long ksys_shmdt(char __user *shmaddr)
  1386. {
  1387. struct mm_struct *mm = current->mm;
  1388. struct vm_area_struct *vma;
  1389. unsigned long addr = (unsigned long)shmaddr;
  1390. int retval = -EINVAL;
  1391. #ifdef CONFIG_MMU
  1392. loff_t size = 0;
  1393. struct file *file;
  1394. struct vm_area_struct *next;
  1395. #endif
  1396. if (addr & ~PAGE_MASK)
  1397. return retval;
  1398. if (down_write_killable(&mm->mmap_sem))
  1399. return -EINTR;
  1400. /*
  1401. * This function tries to be smart and unmap shm segments that
  1402. * were modified by partial mlock or munmap calls:
  1403. * - It first determines the size of the shm segment that should be
  1404. * unmapped: It searches for a vma that is backed by shm and that
  1405. * started at address shmaddr. It records it's size and then unmaps
  1406. * it.
  1407. * - Then it unmaps all shm vmas that started at shmaddr and that
  1408. * are within the initially determined size and that are from the
  1409. * same shm segment from which we determined the size.
  1410. * Errors from do_munmap are ignored: the function only fails if
  1411. * it's called with invalid parameters or if it's called to unmap
  1412. * a part of a vma. Both calls in this function are for full vmas,
  1413. * the parameters are directly copied from the vma itself and always
  1414. * valid - therefore do_munmap cannot fail. (famous last words?)
  1415. */
  1416. /*
  1417. * If it had been mremap()'d, the starting address would not
  1418. * match the usual checks anyway. So assume all vma's are
  1419. * above the starting address given.
  1420. */
  1421. vma = find_vma(mm, addr);
  1422. #ifdef CONFIG_MMU
  1423. while (vma) {
  1424. next = vma->vm_next;
  1425. /*
  1426. * Check if the starting address would match, i.e. it's
  1427. * a fragment created by mprotect() and/or munmap(), or it
  1428. * otherwise it starts at this address with no hassles.
  1429. */
  1430. if ((vma->vm_ops == &shm_vm_ops) &&
  1431. (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
  1432. /*
  1433. * Record the file of the shm segment being
  1434. * unmapped. With mremap(), someone could place
  1435. * page from another segment but with equal offsets
  1436. * in the range we are unmapping.
  1437. */
  1438. file = vma->vm_file;
  1439. size = i_size_read(file_inode(vma->vm_file));
  1440. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1441. /*
  1442. * We discovered the size of the shm segment, so
  1443. * break out of here and fall through to the next
  1444. * loop that uses the size information to stop
  1445. * searching for matching vma's.
  1446. */
  1447. retval = 0;
  1448. vma = next;
  1449. break;
  1450. }
  1451. vma = next;
  1452. }
  1453. /*
  1454. * We need look no further than the maximum address a fragment
  1455. * could possibly have landed at. Also cast things to loff_t to
  1456. * prevent overflows and make comparisons vs. equal-width types.
  1457. */
  1458. size = PAGE_ALIGN(size);
  1459. while (vma && (loff_t)(vma->vm_end - addr) <= size) {
  1460. next = vma->vm_next;
  1461. /* finding a matching vma now does not alter retval */
  1462. if ((vma->vm_ops == &shm_vm_ops) &&
  1463. ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
  1464. (vma->vm_file == file))
  1465. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1466. vma = next;
  1467. }
  1468. #else /* CONFIG_MMU */
  1469. /* under NOMMU conditions, the exact address to be destroyed must be
  1470. * given
  1471. */
  1472. if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
  1473. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1474. retval = 0;
  1475. }
  1476. #endif
  1477. up_write(&mm->mmap_sem);
  1478. return retval;
  1479. }
  1480. SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
  1481. {
  1482. return ksys_shmdt(shmaddr);
  1483. }
  1484. #ifdef CONFIG_PROC_FS
  1485. static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
  1486. {
  1487. struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
  1488. struct user_namespace *user_ns = seq_user_ns(s);
  1489. struct kern_ipc_perm *ipcp = it;
  1490. struct shmid_kernel *shp;
  1491. unsigned long rss = 0, swp = 0;
  1492. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  1493. shm_add_rss_swap(shp, &rss, &swp);
  1494. #if BITS_PER_LONG <= 32
  1495. #define SIZE_SPEC "%10lu"
  1496. #else
  1497. #define SIZE_SPEC "%21lu"
  1498. #endif
  1499. seq_printf(s,
  1500. "%10d %10d %4o " SIZE_SPEC " %5u %5u "
  1501. "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
  1502. SIZE_SPEC " " SIZE_SPEC "\n",
  1503. shp->shm_perm.key,
  1504. shp->shm_perm.id,
  1505. shp->shm_perm.mode,
  1506. shp->shm_segsz,
  1507. pid_nr_ns(shp->shm_cprid, pid_ns),
  1508. pid_nr_ns(shp->shm_lprid, pid_ns),
  1509. shp->shm_nattch,
  1510. from_kuid_munged(user_ns, shp->shm_perm.uid),
  1511. from_kgid_munged(user_ns, shp->shm_perm.gid),
  1512. from_kuid_munged(user_ns, shp->shm_perm.cuid),
  1513. from_kgid_munged(user_ns, shp->shm_perm.cgid),
  1514. shp->shm_atim,
  1515. shp->shm_dtim,
  1516. shp->shm_ctim,
  1517. rss * PAGE_SIZE,
  1518. swp * PAGE_SIZE);
  1519. return 0;
  1520. }
  1521. #endif