inode.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*
  2. * linux/fs/proc/inode.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/time.h>
  7. #include <linux/proc_fs.h>
  8. #include <linux/kernel.h>
  9. #include <linux/pid_namespace.h>
  10. #include <linux/mm.h>
  11. #include <linux/string.h>
  12. #include <linux/stat.h>
  13. #include <linux/completion.h>
  14. #include <linux/poll.h>
  15. #include <linux/printk.h>
  16. #include <linux/file.h>
  17. #include <linux/limits.h>
  18. #include <linux/init.h>
  19. #include <linux/module.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/seq_file.h>
  22. #include <linux/slab.h>
  23. #include <linux/mount.h>
  24. #include <linux/magic.h>
  25. #include <linux/namei.h>
  26. #include <asm/uaccess.h>
  27. #include "internal.h"
  28. static void proc_evict_inode(struct inode *inode)
  29. {
  30. struct proc_dir_entry *de;
  31. struct ctl_table_header *head;
  32. truncate_inode_pages_final(&inode->i_data);
  33. clear_inode(inode);
  34. /* Stop tracking associated processes */
  35. put_pid(PROC_I(inode)->pid);
  36. /* Let go of any associated proc directory entry */
  37. de = PDE(inode);
  38. if (de)
  39. pde_put(de);
  40. head = PROC_I(inode)->sysctl;
  41. if (head) {
  42. RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
  43. sysctl_head_put(head);
  44. }
  45. }
  46. static struct kmem_cache * proc_inode_cachep;
  47. static struct inode *proc_alloc_inode(struct super_block *sb)
  48. {
  49. struct proc_inode *ei;
  50. struct inode *inode;
  51. ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
  52. if (!ei)
  53. return NULL;
  54. ei->pid = NULL;
  55. ei->fd = 0;
  56. ei->op.proc_get_link = NULL;
  57. ei->pde = NULL;
  58. ei->sysctl = NULL;
  59. ei->sysctl_entry = NULL;
  60. ei->ns_ops = NULL;
  61. inode = &ei->vfs_inode;
  62. inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  63. return inode;
  64. }
  65. static void proc_i_callback(struct rcu_head *head)
  66. {
  67. struct inode *inode = container_of(head, struct inode, i_rcu);
  68. kmem_cache_free(proc_inode_cachep, PROC_I(inode));
  69. }
  70. static void proc_destroy_inode(struct inode *inode)
  71. {
  72. call_rcu(&inode->i_rcu, proc_i_callback);
  73. }
  74. static void init_once(void *foo)
  75. {
  76. struct proc_inode *ei = (struct proc_inode *) foo;
  77. inode_init_once(&ei->vfs_inode);
  78. }
  79. void __init proc_init_inodecache(void)
  80. {
  81. proc_inode_cachep = kmem_cache_create("proc_inode_cache",
  82. sizeof(struct proc_inode),
  83. 0, (SLAB_RECLAIM_ACCOUNT|
  84. SLAB_MEM_SPREAD|SLAB_PANIC),
  85. init_once);
  86. }
  87. static int proc_show_options(struct seq_file *seq, struct dentry *root)
  88. {
  89. struct super_block *sb = root->d_sb;
  90. struct pid_namespace *pid = sb->s_fs_info;
  91. if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
  92. seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
  93. if (pid->hide_pid != 0)
  94. seq_printf(seq, ",hidepid=%u", pid->hide_pid);
  95. return 0;
  96. }
  97. static const struct super_operations proc_sops = {
  98. .alloc_inode = proc_alloc_inode,
  99. .destroy_inode = proc_destroy_inode,
  100. .drop_inode = generic_delete_inode,
  101. .evict_inode = proc_evict_inode,
  102. .statfs = simple_statfs,
  103. .remount_fs = proc_remount,
  104. .show_options = proc_show_options,
  105. };
  106. enum {BIAS = -1U<<31};
  107. static inline int use_pde(struct proc_dir_entry *pde)
  108. {
  109. return atomic_inc_unless_negative(&pde->in_use);
  110. }
  111. static void unuse_pde(struct proc_dir_entry *pde)
  112. {
  113. if (atomic_dec_return(&pde->in_use) == BIAS)
  114. complete(pde->pde_unload_completion);
  115. }
  116. /* pde is locked */
  117. static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
  118. {
  119. if (pdeo->closing) {
  120. /* somebody else is doing that, just wait */
  121. DECLARE_COMPLETION_ONSTACK(c);
  122. pdeo->c = &c;
  123. spin_unlock(&pde->pde_unload_lock);
  124. wait_for_completion(&c);
  125. spin_lock(&pde->pde_unload_lock);
  126. } else {
  127. struct file *file;
  128. pdeo->closing = 1;
  129. spin_unlock(&pde->pde_unload_lock);
  130. file = pdeo->file;
  131. pde->proc_fops->release(file_inode(file), file);
  132. spin_lock(&pde->pde_unload_lock);
  133. list_del_init(&pdeo->lh);
  134. if (pdeo->c)
  135. complete(pdeo->c);
  136. kfree(pdeo);
  137. }
  138. }
  139. void proc_entry_rundown(struct proc_dir_entry *de)
  140. {
  141. DECLARE_COMPLETION_ONSTACK(c);
  142. /* Wait until all existing callers into module are done. */
  143. de->pde_unload_completion = &c;
  144. if (atomic_add_return(BIAS, &de->in_use) != BIAS)
  145. wait_for_completion(&c);
  146. spin_lock(&de->pde_unload_lock);
  147. while (!list_empty(&de->pde_openers)) {
  148. struct pde_opener *pdeo;
  149. pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
  150. close_pdeo(de, pdeo);
  151. }
  152. spin_unlock(&de->pde_unload_lock);
  153. }
  154. static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
  155. {
  156. struct proc_dir_entry *pde = PDE(file_inode(file));
  157. loff_t rv = -EINVAL;
  158. if (use_pde(pde)) {
  159. loff_t (*llseek)(struct file *, loff_t, int);
  160. llseek = pde->proc_fops->llseek;
  161. if (!llseek)
  162. llseek = default_llseek;
  163. rv = llseek(file, offset, whence);
  164. unuse_pde(pde);
  165. }
  166. return rv;
  167. }
  168. static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  169. {
  170. ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
  171. struct proc_dir_entry *pde = PDE(file_inode(file));
  172. ssize_t rv = -EIO;
  173. if (use_pde(pde)) {
  174. read = pde->proc_fops->read;
  175. if (read)
  176. rv = read(file, buf, count, ppos);
  177. unuse_pde(pde);
  178. }
  179. return rv;
  180. }
  181. static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
  182. {
  183. ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
  184. struct proc_dir_entry *pde = PDE(file_inode(file));
  185. ssize_t rv = -EIO;
  186. if (use_pde(pde)) {
  187. write = pde->proc_fops->write;
  188. if (write)
  189. rv = write(file, buf, count, ppos);
  190. unuse_pde(pde);
  191. }
  192. return rv;
  193. }
  194. static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
  195. {
  196. struct proc_dir_entry *pde = PDE(file_inode(file));
  197. unsigned int rv = DEFAULT_POLLMASK;
  198. unsigned int (*poll)(struct file *, struct poll_table_struct *);
  199. if (use_pde(pde)) {
  200. poll = pde->proc_fops->poll;
  201. if (poll)
  202. rv = poll(file, pts);
  203. unuse_pde(pde);
  204. }
  205. return rv;
  206. }
  207. static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  208. {
  209. struct proc_dir_entry *pde = PDE(file_inode(file));
  210. long rv = -ENOTTY;
  211. long (*ioctl)(struct file *, unsigned int, unsigned long);
  212. if (use_pde(pde)) {
  213. ioctl = pde->proc_fops->unlocked_ioctl;
  214. if (ioctl)
  215. rv = ioctl(file, cmd, arg);
  216. unuse_pde(pde);
  217. }
  218. return rv;
  219. }
  220. #ifdef CONFIG_COMPAT
  221. static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  222. {
  223. struct proc_dir_entry *pde = PDE(file_inode(file));
  224. long rv = -ENOTTY;
  225. long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
  226. if (use_pde(pde)) {
  227. compat_ioctl = pde->proc_fops->compat_ioctl;
  228. if (compat_ioctl)
  229. rv = compat_ioctl(file, cmd, arg);
  230. unuse_pde(pde);
  231. }
  232. return rv;
  233. }
  234. #endif
  235. static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
  236. {
  237. struct proc_dir_entry *pde = PDE(file_inode(file));
  238. int rv = -EIO;
  239. int (*mmap)(struct file *, struct vm_area_struct *);
  240. if (use_pde(pde)) {
  241. mmap = pde->proc_fops->mmap;
  242. if (mmap)
  243. rv = mmap(file, vma);
  244. unuse_pde(pde);
  245. }
  246. return rv;
  247. }
  248. static unsigned long
  249. proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
  250. unsigned long len, unsigned long pgoff,
  251. unsigned long flags)
  252. {
  253. struct proc_dir_entry *pde = PDE(file_inode(file));
  254. unsigned long rv = -EIO;
  255. if (use_pde(pde)) {
  256. typeof(proc_reg_get_unmapped_area) *get_area;
  257. get_area = pde->proc_fops->get_unmapped_area;
  258. #ifdef CONFIG_MMU
  259. if (!get_area)
  260. get_area = current->mm->get_unmapped_area;
  261. #endif
  262. if (get_area)
  263. rv = get_area(file, orig_addr, len, pgoff, flags);
  264. else
  265. rv = orig_addr;
  266. unuse_pde(pde);
  267. }
  268. return rv;
  269. }
  270. static int proc_reg_open(struct inode *inode, struct file *file)
  271. {
  272. struct proc_dir_entry *pde = PDE(inode);
  273. int rv = 0;
  274. int (*open)(struct inode *, struct file *);
  275. int (*release)(struct inode *, struct file *);
  276. struct pde_opener *pdeo;
  277. /*
  278. * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
  279. * sequence. ->release won't be called because ->proc_fops will be
  280. * cleared. Depending on complexity of ->release, consequences vary.
  281. *
  282. * We can't wait for mercy when close will be done for real, it's
  283. * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
  284. * by hand in remove_proc_entry(). For this, save opener's credentials
  285. * for later.
  286. */
  287. pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
  288. if (!pdeo)
  289. return -ENOMEM;
  290. if (!use_pde(pde)) {
  291. kfree(pdeo);
  292. return -ENOENT;
  293. }
  294. open = pde->proc_fops->open;
  295. release = pde->proc_fops->release;
  296. if (open)
  297. rv = open(inode, file);
  298. if (rv == 0 && release) {
  299. /* To know what to release. */
  300. pdeo->file = file;
  301. /* Strictly for "too late" ->release in proc_reg_release(). */
  302. spin_lock(&pde->pde_unload_lock);
  303. list_add(&pdeo->lh, &pde->pde_openers);
  304. spin_unlock(&pde->pde_unload_lock);
  305. } else
  306. kfree(pdeo);
  307. unuse_pde(pde);
  308. return rv;
  309. }
  310. static int proc_reg_release(struct inode *inode, struct file *file)
  311. {
  312. struct proc_dir_entry *pde = PDE(inode);
  313. struct pde_opener *pdeo;
  314. spin_lock(&pde->pde_unload_lock);
  315. list_for_each_entry(pdeo, &pde->pde_openers, lh) {
  316. if (pdeo->file == file) {
  317. close_pdeo(pde, pdeo);
  318. break;
  319. }
  320. }
  321. spin_unlock(&pde->pde_unload_lock);
  322. return 0;
  323. }
  324. static const struct file_operations proc_reg_file_ops = {
  325. .llseek = proc_reg_llseek,
  326. .read = proc_reg_read,
  327. .write = proc_reg_write,
  328. .poll = proc_reg_poll,
  329. .unlocked_ioctl = proc_reg_unlocked_ioctl,
  330. #ifdef CONFIG_COMPAT
  331. .compat_ioctl = proc_reg_compat_ioctl,
  332. #endif
  333. .mmap = proc_reg_mmap,
  334. .get_unmapped_area = proc_reg_get_unmapped_area,
  335. .open = proc_reg_open,
  336. .release = proc_reg_release,
  337. };
  338. #ifdef CONFIG_COMPAT
  339. static const struct file_operations proc_reg_file_ops_no_compat = {
  340. .llseek = proc_reg_llseek,
  341. .read = proc_reg_read,
  342. .write = proc_reg_write,
  343. .poll = proc_reg_poll,
  344. .unlocked_ioctl = proc_reg_unlocked_ioctl,
  345. .mmap = proc_reg_mmap,
  346. .get_unmapped_area = proc_reg_get_unmapped_area,
  347. .open = proc_reg_open,
  348. .release = proc_reg_release,
  349. };
  350. #endif
  351. static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
  352. {
  353. struct proc_dir_entry *pde = PDE(dentry->d_inode);
  354. if (unlikely(!use_pde(pde)))
  355. return ERR_PTR(-EINVAL);
  356. nd_set_link(nd, pde->data);
  357. return pde;
  358. }
  359. static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
  360. {
  361. unuse_pde(p);
  362. }
  363. const struct inode_operations proc_link_inode_operations = {
  364. .readlink = generic_readlink,
  365. .follow_link = proc_follow_link,
  366. .put_link = proc_put_link,
  367. };
  368. struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
  369. {
  370. struct inode *inode = new_inode_pseudo(sb);
  371. if (inode) {
  372. inode->i_ino = de->low_ino;
  373. inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  374. PROC_I(inode)->pde = de;
  375. if (de->mode) {
  376. inode->i_mode = de->mode;
  377. inode->i_uid = de->uid;
  378. inode->i_gid = de->gid;
  379. }
  380. if (de->size)
  381. inode->i_size = de->size;
  382. if (de->nlink)
  383. set_nlink(inode, de->nlink);
  384. WARN_ON(!de->proc_iops);
  385. inode->i_op = de->proc_iops;
  386. if (de->proc_fops) {
  387. if (S_ISREG(inode->i_mode)) {
  388. #ifdef CONFIG_COMPAT
  389. if (!de->proc_fops->compat_ioctl)
  390. inode->i_fop =
  391. &proc_reg_file_ops_no_compat;
  392. else
  393. #endif
  394. inode->i_fop = &proc_reg_file_ops;
  395. } else {
  396. inode->i_fop = de->proc_fops;
  397. }
  398. }
  399. } else
  400. pde_put(de);
  401. return inode;
  402. }
  403. int proc_fill_super(struct super_block *s)
  404. {
  405. struct inode *root_inode;
  406. int ret;
  407. s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
  408. s->s_blocksize = 1024;
  409. s->s_blocksize_bits = 10;
  410. s->s_magic = PROC_SUPER_MAGIC;
  411. s->s_op = &proc_sops;
  412. s->s_time_gran = 1;
  413. pde_get(&proc_root);
  414. root_inode = proc_get_inode(s, &proc_root);
  415. if (!root_inode) {
  416. pr_err("proc_fill_super: get root inode failed\n");
  417. return -ENOMEM;
  418. }
  419. s->s_root = d_make_root(root_inode);
  420. if (!s->s_root) {
  421. pr_err("proc_fill_super: allocate dentry failed\n");
  422. return -ENOMEM;
  423. }
  424. ret = proc_setup_self(s);
  425. if (ret) {
  426. return ret;
  427. }
  428. return proc_setup_thread_self(s);
  429. }