inode.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /*
  2. * linux/fs/proc/inode.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/time.h>
  7. #include <linux/proc_fs.h>
  8. #include <linux/kernel.h>
  9. #include <linux/pid_namespace.h>
  10. #include <linux/mm.h>
  11. #include <linux/string.h>
  12. #include <linux/stat.h>
  13. #include <linux/completion.h>
  14. #include <linux/poll.h>
  15. #include <linux/printk.h>
  16. #include <linux/file.h>
  17. #include <linux/limits.h>
  18. #include <linux/init.h>
  19. #include <linux/module.h>
  20. #include <linux/sysctl.h>
  21. #include <linux/seq_file.h>
  22. #include <linux/slab.h>
  23. #include <linux/mount.h>
  24. #include <linux/magic.h>
  25. #include <asm/uaccess.h>
  26. #include "internal.h"
  27. static void proc_evict_inode(struct inode *inode)
  28. {
  29. struct proc_dir_entry *de;
  30. struct ctl_table_header *head;
  31. truncate_inode_pages_final(&inode->i_data);
  32. clear_inode(inode);
  33. /* Stop tracking associated processes */
  34. put_pid(PROC_I(inode)->pid);
  35. /* Let go of any associated proc directory entry */
  36. de = PDE(inode);
  37. if (de)
  38. pde_put(de);
  39. head = PROC_I(inode)->sysctl;
  40. if (head) {
  41. RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
  42. sysctl_head_put(head);
  43. }
  44. }
  45. static struct kmem_cache * proc_inode_cachep;
  46. static struct inode *proc_alloc_inode(struct super_block *sb)
  47. {
  48. struct proc_inode *ei;
  49. struct inode *inode;
  50. ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
  51. if (!ei)
  52. return NULL;
  53. ei->pid = NULL;
  54. ei->fd = 0;
  55. ei->op.proc_get_link = NULL;
  56. ei->pde = NULL;
  57. ei->sysctl = NULL;
  58. ei->sysctl_entry = NULL;
  59. ei->ns_ops = NULL;
  60. inode = &ei->vfs_inode;
  61. inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  62. return inode;
  63. }
  64. static void proc_i_callback(struct rcu_head *head)
  65. {
  66. struct inode *inode = container_of(head, struct inode, i_rcu);
  67. kmem_cache_free(proc_inode_cachep, PROC_I(inode));
  68. }
  69. static void proc_destroy_inode(struct inode *inode)
  70. {
  71. call_rcu(&inode->i_rcu, proc_i_callback);
  72. }
  73. static void init_once(void *foo)
  74. {
  75. struct proc_inode *ei = (struct proc_inode *) foo;
  76. inode_init_once(&ei->vfs_inode);
  77. }
  78. void __init proc_init_inodecache(void)
  79. {
  80. proc_inode_cachep = kmem_cache_create("proc_inode_cache",
  81. sizeof(struct proc_inode),
  82. 0, (SLAB_RECLAIM_ACCOUNT|
  83. SLAB_MEM_SPREAD|SLAB_ACCOUNT|
  84. SLAB_PANIC),
  85. init_once);
  86. }
  87. static int proc_show_options(struct seq_file *seq, struct dentry *root)
  88. {
  89. struct super_block *sb = root->d_sb;
  90. struct pid_namespace *pid = sb->s_fs_info;
  91. if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
  92. seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
  93. if (pid->hide_pid != 0)
  94. seq_printf(seq, ",hidepid=%u", pid->hide_pid);
  95. return 0;
  96. }
  97. static const struct super_operations proc_sops = {
  98. .alloc_inode = proc_alloc_inode,
  99. .destroy_inode = proc_destroy_inode,
  100. .drop_inode = generic_delete_inode,
  101. .evict_inode = proc_evict_inode,
  102. .statfs = simple_statfs,
  103. .remount_fs = proc_remount,
  104. .show_options = proc_show_options,
  105. };
  106. enum {BIAS = -1U<<31};
  107. static inline int use_pde(struct proc_dir_entry *pde)
  108. {
  109. return atomic_inc_unless_negative(&pde->in_use);
  110. }
  111. static void unuse_pde(struct proc_dir_entry *pde)
  112. {
  113. if (atomic_dec_return(&pde->in_use) == BIAS)
  114. complete(pde->pde_unload_completion);
  115. }
  116. /* pde is locked */
  117. static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
  118. {
  119. if (pdeo->closing) {
  120. /* somebody else is doing that, just wait */
  121. DECLARE_COMPLETION_ONSTACK(c);
  122. pdeo->c = &c;
  123. spin_unlock(&pde->pde_unload_lock);
  124. wait_for_completion(&c);
  125. spin_lock(&pde->pde_unload_lock);
  126. } else {
  127. struct file *file;
  128. pdeo->closing = 1;
  129. spin_unlock(&pde->pde_unload_lock);
  130. file = pdeo->file;
  131. pde->proc_fops->release(file_inode(file), file);
  132. spin_lock(&pde->pde_unload_lock);
  133. list_del_init(&pdeo->lh);
  134. if (pdeo->c)
  135. complete(pdeo->c);
  136. kfree(pdeo);
  137. }
  138. }
  139. void proc_entry_rundown(struct proc_dir_entry *de)
  140. {
  141. DECLARE_COMPLETION_ONSTACK(c);
  142. /* Wait until all existing callers into module are done. */
  143. de->pde_unload_completion = &c;
  144. if (atomic_add_return(BIAS, &de->in_use) != BIAS)
  145. wait_for_completion(&c);
  146. spin_lock(&de->pde_unload_lock);
  147. while (!list_empty(&de->pde_openers)) {
  148. struct pde_opener *pdeo;
  149. pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
  150. close_pdeo(de, pdeo);
  151. }
  152. spin_unlock(&de->pde_unload_lock);
  153. }
  154. static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
  155. {
  156. struct proc_dir_entry *pde = PDE(file_inode(file));
  157. loff_t rv = -EINVAL;
  158. if (use_pde(pde)) {
  159. loff_t (*llseek)(struct file *, loff_t, int);
  160. llseek = pde->proc_fops->llseek;
  161. if (!llseek)
  162. llseek = default_llseek;
  163. rv = llseek(file, offset, whence);
  164. unuse_pde(pde);
  165. }
  166. return rv;
  167. }
  168. static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  169. {
  170. ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
  171. struct proc_dir_entry *pde = PDE(file_inode(file));
  172. ssize_t rv = -EIO;
  173. if (use_pde(pde)) {
  174. read = pde->proc_fops->read;
  175. if (read)
  176. rv = read(file, buf, count, ppos);
  177. unuse_pde(pde);
  178. }
  179. return rv;
  180. }
  181. static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
  182. {
  183. ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
  184. struct proc_dir_entry *pde = PDE(file_inode(file));
  185. ssize_t rv = -EIO;
  186. if (use_pde(pde)) {
  187. write = pde->proc_fops->write;
  188. if (write)
  189. rv = write(file, buf, count, ppos);
  190. unuse_pde(pde);
  191. }
  192. return rv;
  193. }
  194. static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
  195. {
  196. struct proc_dir_entry *pde = PDE(file_inode(file));
  197. unsigned int rv = DEFAULT_POLLMASK;
  198. unsigned int (*poll)(struct file *, struct poll_table_struct *);
  199. if (use_pde(pde)) {
  200. poll = pde->proc_fops->poll;
  201. if (poll)
  202. rv = poll(file, pts);
  203. unuse_pde(pde);
  204. }
  205. return rv;
  206. }
  207. static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  208. {
  209. struct proc_dir_entry *pde = PDE(file_inode(file));
  210. long rv = -ENOTTY;
  211. long (*ioctl)(struct file *, unsigned int, unsigned long);
  212. if (use_pde(pde)) {
  213. ioctl = pde->proc_fops->unlocked_ioctl;
  214. if (ioctl)
  215. rv = ioctl(file, cmd, arg);
  216. unuse_pde(pde);
  217. }
  218. return rv;
  219. }
  220. #ifdef CONFIG_COMPAT
  221. static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  222. {
  223. struct proc_dir_entry *pde = PDE(file_inode(file));
  224. long rv = -ENOTTY;
  225. long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
  226. if (use_pde(pde)) {
  227. compat_ioctl = pde->proc_fops->compat_ioctl;
  228. if (compat_ioctl)
  229. rv = compat_ioctl(file, cmd, arg);
  230. unuse_pde(pde);
  231. }
  232. return rv;
  233. }
  234. #endif
  235. static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
  236. {
  237. struct proc_dir_entry *pde = PDE(file_inode(file));
  238. int rv = -EIO;
  239. int (*mmap)(struct file *, struct vm_area_struct *);
  240. if (use_pde(pde)) {
  241. mmap = pde->proc_fops->mmap;
  242. if (mmap)
  243. rv = mmap(file, vma);
  244. unuse_pde(pde);
  245. }
  246. return rv;
  247. }
  248. static unsigned long
  249. proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
  250. unsigned long len, unsigned long pgoff,
  251. unsigned long flags)
  252. {
  253. struct proc_dir_entry *pde = PDE(file_inode(file));
  254. unsigned long rv = -EIO;
  255. if (use_pde(pde)) {
  256. typeof(proc_reg_get_unmapped_area) *get_area;
  257. get_area = pde->proc_fops->get_unmapped_area;
  258. #ifdef CONFIG_MMU
  259. if (!get_area)
  260. get_area = current->mm->get_unmapped_area;
  261. #endif
  262. if (get_area)
  263. rv = get_area(file, orig_addr, len, pgoff, flags);
  264. else
  265. rv = orig_addr;
  266. unuse_pde(pde);
  267. }
  268. return rv;
  269. }
  270. static int proc_reg_open(struct inode *inode, struct file *file)
  271. {
  272. struct proc_dir_entry *pde = PDE(inode);
  273. int rv = 0;
  274. int (*open)(struct inode *, struct file *);
  275. int (*release)(struct inode *, struct file *);
  276. struct pde_opener *pdeo;
  277. /*
  278. * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
  279. * sequence. ->release won't be called because ->proc_fops will be
  280. * cleared. Depending on complexity of ->release, consequences vary.
  281. *
  282. * We can't wait for mercy when close will be done for real, it's
  283. * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
  284. * by hand in remove_proc_entry(). For this, save opener's credentials
  285. * for later.
  286. */
  287. pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
  288. if (!pdeo)
  289. return -ENOMEM;
  290. if (!use_pde(pde)) {
  291. kfree(pdeo);
  292. return -ENOENT;
  293. }
  294. open = pde->proc_fops->open;
  295. release = pde->proc_fops->release;
  296. if (open)
  297. rv = open(inode, file);
  298. if (rv == 0 && release) {
  299. /* To know what to release. */
  300. pdeo->file = file;
  301. /* Strictly for "too late" ->release in proc_reg_release(). */
  302. spin_lock(&pde->pde_unload_lock);
  303. list_add(&pdeo->lh, &pde->pde_openers);
  304. spin_unlock(&pde->pde_unload_lock);
  305. } else
  306. kfree(pdeo);
  307. unuse_pde(pde);
  308. return rv;
  309. }
  310. static int proc_reg_release(struct inode *inode, struct file *file)
  311. {
  312. struct proc_dir_entry *pde = PDE(inode);
  313. struct pde_opener *pdeo;
  314. spin_lock(&pde->pde_unload_lock);
  315. list_for_each_entry(pdeo, &pde->pde_openers, lh) {
  316. if (pdeo->file == file) {
  317. close_pdeo(pde, pdeo);
  318. break;
  319. }
  320. }
  321. spin_unlock(&pde->pde_unload_lock);
  322. return 0;
  323. }
  324. static const struct file_operations proc_reg_file_ops = {
  325. .llseek = proc_reg_llseek,
  326. .read = proc_reg_read,
  327. .write = proc_reg_write,
  328. .poll = proc_reg_poll,
  329. .unlocked_ioctl = proc_reg_unlocked_ioctl,
  330. #ifdef CONFIG_COMPAT
  331. .compat_ioctl = proc_reg_compat_ioctl,
  332. #endif
  333. .mmap = proc_reg_mmap,
  334. .get_unmapped_area = proc_reg_get_unmapped_area,
  335. .open = proc_reg_open,
  336. .release = proc_reg_release,
  337. };
  338. #ifdef CONFIG_COMPAT
  339. static const struct file_operations proc_reg_file_ops_no_compat = {
  340. .llseek = proc_reg_llseek,
  341. .read = proc_reg_read,
  342. .write = proc_reg_write,
  343. .poll = proc_reg_poll,
  344. .unlocked_ioctl = proc_reg_unlocked_ioctl,
  345. .mmap = proc_reg_mmap,
  346. .get_unmapped_area = proc_reg_get_unmapped_area,
  347. .open = proc_reg_open,
  348. .release = proc_reg_release,
  349. };
  350. #endif
  351. static void proc_put_link(void *p)
  352. {
  353. unuse_pde(p);
  354. }
  355. static const char *proc_get_link(struct dentry *dentry,
  356. struct inode *inode,
  357. struct delayed_call *done)
  358. {
  359. struct proc_dir_entry *pde = PDE(inode);
  360. if (unlikely(!use_pde(pde)))
  361. return ERR_PTR(-EINVAL);
  362. set_delayed_call(done, proc_put_link, pde);
  363. return pde->data;
  364. }
  365. const struct inode_operations proc_link_inode_operations = {
  366. .readlink = generic_readlink,
  367. .get_link = proc_get_link,
  368. };
  369. struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
  370. {
  371. struct inode *inode = new_inode_pseudo(sb);
  372. if (inode) {
  373. inode->i_ino = de->low_ino;
  374. inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  375. PROC_I(inode)->pde = de;
  376. if (is_empty_pde(de)) {
  377. make_empty_dir_inode(inode);
  378. return inode;
  379. }
  380. if (de->mode) {
  381. inode->i_mode = de->mode;
  382. inode->i_uid = de->uid;
  383. inode->i_gid = de->gid;
  384. }
  385. if (de->size)
  386. inode->i_size = de->size;
  387. if (de->nlink)
  388. set_nlink(inode, de->nlink);
  389. WARN_ON(!de->proc_iops);
  390. inode->i_op = de->proc_iops;
  391. if (de->proc_fops) {
  392. if (S_ISREG(inode->i_mode)) {
  393. #ifdef CONFIG_COMPAT
  394. if (!de->proc_fops->compat_ioctl)
  395. inode->i_fop =
  396. &proc_reg_file_ops_no_compat;
  397. else
  398. #endif
  399. inode->i_fop = &proc_reg_file_ops;
  400. } else {
  401. inode->i_fop = de->proc_fops;
  402. }
  403. }
  404. } else
  405. pde_put(de);
  406. return inode;
  407. }
  408. int proc_fill_super(struct super_block *s)
  409. {
  410. struct inode *root_inode;
  411. int ret;
  412. s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
  413. s->s_blocksize = 1024;
  414. s->s_blocksize_bits = 10;
  415. s->s_magic = PROC_SUPER_MAGIC;
  416. s->s_op = &proc_sops;
  417. s->s_time_gran = 1;
  418. pde_get(&proc_root);
  419. root_inode = proc_get_inode(s, &proc_root);
  420. if (!root_inode) {
  421. pr_err("proc_fill_super: get root inode failed\n");
  422. return -ENOMEM;
  423. }
  424. s->s_root = d_make_root(root_inode);
  425. if (!s->s_root) {
  426. pr_err("proc_fill_super: allocate dentry failed\n");
  427. return -ENOMEM;
  428. }
  429. ret = proc_setup_self(s);
  430. if (ret) {
  431. return ret;
  432. }
  433. return proc_setup_thread_self(s);
  434. }