inode.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. *
  3. * Copyright (C) 2011 Novell Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. */
  9. #include <linux/fs.h>
  10. #include <linux/slab.h>
  11. #include <linux/cred.h>
  12. #include <linux/xattr.h>
  13. #include <linux/posix_acl.h>
  14. #include "overlayfs.h"
  15. int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  16. {
  17. int err;
  18. struct dentry *upperdentry;
  19. const struct cred *old_cred;
  20. /*
  21. * Check for permissions before trying to copy-up. This is redundant
  22. * since it will be rechecked later by ->setattr() on upper dentry. But
  23. * without this, copy-up can be triggered by just about anybody.
  24. *
  25. * We don't initialize inode->size, which just means that
  26. * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
  27. * check for a swapfile (which this won't be anyway).
  28. */
  29. err = setattr_prepare(dentry, attr);
  30. if (err)
  31. return err;
  32. err = ovl_want_write(dentry);
  33. if (err)
  34. goto out;
  35. err = ovl_copy_up(dentry);
  36. if (!err) {
  37. upperdentry = ovl_dentry_upper(dentry);
  38. if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
  39. attr->ia_valid &= ~ATTR_MODE;
  40. inode_lock(upperdentry->d_inode);
  41. old_cred = ovl_override_creds(dentry->d_sb);
  42. err = notify_change(upperdentry, attr, NULL);
  43. revert_creds(old_cred);
  44. if (!err)
  45. ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
  46. inode_unlock(upperdentry->d_inode);
  47. }
  48. ovl_drop_write(dentry);
  49. out:
  50. return err;
  51. }
  52. int ovl_getattr(const struct path *path, struct kstat *stat,
  53. u32 request_mask, unsigned int flags)
  54. {
  55. struct dentry *dentry = path->dentry;
  56. enum ovl_path_type type;
  57. struct path realpath;
  58. const struct cred *old_cred;
  59. bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
  60. int err;
  61. type = ovl_path_real(dentry, &realpath);
  62. old_cred = ovl_override_creds(dentry->d_sb);
  63. err = vfs_getattr(&realpath, stat, request_mask, flags);
  64. if (err)
  65. goto out;
  66. /*
  67. * When all layers are on the same fs, all real inode number are
  68. * unique, so we use the overlay st_dev, which is friendly to du -x.
  69. *
  70. * We also use st_ino of the copy up origin, if we know it.
  71. * This guaranties constant st_dev/st_ino across copy up.
  72. *
  73. * If filesystem supports NFS export ops, this also guaranties
  74. * persistent st_ino across mount cycle.
  75. */
  76. if (ovl_same_sb(dentry->d_sb)) {
  77. if (OVL_TYPE_ORIGIN(type)) {
  78. struct kstat lowerstat;
  79. u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
  80. ovl_path_lower(dentry, &realpath);
  81. err = vfs_getattr(&realpath, &lowerstat,
  82. lowermask, flags);
  83. if (err)
  84. goto out;
  85. WARN_ON_ONCE(stat->dev != lowerstat.dev);
  86. /*
  87. * Lower hardlinks are broken on copy up to different
  88. * upper files, so we cannot use the lower origin st_ino
  89. * for those different files, even for the same fs case.
  90. */
  91. if (is_dir || lowerstat.nlink == 1)
  92. stat->ino = lowerstat.ino;
  93. }
  94. stat->dev = dentry->d_sb->s_dev;
  95. } else if (is_dir) {
  96. /*
  97. * If not all layers are on the same fs the pair {real st_ino;
  98. * overlay st_dev} is not unique, so use the non persistent
  99. * overlay st_ino.
  100. *
  101. * Always use the overlay st_dev for directories, so 'find
  102. * -xdev' will scan the entire overlay mount and won't cross the
  103. * overlay mount boundaries.
  104. */
  105. stat->dev = dentry->d_sb->s_dev;
  106. stat->ino = dentry->d_inode->i_ino;
  107. }
  108. /*
  109. * It's probably not worth it to count subdirs to get the
  110. * correct link count. nlink=1 seems to pacify 'find' and
  111. * other utilities.
  112. */
  113. if (is_dir && OVL_TYPE_MERGE(type))
  114. stat->nlink = 1;
  115. out:
  116. revert_creds(old_cred);
  117. return err;
  118. }
  119. int ovl_permission(struct inode *inode, int mask)
  120. {
  121. bool is_upper;
  122. struct inode *realinode = ovl_inode_real(inode, &is_upper);
  123. const struct cred *old_cred;
  124. int err;
  125. /* Careful in RCU walk mode */
  126. if (!realinode) {
  127. WARN_ON(!(mask & MAY_NOT_BLOCK));
  128. return -ECHILD;
  129. }
  130. /*
  131. * Check overlay inode with the creds of task and underlying inode
  132. * with creds of mounter
  133. */
  134. err = generic_permission(inode, mask);
  135. if (err)
  136. return err;
  137. old_cred = ovl_override_creds(inode->i_sb);
  138. if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
  139. mask &= ~(MAY_WRITE | MAY_APPEND);
  140. /* Make sure mounter can read file for copy up later */
  141. mask |= MAY_READ;
  142. }
  143. err = inode_permission(realinode, mask);
  144. revert_creds(old_cred);
  145. return err;
  146. }
  147. static const char *ovl_get_link(struct dentry *dentry,
  148. struct inode *inode,
  149. struct delayed_call *done)
  150. {
  151. const struct cred *old_cred;
  152. const char *p;
  153. if (!dentry)
  154. return ERR_PTR(-ECHILD);
  155. old_cred = ovl_override_creds(dentry->d_sb);
  156. p = vfs_get_link(ovl_dentry_real(dentry), done);
  157. revert_creds(old_cred);
  158. return p;
  159. }
  160. bool ovl_is_private_xattr(const char *name)
  161. {
  162. return strncmp(name, OVL_XATTR_PREFIX,
  163. sizeof(OVL_XATTR_PREFIX) - 1) == 0;
  164. }
  165. int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
  166. size_t size, int flags)
  167. {
  168. int err;
  169. struct path realpath;
  170. enum ovl_path_type type = ovl_path_real(dentry, &realpath);
  171. const struct cred *old_cred;
  172. err = ovl_want_write(dentry);
  173. if (err)
  174. goto out;
  175. if (!value && !OVL_TYPE_UPPER(type)) {
  176. err = vfs_getxattr(realpath.dentry, name, NULL, 0);
  177. if (err < 0)
  178. goto out_drop_write;
  179. }
  180. err = ovl_copy_up(dentry);
  181. if (err)
  182. goto out_drop_write;
  183. if (!OVL_TYPE_UPPER(type))
  184. ovl_path_upper(dentry, &realpath);
  185. old_cred = ovl_override_creds(dentry->d_sb);
  186. if (value)
  187. err = vfs_setxattr(realpath.dentry, name, value, size, flags);
  188. else {
  189. WARN_ON(flags != XATTR_REPLACE);
  190. err = vfs_removexattr(realpath.dentry, name);
  191. }
  192. revert_creds(old_cred);
  193. out_drop_write:
  194. ovl_drop_write(dentry);
  195. out:
  196. return err;
  197. }
  198. int ovl_xattr_get(struct dentry *dentry, const char *name,
  199. void *value, size_t size)
  200. {
  201. struct dentry *realdentry = ovl_dentry_real(dentry);
  202. ssize_t res;
  203. const struct cred *old_cred;
  204. old_cred = ovl_override_creds(dentry->d_sb);
  205. res = vfs_getxattr(realdentry, name, value, size);
  206. revert_creds(old_cred);
  207. return res;
  208. }
  209. static bool ovl_can_list(const char *s)
  210. {
  211. /* List all non-trusted xatts */
  212. if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
  213. return true;
  214. /* Never list trusted.overlay, list other trusted for superuser only */
  215. return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
  216. }
  217. ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
  218. {
  219. struct dentry *realdentry = ovl_dentry_real(dentry);
  220. ssize_t res;
  221. size_t len;
  222. char *s;
  223. const struct cred *old_cred;
  224. old_cred = ovl_override_creds(dentry->d_sb);
  225. res = vfs_listxattr(realdentry, list, size);
  226. revert_creds(old_cred);
  227. if (res <= 0 || size == 0)
  228. return res;
  229. /* filter out private xattrs */
  230. for (s = list, len = res; len;) {
  231. size_t slen = strnlen(s, len) + 1;
  232. /* underlying fs providing us with an broken xattr list? */
  233. if (WARN_ON(slen > len))
  234. return -EIO;
  235. len -= slen;
  236. if (!ovl_can_list(s)) {
  237. res -= slen;
  238. memmove(s, s + slen, len);
  239. } else {
  240. s += slen;
  241. }
  242. }
  243. return res;
  244. }
  245. struct posix_acl *ovl_get_acl(struct inode *inode, int type)
  246. {
  247. struct inode *realinode = ovl_inode_real(inode, NULL);
  248. const struct cred *old_cred;
  249. struct posix_acl *acl;
  250. if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
  251. return NULL;
  252. old_cred = ovl_override_creds(inode->i_sb);
  253. acl = get_acl(realinode, type);
  254. revert_creds(old_cred);
  255. return acl;
  256. }
  257. static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
  258. struct dentry *realdentry)
  259. {
  260. if (OVL_TYPE_UPPER(type))
  261. return false;
  262. if (special_file(realdentry->d_inode->i_mode))
  263. return false;
  264. if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
  265. return false;
  266. return true;
  267. }
  268. int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
  269. {
  270. int err = 0;
  271. struct path realpath;
  272. enum ovl_path_type type;
  273. type = ovl_path_real(dentry, &realpath);
  274. if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
  275. err = ovl_want_write(dentry);
  276. if (!err) {
  277. err = ovl_copy_up_flags(dentry, file_flags);
  278. ovl_drop_write(dentry);
  279. }
  280. }
  281. return err;
  282. }
  283. int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
  284. {
  285. struct dentry *alias;
  286. struct path upperpath;
  287. if (!(flags & S_ATIME))
  288. return 0;
  289. alias = d_find_any_alias(inode);
  290. if (!alias)
  291. return 0;
  292. ovl_path_upper(alias, &upperpath);
  293. if (upperpath.dentry) {
  294. touch_atime(&upperpath);
  295. inode->i_atime = d_inode(upperpath.dentry)->i_atime;
  296. }
  297. dput(alias);
  298. return 0;
  299. }
  300. static const struct inode_operations ovl_file_inode_operations = {
  301. .setattr = ovl_setattr,
  302. .permission = ovl_permission,
  303. .getattr = ovl_getattr,
  304. .listxattr = ovl_listxattr,
  305. .get_acl = ovl_get_acl,
  306. .update_time = ovl_update_time,
  307. };
  308. static const struct inode_operations ovl_symlink_inode_operations = {
  309. .setattr = ovl_setattr,
  310. .get_link = ovl_get_link,
  311. .getattr = ovl_getattr,
  312. .listxattr = ovl_listxattr,
  313. .update_time = ovl_update_time,
  314. };
  315. /*
  316. * It is possible to stack overlayfs instance on top of another
  317. * overlayfs instance as lower layer. We need to annonate the
  318. * stackable i_mutex locks according to stack level of the super
  319. * block instance. An overlayfs instance can never be in stack
  320. * depth 0 (there is always a real fs below it). An overlayfs
  321. * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
  322. *
  323. * For example, here is a snip from /proc/lockdep_chains after
  324. * dir_iterate of nested overlayfs:
  325. *
  326. * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
  327. * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
  328. * [...] &type->i_mutex_dir_key (stack_depth=0)
  329. */
  330. #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
  331. static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
  332. {
  333. #ifdef CONFIG_LOCKDEP
  334. static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
  335. static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
  336. int depth = inode->i_sb->s_stack_depth - 1;
  337. if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
  338. depth = 0;
  339. if (S_ISDIR(inode->i_mode))
  340. lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
  341. else
  342. lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
  343. #endif
  344. }
  345. static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
  346. {
  347. inode->i_ino = get_next_ino();
  348. inode->i_mode = mode;
  349. inode->i_flags |= S_NOCMTIME;
  350. #ifdef CONFIG_FS_POSIX_ACL
  351. inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
  352. #endif
  353. ovl_lockdep_annotate_inode_mutex_key(inode);
  354. switch (mode & S_IFMT) {
  355. case S_IFREG:
  356. inode->i_op = &ovl_file_inode_operations;
  357. break;
  358. case S_IFDIR:
  359. inode->i_op = &ovl_dir_inode_operations;
  360. inode->i_fop = &ovl_dir_operations;
  361. break;
  362. case S_IFLNK:
  363. inode->i_op = &ovl_symlink_inode_operations;
  364. break;
  365. default:
  366. inode->i_op = &ovl_file_inode_operations;
  367. init_special_inode(inode, mode, rdev);
  368. break;
  369. }
  370. }
  371. struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
  372. {
  373. struct inode *inode;
  374. inode = new_inode(sb);
  375. if (inode)
  376. ovl_fill_inode(inode, mode, rdev);
  377. return inode;
  378. }
  379. static int ovl_inode_test(struct inode *inode, void *data)
  380. {
  381. return ovl_inode_real(inode, NULL) == data;
  382. }
  383. static int ovl_inode_set(struct inode *inode, void *data)
  384. {
  385. inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
  386. return 0;
  387. }
  388. struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
  389. {
  390. struct inode *inode;
  391. inode = iget5_locked(sb, (unsigned long) realinode,
  392. ovl_inode_test, ovl_inode_set, realinode);
  393. if (inode && inode->i_state & I_NEW) {
  394. ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
  395. set_nlink(inode, realinode->i_nlink);
  396. unlock_new_inode(inode);
  397. }
  398. return inode;
  399. }