super.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. /*
  2. *
  3. * Copyright (C) 2011 Novell Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. */
  9. #include <uapi/linux/magic.h>
  10. #include <linux/fs.h>
  11. #include <linux/namei.h>
  12. #include <linux/xattr.h>
  13. #include <linux/mount.h>
  14. #include <linux/parser.h>
  15. #include <linux/module.h>
  16. #include <linux/statfs.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/posix_acl_xattr.h>
  19. #include "overlayfs.h"
  20. MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  21. MODULE_DESCRIPTION("Overlay filesystem");
  22. MODULE_LICENSE("GPL");
  23. struct ovl_dir_cache;
  24. #define OVL_MAX_STACK 500
  25. static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  26. module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  27. MODULE_PARM_DESC(ovl_redirect_dir_def,
  28. "Default to on or off for the redirect_dir feature");
  29. static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  30. module_param_named(index, ovl_index_def, bool, 0644);
  31. MODULE_PARM_DESC(ovl_index_def,
  32. "Default to on or off for the inodes index feature");
  33. static void ovl_entry_stack_free(struct ovl_entry *oe)
  34. {
  35. unsigned int i;
  36. for (i = 0; i < oe->numlower; i++)
  37. dput(oe->lowerstack[i].dentry);
  38. }
  39. static void ovl_dentry_release(struct dentry *dentry)
  40. {
  41. struct ovl_entry *oe = dentry->d_fsdata;
  42. if (oe) {
  43. ovl_entry_stack_free(oe);
  44. kfree_rcu(oe, rcu);
  45. }
  46. }
  47. static int ovl_check_append_only(struct inode *inode, int flag)
  48. {
  49. /*
  50. * This test was moot in vfs may_open() because overlay inode does
  51. * not have the S_APPEND flag, so re-check on real upper inode
  52. */
  53. if (IS_APPEND(inode)) {
  54. if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
  55. return -EPERM;
  56. if (flag & O_TRUNC)
  57. return -EPERM;
  58. }
  59. return 0;
  60. }
  61. static struct dentry *ovl_d_real(struct dentry *dentry,
  62. const struct inode *inode,
  63. unsigned int open_flags, unsigned int flags)
  64. {
  65. struct dentry *real;
  66. int err;
  67. if (flags & D_REAL_UPPER)
  68. return ovl_dentry_upper(dentry);
  69. if (!d_is_reg(dentry)) {
  70. if (!inode || inode == d_inode(dentry))
  71. return dentry;
  72. goto bug;
  73. }
  74. if (open_flags) {
  75. err = ovl_open_maybe_copy_up(dentry, open_flags);
  76. if (err)
  77. return ERR_PTR(err);
  78. }
  79. real = ovl_dentry_upper(dentry);
  80. if (real && (!inode || inode == d_inode(real))) {
  81. if (!inode) {
  82. err = ovl_check_append_only(d_inode(real), open_flags);
  83. if (err)
  84. return ERR_PTR(err);
  85. }
  86. return real;
  87. }
  88. real = ovl_dentry_lower(dentry);
  89. if (!real)
  90. goto bug;
  91. /* Handle recursion */
  92. real = d_real(real, inode, open_flags, 0);
  93. if (!inode || inode == d_inode(real))
  94. return real;
  95. bug:
  96. WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
  97. inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
  98. return dentry;
  99. }
  100. static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
  101. {
  102. struct ovl_entry *oe = dentry->d_fsdata;
  103. unsigned int i;
  104. int ret = 1;
  105. for (i = 0; i < oe->numlower; i++) {
  106. struct dentry *d = oe->lowerstack[i].dentry;
  107. if (d->d_flags & DCACHE_OP_REVALIDATE) {
  108. ret = d->d_op->d_revalidate(d, flags);
  109. if (ret < 0)
  110. return ret;
  111. if (!ret) {
  112. if (!(flags & LOOKUP_RCU))
  113. d_invalidate(d);
  114. return -ESTALE;
  115. }
  116. }
  117. }
  118. return 1;
  119. }
  120. static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
  121. {
  122. struct ovl_entry *oe = dentry->d_fsdata;
  123. unsigned int i;
  124. int ret = 1;
  125. for (i = 0; i < oe->numlower; i++) {
  126. struct dentry *d = oe->lowerstack[i].dentry;
  127. if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
  128. ret = d->d_op->d_weak_revalidate(d, flags);
  129. if (ret <= 0)
  130. break;
  131. }
  132. }
  133. return ret;
  134. }
  135. static const struct dentry_operations ovl_dentry_operations = {
  136. .d_release = ovl_dentry_release,
  137. .d_real = ovl_d_real,
  138. };
  139. static const struct dentry_operations ovl_reval_dentry_operations = {
  140. .d_release = ovl_dentry_release,
  141. .d_real = ovl_d_real,
  142. .d_revalidate = ovl_dentry_revalidate,
  143. .d_weak_revalidate = ovl_dentry_weak_revalidate,
  144. };
  145. static struct kmem_cache *ovl_inode_cachep;
  146. static struct inode *ovl_alloc_inode(struct super_block *sb)
  147. {
  148. struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
  149. if (!oi)
  150. return NULL;
  151. oi->cache = NULL;
  152. oi->redirect = NULL;
  153. oi->version = 0;
  154. oi->flags = 0;
  155. oi->__upperdentry = NULL;
  156. oi->lower = NULL;
  157. mutex_init(&oi->lock);
  158. return &oi->vfs_inode;
  159. }
  160. static void ovl_i_callback(struct rcu_head *head)
  161. {
  162. struct inode *inode = container_of(head, struct inode, i_rcu);
  163. kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
  164. }
  165. static void ovl_destroy_inode(struct inode *inode)
  166. {
  167. struct ovl_inode *oi = OVL_I(inode);
  168. dput(oi->__upperdentry);
  169. kfree(oi->redirect);
  170. ovl_dir_cache_free(inode);
  171. mutex_destroy(&oi->lock);
  172. call_rcu(&inode->i_rcu, ovl_i_callback);
  173. }
  174. static void ovl_free_fs(struct ovl_fs *ofs)
  175. {
  176. unsigned i;
  177. dput(ofs->indexdir);
  178. dput(ofs->workdir);
  179. if (ofs->workdir_locked)
  180. ovl_inuse_unlock(ofs->workbasedir);
  181. dput(ofs->workbasedir);
  182. if (ofs->upperdir_locked)
  183. ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
  184. mntput(ofs->upper_mnt);
  185. for (i = 0; i < ofs->numlower; i++) {
  186. mntput(ofs->lower_layers[i].mnt);
  187. free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
  188. }
  189. kfree(ofs->lower_layers);
  190. kfree(ofs->config.lowerdir);
  191. kfree(ofs->config.upperdir);
  192. kfree(ofs->config.workdir);
  193. if (ofs->creator_cred)
  194. put_cred(ofs->creator_cred);
  195. kfree(ofs);
  196. }
  197. static void ovl_put_super(struct super_block *sb)
  198. {
  199. struct ovl_fs *ofs = sb->s_fs_info;
  200. ovl_free_fs(ofs);
  201. }
  202. static int ovl_sync_fs(struct super_block *sb, int wait)
  203. {
  204. struct ovl_fs *ofs = sb->s_fs_info;
  205. struct super_block *upper_sb;
  206. int ret;
  207. if (!ofs->upper_mnt)
  208. return 0;
  209. upper_sb = ofs->upper_mnt->mnt_sb;
  210. if (!upper_sb->s_op->sync_fs)
  211. return 0;
  212. /* real inodes have already been synced by sync_filesystem(ovl_sb) */
  213. down_read(&upper_sb->s_umount);
  214. ret = upper_sb->s_op->sync_fs(upper_sb, wait);
  215. up_read(&upper_sb->s_umount);
  216. return ret;
  217. }
  218. /**
  219. * ovl_statfs
  220. * @sb: The overlayfs super block
  221. * @buf: The struct kstatfs to fill in with stats
  222. *
  223. * Get the filesystem statistics. As writes always target the upper layer
  224. * filesystem pass the statfs to the upper filesystem (if it exists)
  225. */
  226. static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
  227. {
  228. struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  229. struct dentry *root_dentry = dentry->d_sb->s_root;
  230. struct path path;
  231. int err;
  232. ovl_path_real(root_dentry, &path);
  233. err = vfs_statfs(&path, buf);
  234. if (!err) {
  235. buf->f_namelen = ofs->namelen;
  236. buf->f_type = OVERLAYFS_SUPER_MAGIC;
  237. }
  238. return err;
  239. }
  240. /* Will this overlay be forced to mount/remount ro? */
  241. static bool ovl_force_readonly(struct ovl_fs *ofs)
  242. {
  243. return (!ofs->upper_mnt || !ofs->workdir);
  244. }
  245. /**
  246. * ovl_show_options
  247. *
  248. * Prints the mount options for a given superblock.
  249. * Returns zero; does not fail.
  250. */
  251. static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
  252. {
  253. struct super_block *sb = dentry->d_sb;
  254. struct ovl_fs *ofs = sb->s_fs_info;
  255. seq_show_option(m, "lowerdir", ofs->config.lowerdir);
  256. if (ofs->config.upperdir) {
  257. seq_show_option(m, "upperdir", ofs->config.upperdir);
  258. seq_show_option(m, "workdir", ofs->config.workdir);
  259. }
  260. if (ofs->config.default_permissions)
  261. seq_puts(m, ",default_permissions");
  262. if (ofs->config.redirect_dir != ovl_redirect_dir_def)
  263. seq_printf(m, ",redirect_dir=%s",
  264. ofs->config.redirect_dir ? "on" : "off");
  265. if (ofs->config.index != ovl_index_def)
  266. seq_printf(m, ",index=%s",
  267. ofs->config.index ? "on" : "off");
  268. return 0;
  269. }
  270. static int ovl_remount(struct super_block *sb, int *flags, char *data)
  271. {
  272. struct ovl_fs *ofs = sb->s_fs_info;
  273. if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
  274. return -EROFS;
  275. return 0;
  276. }
  277. static const struct super_operations ovl_super_operations = {
  278. .alloc_inode = ovl_alloc_inode,
  279. .destroy_inode = ovl_destroy_inode,
  280. .drop_inode = generic_delete_inode,
  281. .put_super = ovl_put_super,
  282. .sync_fs = ovl_sync_fs,
  283. .statfs = ovl_statfs,
  284. .show_options = ovl_show_options,
  285. .remount_fs = ovl_remount,
  286. };
  287. enum {
  288. OPT_LOWERDIR,
  289. OPT_UPPERDIR,
  290. OPT_WORKDIR,
  291. OPT_DEFAULT_PERMISSIONS,
  292. OPT_REDIRECT_DIR_ON,
  293. OPT_REDIRECT_DIR_OFF,
  294. OPT_INDEX_ON,
  295. OPT_INDEX_OFF,
  296. OPT_ERR,
  297. };
  298. static const match_table_t ovl_tokens = {
  299. {OPT_LOWERDIR, "lowerdir=%s"},
  300. {OPT_UPPERDIR, "upperdir=%s"},
  301. {OPT_WORKDIR, "workdir=%s"},
  302. {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
  303. {OPT_REDIRECT_DIR_ON, "redirect_dir=on"},
  304. {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
  305. {OPT_INDEX_ON, "index=on"},
  306. {OPT_INDEX_OFF, "index=off"},
  307. {OPT_ERR, NULL}
  308. };
  309. static char *ovl_next_opt(char **s)
  310. {
  311. char *sbegin = *s;
  312. char *p;
  313. if (sbegin == NULL)
  314. return NULL;
  315. for (p = sbegin; *p; p++) {
  316. if (*p == '\\') {
  317. p++;
  318. if (!*p)
  319. break;
  320. } else if (*p == ',') {
  321. *p = '\0';
  322. *s = p + 1;
  323. return sbegin;
  324. }
  325. }
  326. *s = NULL;
  327. return sbegin;
  328. }
  329. static int ovl_parse_opt(char *opt, struct ovl_config *config)
  330. {
  331. char *p;
  332. while ((p = ovl_next_opt(&opt)) != NULL) {
  333. int token;
  334. substring_t args[MAX_OPT_ARGS];
  335. if (!*p)
  336. continue;
  337. token = match_token(p, ovl_tokens, args);
  338. switch (token) {
  339. case OPT_UPPERDIR:
  340. kfree(config->upperdir);
  341. config->upperdir = match_strdup(&args[0]);
  342. if (!config->upperdir)
  343. return -ENOMEM;
  344. break;
  345. case OPT_LOWERDIR:
  346. kfree(config->lowerdir);
  347. config->lowerdir = match_strdup(&args[0]);
  348. if (!config->lowerdir)
  349. return -ENOMEM;
  350. break;
  351. case OPT_WORKDIR:
  352. kfree(config->workdir);
  353. config->workdir = match_strdup(&args[0]);
  354. if (!config->workdir)
  355. return -ENOMEM;
  356. break;
  357. case OPT_DEFAULT_PERMISSIONS:
  358. config->default_permissions = true;
  359. break;
  360. case OPT_REDIRECT_DIR_ON:
  361. config->redirect_dir = true;
  362. break;
  363. case OPT_REDIRECT_DIR_OFF:
  364. config->redirect_dir = false;
  365. break;
  366. case OPT_INDEX_ON:
  367. config->index = true;
  368. break;
  369. case OPT_INDEX_OFF:
  370. config->index = false;
  371. break;
  372. default:
  373. pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
  374. return -EINVAL;
  375. }
  376. }
  377. /* Workdir is useless in non-upper mount */
  378. if (!config->upperdir && config->workdir) {
  379. pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
  380. config->workdir);
  381. kfree(config->workdir);
  382. config->workdir = NULL;
  383. }
  384. return 0;
  385. }
  386. #define OVL_WORKDIR_NAME "work"
  387. #define OVL_INDEXDIR_NAME "index"
  388. static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
  389. const char *name, bool persist)
  390. {
  391. struct inode *dir = ofs->workbasedir->d_inode;
  392. struct vfsmount *mnt = ofs->upper_mnt;
  393. struct dentry *work;
  394. int err;
  395. bool retried = false;
  396. bool locked = false;
  397. err = mnt_want_write(mnt);
  398. if (err)
  399. goto out_err;
  400. inode_lock_nested(dir, I_MUTEX_PARENT);
  401. locked = true;
  402. retry:
  403. work = lookup_one_len(name, ofs->workbasedir, strlen(name));
  404. if (!IS_ERR(work)) {
  405. struct iattr attr = {
  406. .ia_valid = ATTR_MODE,
  407. .ia_mode = S_IFDIR | 0,
  408. };
  409. if (work->d_inode) {
  410. err = -EEXIST;
  411. if (retried)
  412. goto out_dput;
  413. if (persist)
  414. goto out_unlock;
  415. retried = true;
  416. ovl_workdir_cleanup(dir, mnt, work, 0);
  417. dput(work);
  418. goto retry;
  419. }
  420. err = ovl_create_real(dir, work,
  421. &(struct cattr){.mode = S_IFDIR | 0},
  422. NULL, true);
  423. if (err)
  424. goto out_dput;
  425. /*
  426. * Try to remove POSIX ACL xattrs from workdir. We are good if:
  427. *
  428. * a) success (there was a POSIX ACL xattr and was removed)
  429. * b) -ENODATA (there was no POSIX ACL xattr)
  430. * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
  431. *
  432. * There are various other error values that could effectively
  433. * mean that the xattr doesn't exist (e.g. -ERANGE is returned
  434. * if the xattr name is too long), but the set of filesystems
  435. * allowed as upper are limited to "normal" ones, where checking
  436. * for the above two errors is sufficient.
  437. */
  438. err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
  439. if (err && err != -ENODATA && err != -EOPNOTSUPP)
  440. goto out_dput;
  441. err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
  442. if (err && err != -ENODATA && err != -EOPNOTSUPP)
  443. goto out_dput;
  444. /* Clear any inherited mode bits */
  445. inode_lock(work->d_inode);
  446. err = notify_change(work, &attr, NULL);
  447. inode_unlock(work->d_inode);
  448. if (err)
  449. goto out_dput;
  450. } else {
  451. err = PTR_ERR(work);
  452. goto out_err;
  453. }
  454. out_unlock:
  455. mnt_drop_write(mnt);
  456. if (locked)
  457. inode_unlock(dir);
  458. return work;
  459. out_dput:
  460. dput(work);
  461. out_err:
  462. pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
  463. ofs->config.workdir, name, -err);
  464. work = NULL;
  465. goto out_unlock;
  466. }
  467. static void ovl_unescape(char *s)
  468. {
  469. char *d = s;
  470. for (;; s++, d++) {
  471. if (*s == '\\')
  472. s++;
  473. *d = *s;
  474. if (!*s)
  475. break;
  476. }
  477. }
  478. static int ovl_mount_dir_noesc(const char *name, struct path *path)
  479. {
  480. int err = -EINVAL;
  481. if (!*name) {
  482. pr_err("overlayfs: empty lowerdir\n");
  483. goto out;
  484. }
  485. err = kern_path(name, LOOKUP_FOLLOW, path);
  486. if (err) {
  487. pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
  488. goto out;
  489. }
  490. err = -EINVAL;
  491. if (ovl_dentry_weird(path->dentry)) {
  492. pr_err("overlayfs: filesystem on '%s' not supported\n", name);
  493. goto out_put;
  494. }
  495. if (!d_is_dir(path->dentry)) {
  496. pr_err("overlayfs: '%s' not a directory\n", name);
  497. goto out_put;
  498. }
  499. return 0;
  500. out_put:
  501. path_put_init(path);
  502. out:
  503. return err;
  504. }
  505. static int ovl_mount_dir(const char *name, struct path *path)
  506. {
  507. int err = -ENOMEM;
  508. char *tmp = kstrdup(name, GFP_KERNEL);
  509. if (tmp) {
  510. ovl_unescape(tmp);
  511. err = ovl_mount_dir_noesc(tmp, path);
  512. if (!err)
  513. if (ovl_dentry_remote(path->dentry)) {
  514. pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
  515. tmp);
  516. path_put_init(path);
  517. err = -EINVAL;
  518. }
  519. kfree(tmp);
  520. }
  521. return err;
  522. }
  523. static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
  524. const char *name)
  525. {
  526. struct kstatfs statfs;
  527. int err = vfs_statfs(path, &statfs);
  528. if (err)
  529. pr_err("overlayfs: statfs failed on '%s'\n", name);
  530. else
  531. ofs->namelen = max(ofs->namelen, statfs.f_namelen);
  532. return err;
  533. }
  534. static int ovl_lower_dir(const char *name, struct path *path,
  535. struct ovl_fs *ofs, int *stack_depth, bool *remote)
  536. {
  537. int err;
  538. err = ovl_mount_dir_noesc(name, path);
  539. if (err)
  540. goto out;
  541. err = ovl_check_namelen(path, ofs, name);
  542. if (err)
  543. goto out_put;
  544. *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
  545. if (ovl_dentry_remote(path->dentry))
  546. *remote = true;
  547. /*
  548. * The inodes index feature needs to encode and decode file
  549. * handles, so it requires that all layers support them.
  550. */
  551. if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
  552. ofs->config.index = false;
  553. pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
  554. }
  555. return 0;
  556. out_put:
  557. path_put_init(path);
  558. out:
  559. return err;
  560. }
  561. /* Workdir should not be subdir of upperdir and vice versa */
  562. static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
  563. {
  564. bool ok = false;
  565. if (workdir != upperdir) {
  566. ok = (lock_rename(workdir, upperdir) == NULL);
  567. unlock_rename(workdir, upperdir);
  568. }
  569. return ok;
  570. }
  571. static unsigned int ovl_split_lowerdirs(char *str)
  572. {
  573. unsigned int ctr = 1;
  574. char *s, *d;
  575. for (s = d = str;; s++, d++) {
  576. if (*s == '\\') {
  577. s++;
  578. } else if (*s == ':') {
  579. *d = '\0';
  580. ctr++;
  581. continue;
  582. }
  583. *d = *s;
  584. if (!*s)
  585. break;
  586. }
  587. return ctr;
  588. }
  589. static int __maybe_unused
  590. ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
  591. struct dentry *dentry, struct inode *inode,
  592. const char *name, void *buffer, size_t size)
  593. {
  594. return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
  595. }
  596. static int __maybe_unused
  597. ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
  598. struct dentry *dentry, struct inode *inode,
  599. const char *name, const void *value,
  600. size_t size, int flags)
  601. {
  602. struct dentry *workdir = ovl_workdir(dentry);
  603. struct inode *realinode = ovl_inode_real(inode);
  604. struct posix_acl *acl = NULL;
  605. int err;
  606. /* Check that everything is OK before copy-up */
  607. if (value) {
  608. acl = posix_acl_from_xattr(&init_user_ns, value, size);
  609. if (IS_ERR(acl))
  610. return PTR_ERR(acl);
  611. }
  612. err = -EOPNOTSUPP;
  613. if (!IS_POSIXACL(d_inode(workdir)))
  614. goto out_acl_release;
  615. if (!realinode->i_op->set_acl)
  616. goto out_acl_release;
  617. if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
  618. err = acl ? -EACCES : 0;
  619. goto out_acl_release;
  620. }
  621. err = -EPERM;
  622. if (!inode_owner_or_capable(inode))
  623. goto out_acl_release;
  624. posix_acl_release(acl);
  625. /*
  626. * Check if sgid bit needs to be cleared (actual setacl operation will
  627. * be done with mounter's capabilities and so that won't do it for us).
  628. */
  629. if (unlikely(inode->i_mode & S_ISGID) &&
  630. handler->flags == ACL_TYPE_ACCESS &&
  631. !in_group_p(inode->i_gid) &&
  632. !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
  633. struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
  634. err = ovl_setattr(dentry, &iattr);
  635. if (err)
  636. return err;
  637. }
  638. err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
  639. if (!err)
  640. ovl_copyattr(ovl_inode_real(inode), inode);
  641. return err;
  642. out_acl_release:
  643. posix_acl_release(acl);
  644. return err;
  645. }
  646. static int ovl_own_xattr_get(const struct xattr_handler *handler,
  647. struct dentry *dentry, struct inode *inode,
  648. const char *name, void *buffer, size_t size)
  649. {
  650. return -EOPNOTSUPP;
  651. }
  652. static int ovl_own_xattr_set(const struct xattr_handler *handler,
  653. struct dentry *dentry, struct inode *inode,
  654. const char *name, const void *value,
  655. size_t size, int flags)
  656. {
  657. return -EOPNOTSUPP;
  658. }
  659. static int ovl_other_xattr_get(const struct xattr_handler *handler,
  660. struct dentry *dentry, struct inode *inode,
  661. const char *name, void *buffer, size_t size)
  662. {
  663. return ovl_xattr_get(dentry, inode, name, buffer, size);
  664. }
  665. static int ovl_other_xattr_set(const struct xattr_handler *handler,
  666. struct dentry *dentry, struct inode *inode,
  667. const char *name, const void *value,
  668. size_t size, int flags)
  669. {
  670. return ovl_xattr_set(dentry, inode, name, value, size, flags);
  671. }
  672. static const struct xattr_handler __maybe_unused
  673. ovl_posix_acl_access_xattr_handler = {
  674. .name = XATTR_NAME_POSIX_ACL_ACCESS,
  675. .flags = ACL_TYPE_ACCESS,
  676. .get = ovl_posix_acl_xattr_get,
  677. .set = ovl_posix_acl_xattr_set,
  678. };
  679. static const struct xattr_handler __maybe_unused
  680. ovl_posix_acl_default_xattr_handler = {
  681. .name = XATTR_NAME_POSIX_ACL_DEFAULT,
  682. .flags = ACL_TYPE_DEFAULT,
  683. .get = ovl_posix_acl_xattr_get,
  684. .set = ovl_posix_acl_xattr_set,
  685. };
  686. static const struct xattr_handler ovl_own_xattr_handler = {
  687. .prefix = OVL_XATTR_PREFIX,
  688. .get = ovl_own_xattr_get,
  689. .set = ovl_own_xattr_set,
  690. };
  691. static const struct xattr_handler ovl_other_xattr_handler = {
  692. .prefix = "", /* catch all */
  693. .get = ovl_other_xattr_get,
  694. .set = ovl_other_xattr_set,
  695. };
  696. static const struct xattr_handler *ovl_xattr_handlers[] = {
  697. #ifdef CONFIG_FS_POSIX_ACL
  698. &ovl_posix_acl_access_xattr_handler,
  699. &ovl_posix_acl_default_xattr_handler,
  700. #endif
  701. &ovl_own_xattr_handler,
  702. &ovl_other_xattr_handler,
  703. NULL
  704. };
  705. static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
  706. {
  707. struct vfsmount *upper_mnt;
  708. int err;
  709. err = ovl_mount_dir(ofs->config.upperdir, upperpath);
  710. if (err)
  711. goto out;
  712. /* Upper fs should not be r/o */
  713. if (sb_rdonly(upperpath->mnt->mnt_sb)) {
  714. pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
  715. err = -EINVAL;
  716. goto out;
  717. }
  718. err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
  719. if (err)
  720. goto out;
  721. err = -EBUSY;
  722. if (ovl_inuse_trylock(upperpath->dentry)) {
  723. ofs->upperdir_locked = true;
  724. } else if (ofs->config.index) {
  725. pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
  726. goto out;
  727. } else {
  728. pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
  729. }
  730. upper_mnt = clone_private_mount(upperpath);
  731. err = PTR_ERR(upper_mnt);
  732. if (IS_ERR(upper_mnt)) {
  733. pr_err("overlayfs: failed to clone upperpath\n");
  734. goto out;
  735. }
  736. /* Don't inherit atime flags */
  737. upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
  738. ofs->upper_mnt = upper_mnt;
  739. err = 0;
  740. out:
  741. return err;
  742. }
  743. static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
  744. {
  745. struct dentry *temp;
  746. int err;
  747. ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
  748. if (!ofs->workdir)
  749. return 0;
  750. /*
  751. * Upper should support d_type, else whiteouts are visible. Given
  752. * workdir and upper are on same fs, we can do iterate_dir() on
  753. * workdir. This check requires successful creation of workdir in
  754. * previous step.
  755. */
  756. err = ovl_check_d_type_supported(workpath);
  757. if (err < 0)
  758. return err;
  759. /*
  760. * We allowed this configuration and don't want to break users over
  761. * kernel upgrade. So warn instead of erroring out.
  762. */
  763. if (!err)
  764. pr_warn("overlayfs: upper fs needs to support d_type.\n");
  765. /* Check if upper/work fs supports O_TMPFILE */
  766. temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
  767. ofs->tmpfile = !IS_ERR(temp);
  768. if (ofs->tmpfile)
  769. dput(temp);
  770. else
  771. pr_warn("overlayfs: upper fs does not support tmpfile.\n");
  772. /*
  773. * Check if upper/work fs supports trusted.overlay.* xattr
  774. */
  775. err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
  776. if (err) {
  777. ofs->noxattr = true;
  778. pr_warn("overlayfs: upper fs does not support xattr.\n");
  779. } else {
  780. vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
  781. }
  782. /* Check if upper/work fs supports file handles */
  783. if (ofs->config.index &&
  784. !ovl_can_decode_fh(ofs->workdir->d_sb)) {
  785. ofs->config.index = false;
  786. pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
  787. }
  788. return 0;
  789. }
  790. static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
  791. {
  792. int err;
  793. struct path workpath = { };
  794. err = ovl_mount_dir(ofs->config.workdir, &workpath);
  795. if (err)
  796. goto out;
  797. err = -EINVAL;
  798. if (upperpath->mnt != workpath.mnt) {
  799. pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
  800. goto out;
  801. }
  802. if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
  803. pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
  804. goto out;
  805. }
  806. err = -EBUSY;
  807. if (ovl_inuse_trylock(workpath.dentry)) {
  808. ofs->workdir_locked = true;
  809. } else if (ofs->config.index) {
  810. pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
  811. goto out;
  812. } else {
  813. pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
  814. }
  815. ofs->workbasedir = dget(workpath.dentry);
  816. err = ovl_make_workdir(ofs, &workpath);
  817. if (err)
  818. goto out;
  819. err = 0;
  820. out:
  821. path_put(&workpath);
  822. return err;
  823. }
  824. static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
  825. struct path *upperpath)
  826. {
  827. int err;
  828. /* Verify lower root is upper root origin */
  829. err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
  830. false, true);
  831. if (err) {
  832. pr_err("overlayfs: failed to verify upper root origin\n");
  833. goto out;
  834. }
  835. ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
  836. if (ofs->indexdir) {
  837. /* Verify upper root is index dir origin */
  838. err = ovl_verify_origin(ofs->indexdir, upperpath->dentry,
  839. true, true);
  840. if (err)
  841. pr_err("overlayfs: failed to verify index dir origin\n");
  842. /* Cleanup bad/stale/orphan index entries */
  843. if (!err)
  844. err = ovl_indexdir_cleanup(ofs->indexdir,
  845. ofs->upper_mnt,
  846. oe->lowerstack,
  847. oe->numlower);
  848. }
  849. if (err || !ofs->indexdir)
  850. pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
  851. out:
  852. return err;
  853. }
  854. static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
  855. unsigned int numlower)
  856. {
  857. int err;
  858. unsigned int i;
  859. err = -ENOMEM;
  860. ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
  861. GFP_KERNEL);
  862. if (ofs->lower_layers == NULL)
  863. goto out;
  864. for (i = 0; i < numlower; i++) {
  865. struct vfsmount *mnt;
  866. dev_t dev;
  867. err = get_anon_bdev(&dev);
  868. if (err) {
  869. pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
  870. goto out;
  871. }
  872. mnt = clone_private_mount(&stack[i]);
  873. err = PTR_ERR(mnt);
  874. if (IS_ERR(mnt)) {
  875. pr_err("overlayfs: failed to clone lowerpath\n");
  876. free_anon_bdev(dev);
  877. goto out;
  878. }
  879. /*
  880. * Make lower layers R/O. That way fchmod/fchown on lower file
  881. * will fail instead of modifying lower fs.
  882. */
  883. mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
  884. ofs->lower_layers[ofs->numlower].mnt = mnt;
  885. ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
  886. ofs->numlower++;
  887. /* Check if all lower layers are on same sb */
  888. if (i == 0)
  889. ofs->same_sb = mnt->mnt_sb;
  890. else if (ofs->same_sb != mnt->mnt_sb)
  891. ofs->same_sb = NULL;
  892. }
  893. err = 0;
  894. out:
  895. return err;
  896. }
  897. static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
  898. struct ovl_fs *ofs)
  899. {
  900. int err;
  901. char *lowertmp, *lower;
  902. struct path *stack = NULL;
  903. unsigned int stacklen, numlower = 0, i;
  904. bool remote = false;
  905. struct ovl_entry *oe;
  906. err = -ENOMEM;
  907. lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
  908. if (!lowertmp)
  909. goto out_err;
  910. err = -EINVAL;
  911. stacklen = ovl_split_lowerdirs(lowertmp);
  912. if (stacklen > OVL_MAX_STACK) {
  913. pr_err("overlayfs: too many lower directories, limit is %d\n",
  914. OVL_MAX_STACK);
  915. goto out_err;
  916. } else if (!ofs->config.upperdir && stacklen == 1) {
  917. pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
  918. goto out_err;
  919. }
  920. err = -ENOMEM;
  921. stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
  922. if (!stack)
  923. goto out_err;
  924. err = -EINVAL;
  925. lower = lowertmp;
  926. for (numlower = 0; numlower < stacklen; numlower++) {
  927. err = ovl_lower_dir(lower, &stack[numlower], ofs,
  928. &sb->s_stack_depth, &remote);
  929. if (err)
  930. goto out_err;
  931. lower = strchr(lower, '\0') + 1;
  932. }
  933. err = -EINVAL;
  934. sb->s_stack_depth++;
  935. if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
  936. pr_err("overlayfs: maximum fs stacking depth exceeded\n");
  937. goto out_err;
  938. }
  939. err = ovl_get_lower_layers(ofs, stack, numlower);
  940. if (err)
  941. goto out_err;
  942. err = -ENOMEM;
  943. oe = ovl_alloc_entry(numlower);
  944. if (!oe)
  945. goto out_err;
  946. for (i = 0; i < numlower; i++) {
  947. oe->lowerstack[i].dentry = dget(stack[i].dentry);
  948. oe->lowerstack[i].layer = &ofs->lower_layers[i];
  949. }
  950. if (remote)
  951. sb->s_d_op = &ovl_reval_dentry_operations;
  952. else
  953. sb->s_d_op = &ovl_dentry_operations;
  954. out:
  955. for (i = 0; i < numlower; i++)
  956. path_put(&stack[i]);
  957. kfree(stack);
  958. kfree(lowertmp);
  959. return oe;
  960. out_err:
  961. oe = ERR_PTR(err);
  962. goto out;
  963. }
  964. static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  965. {
  966. struct path upperpath = { };
  967. struct dentry *root_dentry;
  968. struct ovl_entry *oe;
  969. struct ovl_fs *ofs;
  970. struct cred *cred;
  971. int err;
  972. err = -ENOMEM;
  973. ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
  974. if (!ofs)
  975. goto out;
  976. ofs->creator_cred = cred = prepare_creds();
  977. if (!cred)
  978. goto out_err;
  979. ofs->config.redirect_dir = ovl_redirect_dir_def;
  980. ofs->config.index = ovl_index_def;
  981. err = ovl_parse_opt((char *) data, &ofs->config);
  982. if (err)
  983. goto out_err;
  984. err = -EINVAL;
  985. if (!ofs->config.lowerdir) {
  986. if (!silent)
  987. pr_err("overlayfs: missing 'lowerdir'\n");
  988. goto out_err;
  989. }
  990. sb->s_stack_depth = 0;
  991. sb->s_maxbytes = MAX_LFS_FILESIZE;
  992. if (ofs->config.upperdir) {
  993. if (!ofs->config.workdir) {
  994. pr_err("overlayfs: missing 'workdir'\n");
  995. goto out_err;
  996. }
  997. err = ovl_get_upper(ofs, &upperpath);
  998. if (err)
  999. goto out_err;
  1000. err = ovl_get_workdir(ofs, &upperpath);
  1001. if (err)
  1002. goto out_err;
  1003. if (!ofs->workdir)
  1004. sb->s_flags |= SB_RDONLY;
  1005. sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
  1006. sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
  1007. }
  1008. oe = ovl_get_lowerstack(sb, ofs);
  1009. err = PTR_ERR(oe);
  1010. if (IS_ERR(oe))
  1011. goto out_err;
  1012. /* If the upper fs is nonexistent, we mark overlayfs r/o too */
  1013. if (!ofs->upper_mnt)
  1014. sb->s_flags |= SB_RDONLY;
  1015. else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
  1016. ofs->same_sb = NULL;
  1017. if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
  1018. err = ovl_get_indexdir(ofs, oe, &upperpath);
  1019. if (err)
  1020. goto out_free_oe;
  1021. if (!ofs->indexdir)
  1022. sb->s_flags |= SB_RDONLY;
  1023. }
  1024. /* Show index=off/on in /proc/mounts for any of the reasons above */
  1025. if (!ofs->indexdir)
  1026. ofs->config.index = false;
  1027. /* Never override disk quota limits or use reserved space */
  1028. cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
  1029. sb->s_magic = OVERLAYFS_SUPER_MAGIC;
  1030. sb->s_op = &ovl_super_operations;
  1031. sb->s_xattr = ovl_xattr_handlers;
  1032. sb->s_fs_info = ofs;
  1033. sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
  1034. err = -ENOMEM;
  1035. root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
  1036. if (!root_dentry)
  1037. goto out_free_oe;
  1038. mntput(upperpath.mnt);
  1039. if (upperpath.dentry) {
  1040. oe->has_upper = true;
  1041. if (ovl_is_impuredir(upperpath.dentry))
  1042. ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
  1043. }
  1044. root_dentry->d_fsdata = oe;
  1045. /* Root is always merge -> can have whiteouts */
  1046. ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
  1047. ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
  1048. ovl_dentry_lower(root_dentry));
  1049. sb->s_root = root_dentry;
  1050. return 0;
  1051. out_free_oe:
  1052. ovl_entry_stack_free(oe);
  1053. kfree(oe);
  1054. out_err:
  1055. path_put(&upperpath);
  1056. ovl_free_fs(ofs);
  1057. out:
  1058. return err;
  1059. }
  1060. static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
  1061. const char *dev_name, void *raw_data)
  1062. {
  1063. return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
  1064. }
  1065. static struct file_system_type ovl_fs_type = {
  1066. .owner = THIS_MODULE,
  1067. .name = "overlay",
  1068. .mount = ovl_mount,
  1069. .kill_sb = kill_anon_super,
  1070. };
  1071. MODULE_ALIAS_FS("overlay");
  1072. static void ovl_inode_init_once(void *foo)
  1073. {
  1074. struct ovl_inode *oi = foo;
  1075. inode_init_once(&oi->vfs_inode);
  1076. }
  1077. static int __init ovl_init(void)
  1078. {
  1079. int err;
  1080. ovl_inode_cachep = kmem_cache_create("ovl_inode",
  1081. sizeof(struct ovl_inode), 0,
  1082. (SLAB_RECLAIM_ACCOUNT|
  1083. SLAB_MEM_SPREAD|SLAB_ACCOUNT),
  1084. ovl_inode_init_once);
  1085. if (ovl_inode_cachep == NULL)
  1086. return -ENOMEM;
  1087. err = register_filesystem(&ovl_fs_type);
  1088. if (err)
  1089. kmem_cache_destroy(ovl_inode_cachep);
  1090. return err;
  1091. }
  1092. static void __exit ovl_exit(void)
  1093. {
  1094. unregister_filesystem(&ovl_fs_type);
  1095. /*
  1096. * Make sure all delayed rcu free inodes are flushed before we
  1097. * destroy cache.
  1098. */
  1099. rcu_barrier();
  1100. kmem_cache_destroy(ovl_inode_cachep);
  1101. }
  1102. module_init(ovl_init);
  1103. module_exit(ovl_exit);