copy_up.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
  1. /*
  2. *
  3. * Copyright (C) 2011 Novell Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. */
  9. #include <linux/module.h>
  10. #include <linux/fs.h>
  11. #include <linux/slab.h>
  12. #include <linux/file.h>
  13. #include <linux/splice.h>
  14. #include <linux/xattr.h>
  15. #include <linux/security.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/sched/signal.h>
  18. #include <linux/cred.h>
  19. #include <linux/namei.h>
  20. #include <linux/fdtable.h>
  21. #include <linux/ratelimit.h>
  22. #include <linux/exportfs.h>
  23. #include "overlayfs.h"
  24. #include "ovl_entry.h"
  25. #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  26. static bool __read_mostly ovl_check_copy_up;
  27. module_param_named(check_copy_up, ovl_check_copy_up, bool,
  28. S_IWUSR | S_IRUGO);
  29. MODULE_PARM_DESC(ovl_check_copy_up,
  30. "Warn on copy-up when causing process also has a R/O fd open");
  31. static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
  32. {
  33. const struct dentry *dentry = data;
  34. if (file_inode(f) == d_inode(dentry))
  35. pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
  36. f, fd, current->pid, current->comm);
  37. return 0;
  38. }
  39. /*
  40. * Check the fds open by this process and warn if something like the following
  41. * scenario is about to occur:
  42. *
  43. * fd1 = open("foo", O_RDONLY);
  44. * fd2 = open("foo", O_RDWR);
  45. */
  46. static void ovl_do_check_copy_up(struct dentry *dentry)
  47. {
  48. if (ovl_check_copy_up)
  49. iterate_fd(current->files, 0, ovl_check_fd, dentry);
  50. }
  51. int ovl_copy_xattr(struct dentry *old, struct dentry *new)
  52. {
  53. ssize_t list_size, size, value_size = 0;
  54. char *buf, *name, *value = NULL;
  55. int uninitialized_var(error);
  56. size_t slen;
  57. if (!(old->d_inode->i_opflags & IOP_XATTR) ||
  58. !(new->d_inode->i_opflags & IOP_XATTR))
  59. return 0;
  60. list_size = vfs_listxattr(old, NULL, 0);
  61. if (list_size <= 0) {
  62. if (list_size == -EOPNOTSUPP)
  63. return 0;
  64. return list_size;
  65. }
  66. buf = kzalloc(list_size, GFP_KERNEL);
  67. if (!buf)
  68. return -ENOMEM;
  69. list_size = vfs_listxattr(old, buf, list_size);
  70. if (list_size <= 0) {
  71. error = list_size;
  72. goto out;
  73. }
  74. for (name = buf; list_size; name += slen) {
  75. slen = strnlen(name, list_size) + 1;
  76. /* underlying fs providing us with an broken xattr list? */
  77. if (WARN_ON(slen > list_size)) {
  78. error = -EIO;
  79. break;
  80. }
  81. list_size -= slen;
  82. if (ovl_is_private_xattr(name))
  83. continue;
  84. retry:
  85. size = vfs_getxattr(old, name, value, value_size);
  86. if (size == -ERANGE)
  87. size = vfs_getxattr(old, name, NULL, 0);
  88. if (size < 0) {
  89. error = size;
  90. break;
  91. }
  92. if (size > value_size) {
  93. void *new;
  94. new = krealloc(value, size, GFP_KERNEL);
  95. if (!new) {
  96. error = -ENOMEM;
  97. break;
  98. }
  99. value = new;
  100. value_size = size;
  101. goto retry;
  102. }
  103. error = security_inode_copy_up_xattr(name);
  104. if (error < 0 && error != -EOPNOTSUPP)
  105. break;
  106. if (error == 1) {
  107. error = 0;
  108. continue; /* Discard */
  109. }
  110. error = vfs_setxattr(new, name, value, size, 0);
  111. if (error)
  112. break;
  113. }
  114. kfree(value);
  115. out:
  116. kfree(buf);
  117. return error;
  118. }
  119. static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
  120. {
  121. struct file *old_file;
  122. struct file *new_file;
  123. loff_t old_pos = 0;
  124. loff_t new_pos = 0;
  125. int error = 0;
  126. if (len == 0)
  127. return 0;
  128. old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
  129. if (IS_ERR(old_file))
  130. return PTR_ERR(old_file);
  131. new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
  132. if (IS_ERR(new_file)) {
  133. error = PTR_ERR(new_file);
  134. goto out_fput;
  135. }
  136. /* Try to use clone_file_range to clone up within the same fs */
  137. error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
  138. if (!error)
  139. goto out;
  140. /* Couldn't clone, so now we try to copy the data */
  141. error = 0;
  142. /* FIXME: copy up sparse files efficiently */
  143. while (len) {
  144. size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
  145. long bytes;
  146. if (len < this_len)
  147. this_len = len;
  148. if (signal_pending_state(TASK_KILLABLE, current)) {
  149. error = -EINTR;
  150. break;
  151. }
  152. bytes = do_splice_direct(old_file, &old_pos,
  153. new_file, &new_pos,
  154. this_len, SPLICE_F_MOVE);
  155. if (bytes <= 0) {
  156. error = bytes;
  157. break;
  158. }
  159. WARN_ON(old_pos != new_pos);
  160. len -= bytes;
  161. }
  162. out:
  163. if (!error)
  164. error = vfs_fsync(new_file, 0);
  165. fput(new_file);
  166. out_fput:
  167. fput(old_file);
  168. return error;
  169. }
  170. static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
  171. {
  172. struct iattr attr = {
  173. .ia_valid =
  174. ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
  175. .ia_atime = stat->atime,
  176. .ia_mtime = stat->mtime,
  177. };
  178. return notify_change(upperdentry, &attr, NULL);
  179. }
  180. int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
  181. {
  182. int err = 0;
  183. if (!S_ISLNK(stat->mode)) {
  184. struct iattr attr = {
  185. .ia_valid = ATTR_MODE,
  186. .ia_mode = stat->mode,
  187. };
  188. err = notify_change(upperdentry, &attr, NULL);
  189. }
  190. if (!err) {
  191. struct iattr attr = {
  192. .ia_valid = ATTR_UID | ATTR_GID,
  193. .ia_uid = stat->uid,
  194. .ia_gid = stat->gid,
  195. };
  196. err = notify_change(upperdentry, &attr, NULL);
  197. }
  198. if (!err)
  199. ovl_set_timestamps(upperdentry, stat);
  200. return err;
  201. }
  202. struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
  203. {
  204. struct ovl_fh *fh;
  205. int fh_type, fh_len, dwords;
  206. void *buf;
  207. int buflen = MAX_HANDLE_SZ;
  208. uuid_t *uuid = &lower->d_sb->s_uuid;
  209. buf = kmalloc(buflen, GFP_KERNEL);
  210. if (!buf)
  211. return ERR_PTR(-ENOMEM);
  212. /*
  213. * We encode a non-connectable file handle for non-dir, because we
  214. * only need to find the lower inode number and we don't want to pay
  215. * the price or reconnecting the dentry.
  216. */
  217. dwords = buflen >> 2;
  218. fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
  219. buflen = (dwords << 2);
  220. fh = ERR_PTR(-EIO);
  221. if (WARN_ON(fh_type < 0) ||
  222. WARN_ON(buflen > MAX_HANDLE_SZ) ||
  223. WARN_ON(fh_type == FILEID_INVALID))
  224. goto out;
  225. BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
  226. fh_len = offsetof(struct ovl_fh, fid) + buflen;
  227. fh = kmalloc(fh_len, GFP_KERNEL);
  228. if (!fh) {
  229. fh = ERR_PTR(-ENOMEM);
  230. goto out;
  231. }
  232. fh->version = OVL_FH_VERSION;
  233. fh->magic = OVL_FH_MAGIC;
  234. fh->type = fh_type;
  235. fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
  236. /*
  237. * When we will want to decode an overlay dentry from this handle
  238. * and all layers are on the same fs, if we get a disconncted real
  239. * dentry when we decode fid, the only way to tell if we should assign
  240. * it to upperdentry or to lowerstack is by checking this flag.
  241. */
  242. if (is_upper)
  243. fh->flags |= OVL_FH_FLAG_PATH_UPPER;
  244. fh->len = fh_len;
  245. fh->uuid = *uuid;
  246. memcpy(fh->fid, buf, buflen);
  247. out:
  248. kfree(buf);
  249. return fh;
  250. }
  251. static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
  252. struct dentry *upper)
  253. {
  254. const struct ovl_fh *fh = NULL;
  255. int err;
  256. /*
  257. * When lower layer doesn't support export operations store a 'null' fh,
  258. * so we can use the overlay.origin xattr to distignuish between a copy
  259. * up and a pure upper inode.
  260. */
  261. if (ovl_can_decode_fh(lower->d_sb)) {
  262. fh = ovl_encode_fh(lower, false);
  263. if (IS_ERR(fh))
  264. return PTR_ERR(fh);
  265. }
  266. /*
  267. * Do not fail when upper doesn't support xattrs.
  268. */
  269. err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
  270. fh ? fh->len : 0, 0);
  271. kfree(fh);
  272. return err;
  273. }
  274. struct ovl_copy_up_ctx {
  275. struct dentry *parent;
  276. struct dentry *dentry;
  277. struct path lowerpath;
  278. struct kstat stat;
  279. struct kstat pstat;
  280. const char *link;
  281. struct dentry *destdir;
  282. struct qstr destname;
  283. struct dentry *workdir;
  284. bool tmpfile;
  285. bool origin;
  286. };
  287. static int ovl_link_up(struct ovl_copy_up_ctx *c)
  288. {
  289. int err;
  290. struct dentry *upper;
  291. struct dentry *upperdir = ovl_dentry_upper(c->parent);
  292. struct inode *udir = d_inode(upperdir);
  293. /* Mark parent "impure" because it may now contain non-pure upper */
  294. err = ovl_set_impure(c->parent, upperdir);
  295. if (err)
  296. return err;
  297. err = ovl_set_nlink_lower(c->dentry);
  298. if (err)
  299. return err;
  300. inode_lock_nested(udir, I_MUTEX_PARENT);
  301. upper = lookup_one_len(c->dentry->d_name.name, upperdir,
  302. c->dentry->d_name.len);
  303. err = PTR_ERR(upper);
  304. if (!IS_ERR(upper)) {
  305. err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
  306. true);
  307. dput(upper);
  308. if (!err) {
  309. /* Restore timestamps on parent (best effort) */
  310. ovl_set_timestamps(upperdir, &c->pstat);
  311. ovl_dentry_set_upper_alias(c->dentry);
  312. }
  313. }
  314. inode_unlock(udir);
  315. ovl_set_nlink_upper(c->dentry);
  316. return err;
  317. }
  318. static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
  319. struct dentry **newdentry)
  320. {
  321. int err;
  322. struct dentry *upper;
  323. struct inode *udir = d_inode(c->destdir);
  324. upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
  325. if (IS_ERR(upper))
  326. return PTR_ERR(upper);
  327. if (c->tmpfile)
  328. err = ovl_do_link(temp, udir, upper, true);
  329. else
  330. err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
  331. if (!err)
  332. *newdentry = dget(c->tmpfile ? upper : temp);
  333. dput(upper);
  334. return err;
  335. }
  336. static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
  337. {
  338. int err;
  339. struct dentry *temp;
  340. const struct cred *old_creds = NULL;
  341. struct cred *new_creds = NULL;
  342. struct cattr cattr = {
  343. /* Can't properly set mode on creation because of the umask */
  344. .mode = c->stat.mode & S_IFMT,
  345. .rdev = c->stat.rdev,
  346. .link = c->link
  347. };
  348. err = security_inode_copy_up(c->dentry, &new_creds);
  349. if (err < 0)
  350. goto out;
  351. if (new_creds)
  352. old_creds = override_creds(new_creds);
  353. if (c->tmpfile) {
  354. temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
  355. if (IS_ERR(temp))
  356. goto temp_err;
  357. } else {
  358. temp = ovl_lookup_temp(c->workdir);
  359. if (IS_ERR(temp))
  360. goto temp_err;
  361. err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
  362. NULL, true);
  363. if (err) {
  364. dput(temp);
  365. goto out;
  366. }
  367. }
  368. err = 0;
  369. *tempp = temp;
  370. out:
  371. if (new_creds) {
  372. revert_creds(old_creds);
  373. put_cred(new_creds);
  374. }
  375. return err;
  376. temp_err:
  377. err = PTR_ERR(temp);
  378. goto out;
  379. }
  380. static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
  381. {
  382. int err;
  383. if (S_ISREG(c->stat.mode)) {
  384. struct path upperpath;
  385. ovl_path_upper(c->dentry, &upperpath);
  386. BUG_ON(upperpath.dentry != NULL);
  387. upperpath.dentry = temp;
  388. err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
  389. if (err)
  390. return err;
  391. }
  392. err = ovl_copy_xattr(c->lowerpath.dentry, temp);
  393. if (err)
  394. return err;
  395. inode_lock(temp->d_inode);
  396. err = ovl_set_attr(temp, &c->stat);
  397. inode_unlock(temp->d_inode);
  398. if (err)
  399. return err;
  400. /*
  401. * Store identifier of lower inode in upper inode xattr to
  402. * allow lookup of the copy up origin inode.
  403. *
  404. * Don't set origin when we are breaking the association with a lower
  405. * hard link.
  406. */
  407. if (c->origin) {
  408. err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
  409. if (err)
  410. return err;
  411. }
  412. return 0;
  413. }
  414. static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
  415. {
  416. struct inode *udir = c->destdir->d_inode;
  417. struct dentry *newdentry = NULL;
  418. struct dentry *temp = NULL;
  419. int err;
  420. err = ovl_get_tmpfile(c, &temp);
  421. if (err)
  422. goto out;
  423. err = ovl_copy_up_inode(c, temp);
  424. if (err)
  425. goto out_cleanup;
  426. if (c->tmpfile) {
  427. inode_lock_nested(udir, I_MUTEX_PARENT);
  428. err = ovl_install_temp(c, temp, &newdentry);
  429. inode_unlock(udir);
  430. } else {
  431. err = ovl_install_temp(c, temp, &newdentry);
  432. }
  433. if (err)
  434. goto out_cleanup;
  435. ovl_inode_update(d_inode(c->dentry), newdentry);
  436. out:
  437. dput(temp);
  438. return err;
  439. out_cleanup:
  440. if (!c->tmpfile)
  441. ovl_cleanup(d_inode(c->workdir), temp);
  442. goto out;
  443. }
  444. /*
  445. * Copy up a single dentry
  446. *
  447. * All renames start with copy up of source if necessary. The actual
  448. * rename will only proceed once the copy up was successful. Copy up uses
  449. * upper parent i_mutex for exclusion. Since rename can change d_parent it
  450. * is possible that the copy up will lock the old parent. At that point
  451. * the file will have already been copied up anyway.
  452. */
  453. static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
  454. {
  455. int err;
  456. struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
  457. bool indexed = false;
  458. if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
  459. c->stat.nlink > 1)
  460. indexed = true;
  461. if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
  462. c->origin = true;
  463. if (indexed) {
  464. c->destdir = ovl_indexdir(c->dentry->d_sb);
  465. err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
  466. if (err)
  467. return err;
  468. } else {
  469. /*
  470. * Mark parent "impure" because it may now contain non-pure
  471. * upper
  472. */
  473. err = ovl_set_impure(c->parent, c->destdir);
  474. if (err)
  475. return err;
  476. }
  477. /* Should we copyup with O_TMPFILE or with workdir? */
  478. if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
  479. c->tmpfile = true;
  480. err = ovl_copy_up_locked(c);
  481. } else {
  482. err = -EIO;
  483. if (lock_rename(c->workdir, c->destdir) != NULL) {
  484. pr_err("overlayfs: failed to lock workdir+upperdir\n");
  485. } else {
  486. err = ovl_copy_up_locked(c);
  487. unlock_rename(c->workdir, c->destdir);
  488. }
  489. }
  490. if (indexed) {
  491. if (!err)
  492. ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
  493. kfree(c->destname.name);
  494. } else if (!err) {
  495. struct inode *udir = d_inode(c->destdir);
  496. /* Restore timestamps on parent (best effort) */
  497. inode_lock(udir);
  498. ovl_set_timestamps(c->destdir, &c->pstat);
  499. inode_unlock(udir);
  500. ovl_dentry_set_upper_alias(c->dentry);
  501. }
  502. return err;
  503. }
  504. static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
  505. int flags)
  506. {
  507. int err;
  508. DEFINE_DELAYED_CALL(done);
  509. struct path parentpath;
  510. struct ovl_copy_up_ctx ctx = {
  511. .parent = parent,
  512. .dentry = dentry,
  513. .workdir = ovl_workdir(dentry),
  514. };
  515. if (WARN_ON(!ctx.workdir))
  516. return -EROFS;
  517. ovl_path_lower(dentry, &ctx.lowerpath);
  518. err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
  519. STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
  520. if (err)
  521. return err;
  522. ovl_path_upper(parent, &parentpath);
  523. ctx.destdir = parentpath.dentry;
  524. ctx.destname = dentry->d_name;
  525. err = vfs_getattr(&parentpath, &ctx.pstat,
  526. STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
  527. if (err)
  528. return err;
  529. /* maybe truncate regular file. this has no effect on dirs */
  530. if (flags & O_TRUNC)
  531. ctx.stat.size = 0;
  532. if (S_ISLNK(ctx.stat.mode)) {
  533. ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
  534. if (IS_ERR(ctx.link))
  535. return PTR_ERR(ctx.link);
  536. }
  537. ovl_do_check_copy_up(ctx.lowerpath.dentry);
  538. err = ovl_copy_up_start(dentry);
  539. /* err < 0: interrupted, err > 0: raced with another copy-up */
  540. if (unlikely(err)) {
  541. if (err > 0)
  542. err = 0;
  543. } else {
  544. if (!ovl_dentry_upper(dentry))
  545. err = ovl_do_copy_up(&ctx);
  546. if (!err && !ovl_dentry_has_upper_alias(dentry))
  547. err = ovl_link_up(&ctx);
  548. ovl_copy_up_end(dentry);
  549. }
  550. do_delayed_call(&done);
  551. return err;
  552. }
  553. int ovl_copy_up_flags(struct dentry *dentry, int flags)
  554. {
  555. int err = 0;
  556. const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
  557. while (!err) {
  558. struct dentry *next;
  559. struct dentry *parent;
  560. /*
  561. * Check if copy-up has happened as well as for upper alias (in
  562. * case of hard links) is there.
  563. *
  564. * Both checks are lockless:
  565. * - false negatives: will recheck under oi->lock
  566. * - false positives:
  567. * + ovl_dentry_upper() uses memory barriers to ensure the
  568. * upper dentry is up-to-date
  569. * + ovl_dentry_has_upper_alias() relies on locking of
  570. * upper parent i_rwsem to prevent reordering copy-up
  571. * with rename.
  572. */
  573. if (ovl_dentry_upper(dentry) &&
  574. ovl_dentry_has_upper_alias(dentry))
  575. break;
  576. next = dget(dentry);
  577. /* find the topmost dentry not yet copied up */
  578. for (;;) {
  579. parent = dget_parent(next);
  580. if (ovl_dentry_upper(parent))
  581. break;
  582. dput(next);
  583. next = parent;
  584. }
  585. err = ovl_copy_up_one(parent, next, flags);
  586. dput(parent);
  587. dput(next);
  588. }
  589. revert_creds(old_cred);
  590. return err;
  591. }
  592. int ovl_copy_up(struct dentry *dentry)
  593. {
  594. return ovl_copy_up_flags(dentry, 0);
  595. }