file.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. /*
  2. * Copyright (C) 2017 Red Hat, Inc.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License version 2 as published by
  6. * the Free Software Foundation.
  7. */
  8. #include <linux/cred.h>
  9. #include <linux/file.h>
  10. #include <linux/mount.h>
  11. #include <linux/xattr.h>
  12. #include <linux/uio.h>
  13. #include "overlayfs.h"
  14. static char ovl_whatisit(struct inode *inode, struct inode *realinode)
  15. {
  16. if (realinode != ovl_inode_upper(inode))
  17. return 'l';
  18. if (ovl_has_upperdata(inode))
  19. return 'u';
  20. else
  21. return 'm';
  22. }
  23. static struct file *ovl_open_realfile(const struct file *file,
  24. struct inode *realinode)
  25. {
  26. struct inode *inode = file_inode(file);
  27. struct file *realfile;
  28. const struct cred *old_cred;
  29. old_cred = ovl_override_creds(inode->i_sb);
  30. realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
  31. realinode, current_cred());
  32. revert_creds(old_cred);
  33. pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
  34. file, file, ovl_whatisit(inode, realinode), file->f_flags,
  35. realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
  36. return realfile;
  37. }
  38. #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
  39. static int ovl_change_flags(struct file *file, unsigned int flags)
  40. {
  41. struct inode *inode = file_inode(file);
  42. int err;
  43. /* No atime modificaton on underlying */
  44. flags |= O_NOATIME;
  45. /* If some flag changed that cannot be changed then something's amiss */
  46. if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
  47. return -EIO;
  48. flags &= OVL_SETFL_MASK;
  49. if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
  50. return -EPERM;
  51. if (flags & O_DIRECT) {
  52. if (!file->f_mapping->a_ops ||
  53. !file->f_mapping->a_ops->direct_IO)
  54. return -EINVAL;
  55. }
  56. if (file->f_op->check_flags) {
  57. err = file->f_op->check_flags(flags);
  58. if (err)
  59. return err;
  60. }
  61. spin_lock(&file->f_lock);
  62. file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
  63. spin_unlock(&file->f_lock);
  64. return 0;
  65. }
  66. static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
  67. bool allow_meta)
  68. {
  69. struct inode *inode = file_inode(file);
  70. struct inode *realinode;
  71. real->flags = 0;
  72. real->file = file->private_data;
  73. if (allow_meta)
  74. realinode = ovl_inode_real(inode);
  75. else
  76. realinode = ovl_inode_realdata(inode);
  77. /* Has it been copied up since we'd opened it? */
  78. if (unlikely(file_inode(real->file) != realinode)) {
  79. real->flags = FDPUT_FPUT;
  80. real->file = ovl_open_realfile(file, realinode);
  81. return PTR_ERR_OR_ZERO(real->file);
  82. }
  83. /* Did the flags change since open? */
  84. if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
  85. return ovl_change_flags(real->file, file->f_flags);
  86. return 0;
  87. }
  88. static int ovl_real_fdget(const struct file *file, struct fd *real)
  89. {
  90. return ovl_real_fdget_meta(file, real, false);
  91. }
  92. static int ovl_open(struct inode *inode, struct file *file)
  93. {
  94. struct dentry *dentry = file_dentry(file);
  95. struct file *realfile;
  96. int err;
  97. err = ovl_open_maybe_copy_up(dentry, file->f_flags);
  98. if (err)
  99. return err;
  100. /* No longer need these flags, so don't pass them on to underlying fs */
  101. file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  102. realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
  103. if (IS_ERR(realfile))
  104. return PTR_ERR(realfile);
  105. /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
  106. file->f_mapping = realfile->f_mapping;
  107. file->private_data = realfile;
  108. return 0;
  109. }
  110. static int ovl_release(struct inode *inode, struct file *file)
  111. {
  112. fput(file->private_data);
  113. return 0;
  114. }
  115. static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
  116. {
  117. struct inode *realinode = ovl_inode_real(file_inode(file));
  118. return generic_file_llseek_size(file, offset, whence,
  119. realinode->i_sb->s_maxbytes,
  120. i_size_read(realinode));
  121. }
  122. static void ovl_file_accessed(struct file *file)
  123. {
  124. struct inode *inode, *upperinode;
  125. if (file->f_flags & O_NOATIME)
  126. return;
  127. inode = file_inode(file);
  128. upperinode = ovl_inode_upper(inode);
  129. if (!upperinode)
  130. return;
  131. if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
  132. !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
  133. inode->i_mtime = upperinode->i_mtime;
  134. inode->i_ctime = upperinode->i_ctime;
  135. }
  136. touch_atime(&file->f_path);
  137. }
  138. static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
  139. {
  140. int ifl = iocb->ki_flags;
  141. rwf_t flags = 0;
  142. if (ifl & IOCB_NOWAIT)
  143. flags |= RWF_NOWAIT;
  144. if (ifl & IOCB_HIPRI)
  145. flags |= RWF_HIPRI;
  146. if (ifl & IOCB_DSYNC)
  147. flags |= RWF_DSYNC;
  148. if (ifl & IOCB_SYNC)
  149. flags |= RWF_SYNC;
  150. return flags;
  151. }
  152. static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  153. {
  154. struct file *file = iocb->ki_filp;
  155. struct fd real;
  156. const struct cred *old_cred;
  157. ssize_t ret;
  158. if (!iov_iter_count(iter))
  159. return 0;
  160. ret = ovl_real_fdget(file, &real);
  161. if (ret)
  162. return ret;
  163. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  164. ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
  165. ovl_iocb_to_rwf(iocb));
  166. revert_creds(old_cred);
  167. ovl_file_accessed(file);
  168. fdput(real);
  169. return ret;
  170. }
  171. static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
  172. {
  173. struct file *file = iocb->ki_filp;
  174. struct inode *inode = file_inode(file);
  175. struct fd real;
  176. const struct cred *old_cred;
  177. ssize_t ret;
  178. if (!iov_iter_count(iter))
  179. return 0;
  180. inode_lock(inode);
  181. /* Update mode */
  182. ovl_copyattr(ovl_inode_real(inode), inode);
  183. ret = file_remove_privs(file);
  184. if (ret)
  185. goto out_unlock;
  186. ret = ovl_real_fdget(file, &real);
  187. if (ret)
  188. goto out_unlock;
  189. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  190. ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
  191. ovl_iocb_to_rwf(iocb));
  192. revert_creds(old_cred);
  193. /* Update size */
  194. ovl_copyattr(ovl_inode_real(inode), inode);
  195. fdput(real);
  196. out_unlock:
  197. inode_unlock(inode);
  198. return ret;
  199. }
  200. static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  201. {
  202. struct fd real;
  203. const struct cred *old_cred;
  204. int ret;
  205. ret = ovl_real_fdget_meta(file, &real, !datasync);
  206. if (ret)
  207. return ret;
  208. /* Don't sync lower file for fear of receiving EROFS error */
  209. if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
  210. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  211. ret = vfs_fsync_range(real.file, start, end, datasync);
  212. revert_creds(old_cred);
  213. }
  214. fdput(real);
  215. return ret;
  216. }
  217. static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
  218. {
  219. struct file *realfile = file->private_data;
  220. const struct cred *old_cred;
  221. int ret;
  222. if (!realfile->f_op->mmap)
  223. return -ENODEV;
  224. if (WARN_ON(file != vma->vm_file))
  225. return -EIO;
  226. vma->vm_file = get_file(realfile);
  227. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  228. ret = call_mmap(vma->vm_file, vma);
  229. revert_creds(old_cred);
  230. if (ret) {
  231. /* Drop reference count from new vm_file value */
  232. fput(realfile);
  233. } else {
  234. /* Drop reference count from previous vm_file value */
  235. fput(file);
  236. }
  237. ovl_file_accessed(file);
  238. return ret;
  239. }
  240. static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
  241. {
  242. struct inode *inode = file_inode(file);
  243. struct fd real;
  244. const struct cred *old_cred;
  245. int ret;
  246. ret = ovl_real_fdget(file, &real);
  247. if (ret)
  248. return ret;
  249. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  250. ret = vfs_fallocate(real.file, mode, offset, len);
  251. revert_creds(old_cred);
  252. /* Update size */
  253. ovl_copyattr(ovl_inode_real(inode), inode);
  254. fdput(real);
  255. return ret;
  256. }
  257. static long ovl_real_ioctl(struct file *file, unsigned int cmd,
  258. unsigned long arg)
  259. {
  260. struct fd real;
  261. const struct cred *old_cred;
  262. long ret;
  263. ret = ovl_real_fdget(file, &real);
  264. if (ret)
  265. return ret;
  266. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  267. ret = vfs_ioctl(real.file, cmd, arg);
  268. revert_creds(old_cred);
  269. fdput(real);
  270. return ret;
  271. }
  272. static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  273. {
  274. long ret;
  275. struct inode *inode = file_inode(file);
  276. switch (cmd) {
  277. case FS_IOC_GETFLAGS:
  278. ret = ovl_real_ioctl(file, cmd, arg);
  279. break;
  280. case FS_IOC_SETFLAGS:
  281. if (!inode_owner_or_capable(inode))
  282. return -EACCES;
  283. ret = mnt_want_write_file(file);
  284. if (ret)
  285. return ret;
  286. ret = ovl_copy_up_with_data(file_dentry(file));
  287. if (!ret) {
  288. ret = ovl_real_ioctl(file, cmd, arg);
  289. inode_lock(inode);
  290. ovl_copyflags(ovl_inode_real(inode), inode);
  291. inode_unlock(inode);
  292. }
  293. mnt_drop_write_file(file);
  294. break;
  295. default:
  296. ret = -ENOTTY;
  297. }
  298. return ret;
  299. }
  300. static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
  301. unsigned long arg)
  302. {
  303. switch (cmd) {
  304. case FS_IOC32_GETFLAGS:
  305. cmd = FS_IOC_GETFLAGS;
  306. break;
  307. case FS_IOC32_SETFLAGS:
  308. cmd = FS_IOC_SETFLAGS;
  309. break;
  310. default:
  311. return -ENOIOCTLCMD;
  312. }
  313. return ovl_ioctl(file, cmd, arg);
  314. }
  315. enum ovl_copyop {
  316. OVL_COPY,
  317. OVL_CLONE,
  318. OVL_DEDUPE,
  319. };
  320. static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
  321. struct file *file_out, loff_t pos_out,
  322. u64 len, unsigned int flags, enum ovl_copyop op)
  323. {
  324. struct inode *inode_out = file_inode(file_out);
  325. struct fd real_in, real_out;
  326. const struct cred *old_cred;
  327. ssize_t ret;
  328. ret = ovl_real_fdget(file_out, &real_out);
  329. if (ret)
  330. return ret;
  331. ret = ovl_real_fdget(file_in, &real_in);
  332. if (ret) {
  333. fdput(real_out);
  334. return ret;
  335. }
  336. old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
  337. switch (op) {
  338. case OVL_COPY:
  339. ret = vfs_copy_file_range(real_in.file, pos_in,
  340. real_out.file, pos_out, len, flags);
  341. break;
  342. case OVL_CLONE:
  343. ret = vfs_clone_file_range(real_in.file, pos_in,
  344. real_out.file, pos_out, len);
  345. break;
  346. case OVL_DEDUPE:
  347. ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
  348. real_out.file, pos_out, len);
  349. break;
  350. }
  351. revert_creds(old_cred);
  352. /* Update size */
  353. ovl_copyattr(ovl_inode_real(inode_out), inode_out);
  354. fdput(real_in);
  355. fdput(real_out);
  356. return ret;
  357. }
  358. static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
  359. struct file *file_out, loff_t pos_out,
  360. size_t len, unsigned int flags)
  361. {
  362. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
  363. OVL_COPY);
  364. }
  365. static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
  366. struct file *file_out, loff_t pos_out, u64 len)
  367. {
  368. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
  369. OVL_CLONE);
  370. }
  371. static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
  372. struct file *file_out, loff_t pos_out, u64 len)
  373. {
  374. /*
  375. * Don't copy up because of a dedupe request, this wouldn't make sense
  376. * most of the time (data would be duplicated instead of deduplicated).
  377. */
  378. if (!ovl_inode_upper(file_inode(file_in)) ||
  379. !ovl_inode_upper(file_inode(file_out)))
  380. return -EPERM;
  381. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
  382. OVL_DEDUPE);
  383. }
  384. const struct file_operations ovl_file_operations = {
  385. .open = ovl_open,
  386. .release = ovl_release,
  387. .llseek = ovl_llseek,
  388. .read_iter = ovl_read_iter,
  389. .write_iter = ovl_write_iter,
  390. .fsync = ovl_fsync,
  391. .mmap = ovl_mmap,
  392. .fallocate = ovl_fallocate,
  393. .unlocked_ioctl = ovl_ioctl,
  394. .compat_ioctl = ovl_compat_ioctl,
  395. .copy_file_range = ovl_copy_file_range,
  396. .clone_file_range = ovl_clone_file_range,
  397. .dedupe_file_range = ovl_dedupe_file_range,
  398. };