|
@@ -16,6 +16,7 @@
|
|
|
#include <linux/pagemap.h>
|
|
|
#include <linux/splice.h>
|
|
|
#include <linux/compat.h>
|
|
|
+#include <linux/mount.h>
|
|
|
#include "internal.h"
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
@@ -395,9 +396,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
|
|
|
}
|
|
|
|
|
|
if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
|
|
|
- retval = locks_mandatory_area(
|
|
|
- read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
|
|
|
- inode, file, pos, count);
|
|
|
+ retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
|
|
|
+ read_write == READ ? F_RDLCK : F_WRLCK);
|
|
|
if (retval < 0)
|
|
|
return retval;
|
|
|
}
|
|
@@ -1327,3 +1327,299 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
|
|
|
return do_sendfile(out_fd, in_fd, NULL, count, 0);
|
|
|
}
|
|
|
#endif
|
|
|
+
|
|
|
+/*
|
|
|
+ * copy_file_range() differs from regular file read and write in that it
|
|
|
+ * specifically allows return partial success. When it does so is up to
|
|
|
+ * the copy_file_range method.
|
|
|
+ */
|
|
|
+ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
|
|
|
+ struct file *file_out, loff_t pos_out,
|
|
|
+ size_t len, unsigned int flags)
|
|
|
+{
|
|
|
+ struct inode *inode_in = file_inode(file_in);
|
|
|
+ struct inode *inode_out = file_inode(file_out);
|
|
|
+ ssize_t ret;
|
|
|
+
|
|
|
+ if (flags != 0)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */
|
|
|
+ ret = rw_verify_area(READ, file_in, &pos_in, len);
|
|
|
+ if (ret >= 0)
|
|
|
+ ret = rw_verify_area(WRITE, file_out, &pos_out, len);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ if (!(file_in->f_mode & FMODE_READ) ||
|
|
|
+ !(file_out->f_mode & FMODE_WRITE) ||
|
|
|
+ (file_out->f_flags & O_APPEND))
|
|
|
+ return -EBADF;
|
|
|
+
|
|
|
+ /* this could be relaxed once a method supports cross-fs copies */
|
|
|
+ if (inode_in->i_sb != inode_out->i_sb)
|
|
|
+ return -EXDEV;
|
|
|
+
|
|
|
+ if (len == 0)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ ret = mnt_want_write_file(file_out);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = -EOPNOTSUPP;
|
|
|
+ if (file_out->f_op->copy_file_range)
|
|
|
+ ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
|
|
|
+ pos_out, len, flags);
|
|
|
+ if (ret == -EOPNOTSUPP)
|
|
|
+ ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
|
|
|
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
|
|
|
+
|
|
|
+ if (ret > 0) {
|
|
|
+ fsnotify_access(file_in);
|
|
|
+ add_rchar(current, ret);
|
|
|
+ fsnotify_modify(file_out);
|
|
|
+ add_wchar(current, ret);
|
|
|
+ }
|
|
|
+ inc_syscr(current);
|
|
|
+ inc_syscw(current);
|
|
|
+
|
|
|
+ mnt_drop_write_file(file_out);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(vfs_copy_file_range);
|
|
|
+
|
|
|
+SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
|
|
|
+ int, fd_out, loff_t __user *, off_out,
|
|
|
+ size_t, len, unsigned int, flags)
|
|
|
+{
|
|
|
+ loff_t pos_in;
|
|
|
+ loff_t pos_out;
|
|
|
+ struct fd f_in;
|
|
|
+ struct fd f_out;
|
|
|
+ ssize_t ret = -EBADF;
|
|
|
+
|
|
|
+ f_in = fdget(fd_in);
|
|
|
+ if (!f_in.file)
|
|
|
+ goto out2;
|
|
|
+
|
|
|
+ f_out = fdget(fd_out);
|
|
|
+ if (!f_out.file)
|
|
|
+ goto out1;
|
|
|
+
|
|
|
+ ret = -EFAULT;
|
|
|
+ if (off_in) {
|
|
|
+ if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
|
|
|
+ goto out;
|
|
|
+ } else {
|
|
|
+ pos_in = f_in.file->f_pos;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (off_out) {
|
|
|
+ if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
|
|
|
+ goto out;
|
|
|
+ } else {
|
|
|
+ pos_out = f_out.file->f_pos;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
|
|
|
+ flags);
|
|
|
+ if (ret > 0) {
|
|
|
+ pos_in += ret;
|
|
|
+ pos_out += ret;
|
|
|
+
|
|
|
+ if (off_in) {
|
|
|
+ if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
|
|
|
+ ret = -EFAULT;
|
|
|
+ } else {
|
|
|
+ f_in.file->f_pos = pos_in;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (off_out) {
|
|
|
+ if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
|
|
|
+ ret = -EFAULT;
|
|
|
+ } else {
|
|
|
+ f_out.file->f_pos = pos_out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ fdput(f_out);
|
|
|
+out1:
|
|
|
+ fdput(f_in);
|
|
|
+out2:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
|
|
|
+{
|
|
|
+ struct inode *inode = file_inode(file);
|
|
|
+
|
|
|
+ if (unlikely(pos < 0))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (unlikely((loff_t) (pos + len) < 0))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
|
|
|
+ loff_t end = len ? pos + len - 1 : OFFSET_MAX;
|
|
|
+ int retval;
|
|
|
+
|
|
|
+ retval = locks_mandatory_area(inode, file, pos, end,
|
|
|
+ write ? F_WRLCK : F_RDLCK);
|
|
|
+ if (retval < 0)
|
|
|
+ return retval;
|
|
|
+ }
|
|
|
+
|
|
|
+ return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
|
|
|
+}
|
|
|
+
|
|
|
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
|
|
|
+ struct file *file_out, loff_t pos_out, u64 len)
|
|
|
+{
|
|
|
+ struct inode *inode_in = file_inode(file_in);
|
|
|
+ struct inode *inode_out = file_inode(file_out);
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ if (inode_in->i_sb != inode_out->i_sb ||
|
|
|
+ file_in->f_path.mnt != file_out->f_path.mnt)
|
|
|
+ return -EXDEV;
|
|
|
+
|
|
|
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
|
|
|
+ return -EISDIR;
|
|
|
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (!(file_in->f_mode & FMODE_READ) ||
|
|
|
+ !(file_out->f_mode & FMODE_WRITE) ||
|
|
|
+ (file_out->f_flags & O_APPEND) ||
|
|
|
+ !file_in->f_op->clone_file_range)
|
|
|
+ return -EBADF;
|
|
|
+
|
|
|
+ ret = clone_verify_area(file_in, pos_in, len, false);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = clone_verify_area(file_out, pos_out, len, true);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ if (pos_in + len > i_size_read(inode_in))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ ret = mnt_want_write_file(file_out);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ ret = file_in->f_op->clone_file_range(file_in, pos_in,
|
|
|
+ file_out, pos_out, len);
|
|
|
+ if (!ret) {
|
|
|
+ fsnotify_access(file_in);
|
|
|
+ fsnotify_modify(file_out);
|
|
|
+ }
|
|
|
+
|
|
|
+ mnt_drop_write_file(file_out);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(vfs_clone_file_range);
|
|
|
+
|
|
|
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
|
|
|
+{
|
|
|
+ struct file_dedupe_range_info *info;
|
|
|
+ struct inode *src = file_inode(file);
|
|
|
+ u64 off;
|
|
|
+ u64 len;
|
|
|
+ int i;
|
|
|
+ int ret;
|
|
|
+ bool is_admin = capable(CAP_SYS_ADMIN);
|
|
|
+ u16 count = same->dest_count;
|
|
|
+ struct file *dst_file;
|
|
|
+ loff_t dst_off;
|
|
|
+ ssize_t deduped;
|
|
|
+
|
|
|
+ if (!(file->f_mode & FMODE_READ))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ if (same->reserved1 || same->reserved2)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ off = same->src_offset;
|
|
|
+ len = same->src_length;
|
|
|
+
|
|
|
+ ret = -EISDIR;
|
|
|
+ if (S_ISDIR(src->i_mode))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (!S_ISREG(src->i_mode))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = clone_verify_area(file, off, len, false);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ ret = 0;
|
|
|
+
|
|
|
+ /* pre-format output fields to sane values */
|
|
|
+ for (i = 0; i < count; i++) {
|
|
|
+ same->info[i].bytes_deduped = 0ULL;
|
|
|
+ same->info[i].status = FILE_DEDUPE_RANGE_SAME;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0, info = same->info; i < count; i++, info++) {
|
|
|
+ struct inode *dst;
|
|
|
+ struct fd dst_fd = fdget(info->dest_fd);
|
|
|
+
|
|
|
+ dst_file = dst_fd.file;
|
|
|
+ if (!dst_file) {
|
|
|
+ info->status = -EBADF;
|
|
|
+ goto next_loop;
|
|
|
+ }
|
|
|
+ dst = file_inode(dst_file);
|
|
|
+
|
|
|
+ ret = mnt_want_write_file(dst_file);
|
|
|
+ if (ret) {
|
|
|
+ info->status = ret;
|
|
|
+ goto next_loop;
|
|
|
+ }
|
|
|
+
|
|
|
+ dst_off = info->dest_offset;
|
|
|
+ ret = clone_verify_area(dst_file, dst_off, len, true);
|
|
|
+ if (ret < 0) {
|
|
|
+ info->status = ret;
|
|
|
+ goto next_file;
|
|
|
+ }
|
|
|
+ ret = 0;
|
|
|
+
|
|
|
+ if (info->reserved) {
|
|
|
+ info->status = -EINVAL;
|
|
|
+ } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
|
|
|
+ info->status = -EINVAL;
|
|
|
+ } else if (file->f_path.mnt != dst_file->f_path.mnt) {
|
|
|
+ info->status = -EXDEV;
|
|
|
+ } else if (S_ISDIR(dst->i_mode)) {
|
|
|
+ info->status = -EISDIR;
|
|
|
+ } else if (dst_file->f_op->dedupe_file_range == NULL) {
|
|
|
+ info->status = -EINVAL;
|
|
|
+ } else {
|
|
|
+ deduped = dst_file->f_op->dedupe_file_range(file, off,
|
|
|
+ len, dst_file,
|
|
|
+ info->dest_offset);
|
|
|
+ if (deduped == -EBADE)
|
|
|
+ info->status = FILE_DEDUPE_RANGE_DIFFERS;
|
|
|
+ else if (deduped < 0)
|
|
|
+ info->status = deduped;
|
|
|
+ else
|
|
|
+ info->bytes_deduped += deduped;
|
|
|
+ }
|
|
|
+
|
|
|
+next_file:
|
|
|
+ mnt_drop_write_file(dst_file);
|
|
|
+next_loop:
|
|
|
+ fdput(dst_fd);
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(vfs_dedupe_file_range);
|