Эх сурвалжийг харах

nfs: per-name sillyunlink exclusion

use d_alloc_parallel() for sillyunlink/lookup exclusion and
explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() -
a reader) for rmdir/sillyunlink one.

That ought to make lookup/readdir/!O_CREAT atomic_open really
parallel on NFS.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Al Viro 9 жил өмнө
parent
commit
884be17535

+ 2 - 7
fs/nfs/dir.c

@@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
 
-	nfs_block_sillyrename(dentry);
 	if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
 	if (res < 0)
@@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			break;
 	} while (!desc->eof);
 out:
-	nfs_unblock_sillyrename(dentry);
 	if (res > 0)
 		res = 0;
 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
@@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	trace_nfs_lookup_enter(dir, dentry, flags);
-	nfs_block_sillyrename(parent);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
 	if (error == -ENOENT)
 		goto no_entry;
@@ -1423,7 +1420,6 @@ no_entry:
 	}
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unblock_sillyrename:
-	nfs_unblock_sillyrename(parent);
 	trace_nfs_lookup_exit(dir, dentry, flags, error);
 	nfs4_label_free(label);
 out:
@@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
-	nfs_block_sillyrename(dentry->d_parent);
 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
-	nfs_unblock_sillyrename(dentry->d_parent);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
@@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	trace_nfs_rmdir_enter(dir, dentry);
 	if (d_really_is_positive(dentry)) {
-		nfs_wait_on_sillyrename(dentry);
+		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
 		/* Ensure the VFS deletes this inode */
 		switch (error) {
@@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 		case -ENOENT:
 			nfs_dentry_handle_enoent(dentry);
 		}
+		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
 	} else
 		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
 	trace_nfs_rmdir_exit(dir, dentry, error);

+ 1 - 3
fs/nfs/inode.c

@@ -1958,9 +1958,7 @@ static void init_once(void *foo)
 	nfsi->nrequests = 0;
 	nfsi->commit_info.ncommit = 0;
 	atomic_set(&nfsi->commit_info.rpcs_out, 0);
-	atomic_set(&nfsi->silly_count, 1);
-	INIT_HLIST_HEAD(&nfsi->silly_list);
-	init_waitqueue_head(&nfsi->waitqueue);
+	init_rwsem(&nfsi->rmdir_sem);
 	nfs4_init_once(nfsi);
 }
 

+ 1 - 1
fs/nfs/nfs4proc.c

@@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
 {
-	nfs4_setup_sequence(NFS_SERVER(data->dir),
+	nfs4_setup_sequence(NFS_SB(data->dentry->d_sb),
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);

+ 1 - 1
fs/nfs/nfstrace.h

@@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
 		),
 
 		TP_fast_assign(
-			struct inode *dir = data->dir;
+			struct inode *dir = d_inode(data->dentry->d_parent);
 			size_t len = data->args.name.len;
 			__entry->dev = dir->i_sb->s_dev;
 			__entry->dir = NFS_FILEID(dir);

+ 51 - 142
fs/nfs/unlink.c

@@ -30,45 +30,11 @@
 static void
 nfs_free_unlinkdata(struct nfs_unlinkdata *data)
 {
-	iput(data->dir);
 	put_rpccred(data->cred);
 	kfree(data->args.name.name);
 	kfree(data);
 }
 
-#define NAME_ALLOC_LEN(len)	((len+16) & ~15)
-/**
- * nfs_copy_dname - copy dentry name to data structure
- * @dentry: pointer to dentry
- * @data: nfs_unlinkdata
- */
-static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
-{
-	char		*str;
-	int		len = dentry->d_name.len;
-
-	str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL);
-	if (!str)
-		return -ENOMEM;
-	data->args.name.len = len;
-	data->args.name.name = str;
-	return 0;
-}
-
-static void nfs_free_dname(struct nfs_unlinkdata *data)
-{
-	kfree(data->args.name.name);
-	data->args.name.name = NULL;
-	data->args.name.len = 0;
-}
-
-static void nfs_dec_sillycount(struct inode *dir)
-{
-	struct nfs_inode *nfsi = NFS_I(dir);
-	if (atomic_dec_return(&nfsi->silly_count) == 1)
-		wake_up(&nfsi->waitqueue);
-}
-
 /**
  * nfs_async_unlink_done - Sillydelete post-processing
  * @task: rpc_task of the sillydelete
@@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir)
 static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_unlinkdata *data = calldata;
-	struct inode *dir = data->dir;
+	struct inode *dir = d_inode(data->dentry->d_parent);
 
 	trace_nfs_sillyrename_unlink(data, task->tk_status);
 	if (!NFS_PROTO(dir)->unlink_done(task, dir))
@@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 static void nfs_async_unlink_release(void *calldata)
 {
 	struct nfs_unlinkdata	*data = calldata;
-	struct super_block *sb = data->dir->i_sb;
+	struct dentry *dentry = data->dentry;
+	struct super_block *sb = dentry->d_sb;
 
-	nfs_dec_sillycount(data->dir);
+	up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
+	d_lookup_done(dentry);
 	nfs_free_unlinkdata(data);
+	dput(dentry);
 	nfs_sb_deactive(sb);
 }
 
 static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_unlinkdata *data = calldata;
-	NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
+	struct inode *dir = d_inode(data->dentry->d_parent);
+	NFS_PROTO(dir)->unlink_rpc_prepare(task, data);
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
@@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = {
 	.rpc_call_prepare = nfs_unlink_prepare,
 };
 
-static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
+static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
 {
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
@@ -129,10 +99,31 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		.flags = RPC_TASK_ASYNC,
 	};
 	struct rpc_task *task;
+	struct inode *dir = d_inode(data->dentry->d_parent);
+	nfs_sb_active(dir->i_sb);
+	data->args.fh = NFS_FH(dir);
+	nfs_fattr_init(data->res.dir_attr);
+
+	NFS_PROTO(dir)->unlink_setup(&msg, dir);
+
+	task_setup_data.rpc_client = NFS_CLIENT(dir);
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task))
+		rpc_put_task_async(task);
+}
+
+static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+	struct inode *dir = d_inode(dentry->d_parent);
 	struct dentry *alias;
 
-	alias = d_lookup(parent, &data->args.name);
-	if (alias != NULL) {
+	down_read_non_owner(&NFS_I(dir)->rmdir_sem);
+	alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
+	if (IS_ERR(alias)) {
+		up_read_non_owner(&NFS_I(dir)->rmdir_sem);
+		return 0;
+	}
+	if (!d_in_lookup(alias)) {
 		int ret;
 		void *devname_garbage = NULL;
 
@@ -140,10 +131,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		 * Hey, we raced with lookup... See if we need to transfer
 		 * the sillyrename information to the aliased dentry.
 		 */
-		nfs_free_dname(data);
-		ret = nfs_copy_dname(alias, data);
 		spin_lock(&alias->d_lock);
-		if (ret == 0 && d_really_is_positive(alias) &&
+		if (d_really_is_positive(alias) &&
 		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
 			devname_garbage = alias->d_fsdata;
 			alias->d_fsdata = data;
@@ -152,8 +141,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		} else
 			ret = 0;
 		spin_unlock(&alias->d_lock);
-		nfs_dec_sillycount(dir);
 		dput(alias);
+		up_read_non_owner(&NFS_I(dir)->rmdir_sem);
 		/*
 		 * If we'd displaced old cached devname, free it.  At that
 		 * point dentry is definitely not a root, so we won't need
@@ -162,95 +151,18 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		kfree(devname_garbage);
 		return ret;
 	}
-	data->dir = igrab(dir);
-	if (!data->dir) {
-		nfs_dec_sillycount(dir);
-		return 0;
-	}
-	nfs_sb_active(dir->i_sb);
-	data->args.fh = NFS_FH(dir);
-	nfs_fattr_init(data->res.dir_attr);
-
-	NFS_PROTO(dir)->unlink_setup(&msg, dir);
-
-	task_setup_data.rpc_client = NFS_CLIENT(dir);
-	task = rpc_run_task(&task_setup_data);
-	if (!IS_ERR(task))
-		rpc_put_task_async(task);
+	data->dentry = alias;
+	nfs_do_call_unlink(data);
 	return 1;
 }
 
-static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
-{
-	struct dentry *parent;
-	struct inode *dir;
-	int ret = 0;
-
-
-	parent = dget_parent(dentry);
-	if (parent == NULL)
-		goto out_free;
-	dir = d_inode(parent);
-	/* Non-exclusive lock protects against concurrent lookup() calls */
-	spin_lock(&dir->i_lock);
-	if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
-		/* Deferred delete */
-		hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
-		spin_unlock(&dir->i_lock);
-		ret = 1;
-		goto out_dput;
-	}
-	spin_unlock(&dir->i_lock);
-	ret = nfs_do_call_unlink(parent, dir, data);
-out_dput:
-	dput(parent);
-out_free:
-	return ret;
-}
-
-void nfs_wait_on_sillyrename(struct dentry *dentry)
-{
-	struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
-
-	wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
-}
-
-void nfs_block_sillyrename(struct dentry *dentry)
-{
-	struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
-
-	wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
-}
-
-void nfs_unblock_sillyrename(struct dentry *dentry)
-{
-	struct inode *dir = d_inode(dentry);
-	struct nfs_inode *nfsi = NFS_I(dir);
-	struct nfs_unlinkdata *data;
-
-	atomic_inc(&nfsi->silly_count);
-	wake_up(&nfsi->waitqueue);
-	spin_lock(&dir->i_lock);
-	while (!hlist_empty(&nfsi->silly_list)) {
-		if (!atomic_inc_not_zero(&nfsi->silly_count))
-			break;
-		data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
-		hlist_del(&data->list);
-		spin_unlock(&dir->i_lock);
-		if (nfs_do_call_unlink(dentry, dir, data) == 0)
-			nfs_free_unlinkdata(data);
-		spin_lock(&dir->i_lock);
-	}
-	spin_unlock(&dir->i_lock);
-}
-
 /**
  * nfs_async_unlink - asynchronous unlinking of a file
  * @dir: parent directory of dentry
  * @dentry: dentry to unlink
  */
 static int
-nfs_async_unlink(struct inode *dir, struct dentry *dentry)
+nfs_async_unlink(struct dentry *dentry, struct qstr *name)
 {
 	struct nfs_unlinkdata *data;
 	int status = -ENOMEM;
@@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
 		goto out;
+	data->args.name.name = kstrdup(name->name, GFP_KERNEL);
+	if (!data->args.name.name)
+		goto out_free;
+	data->args.name.len = name->len;
 
 	data->cred = rpc_lookup_cred();
 	if (IS_ERR(data->cred)) {
 		status = PTR_ERR(data->cred);
-		goto out_free;
+		goto out_free_name;
 	}
 	data->res.dir_attr = &data->dir_attr;
+	init_waitqueue_head(&data->wq);
 
 	status = -EBUSY;
 	spin_lock(&dentry->d_lock);
@@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 out_unlock:
 	spin_unlock(&dentry->d_lock);
 	put_rpccred(data->cred);
+out_free_name:
+	kfree(data->args.name.name);
 out_free:
 	kfree(data);
 out:
@@ -303,17 +222,15 @@ out:
 void
 nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
 {
-	struct nfs_unlinkdata	*data = NULL;
+	struct nfs_unlinkdata	*data;
 
 	spin_lock(&dentry->d_lock);
-	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
-		data = dentry->d_fsdata;
-		dentry->d_fsdata = NULL;
-	}
+	dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+	data = dentry->d_fsdata;
+	dentry->d_fsdata = NULL;
 	spin_unlock(&dentry->d_lock);
 
-	if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
+	if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))
 		nfs_free_unlinkdata(data);
 }
 
@@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	/* queue unlink first. Can't do this from rpc_release as it
 	 * has to allocate memory
 	 */
-	error = nfs_async_unlink(dir, dentry);
+	error = nfs_async_unlink(dentry, &sdentry->d_name);
 	if (error)
 		goto out_dput;
 
-	/* populate unlinkdata with the right dname */
-	error = nfs_copy_dname(sdentry,
-				(struct nfs_unlinkdata *)dentry->d_fsdata);
-	if (error) {
-		nfs_cancel_async_unlink(dentry);
-		goto out_dput;
-	}
-
 	/* run the rename task, undo unlink if it fails */
 	task = nfs_async_rename(dir, dir, dentry, sdentry,
 					nfs_complete_sillyrename);

+ 3 - 8
include/linux/nfs_fs.h

@@ -163,11 +163,9 @@ struct nfs_inode {
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
 
-	/* Number of in-flight sillydelete RPC calls */
-	atomic_t		silly_count;
-	/* List of deferred sillydelete requests */
-	struct hlist_head	silly_list;
-	wait_queue_head_t	waitqueue;
+	/* Readers: in-flight sillydelete RPC calls */
+	/* Writers: rmdir */
+	struct rw_semaphore	rmdir_sem;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
 	struct nfs4_cached_acl	*nfs4_acl;
@@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void);
  * linux/fs/nfs/unlink.c
  */
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
-extern void nfs_wait_on_sillyrename(struct dentry *dentry);
-extern void nfs_block_sillyrename(struct dentry *dentry);
-extern void nfs_unblock_sillyrename(struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c

+ 2 - 2
include/linux/nfs_xdr.h

@@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops {
 };
 
 struct nfs_unlinkdata {
-	struct hlist_node list;
 	struct nfs_removeargs args;
 	struct nfs_removeres res;
-	struct inode *dir;
+	struct dentry *dentry;
+	wait_queue_head_t wq;
 	struct rpc_cred	*cred;
 	struct nfs_fattr dir_attr;
 	long timeout;