Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace fixes from Eric Biederman:
 "This tree contains 4 fixes.

  The first is a fix for a race that can causes oopses under the right
  circumstances, and that someone just recently encountered.

  Past that are several small trivial correct fixes. A real issue that
  was blocking development of an out of tree driver, but does not appear
  to have caused any actual problems for in-tree code. A potential
  deadlock that was reported by lockdep. And a deadlock people have
  experienced and took the time to track down caused by a cleanup that
  removed the code to drop a reference count"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  sysctl: Drop reference added by grab_header in proc_sys_readdir
  pid: fix lockdep deadlock warning due to ucount_lock
  libfs: Modify mount_pseudo_xattr to be clear it is not a userspace mount
  mnt: Protect the mountpoint hashtable with mount_lock
Linus Torvalds 8 years ago
parent
commit
99421c1cb2
5 changed files with 60 additions and 27 deletions
  1. 5 2
      fs/dcache.c
  2. 2 1
      fs/libfs.c
  3. 45 19
      fs/namespace.c
  4. 2 1
      fs/proc/proc_sysctl.c
  5. 6 4
      kernel/pid_namespace.c

+ 5 - 2
fs/dcache.c

@@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry)
 	}
 	}
 	spin_lock(&dentry->d_lock);
 	spin_lock(&dentry->d_lock);
 	if (!d_unlinked(dentry)) {
 	if (!d_unlinked(dentry)) {
-		dentry->d_flags |= DCACHE_MOUNTED;
-		ret = 0;
+		ret = -EBUSY;
+		if (!d_mountpoint(dentry)) {
+			dentry->d_flags |= DCACHE_MOUNTED;
+			ret = 0;
+		}
 	}
 	}
  	spin_unlock(&dentry->d_lock);
  	spin_unlock(&dentry->d_lock);
 out:
 out:

+ 2 - 1
fs/libfs.c

@@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
 	struct inode *root;
 	struct inode *root;
 	struct qstr d_name = QSTR_INIT(name, strlen(name));
 	struct qstr d_name = QSTR_INIT(name, strlen(name));
 
 
-	s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
+	s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+			&init_user_ns, NULL);
 	if (IS_ERR(s))
 	if (IS_ERR(s))
 		return ERR_CAST(s);
 		return ERR_CAST(s);
 
 

+ 45 - 19
fs/namespace.c

@@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 	return NULL;
 	return NULL;
 }
 }
 
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
 {
 {
-	struct hlist_head *chain = mp_hash(dentry);
-	struct mountpoint *mp;
+	struct mountpoint *mp, *new = NULL;
 	int ret;
 	int ret;
 
 
-	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-	if (!mp)
+	if (d_mountpoint(dentry)) {
+mountpoint:
+		read_seqlock_excl(&mount_lock);
+		mp = lookup_mountpoint(dentry);
+		read_sequnlock_excl(&mount_lock);
+		if (mp)
+			goto done;
+	}
+
+	if (!new)
+		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	if (!new)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
+
+	/* Exactly one processes may set d_mounted */
 	ret = d_set_mounted(dentry);
 	ret = d_set_mounted(dentry);
-	if (ret) {
-		kfree(mp);
-		return ERR_PTR(ret);
-	}
 
 
-	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
-	INIT_HLIST_HEAD(&mp->m_list);
+	/* Someone else set d_mounted? */
+	if (ret == -EBUSY)
+		goto mountpoint;
+
+	/* The dentry is not available as a mountpoint? */
+	mp = ERR_PTR(ret);
+	if (ret)
+		goto done;
+
+	/* Add the new mountpoint to the hash table */
+	read_seqlock_excl(&mount_lock);
+	new->m_dentry = dentry;
+	new->m_count = 1;
+	hlist_add_head(&new->m_hash, mp_hash(dentry));
+	INIT_HLIST_HEAD(&new->m_list);
+	read_sequnlock_excl(&mount_lock);
+
+	mp = new;
+	new = NULL;
+done:
+	kfree(new);
 	return mp;
 	return mp;
 }
 }
 
 
@@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry)
 	struct mount *mnt;
 	struct mount *mnt;
 
 
 	namespace_lock();
 	namespace_lock();
+	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
 	mp = lookup_mountpoint(dentry);
 	if (IS_ERR_OR_NULL(mp))
 	if (IS_ERR_OR_NULL(mp))
 		goto out_unlock;
 		goto out_unlock;
 
 
-	lock_mount_hash();
 	event++;
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry)
 		}
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
 	}
-	unlock_mount_hash();
 	put_mountpoint(mp);
 	put_mountpoint(mp);
 out_unlock:
 out_unlock:
+	unlock_mount_hash();
 	namespace_unlock();
 	namespace_unlock();
 }
 }
 
 
@@ -2038,9 +2062,7 @@ retry:
 	namespace_lock();
 	namespace_lock();
 	mnt = lookup_mnt(path);
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
 	if (likely(!mnt)) {
-		struct mountpoint *mp = lookup_mountpoint(dentry);
-		if (!mp)
-			mp = new_mountpoint(dentry);
+		struct mountpoint *mp = get_mountpoint(dentry);
 		if (IS_ERR(mp)) {
 		if (IS_ERR(mp)) {
 			namespace_unlock();
 			namespace_unlock();
 			inode_unlock(dentry->d_inode);
 			inode_unlock(dentry->d_inode);
@@ -2059,7 +2081,11 @@ retry:
 static void unlock_mount(struct mountpoint *where)
 static void unlock_mount(struct mountpoint *where)
 {
 {
 	struct dentry *dentry = where->m_dentry;
 	struct dentry *dentry = where->m_dentry;
+
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(where);
 	put_mountpoint(where);
+	read_sequnlock_excl(&mount_lock);
+
 	namespace_unlock();
 	namespace_unlock();
 	inode_unlock(dentry->d_inode);
 	inode_unlock(dentry->d_inode);
 }
 }
@@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	/* A moved mount should not expire automatically */
 	/* A moved mount should not expire automatically */
 	list_del_init(&new_mnt->mnt_expire);
 	list_del_init(&new_mnt->mnt_expire);
+	put_mountpoint(root_mp);
 	unlock_mount_hash();
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
 	chroot_fs_refs(&root, &new);
-	put_mountpoint(root_mp);
 	error = 0;
 	error = 0;
 out4:
 out4:
 	unlock_mount(old_mp);
 	unlock_mount(old_mp);

+ 2 - 1
fs/proc/proc_sysctl.c

@@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 	ctl_dir = container_of(head, struct ctl_dir, header);
 	ctl_dir = container_of(head, struct ctl_dir, header);
 
 
 	if (!dir_emit_dots(file, ctx))
 	if (!dir_emit_dots(file, ctx))
-		return 0;
+		goto out;
 
 
 	pos = 2;
 	pos = 2;
 
 
@@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 			break;
 			break;
 		}
 		}
 	}
 	}
+out:
 	sysctl_head_finish(head);
 	sysctl_head_finish(head);
 	return 0;
 	return 0;
 }
 }

+ 6 - 4
kernel/pid_namespace.c

@@ -151,8 +151,12 @@ out:
 
 
 static void delayed_free_pidns(struct rcu_head *p)
 static void delayed_free_pidns(struct rcu_head *p)
 {
 {
-	kmem_cache_free(pid_ns_cachep,
-			container_of(p, struct pid_namespace, rcu));
+	struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
+
+	dec_pid_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+
+	kmem_cache_free(pid_ns_cachep, ns);
 }
 }
 
 
 static void destroy_pid_namespace(struct pid_namespace *ns)
 static void destroy_pid_namespace(struct pid_namespace *ns)
@@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 		kfree(ns->pidmap[i].page);
-	dec_pid_namespaces(ns->ucounts);
-	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
 }