|
@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
|
|
|
return total_objects;
|
|
|
}
|
|
|
|
|
|
+static void destroy_super_work(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct super_block *s = container_of(work, struct super_block,
|
|
|
+ destroy_work);
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < SB_FREEZE_LEVELS; i++)
|
|
|
+ percpu_free_rwsem(&s->s_writers.rw_sem[i]);
|
|
|
+ kfree(s);
|
|
|
+}
|
|
|
+
|
|
|
+static void destroy_super_rcu(struct rcu_head *head)
|
|
|
+{
|
|
|
+ struct super_block *s = container_of(head, struct super_block, rcu);
|
|
|
+ INIT_WORK(&s->destroy_work, destroy_super_work);
|
|
|
+ schedule_work(&s->destroy_work);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* destroy_super - frees a superblock
|
|
|
* @s: superblock to free
|
|
@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
|
|
|
*/
|
|
|
static void destroy_super(struct super_block *s)
|
|
|
{
|
|
|
- int i;
|
|
|
list_lru_destroy(&s->s_dentry_lru);
|
|
|
list_lru_destroy(&s->s_inode_lru);
|
|
|
- for (i = 0; i < SB_FREEZE_LEVELS; i++)
|
|
|
- percpu_counter_destroy(&s->s_writers.counter[i]);
|
|
|
security_sb_free(s);
|
|
|
WARN_ON(!list_empty(&s->s_mounts));
|
|
|
kfree(s->s_subtype);
|
|
|
kfree(s->s_options);
|
|
|
- kfree_rcu(s, rcu);
|
|
|
+ call_rcu(&s->rcu, destroy_super_rcu);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -178,13 +193,11 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
|
|
|
goto fail;
|
|
|
|
|
|
for (i = 0; i < SB_FREEZE_LEVELS; i++) {
|
|
|
- if (percpu_counter_init(&s->s_writers.counter[i], 0,
|
|
|
- GFP_KERNEL) < 0)
|
|
|
+ if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
|
|
|
+ sb_writers_name[i],
|
|
|
+ &type->s_writers_key[i]))
|
|
|
goto fail;
|
|
|
- lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
|
|
|
- &type->s_writers_key[i], 0);
|
|
|
}
|
|
|
- init_waitqueue_head(&s->s_writers.wait);
|
|
|
init_waitqueue_head(&s->s_writers.wait_unfrozen);
|
|
|
s->s_bdi = &noop_backing_dev_info;
|
|
|
s->s_flags = flags;
|
|
@@ -1146,72 +1159,46 @@ out:
|
|
|
*/
|
|
|
void __sb_end_write(struct super_block *sb, int level)
|
|
|
{
|
|
|
- percpu_counter_dec(&sb->s_writers.counter[level-1]);
|
|
|
- /*
|
|
|
- * Make sure s_writers are updated before we wake up waiters in
|
|
|
- * freeze_super().
|
|
|
- */
|
|
|
- smp_mb();
|
|
|
- if (waitqueue_active(&sb->s_writers.wait))
|
|
|
- wake_up(&sb->s_writers.wait);
|
|
|
- rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
|
|
|
+ percpu_up_read(sb->s_writers.rw_sem + level-1);
|
|
|
}
|
|
|
EXPORT_SYMBOL(__sb_end_write);
|
|
|
|
|
|
-#ifdef CONFIG_LOCKDEP
|
|
|
-/*
|
|
|
- * We want lockdep to tell us about possible deadlocks with freezing but
|
|
|
- * it's it bit tricky to properly instrument it. Getting a freeze protection
|
|
|
- * works as getting a read lock but there are subtle problems. XFS for example
|
|
|
- * gets freeze protection on internal level twice in some cases, which is OK
|
|
|
- * only because we already hold a freeze protection also on higher level. Due
|
|
|
- * to these cases we have to tell lockdep we are doing trylock when we
|
|
|
- * already hold a freeze protection for a higher freeze level.
|
|
|
- */
|
|
|
-static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
|
|
|
- unsigned long ip)
|
|
|
-{
|
|
|
- int i;
|
|
|
-
|
|
|
- if (!trylock) {
|
|
|
- for (i = 0; i < level - 1; i++)
|
|
|
- if (lock_is_held(&sb->s_writers.lock_map[i])) {
|
|
|
- trylock = true;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
/*
|
|
|
* This is an internal function, please use sb_start_{write,pagefault,intwrite}
|
|
|
* instead.
|
|
|
*/
|
|
|
int __sb_start_write(struct super_block *sb, int level, bool wait)
|
|
|
{
|
|
|
-retry:
|
|
|
- if (unlikely(sb->s_writers.frozen >= level)) {
|
|
|
- if (!wait)
|
|
|
- return 0;
|
|
|
- wait_event(sb->s_writers.wait_unfrozen,
|
|
|
- sb->s_writers.frozen < level);
|
|
|
- }
|
|
|
+ bool force_trylock = false;
|
|
|
+ int ret = 1;
|
|
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
|
- acquire_freeze_lock(sb, level, !wait, _RET_IP_);
|
|
|
-#endif
|
|
|
- percpu_counter_inc(&sb->s_writers.counter[level-1]);
|
|
|
/*
|
|
|
- * Make sure counter is updated before we check for frozen.
|
|
|
- * freeze_super() first sets frozen and then checks the counter.
|
|
|
+ * We want lockdep to tell us about possible deadlocks with freezing
|
|
|
+ * but it's it bit tricky to properly instrument it. Getting a freeze
|
|
|
+ * protection works as getting a read lock but there are subtle
|
|
|
+ * problems. XFS for example gets freeze protection on internal level
|
|
|
+ * twice in some cases, which is OK only because we already hold a
|
|
|
+ * freeze protection also on higher level. Due to these cases we have
|
|
|
+ * to use wait == F (trylock mode) which must not fail.
|
|
|
*/
|
|
|
- smp_mb();
|
|
|
- if (unlikely(sb->s_writers.frozen >= level)) {
|
|
|
- __sb_end_write(sb, level);
|
|
|
- goto retry;
|
|
|
+ if (wait) {
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < level - 1; i++)
|
|
|
+ if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
|
|
|
+ force_trylock = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
- return 1;
|
|
|
+#endif
|
|
|
+ if (wait && !force_trylock)
|
|
|
+ percpu_down_read(sb->s_writers.rw_sem + level-1);
|
|
|
+ else
|
|
|
+ ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
|
|
|
+
|
|
|
+ WARN_ON(force_trylock & !ret);
|
|
|
+ return ret;
|
|
|
}
|
|
|
EXPORT_SYMBOL(__sb_start_write);
|
|
|
|
|
@@ -1221,37 +1208,33 @@ EXPORT_SYMBOL(__sb_start_write);
|
|
|
* @level: type of writers we wait for (normal vs page fault)
|
|
|
*
|
|
|
* This function waits until there are no writers of given type to given file
|
|
|
- * system. Caller of this function should make sure there can be no new writers
|
|
|
- * of type @level before calling this function. Otherwise this function can
|
|
|
- * livelock.
|
|
|
+ * system.
|
|
|
*/
|
|
|
static void sb_wait_write(struct super_block *sb, int level)
|
|
|
{
|
|
|
- s64 writers;
|
|
|
-
|
|
|
+ percpu_down_write(sb->s_writers.rw_sem + level-1);
|
|
|
/*
|
|
|
- * We just cycle-through lockdep here so that it does not complain
|
|
|
- * about returning with lock to userspace
|
|
|
+ * We are going to return to userspace and forget about this lock, the
|
|
|
+ * ownership goes to the caller of thaw_super() which does unlock.
|
|
|
+ *
|
|
|
+ * FIXME: we should do this before return from freeze_super() after we
|
|
|
+ * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
|
|
|
+ * should re-acquire these locks before s_op->unfreeze_fs(sb). However
|
|
|
+ * this leads to lockdep false-positives, so currently we do the early
|
|
|
+ * release right after acquire.
|
|
|
*/
|
|
|
- rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
|
|
|
- rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
|
|
|
-
|
|
|
- do {
|
|
|
- DEFINE_WAIT(wait);
|
|
|
+ percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
|
|
|
+}
|
|
|
|
|
|
- /*
|
|
|
- * We use a barrier in prepare_to_wait() to separate setting
|
|
|
- * of frozen and checking of the counter
|
|
|
- */
|
|
|
- prepare_to_wait(&sb->s_writers.wait, &wait,
|
|
|
- TASK_UNINTERRUPTIBLE);
|
|
|
+static void sb_freeze_unlock(struct super_block *sb)
|
|
|
+{
|
|
|
+ int level;
|
|
|
|
|
|
- writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
|
|
|
- if (writers)
|
|
|
- schedule();
|
|
|
+ for (level = 0; level < SB_FREEZE_LEVELS; ++level)
|
|
|
+ percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
|
|
|
|
|
|
- finish_wait(&sb->s_writers.wait, &wait);
|
|
|
- } while (writers);
|
|
|
+ for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
|
|
|
+ percpu_up_write(sb->s_writers.rw_sem + level);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1310,20 +1293,14 @@ int freeze_super(struct super_block *sb)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- /* From now on, no new normal writers can start */
|
|
|
sb->s_writers.frozen = SB_FREEZE_WRITE;
|
|
|
- smp_wmb();
|
|
|
-
|
|
|
/* Release s_umount to preserve sb_start_write -> s_umount ordering */
|
|
|
up_write(&sb->s_umount);
|
|
|
-
|
|
|
sb_wait_write(sb, SB_FREEZE_WRITE);
|
|
|
+ down_write(&sb->s_umount);
|
|
|
|
|
|
/* Now we go and block page faults... */
|
|
|
- down_write(&sb->s_umount);
|
|
|
sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
|
|
|
- smp_wmb();
|
|
|
-
|
|
|
sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
|
|
|
|
|
|
/* All writers are done so after syncing there won't be dirty data */
|
|
@@ -1331,7 +1308,6 @@ int freeze_super(struct super_block *sb)
|
|
|
|
|
|
/* Now wait for internal filesystem counter */
|
|
|
sb->s_writers.frozen = SB_FREEZE_FS;
|
|
|
- smp_wmb();
|
|
|
sb_wait_write(sb, SB_FREEZE_FS);
|
|
|
|
|
|
if (sb->s_op->freeze_fs) {
|
|
@@ -1340,7 +1316,7 @@ int freeze_super(struct super_block *sb)
|
|
|
printk(KERN_ERR
|
|
|
"VFS:Filesystem freeze failed\n");
|
|
|
sb->s_writers.frozen = SB_UNFROZEN;
|
|
|
- smp_wmb();
|
|
|
+ sb_freeze_unlock(sb);
|
|
|
wake_up(&sb->s_writers.wait_unfrozen);
|
|
|
deactivate_locked_super(sb);
|
|
|
return ret;
|
|
@@ -1372,8 +1348,10 @@ int thaw_super(struct super_block *sb)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
|
- if (sb->s_flags & MS_RDONLY)
|
|
|
+ if (sb->s_flags & MS_RDONLY) {
|
|
|
+ sb->s_writers.frozen = SB_UNFROZEN;
|
|
|
goto out;
|
|
|
+ }
|
|
|
|
|
|
if (sb->s_op->unfreeze_fs) {
|
|
|
error = sb->s_op->unfreeze_fs(sb);
|
|
@@ -1385,12 +1363,11 @@ int thaw_super(struct super_block *sb)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-out:
|
|
|
sb->s_writers.frozen = SB_UNFROZEN;
|
|
|
- smp_wmb();
|
|
|
+ sb_freeze_unlock(sb);
|
|
|
+out:
|
|
|
wake_up(&sb->s_writers.wait_unfrozen);
|
|
|
deactivate_locked_super(sb);
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
EXPORT_SYMBOL(thaw_super);
|