|
@@ -34,6 +34,7 @@
|
|
#include "extent_io.h"
|
|
#include "extent_io.h"
|
|
#include "qgroup.h"
|
|
#include "qgroup.h"
|
|
|
|
|
|
|
|
+
|
|
/* TODO XXX FIXME
|
|
/* TODO XXX FIXME
|
|
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
|
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
|
* - reorganize keys
|
|
* - reorganize keys
|
|
@@ -1387,172 +1388,6 @@ out:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
|
|
|
|
- struct btrfs_qgroup_operation *oper2)
|
|
|
|
-{
|
|
|
|
- /*
|
|
|
|
- * Ignore seq and type here, we're looking for any operation
|
|
|
|
- * at all related to this extent on that root.
|
|
|
|
- */
|
|
|
|
- if (oper1->bytenr < oper2->bytenr)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->bytenr > oper2->bytenr)
|
|
|
|
- return 1;
|
|
|
|
- if (oper1->ref_root < oper2->ref_root)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->ref_root > oper2->ref_root)
|
|
|
|
- return 1;
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct rb_node *n;
|
|
|
|
- struct btrfs_qgroup_operation *cur;
|
|
|
|
- int cmp;
|
|
|
|
-
|
|
|
|
- spin_lock(&fs_info->qgroup_op_lock);
|
|
|
|
- n = fs_info->qgroup_op_tree.rb_node;
|
|
|
|
- while (n) {
|
|
|
|
- cur = rb_entry(n, struct btrfs_qgroup_operation, n);
|
|
|
|
- cmp = comp_oper_exist(cur, oper);
|
|
|
|
- if (cmp < 0) {
|
|
|
|
- n = n->rb_right;
|
|
|
|
- } else if (cmp) {
|
|
|
|
- n = n->rb_left;
|
|
|
|
- } else {
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- return -EEXIST;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
|
|
|
|
- struct btrfs_qgroup_operation *oper2)
|
|
|
|
-{
|
|
|
|
- if (oper1->bytenr < oper2->bytenr)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->bytenr > oper2->bytenr)
|
|
|
|
- return 1;
|
|
|
|
- if (oper1->ref_root < oper2->ref_root)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->ref_root > oper2->ref_root)
|
|
|
|
- return 1;
|
|
|
|
- if (oper1->seq < oper2->seq)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->seq > oper2->seq)
|
|
|
|
- return 1;
|
|
|
|
- if (oper1->type < oper2->type)
|
|
|
|
- return -1;
|
|
|
|
- if (oper1->type > oper2->type)
|
|
|
|
- return 1;
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct rb_node **p;
|
|
|
|
- struct rb_node *parent = NULL;
|
|
|
|
- struct btrfs_qgroup_operation *cur;
|
|
|
|
- int cmp;
|
|
|
|
-
|
|
|
|
- spin_lock(&fs_info->qgroup_op_lock);
|
|
|
|
- p = &fs_info->qgroup_op_tree.rb_node;
|
|
|
|
- while (*p) {
|
|
|
|
- parent = *p;
|
|
|
|
- cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
|
|
|
|
- cmp = comp_oper(cur, oper);
|
|
|
|
- if (cmp < 0) {
|
|
|
|
- p = &(*p)->rb_right;
|
|
|
|
- } else if (cmp) {
|
|
|
|
- p = &(*p)->rb_left;
|
|
|
|
- } else {
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- return -EEXIST;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- rb_link_node(&oper->n, parent, p);
|
|
|
|
- rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Record a quota operation for processing later on.
|
|
|
|
- * @trans: the transaction we are adding the delayed op to.
|
|
|
|
- * @fs_info: the fs_info for this fs.
|
|
|
|
- * @ref_root: the root of the reference we are acting on,
|
|
|
|
- * @bytenr: the bytenr we are acting on.
|
|
|
|
- * @num_bytes: the number of bytes in the reference.
|
|
|
|
- * @type: the type of operation this is.
|
|
|
|
- * @mod_seq: do we need to get a sequence number for looking up roots.
|
|
|
|
- *
|
|
|
|
- * We just add it to our trans qgroup_ref_list and carry on and process these
|
|
|
|
- * operations in order at some later point. If the reference root isn't a fs
|
|
|
|
- * root then we don't bother with doing anything.
|
|
|
|
- *
|
|
|
|
- * MUST BE HOLDING THE REF LOCK.
|
|
|
|
- */
|
|
|
|
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info, u64 ref_root,
|
|
|
|
- u64 bytenr, u64 num_bytes,
|
|
|
|
- enum btrfs_qgroup_operation_type type, int mod_seq)
|
|
|
|
-{
|
|
|
|
- struct btrfs_qgroup_operation *oper;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
|
|
|
|
- return 0;
|
|
|
|
-
|
|
|
|
- oper = kmalloc(sizeof(*oper), GFP_NOFS);
|
|
|
|
- if (!oper)
|
|
|
|
- return -ENOMEM;
|
|
|
|
-
|
|
|
|
- oper->ref_root = ref_root;
|
|
|
|
- oper->bytenr = bytenr;
|
|
|
|
- oper->num_bytes = num_bytes;
|
|
|
|
- oper->type = type;
|
|
|
|
- oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
|
|
|
|
- INIT_LIST_HEAD(&oper->elem.list);
|
|
|
|
- oper->elem.seq = 0;
|
|
|
|
-
|
|
|
|
- trace_btrfs_qgroup_record_ref(oper);
|
|
|
|
-
|
|
|
|
- if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
|
|
|
|
- /*
|
|
|
|
- * If any operation for this bytenr/ref_root combo
|
|
|
|
- * exists, then we know it's not exclusively owned and
|
|
|
|
- * shouldn't be queued up.
|
|
|
|
- *
|
|
|
|
- * This also catches the case where we have a cloned
|
|
|
|
- * extent that gets queued up multiple times during
|
|
|
|
- * drop snapshot.
|
|
|
|
- */
|
|
|
|
- if (qgroup_oper_exists(fs_info, oper)) {
|
|
|
|
- kfree(oper);
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- ret = insert_qgroup_oper(fs_info, oper);
|
|
|
|
- if (ret) {
|
|
|
|
- /* Shouldn't happen so have an assert for developers */
|
|
|
|
- ASSERT(0);
|
|
|
|
- kfree(oper);
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
- list_add_tail(&oper->list, &trans->qgroup_ref_list);
|
|
|
|
-
|
|
|
|
- if (mod_seq)
|
|
|
|
- btrfs_get_tree_mod_seq(fs_info, &oper->elem);
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
|
|
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info)
|
|
struct btrfs_fs_info *fs_info)
|
|
{
|
|
{
|
|
@@ -1606,264 +1441,6 @@ struct btrfs_qgroup_extent_record
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * The easy accounting, if we are adding/removing the only ref for an extent
|
|
|
|
- * then this qgroup and all of the parent qgroups get their refrence and
|
|
|
|
- * exclusive counts adjusted.
|
|
|
|
- */
|
|
|
|
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct ulist *tmp;
|
|
|
|
- int sign = 0;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- tmp = ulist_alloc(GFP_NOFS);
|
|
|
|
- if (!tmp)
|
|
|
|
- return -ENOMEM;
|
|
|
|
-
|
|
|
|
- spin_lock(&fs_info->qgroup_lock);
|
|
|
|
- if (!fs_info->quota_root)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- switch (oper->type) {
|
|
|
|
- case BTRFS_QGROUP_OPER_ADD_EXCL:
|
|
|
|
- sign = 1;
|
|
|
|
- break;
|
|
|
|
- case BTRFS_QGROUP_OPER_SUB_EXCL:
|
|
|
|
- sign = -1;
|
|
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- ASSERT(0);
|
|
|
|
- }
|
|
|
|
- ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
|
|
|
|
- oper->num_bytes, sign);
|
|
|
|
-out:
|
|
|
|
- spin_unlock(&fs_info->qgroup_lock);
|
|
|
|
- ulist_free(tmp);
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
|
|
|
|
- * properly.
|
|
|
|
- */
|
|
|
|
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
|
|
|
|
- u64 root_to_skip, struct ulist *tmp,
|
|
|
|
- struct ulist *roots, struct ulist *qgroups,
|
|
|
|
- u64 seq, int *old_roots, int rescan)
|
|
|
|
-{
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- struct ulist_node *tmp_unode;
|
|
|
|
- struct ulist_iterator tmp_uiter;
|
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(roots, &uiter))) {
|
|
|
|
- /* We don't count our current root here */
|
|
|
|
- if (unode->val == root_to_skip)
|
|
|
|
- continue;
|
|
|
|
- qg = find_qgroup_rb(fs_info, unode->val);
|
|
|
|
- if (!qg)
|
|
|
|
- continue;
|
|
|
|
- /*
|
|
|
|
- * We could have a pending removal of this same ref so we may
|
|
|
|
- * not have actually found our ref root when doing
|
|
|
|
- * btrfs_find_all_roots, so we need to keep track of how many
|
|
|
|
- * old roots we find in case we removed ours and added a
|
|
|
|
- * different one at the same time. I don't think this could
|
|
|
|
- * happen in practice but that sort of thinking leads to pain
|
|
|
|
- * and suffering and to the dark side.
|
|
|
|
- */
|
|
|
|
- (*old_roots)++;
|
|
|
|
-
|
|
|
|
- ulist_reinit(tmp);
|
|
|
|
- ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ULIST_ITER_INIT(&tmp_uiter);
|
|
|
|
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
|
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
|
- int mod;
|
|
|
|
-
|
|
|
|
- qg = u64_to_ptr(tmp_unode->aux);
|
|
|
|
- /*
|
|
|
|
- * We use this sequence number to keep from having to
|
|
|
|
- * run the whole list and 0 out the refcnt every time.
|
|
|
|
- * We basically use sequnce as the known 0 count and
|
|
|
|
- * then add 1 everytime we see a qgroup. This is how we
|
|
|
|
- * get how many of the roots actually point up to the
|
|
|
|
- * upper level qgroups in order to determine exclusive
|
|
|
|
- * counts.
|
|
|
|
- *
|
|
|
|
- * For rescan none of the extent is recorded before so
|
|
|
|
- * we just don't add old_refcnt.
|
|
|
|
- */
|
|
|
|
- if (rescan)
|
|
|
|
- mod = 0;
|
|
|
|
- else
|
|
|
|
- mod = 1;
|
|
|
|
- btrfs_qgroup_update_old_refcnt(qg, seq, mod);
|
|
|
|
- btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
|
- ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * We need to walk forward in our operation tree and account for any roots that
|
|
|
|
- * were deleted after we made this operation.
|
|
|
|
- */
|
|
|
|
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper,
|
|
|
|
- struct ulist *tmp,
|
|
|
|
- struct ulist *qgroups, u64 seq,
|
|
|
|
- int *old_roots)
|
|
|
|
-{
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
|
- struct btrfs_qgroup_operation *tmp_oper;
|
|
|
|
- struct rb_node *n;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- ulist_reinit(tmp);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * We only walk forward in the tree since we're only interested in
|
|
|
|
- * removals that happened _after_ our operation.
|
|
|
|
- */
|
|
|
|
- spin_lock(&fs_info->qgroup_op_lock);
|
|
|
|
- n = rb_next(&oper->n);
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- if (!n)
|
|
|
|
- return 0;
|
|
|
|
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
|
|
|
- while (tmp_oper->bytenr == oper->bytenr) {
|
|
|
|
- /*
|
|
|
|
- * If it's not a removal we don't care, additions work out
|
|
|
|
- * properly with our refcnt tracking.
|
|
|
|
- */
|
|
|
|
- if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
|
|
|
|
- tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
|
|
|
|
- goto next;
|
|
|
|
- qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
|
|
|
|
- if (!qg)
|
|
|
|
- goto next;
|
|
|
|
- ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret) {
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- /*
|
|
|
|
- * We only want to increase old_roots if this qgroup is
|
|
|
|
- * not already in the list of qgroups. If it is already
|
|
|
|
- * there then that means it must have been re-added or
|
|
|
|
- * the delete will be discarded because we had an
|
|
|
|
- * existing ref that we haven't looked up yet. In this
|
|
|
|
- * case we don't want to increase old_roots. So if ret
|
|
|
|
- * == 1 then we know that this is the first time we've
|
|
|
|
- * seen this qgroup and we can bump the old_roots.
|
|
|
|
- */
|
|
|
|
- (*old_roots)++;
|
|
|
|
- ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
-next:
|
|
|
|
- spin_lock(&fs_info->qgroup_op_lock);
|
|
|
|
- n = rb_next(&tmp_oper->n);
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- if (!n)
|
|
|
|
- break;
|
|
|
|
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* Ok now process the qgroups we found */
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
|
-
|
|
|
|
- qg = u64_to_ptr(unode->aux);
|
|
|
|
- btrfs_qgroup_update_old_refcnt(qg, seq, 1);
|
|
|
|
- btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
|
- ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/* Add refcnt for the newly added reference. */
|
|
|
|
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper,
|
|
|
|
- struct btrfs_qgroup *qgroup,
|
|
|
|
- struct ulist *tmp, struct ulist *qgroups,
|
|
|
|
- u64 seq)
|
|
|
|
-{
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
|
- int ret;
|
|
|
|
-
|
|
|
|
- ulist_reinit(tmp);
|
|
|
|
- ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
|
|
|
|
- GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
|
-
|
|
|
|
- qg = u64_to_ptr(unode->aux);
|
|
|
|
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
|
|
|
|
- btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
|
|
|
- else
|
|
|
|
- btrfs_qgroup_update_old_refcnt(qg, seq, 1);
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
|
- ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
#define UPDATE_NEW 0
|
|
#define UPDATE_NEW 0
|
|
#define UPDATE_OLD 1
|
|
#define UPDATE_OLD 1
|
|
/*
|
|
/*
|
|
@@ -1925,6 +1502,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
|
|
/*
|
|
/*
|
|
* Update qgroup rfer/excl counters.
|
|
* Update qgroup rfer/excl counters.
|
|
* Rfer update is easy, codes can explain themselves.
|
|
* Rfer update is easy, codes can explain themselves.
|
|
|
|
+ *
|
|
* Excl update is tricky, the update is split into 2 part.
|
|
* Excl update is tricky, the update is split into 2 part.
|
|
* Part 1: Possible exclusive <-> sharing detect:
|
|
* Part 1: Possible exclusive <-> sharing detect:
|
|
* | A | !A |
|
|
* | A | !A |
|
|
@@ -2042,419 +1620,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * This adjusts the counters for all referenced qgroups if need be.
|
|
|
|
- */
|
|
|
|
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
|
|
|
|
- u64 root_to_skip, u64 num_bytes,
|
|
|
|
- struct ulist *qgroups, u64 seq,
|
|
|
|
- int old_roots, int new_roots, int rescan)
|
|
|
|
-{
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
|
- u64 cur_new_count, cur_old_count;
|
|
|
|
-
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(qgroups, &uiter))) {
|
|
|
|
- bool dirty = false;
|
|
|
|
-
|
|
|
|
- qg = u64_to_ptr(unode->aux);
|
|
|
|
- cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
|
|
|
|
- cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Wasn't referenced before but is now, add to the reference
|
|
|
|
- * counters.
|
|
|
|
- */
|
|
|
|
- if (cur_old_count == 0 && cur_new_count > 0) {
|
|
|
|
- qg->rfer += num_bytes;
|
|
|
|
- qg->rfer_cmpr += num_bytes;
|
|
|
|
- dirty = true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Was referenced before but isn't now, subtract from the
|
|
|
|
- * reference counters.
|
|
|
|
- */
|
|
|
|
- if (cur_old_count > 0 && cur_new_count == 0) {
|
|
|
|
- qg->rfer -= num_bytes;
|
|
|
|
- qg->rfer_cmpr -= num_bytes;
|
|
|
|
- dirty = true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * If our refcount was the same as the roots previously but our
|
|
|
|
- * new count isn't the same as the number of roots now then we
|
|
|
|
- * went from having a exclusive reference on this range to not.
|
|
|
|
- */
|
|
|
|
- if (old_roots && cur_old_count == old_roots &&
|
|
|
|
- (cur_new_count != new_roots || new_roots == 0)) {
|
|
|
|
- WARN_ON(cur_new_count != new_roots && new_roots == 0);
|
|
|
|
- qg->excl -= num_bytes;
|
|
|
|
- qg->excl_cmpr -= num_bytes;
|
|
|
|
- dirty = true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * If we didn't reference all the roots before but now we do we
|
|
|
|
- * have an exclusive reference to this range.
|
|
|
|
- */
|
|
|
|
- if ((!old_roots || (old_roots && cur_old_count != old_roots))
|
|
|
|
- && cur_new_count == new_roots) {
|
|
|
|
- qg->excl += num_bytes;
|
|
|
|
- qg->excl_cmpr += num_bytes;
|
|
|
|
- dirty = true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (dirty)
|
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
|
- }
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * If we removed a data extent and there were other references for that bytenr
|
|
|
|
- * then we need to lookup all referenced roots to make sure we still don't
|
|
|
|
- * reference this bytenr. If we do then we can just discard this operation.
|
|
|
|
- */
|
|
|
|
-static int check_existing_refs(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct ulist *roots = NULL;
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
|
|
|
|
- oper->elem.seq, &roots);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- ret = 0;
|
|
|
|
-
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(roots, &uiter))) {
|
|
|
|
- if (unode->val == oper->ref_root) {
|
|
|
|
- ret = 1;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- ulist_free(roots);
|
|
|
|
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
|
|
|
-
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * If we share a reference across multiple roots then we may need to adjust
|
|
|
|
- * various qgroups referenced and exclusive counters. The basic premise is this
|
|
|
|
- *
|
|
|
|
- * 1) We have seq to represent a 0 count. Instead of looping through all of the
|
|
|
|
- * qgroups and resetting their refcount to 0 we just constantly bump this
|
|
|
|
- * sequence number to act as the base reference count. This means that if
|
|
|
|
- * anybody is equal to or below this sequence they were never referenced. We
|
|
|
|
- * jack this sequence up by the number of roots we found each time in order to
|
|
|
|
- * make sure we don't have any overlap.
|
|
|
|
- *
|
|
|
|
- * 2) We first search all the roots that reference the area _except_ the root
|
|
|
|
- * we're acting on currently. This makes up the old_refcnt of all the qgroups
|
|
|
|
- * before.
|
|
|
|
- *
|
|
|
|
- * 3) We walk all of the qgroups referenced by the root we are currently acting
|
|
|
|
- * on, and will either adjust old_refcnt in the case of a removal or the
|
|
|
|
- * new_refcnt in the case of an addition.
|
|
|
|
- *
|
|
|
|
- * 4) Finally we walk all the qgroups that are referenced by this range
|
|
|
|
- * including the root we are acting on currently. We will adjust the counters
|
|
|
|
- * based on the number of roots we had and will have after this operation.
|
|
|
|
- *
|
|
|
|
- * Take this example as an illustration
|
|
|
|
- *
|
|
|
|
- * [qgroup 1/0]
|
|
|
|
- * / | \
|
|
|
|
- * [qg 0/0] [qg 0/1] [qg 0/2]
|
|
|
|
- * \ | /
|
|
|
|
- * [ extent ]
|
|
|
|
- *
|
|
|
|
- * Say we are adding a reference that is covered by qg 0/0. The first step
|
|
|
|
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
|
|
|
|
- * old_roots being 2. Because it is adding new_roots will be 1. We then go
|
|
|
|
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
|
|
|
|
- * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
|
|
|
|
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
|
|
|
|
- * reference and thus must add the size to the referenced bytes. Everything
|
|
|
|
- * else is the same so nothing else changes.
|
|
|
|
- */
|
|
|
|
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct ulist *roots = NULL;
|
|
|
|
- struct ulist *qgroups, *tmp;
|
|
|
|
- struct btrfs_qgroup *qgroup;
|
|
|
|
- struct seq_list elem = SEQ_LIST_INIT(elem);
|
|
|
|
- u64 seq;
|
|
|
|
- int old_roots = 0;
|
|
|
|
- int new_roots = 0;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- if (oper->elem.seq) {
|
|
|
|
- ret = check_existing_refs(trans, fs_info, oper);
|
|
|
|
- if (ret < 0)
|
|
|
|
- return ret;
|
|
|
|
- if (ret)
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- qgroups = ulist_alloc(GFP_NOFS);
|
|
|
|
- if (!qgroups)
|
|
|
|
- return -ENOMEM;
|
|
|
|
-
|
|
|
|
- tmp = ulist_alloc(GFP_NOFS);
|
|
|
|
- if (!tmp) {
|
|
|
|
- ulist_free(qgroups);
|
|
|
|
- return -ENOMEM;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- btrfs_get_tree_mod_seq(fs_info, &elem);
|
|
|
|
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
|
|
|
|
- &roots);
|
|
|
|
- btrfs_put_tree_mod_seq(fs_info, &elem);
|
|
|
|
- if (ret < 0) {
|
|
|
|
- ulist_free(qgroups);
|
|
|
|
- ulist_free(tmp);
|
|
|
|
- return ret;
|
|
|
|
- }
|
|
|
|
- spin_lock(&fs_info->qgroup_lock);
|
|
|
|
- qgroup = find_qgroup_rb(fs_info, oper->ref_root);
|
|
|
|
- if (!qgroup)
|
|
|
|
- goto out;
|
|
|
|
- seq = fs_info->qgroup_seq;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * So roots is the list of all the roots currently pointing at the
|
|
|
|
- * bytenr, including the ref we are adding if we are adding, or not if
|
|
|
|
- * we are removing a ref. So we pass in the ref_root to skip that root
|
|
|
|
- * in our calculations. We set old_refnct and new_refcnt cause who the
|
|
|
|
- * hell knows what everything looked like before, and it doesn't matter
|
|
|
|
- * except...
|
|
|
|
- */
|
|
|
|
- ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
|
|
|
|
- seq, &old_roots, 0);
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Now adjust the refcounts of the qgroups that care about this
|
|
|
|
- * reference, either the old_count in the case of removal or new_count
|
|
|
|
- * in the case of an addition.
|
|
|
|
- */
|
|
|
|
- ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
|
|
|
|
- seq);
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * ...in the case of removals. If we had a removal before we got around
|
|
|
|
- * to processing this operation then we need to find that guy and count
|
|
|
|
- * his references as if they really existed so we don't end up screwing
|
|
|
|
- * up the exclusive counts. Then whenever we go to process the delete
|
|
|
|
- * everything will be grand and we can account for whatever exclusive
|
|
|
|
- * changes need to be made there. We also have to pass in old_roots so
|
|
|
|
- * we have an accurate count of the roots as it pertains to this
|
|
|
|
- * operations view of the world.
|
|
|
|
- */
|
|
|
|
- ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
|
|
|
|
- &old_roots);
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * We are adding our root, need to adjust up the number of roots,
|
|
|
|
- * otherwise old_roots is the number of roots we want.
|
|
|
|
- */
|
|
|
|
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
|
|
|
|
- new_roots = old_roots + 1;
|
|
|
|
- } else {
|
|
|
|
- new_roots = old_roots;
|
|
|
|
- old_roots++;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Bump qgroup_seq to avoid seq overlap
|
|
|
|
- * XXX: This makes qgroup_seq mismatch with oper->seq.
|
|
|
|
- */
|
|
|
|
- fs_info->qgroup_seq += old_roots + 1;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * And now the magic happens, bless Arne for having a pretty elegant
|
|
|
|
- * solution for this.
|
|
|
|
- */
|
|
|
|
- qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
|
|
|
|
- qgroups, seq, old_roots, new_roots, 0);
|
|
|
|
-out:
|
|
|
|
- spin_unlock(&fs_info->qgroup_lock);
|
|
|
|
- ulist_free(qgroups);
|
|
|
|
- ulist_free(roots);
|
|
|
|
- ulist_free(tmp);
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * Process a reference to a shared subtree. This type of operation is
|
|
|
|
- * queued during snapshot removal when we encounter extents which are
|
|
|
|
- * shared between more than one root.
|
|
|
|
- */
|
|
|
|
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- struct ulist *roots = NULL;
|
|
|
|
- struct ulist_node *unode;
|
|
|
|
- struct ulist_iterator uiter;
|
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
|
- struct ulist *parents;
|
|
|
|
- int ret = 0;
|
|
|
|
- int err;
|
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
|
- u64 root_obj = 0;
|
|
|
|
- struct seq_list elem = SEQ_LIST_INIT(elem);
|
|
|
|
-
|
|
|
|
- parents = ulist_alloc(GFP_NOFS);
|
|
|
|
- if (!parents)
|
|
|
|
- return -ENOMEM;
|
|
|
|
-
|
|
|
|
- btrfs_get_tree_mod_seq(fs_info, &elem);
|
|
|
|
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
|
|
|
|
- elem.seq, &roots);
|
|
|
|
- btrfs_put_tree_mod_seq(fs_info, &elem);
|
|
|
|
- if (ret < 0)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- if (roots->nnodes != 1)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
|
|
|
|
- /*
|
|
|
|
- * If we find our ref root then that means all refs
|
|
|
|
- * this extent has to the root have not yet been
|
|
|
|
- * deleted. In that case, we do nothing and let the
|
|
|
|
- * last ref for this bytenr drive our update.
|
|
|
|
- *
|
|
|
|
- * This can happen for example if an extent is
|
|
|
|
- * referenced multiple times in a snapshot (clone,
|
|
|
|
- * etc). If we are in the middle of snapshot removal,
|
|
|
|
- * queued updates for such an extent will find the
|
|
|
|
- * root if we have not yet finished removing the
|
|
|
|
- * snapshot.
|
|
|
|
- */
|
|
|
|
- if (unode->val == oper->ref_root)
|
|
|
|
- goto out;
|
|
|
|
-
|
|
|
|
- root_obj = unode->val;
|
|
|
|
- BUG_ON(!root_obj);
|
|
|
|
-
|
|
|
|
- spin_lock(&fs_info->qgroup_lock);
|
|
|
|
- qg = find_qgroup_rb(fs_info, root_obj);
|
|
|
|
- if (!qg)
|
|
|
|
- goto out_unlock;
|
|
|
|
-
|
|
|
|
- qg->excl += oper->num_bytes;
|
|
|
|
- qg->excl_cmpr += oper->num_bytes;
|
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Adjust counts for parent groups. First we find all
|
|
|
|
- * parents, then in the 2nd loop we do the adjustment
|
|
|
|
- * while adding parents of the parents to our ulist.
|
|
|
|
- */
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
|
- err = ulist_add(parents, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (err < 0) {
|
|
|
|
- ret = err;
|
|
|
|
- goto out_unlock;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
|
- while ((unode = ulist_next(parents, &uiter))) {
|
|
|
|
- qg = u64_to_ptr(unode->aux);
|
|
|
|
- qg->excl += oper->num_bytes;
|
|
|
|
- qg->excl_cmpr += oper->num_bytes;
|
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
|
-
|
|
|
|
- /* Add any parents of the parents */
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
|
- err = ulist_add(parents, glist->group->qgroupid,
|
|
|
|
- ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
|
- if (err < 0) {
|
|
|
|
- ret = err;
|
|
|
|
- goto out_unlock;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-out_unlock:
|
|
|
|
- spin_unlock(&fs_info->qgroup_lock);
|
|
|
|
-
|
|
|
|
-out:
|
|
|
|
- ulist_free(roots);
|
|
|
|
- ulist_free(parents);
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
|
|
|
|
- * from the fs. First, all roots referencing the extent are searched, and
|
|
|
|
- * then the space is accounted accordingly to the different roots. The
|
|
|
|
- * accounting algorithm works in 3 steps documented inline.
|
|
|
|
- */
|
|
|
|
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
|
- struct btrfs_qgroup_operation *oper)
|
|
|
|
-{
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- if (!fs_info->quota_enabled)
|
|
|
|
- return 0;
|
|
|
|
-
|
|
|
|
- BUG_ON(!fs_info->quota_root);
|
|
|
|
-
|
|
|
|
- mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
|
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
|
- if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
|
|
|
|
- mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
|
-
|
|
|
|
- ASSERT(is_fstree(oper->ref_root));
|
|
|
|
-
|
|
|
|
- trace_btrfs_qgroup_account(oper);
|
|
|
|
-
|
|
|
|
- switch (oper->type) {
|
|
|
|
- case BTRFS_QGROUP_OPER_ADD_EXCL:
|
|
|
|
- case BTRFS_QGROUP_OPER_SUB_EXCL:
|
|
|
|
- ret = qgroup_excl_accounting(fs_info, oper);
|
|
|
|
- break;
|
|
|
|
- case BTRFS_QGROUP_OPER_ADD_SHARED:
|
|
|
|
- case BTRFS_QGROUP_OPER_SUB_SHARED:
|
|
|
|
- ret = qgroup_shared_accounting(trans, fs_info, oper);
|
|
|
|
- break;
|
|
|
|
- case BTRFS_QGROUP_OPER_SUB_SUBTREE:
|
|
|
|
- ret = qgroup_subtree_accounting(trans, fs_info, oper);
|
|
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- ASSERT(0);
|
|
|
|
- }
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
int
|
|
int
|
|
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
|
|
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info,
|
|
struct btrfs_fs_info *fs_info,
|
|
@@ -2571,31 +1736,6 @@ cleanup:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Needs to be called everytime we run delayed refs, even if there is an error
|
|
|
|
- * in order to cleanup outstanding operations.
|
|
|
|
- */
|
|
|
|
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
|
|
|
|
- struct btrfs_fs_info *fs_info)
|
|
|
|
-{
|
|
|
|
- struct btrfs_qgroup_operation *oper;
|
|
|
|
- int ret = 0;
|
|
|
|
-
|
|
|
|
- while (!list_empty(&trans->qgroup_ref_list)) {
|
|
|
|
- oper = list_first_entry(&trans->qgroup_ref_list,
|
|
|
|
- struct btrfs_qgroup_operation, list);
|
|
|
|
- list_del_init(&oper->list);
|
|
|
|
- if (!ret || !trans->aborted)
|
|
|
|
- ret = btrfs_qgroup_account(trans, fs_info, oper);
|
|
|
|
- spin_lock(&fs_info->qgroup_op_lock);
|
|
|
|
- rb_erase(&oper->n, &fs_info->qgroup_op_tree);
|
|
|
|
- spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
|
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
|
|
|
- kfree(oper);
|
|
|
|
- }
|
|
|
|
- return ret;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* called from commit_transaction. Writes all changed qgroups to disk.
|
|
* called from commit_transaction. Writes all changed qgroups to disk.
|
|
*/
|
|
*/
|