|
@@ -32,6 +32,7 @@
|
|
|
#include "ulist.h"
|
|
|
#include "backref.h"
|
|
|
#include "extent_io.h"
|
|
|
+#include "qgroup.h"
|
|
|
|
|
|
/* TODO XXX FIXME
|
|
|
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
|
@@ -84,8 +85,8 @@ struct btrfs_qgroup {
|
|
|
/*
|
|
|
* temp variables for accounting operations
|
|
|
*/
|
|
|
- u64 tag;
|
|
|
- u64 refcnt;
|
|
|
+ u64 old_refcnt;
|
|
|
+ u64 new_refcnt;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -98,6 +99,9 @@ struct btrfs_qgroup_list {
|
|
|
struct btrfs_qgroup *member;
|
|
|
};
|
|
|
|
|
|
+#define ptr_to_u64(x) ((u64)(uintptr_t)x)
|
|
|
+#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
|
|
|
+
|
|
|
static int
|
|
|
qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
|
|
int init_flags);
|
|
@@ -1174,33 +1178,198 @@ out:
|
|
|
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
|
|
return ret;
|
|
|
}
|
|
|
+static int comp_oper(struct btrfs_qgroup_operation *oper1,
|
|
|
+ struct btrfs_qgroup_operation *oper2)
|
|
|
+{
|
|
|
+ if (oper1->bytenr < oper2->bytenr)
|
|
|
+ return -1;
|
|
|
+ if (oper1->bytenr > oper2->bytenr)
|
|
|
+ return 1;
|
|
|
+ if (oper1->seq < oper2->seq)
|
|
|
+ return -1;
|
|
|
+ if (oper1->seq > oper2->seq)
|
|
|
+ return -1;
|
|
|
+ if (oper1->ref_root < oper2->ref_root)
|
|
|
+ return -1;
|
|
|
+ if (oper1->ref_root > oper2->ref_root)
|
|
|
+ return 1;
|
|
|
+ if (oper1->type < oper2->type)
|
|
|
+ return -1;
|
|
|
+ if (oper1->type > oper2->type)
|
|
|
+ return 1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper)
|
|
|
+{
|
|
|
+ struct rb_node **p;
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
+ struct btrfs_qgroup_operation *cur;
|
|
|
+ int cmp;
|
|
|
+
|
|
|
+ spin_lock(&fs_info->qgroup_op_lock);
|
|
|
+ p = &fs_info->qgroup_op_tree.rb_node;
|
|
|
+ while (*p) {
|
|
|
+ parent = *p;
|
|
|
+ cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
|
|
|
+ cmp = comp_oper(cur, oper);
|
|
|
+ if (cmp < 0) {
|
|
|
+ p = &(*p)->rb_right;
|
|
|
+ } else if (cmp) {
|
|
|
+ p = &(*p)->rb_left;
|
|
|
+ } else {
|
|
|
+ spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
+ return -EEXIST;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rb_link_node(&oper->n, parent, p);
|
|
|
+ rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
|
|
|
+ spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
/*
|
|
|
- * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
|
|
|
- * the modification into a list that's later used by btrfs_end_transaction to
|
|
|
- * pass the recorded modifications on to btrfs_qgroup_account_ref.
|
|
|
+ * Record a quota operation for processing later on.
|
|
|
+ * @trans: the transaction we are adding the delayed op to.
|
|
|
+ * @fs_info: the fs_info for this fs.
|
|
|
+ * @ref_root: the root of the reference we are acting on,
|
|
|
+ * @bytenr: the bytenr we are acting on.
|
|
|
+ * @num_bytes: the number of bytes in the reference.
|
|
|
+ * @type: the type of operation this is.
|
|
|
+ * @mod_seq: do we need to get a sequence number for looking up roots.
|
|
|
+ *
|
|
|
+ * We just add it to our trans qgroup_ref_list and carry on and process these
|
|
|
+ * operations in order at some later point. If the reference root isn't a fs
|
|
|
+ * root then we don't bother with doing anything.
|
|
|
+ *
|
|
|
+ * MUST BE HOLDING THE REF LOCK.
|
|
|
*/
|
|
|
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_delayed_ref_node *node,
|
|
|
- struct btrfs_delayed_extent_op *extent_op)
|
|
|
+ struct btrfs_fs_info *fs_info, u64 ref_root,
|
|
|
+ u64 bytenr, u64 num_bytes,
|
|
|
+ enum btrfs_qgroup_operation_type type, int mod_seq)
|
|
|
{
|
|
|
- struct qgroup_update *u;
|
|
|
+ struct btrfs_qgroup_operation *oper;
|
|
|
+ int ret;
|
|
|
|
|
|
- BUG_ON(!trans->delayed_ref_elem.seq);
|
|
|
- u = kmalloc(sizeof(*u), GFP_NOFS);
|
|
|
- if (!u)
|
|
|
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ oper = kmalloc(sizeof(*oper), GFP_NOFS);
|
|
|
+ if (!oper)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- u->node = node;
|
|
|
- u->extent_op = extent_op;
|
|
|
- list_add_tail(&u->list, &trans->qgroup_ref_list);
|
|
|
+ oper->ref_root = ref_root;
|
|
|
+ oper->bytenr = bytenr;
|
|
|
+ oper->num_bytes = num_bytes;
|
|
|
+ oper->type = type;
|
|
|
+ oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
|
|
|
+ INIT_LIST_HEAD(&oper->elem.list);
|
|
|
+ oper->elem.seq = 0;
|
|
|
+ ret = insert_qgroup_oper(fs_info, oper);
|
|
|
+ if (ret) {
|
|
|
+ /* Shouldn't happen so have an assert for developers */
|
|
|
+ ASSERT(0);
|
|
|
+ kfree(oper);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ list_add_tail(&oper->list, &trans->qgroup_ref_list);
|
|
|
+
|
|
|
+ if (mod_seq)
|
|
|
+ btrfs_get_tree_mod_seq(fs_info, &oper->elem);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
|
|
|
- struct ulist *roots, struct ulist *tmp,
|
|
|
- u64 seq)
|
|
|
+/*
|
|
|
+ * The easy accounting, if we are adding/removing the only ref for an extent
|
|
|
+ * then this qgroup and all of the parent qgroups get their refrence and
|
|
|
+ * exclusive counts adjusted.
|
|
|
+ */
|
|
|
+static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper)
|
|
|
+{
|
|
|
+ struct btrfs_qgroup *qgroup;
|
|
|
+ struct ulist *tmp;
|
|
|
+ struct btrfs_qgroup_list *glist;
|
|
|
+ struct ulist_node *unode;
|
|
|
+ struct ulist_iterator uiter;
|
|
|
+ int sign = 0;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ tmp = ulist_alloc(GFP_NOFS);
|
|
|
+ if (!tmp)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ spin_lock(&fs_info->qgroup_lock);
|
|
|
+ if (!fs_info->quota_root)
|
|
|
+ goto out;
|
|
|
+ qgroup = find_qgroup_rb(fs_info, oper->ref_root);
|
|
|
+ if (!qgroup)
|
|
|
+ goto out;
|
|
|
+ switch (oper->type) {
|
|
|
+ case BTRFS_QGROUP_OPER_ADD_EXCL:
|
|
|
+ sign = 1;
|
|
|
+ break;
|
|
|
+ case BTRFS_QGROUP_OPER_SUB_EXCL:
|
|
|
+ sign = -1;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ASSERT(0);
|
|
|
+ }
|
|
|
+ qgroup->rfer += sign * oper->num_bytes;
|
|
|
+ qgroup->rfer_cmpr += sign * oper->num_bytes;
|
|
|
+
|
|
|
+ WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
|
|
|
+ qgroup->excl += sign * oper->num_bytes;
|
|
|
+ qgroup->excl_cmpr += sign * oper->num_bytes;
|
|
|
+
|
|
|
+ qgroup_dirty(fs_info, qgroup);
|
|
|
+
|
|
|
+ /* Get all of the parent groups that contain this qgroup */
|
|
|
+ list_for_each_entry(glist, &qgroup->groups, next_group) {
|
|
|
+ ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Iterate all of the parents and adjust their reference counts */
|
|
|
+ ULIST_ITER_INIT(&uiter);
|
|
|
+ while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
+ qgroup = u64_to_ptr(unode->aux);
|
|
|
+ qgroup->rfer += sign * oper->num_bytes;
|
|
|
+ qgroup->rfer_cmpr += sign * oper->num_bytes;
|
|
|
+ qgroup->excl += sign * oper->num_bytes;
|
|
|
+ if (sign < 0)
|
|
|
+ WARN_ON(qgroup->excl < oper->num_bytes);
|
|
|
+ qgroup->excl_cmpr += sign * oper->num_bytes;
|
|
|
+ qgroup_dirty(fs_info, qgroup);
|
|
|
+
|
|
|
+ /* Add any parents of the parents */
|
|
|
+ list_for_each_entry(glist, &qgroup->groups, next_group) {
|
|
|
+ ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ret = 0;
|
|
|
+out:
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(tmp);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
|
|
|
+ * properly.
|
|
|
+ */
|
|
|
+static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
|
|
|
+ u64 root_to_skip, struct ulist *tmp,
|
|
|
+ struct ulist *roots, struct ulist *qgroups,
|
|
|
+ u64 seq, int *old_roots, int rescan)
|
|
|
{
|
|
|
struct ulist_node *unode;
|
|
|
struct ulist_iterator uiter;
|
|
@@ -1211,256 +1380,549 @@ static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
|
|
|
|
|
|
ULIST_ITER_INIT(&uiter);
|
|
|
while ((unode = ulist_next(roots, &uiter))) {
|
|
|
+ /* We don't count our current root here */
|
|
|
+ if (unode->val == root_to_skip)
|
|
|
+ continue;
|
|
|
qg = find_qgroup_rb(fs_info, unode->val);
|
|
|
if (!qg)
|
|
|
continue;
|
|
|
+ /*
|
|
|
+ * We could have a pending removal of this same ref so we may
|
|
|
+ * not have actually found our ref root when doing
|
|
|
+ * btrfs_find_all_roots, so we need to keep track of how many
|
|
|
+ * old roots we find in case we removed ours and added a
|
|
|
+ * different one at the same time. I don't think this could
|
|
|
+ * happen in practice but that sort of thinking leads to pain
|
|
|
+ * and suffering and to the dark side.
|
|
|
+ */
|
|
|
+ (*old_roots)++;
|
|
|
|
|
|
ulist_reinit(tmp);
|
|
|
- /* XXX id not needed */
|
|
|
- ret = ulist_add(tmp, qg->qgroupid,
|
|
|
- (u64)(uintptr_t)qg, GFP_ATOMIC);
|
|
|
+ ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
|
|
|
if (ret < 0)
|
|
|
return ret;
|
|
|
ULIST_ITER_INIT(&tmp_uiter);
|
|
|
while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
|
|
|
struct btrfs_qgroup_list *glist;
|
|
|
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
|
|
|
- if (qg->refcnt < seq)
|
|
|
- qg->refcnt = seq + 1;
|
|
|
+ qg = u64_to_ptr(tmp_unode->aux);
|
|
|
+ /*
|
|
|
+ * We use this sequence number to keep from having to
|
|
|
+ * run the whole list and 0 out the refcnt every time.
|
|
|
+ * We basically use sequnce as the known 0 count and
|
|
|
+ * then add 1 everytime we see a qgroup. This is how we
|
|
|
+ * get how many of the roots actually point up to the
|
|
|
+ * upper level qgroups in order to determine exclusive
|
|
|
+ * counts.
|
|
|
+ *
|
|
|
+ * For rescan we want to set old_refcnt to seq so our
|
|
|
+ * exclusive calculations end up correct.
|
|
|
+ */
|
|
|
+ if (rescan)
|
|
|
+ qg->old_refcnt = seq;
|
|
|
+ else if (qg->old_refcnt < seq)
|
|
|
+ qg->old_refcnt = seq + 1;
|
|
|
else
|
|
|
- ++qg->refcnt;
|
|
|
+ qg->old_refcnt++;
|
|
|
|
|
|
+ if (qg->new_refcnt < seq)
|
|
|
+ qg->new_refcnt = seq + 1;
|
|
|
+ else
|
|
|
+ qg->new_refcnt++;
|
|
|
list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
+ ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group),
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
- (u64)(uintptr_t)glist->group,
|
|
|
+ ptr_to_u64(glist->group),
|
|
|
GFP_ATOMIC);
|
|
|
if (ret < 0)
|
|
|
return ret;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ return 0;
|
|
|
+}
|
|
|
|
|
|
+/*
|
|
|
+ * We need to walk forward in our operation tree and account for any roots that
|
|
|
+ * were deleted after we made this operation.
|
|
|
+ */
|
|
|
+static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper,
|
|
|
+ struct ulist *tmp,
|
|
|
+ struct ulist *qgroups, u64 seq,
|
|
|
+ int *old_roots)
|
|
|
+{
|
|
|
+ struct ulist_node *unode;
|
|
|
+ struct ulist_iterator uiter;
|
|
|
+ struct btrfs_qgroup *qg;
|
|
|
+ struct btrfs_qgroup_operation *tmp_oper;
|
|
|
+ struct rb_node *n;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ulist_reinit(tmp);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We only walk forward in the tree since we're only interested in
|
|
|
+ * removals that happened _after_ our operation.
|
|
|
+ */
|
|
|
+ spin_lock(&fs_info->qgroup_op_lock);
|
|
|
+ n = rb_next(&oper->n);
|
|
|
+ spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
+ if (!n)
|
|
|
+ return 0;
|
|
|
+ tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
|
|
+ while (tmp_oper->bytenr == oper->bytenr) {
|
|
|
+ /*
|
|
|
+ * If it's not a removal we don't care, additions work out
|
|
|
+ * properly with our refcnt tracking.
|
|
|
+ */
|
|
|
+ if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
|
|
|
+ tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
|
|
|
+ goto next;
|
|
|
+ qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
|
|
|
+ if (!qg)
|
|
|
+ goto next;
|
|
|
+ ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret) {
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ /*
|
|
|
+ * We only want to increase old_roots if this qgroup is
|
|
|
+ * not already in the list of qgroups. If it is already
|
|
|
+ * there then that means it must have been re-added or
|
|
|
+ * the delete will be discarded because we had an
|
|
|
+ * existing ref that we haven't looked up yet. In this
|
|
|
+ * case we don't want to increase old_roots. So if ret
|
|
|
+ * == 1 then we know that this is the first time we've
|
|
|
+ * seen this qgroup and we can bump the old_roots.
|
|
|
+ */
|
|
|
+ (*old_roots)++;
|
|
|
+ ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+next:
|
|
|
+ spin_lock(&fs_info->qgroup_op_lock);
|
|
|
+ n = rb_next(&tmp_oper->n);
|
|
|
+ spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
+ if (!n)
|
|
|
+ break;
|
|
|
+ tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Ok now process the qgroups we found */
|
|
|
+ ULIST_ITER_INIT(&uiter);
|
|
|
+ while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
+ struct btrfs_qgroup_list *glist;
|
|
|
+
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
+ if (qg->old_refcnt < seq)
|
|
|
+ qg->old_refcnt = seq + 1;
|
|
|
+ else
|
|
|
+ qg->old_refcnt++;
|
|
|
+ if (qg->new_refcnt < seq)
|
|
|
+ qg->new_refcnt = seq + 1;
|
|
|
+ else
|
|
|
+ qg->new_refcnt++;
|
|
|
+ list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
+ ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ }
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
|
|
|
- struct ulist *roots, struct ulist *tmp,
|
|
|
- u64 seq, int sgn, u64 num_bytes,
|
|
|
- struct btrfs_qgroup *qgroup)
|
|
|
+/* Add refcnt for the newly added reference. */
|
|
|
+static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper,
|
|
|
+ struct btrfs_qgroup *qgroup,
|
|
|
+ struct ulist *tmp, struct ulist *qgroups,
|
|
|
+ u64 seq)
|
|
|
{
|
|
|
struct ulist_node *unode;
|
|
|
struct ulist_iterator uiter;
|
|
|
struct btrfs_qgroup *qg;
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
int ret;
|
|
|
|
|
|
ulist_reinit(tmp);
|
|
|
- ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
|
|
|
+ ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
|
|
|
+ GFP_ATOMIC);
|
|
|
if (ret < 0)
|
|
|
return ret;
|
|
|
-
|
|
|
ULIST_ITER_INIT(&uiter);
|
|
|
while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
|
|
|
- if (qg->refcnt < seq) {
|
|
|
- /* not visited by step 1 */
|
|
|
- qg->rfer += sgn * num_bytes;
|
|
|
- qg->rfer_cmpr += sgn * num_bytes;
|
|
|
- if (roots->nnodes == 0) {
|
|
|
- qg->excl += sgn * num_bytes;
|
|
|
- qg->excl_cmpr += sgn * num_bytes;
|
|
|
- }
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
- }
|
|
|
- WARN_ON(qg->tag >= seq);
|
|
|
- qg->tag = seq;
|
|
|
+ struct btrfs_qgroup_list *glist;
|
|
|
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
+ if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
|
|
|
+ if (qg->new_refcnt < seq)
|
|
|
+ qg->new_refcnt = seq + 1;
|
|
|
+ else
|
|
|
+ qg->new_refcnt++;
|
|
|
+ } else {
|
|
|
+ if (qg->old_refcnt < seq)
|
|
|
+ qg->old_refcnt = seq + 1;
|
|
|
+ else
|
|
|
+ qg->old_refcnt++;
|
|
|
+ }
|
|
|
list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
- (uintptr_t)glist->group, GFP_ATOMIC);
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ ret = ulist_add(qgroups, glist->group->qgroupid,
|
|
|
+ ptr_to_u64(glist->group), GFP_ATOMIC);
|
|
|
if (ret < 0)
|
|
|
return ret;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
|
|
|
- struct ulist *roots, struct ulist *tmp,
|
|
|
- u64 seq, int sgn, u64 num_bytes)
|
|
|
+/*
|
|
|
+ * This adjusts the counters for all referenced qgroups if need be.
|
|
|
+ */
|
|
|
+static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
|
|
|
+ u64 root_to_skip, u64 num_bytes,
|
|
|
+ struct ulist *qgroups, u64 seq,
|
|
|
+ int old_roots, int new_roots, int rescan)
|
|
|
{
|
|
|
struct ulist_node *unode;
|
|
|
struct ulist_iterator uiter;
|
|
|
struct btrfs_qgroup *qg;
|
|
|
- struct ulist_node *tmp_unode;
|
|
|
- struct ulist_iterator tmp_uiter;
|
|
|
- int ret;
|
|
|
+ u64 cur_new_count, cur_old_count;
|
|
|
|
|
|
ULIST_ITER_INIT(&uiter);
|
|
|
- while ((unode = ulist_next(roots, &uiter))) {
|
|
|
- qg = find_qgroup_rb(fs_info, unode->val);
|
|
|
- if (!qg)
|
|
|
- continue;
|
|
|
+ while ((unode = ulist_next(qgroups, &uiter))) {
|
|
|
+ bool dirty = false;
|
|
|
|
|
|
- ulist_reinit(tmp);
|
|
|
- ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
|
|
|
- if (ret < 0)
|
|
|
- return ret;
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
+ /*
|
|
|
+ * Wasn't referenced before but is now, add to the reference
|
|
|
+ * counters.
|
|
|
+ */
|
|
|
+ if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
|
|
|
+ qg->rfer += num_bytes;
|
|
|
+ qg->rfer_cmpr += num_bytes;
|
|
|
+ dirty = true;
|
|
|
+ }
|
|
|
|
|
|
- ULIST_ITER_INIT(&tmp_uiter);
|
|
|
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
+ /*
|
|
|
+ * Was referenced before but isn't now, subtract from the
|
|
|
+ * reference counters.
|
|
|
+ */
|
|
|
+ if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
|
|
|
+ qg->rfer -= num_bytes;
|
|
|
+ qg->rfer_cmpr -= num_bytes;
|
|
|
+ dirty = true;
|
|
|
+ }
|
|
|
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
|
|
|
- if (qg->tag == seq)
|
|
|
- continue;
|
|
|
+ if (qg->old_refcnt < seq)
|
|
|
+ cur_old_count = 0;
|
|
|
+ else
|
|
|
+ cur_old_count = qg->old_refcnt - seq;
|
|
|
+ if (qg->new_refcnt < seq)
|
|
|
+ cur_new_count = 0;
|
|
|
+ else
|
|
|
+ cur_new_count = qg->new_refcnt - seq;
|
|
|
|
|
|
- if (qg->refcnt - seq == roots->nnodes) {
|
|
|
- qg->excl -= sgn * num_bytes;
|
|
|
- qg->excl_cmpr -= sgn * num_bytes;
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * If our refcount was the same as the roots previously but our
|
|
|
+ * new count isn't the same as the number of roots now then we
|
|
|
+ * went from having a exclusive reference on this range to not.
|
|
|
+ */
|
|
|
+ if (old_roots && cur_old_count == old_roots &&
|
|
|
+ (cur_new_count != new_roots || new_roots == 0)) {
|
|
|
+ WARN_ON(cur_new_count != new_roots && new_roots == 0);
|
|
|
+ qg->excl -= num_bytes;
|
|
|
+ qg->excl_cmpr -= num_bytes;
|
|
|
+ dirty = true;
|
|
|
+ }
|
|
|
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
- ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
- (uintptr_t)glist->group,
|
|
|
- GFP_ATOMIC);
|
|
|
- if (ret < 0)
|
|
|
- return ret;
|
|
|
- }
|
|
|
+ /*
|
|
|
+ * If we didn't reference all the roots before but now we do we
|
|
|
+ * have an exclusive reference to this range.
|
|
|
+ */
|
|
|
+ if ((!old_roots || (old_roots && cur_old_count != old_roots))
|
|
|
+ && cur_new_count == new_roots) {
|
|
|
+ qg->excl += num_bytes;
|
|
|
+ qg->excl_cmpr += num_bytes;
|
|
|
+ dirty = true;
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
+ if (dirty)
|
|
|
+ qgroup_dirty(fs_info, qg);
|
|
|
+ }
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
|
|
|
- * from the fs. First, all roots referencing the extent are searched, and
|
|
|
- * then the space is accounted accordingly to the different roots. The
|
|
|
- * accounting algorithm works in 3 steps documented inline.
|
|
|
+ * If we removed a data extent and there were other references for that bytenr
|
|
|
+ * then we need to lookup all referenced roots to make sure we still don't
|
|
|
+ * reference this bytenr. If we do then we can just discard this operation.
|
|
|
*/
|
|
|
-int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_fs_info *fs_info,
|
|
|
- struct btrfs_delayed_ref_node *node,
|
|
|
- struct btrfs_delayed_extent_op *extent_op)
|
|
|
+static int check_existing_refs(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper)
|
|
|
{
|
|
|
- struct btrfs_root *quota_root;
|
|
|
- u64 ref_root;
|
|
|
- struct btrfs_qgroup *qgroup;
|
|
|
struct ulist *roots = NULL;
|
|
|
- u64 seq;
|
|
|
+ struct ulist_node *unode;
|
|
|
+ struct ulist_iterator uiter;
|
|
|
int ret = 0;
|
|
|
- int sgn;
|
|
|
|
|
|
- if (!fs_info->quota_enabled)
|
|
|
- return 0;
|
|
|
-
|
|
|
- BUG_ON(!fs_info->quota_root);
|
|
|
+ ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
|
|
|
+ oper->elem.seq, &roots);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ ret = 0;
|
|
|
|
|
|
- if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
|
|
- node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
|
|
|
- struct btrfs_delayed_tree_ref *ref;
|
|
|
- ref = btrfs_delayed_node_to_tree_ref(node);
|
|
|
- ref_root = ref->root;
|
|
|
- } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
|
|
|
- node->type == BTRFS_SHARED_DATA_REF_KEY) {
|
|
|
- struct btrfs_delayed_data_ref *ref;
|
|
|
- ref = btrfs_delayed_node_to_data_ref(node);
|
|
|
- ref_root = ref->root;
|
|
|
- } else {
|
|
|
- BUG();
|
|
|
+ ULIST_ITER_INIT(&uiter);
|
|
|
+ while ((unode = ulist_next(roots, &uiter))) {
|
|
|
+ if (unode->val == oper->ref_root) {
|
|
|
+ ret = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
+ ulist_free(roots);
|
|
|
+ btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
|
|
|
|
|
- if (!is_fstree(ref_root)) {
|
|
|
- /*
|
|
|
- * non-fs-trees are not being accounted
|
|
|
- */
|
|
|
- return 0;
|
|
|
- }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
|
|
|
- switch (node->action) {
|
|
|
- case BTRFS_ADD_DELAYED_REF:
|
|
|
- case BTRFS_ADD_DELAYED_EXTENT:
|
|
|
- sgn = 1;
|
|
|
- seq = btrfs_tree_mod_seq_prev(node->seq);
|
|
|
- break;
|
|
|
- case BTRFS_DROP_DELAYED_REF:
|
|
|
- sgn = -1;
|
|
|
- seq = node->seq;
|
|
|
- break;
|
|
|
- case BTRFS_UPDATE_DELAYED_HEAD:
|
|
|
- return 0;
|
|
|
- default:
|
|
|
- BUG();
|
|
|
- }
|
|
|
+/*
|
|
|
+ * If we share a reference across multiple roots then we may need to adjust
|
|
|
+ * various qgroups referenced and exclusive counters. The basic premise is this
|
|
|
+ *
|
|
|
+ * 1) We have seq to represent a 0 count. Instead of looping through all of the
|
|
|
+ * qgroups and resetting their refcount to 0 we just constantly bump this
|
|
|
+ * sequence number to act as the base reference count. This means that if
|
|
|
+ * anybody is equal to or below this sequence they were never referenced. We
|
|
|
+ * jack this sequence up by the number of roots we found each time in order to
|
|
|
+ * make sure we don't have any overlap.
|
|
|
+ *
|
|
|
+ * 2) We first search all the roots that reference the area _except_ the root
|
|
|
+ * we're acting on currently. This makes up the old_refcnt of all the qgroups
|
|
|
+ * before.
|
|
|
+ *
|
|
|
+ * 3) We walk all of the qgroups referenced by the root we are currently acting
|
|
|
+ * on, and will either adjust old_refcnt in the case of a removal or the
|
|
|
+ * new_refcnt in the case of an addition.
|
|
|
+ *
|
|
|
+ * 4) Finally we walk all the qgroups that are referenced by this range
|
|
|
+ * including the root we are acting on currently. We will adjust the counters
|
|
|
+ * based on the number of roots we had and will have after this operation.
|
|
|
+ *
|
|
|
+ * Take this example as an illustration
|
|
|
+ *
|
|
|
+ * [qgroup 1/0]
|
|
|
+ * / | \
|
|
|
+ * [qg 0/0] [qg 0/1] [qg 0/2]
|
|
|
+ * \ | /
|
|
|
+ * [ extent ]
|
|
|
+ *
|
|
|
+ * Say we are adding a reference that is covered by qg 0/0. The first step
|
|
|
+ * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
|
|
|
+ * old_roots being 2. Because it is adding new_roots will be 1. We then go
|
|
|
+ * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
|
|
|
+ * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
|
|
|
+ * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
|
|
|
+ * reference and thus must add the size to the referenced bytes. Everything
|
|
|
+ * else is the same so nothing else changes.
|
|
|
+ */
|
|
|
+static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper)
|
|
|
+{
|
|
|
+ struct ulist *roots = NULL;
|
|
|
+ struct ulist *qgroups, *tmp;
|
|
|
+ struct btrfs_qgroup *qgroup;
|
|
|
+ struct seq_list elem = {};
|
|
|
+ u64 seq;
|
|
|
+ int old_roots = 0;
|
|
|
+ int new_roots = 0;
|
|
|
+ int ret = 0;
|
|
|
|
|
|
- mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
- if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
|
|
|
- mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+ if (oper->elem.seq) {
|
|
|
+ ret = check_existing_refs(trans, fs_info, oper);
|
|
|
+ if (ret < 0)
|
|
|
+ return ret;
|
|
|
+ if (ret)
|
|
|
return 0;
|
|
|
- }
|
|
|
}
|
|
|
- mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
|
|
|
- /*
|
|
|
- * the delayed ref sequence number we pass depends on the direction of
|
|
|
- * the operation. for add operations, we pass
|
|
|
- * tree_mod_log_prev_seq(node->seq) to skip
|
|
|
- * the delayed ref's current sequence number, because we need the state
|
|
|
- * of the tree before the add operation. for delete operations, we pass
|
|
|
- * (node->seq) to include the delayed ref's current sequence number,
|
|
|
- * because we need the state of the tree after the delete operation.
|
|
|
- */
|
|
|
- ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
|
|
|
- if (ret < 0)
|
|
|
- return ret;
|
|
|
-
|
|
|
- spin_lock(&fs_info->qgroup_lock);
|
|
|
+ qgroups = ulist_alloc(GFP_NOFS);
|
|
|
+ if (!qgroups)
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
- quota_root = fs_info->quota_root;
|
|
|
- if (!quota_root)
|
|
|
- goto unlock;
|
|
|
+ tmp = ulist_alloc(GFP_NOFS);
|
|
|
+ if (!tmp)
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
- qgroup = find_qgroup_rb(fs_info, ref_root);
|
|
|
+ btrfs_get_tree_mod_seq(fs_info, &elem);
|
|
|
+ ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
|
|
|
+ &roots);
|
|
|
+ btrfs_put_tree_mod_seq(fs_info, &elem);
|
|
|
+ if (ret < 0) {
|
|
|
+ ulist_free(qgroups);
|
|
|
+ ulist_free(tmp);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ spin_lock(&fs_info->qgroup_lock);
|
|
|
+ qgroup = find_qgroup_rb(fs_info, oper->ref_root);
|
|
|
if (!qgroup)
|
|
|
- goto unlock;
|
|
|
+ goto out;
|
|
|
+ seq = fs_info->qgroup_seq;
|
|
|
|
|
|
/*
|
|
|
- * step 1: for each old ref, visit all nodes once and inc refcnt
|
|
|
+ * So roots is the list of all the roots currently pointing at the
|
|
|
+ * bytenr, including the ref we are adding if we are adding, or not if
|
|
|
+ * we are removing a ref. So we pass in the ref_root to skip that root
|
|
|
+ * in our calculations. We set old_refnct and new_refcnt cause who the
|
|
|
+ * hell knows what everything looked like before, and it doesn't matter
|
|
|
+ * except...
|
|
|
*/
|
|
|
- ulist_reinit(fs_info->qgroup_ulist);
|
|
|
- seq = fs_info->qgroup_seq;
|
|
|
- fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
|
|
|
+ ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
|
|
|
+ seq, &old_roots, 0);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
|
|
|
- ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
|
|
|
- seq);
|
|
|
- if (ret)
|
|
|
- goto unlock;
|
|
|
+ /*
|
|
|
+ * Now adjust the refcounts of the qgroups that care about this
|
|
|
+ * reference, either the old_count in the case of removal or new_count
|
|
|
+ * in the case of an addition.
|
|
|
+ */
|
|
|
+ ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
|
|
|
+ seq);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
|
|
|
/*
|
|
|
- * step 2: walk from the new root
|
|
|
+ * ...in the case of removals. If we had a removal before we got around
|
|
|
+ * to processing this operation then we need to find that guy and count
|
|
|
+ * his references as if they really existed so we don't end up screwing
|
|
|
+ * up the exclusive counts. Then whenever we go to process the delete
|
|
|
+ * everything will be grand and we can account for whatever exclusive
|
|
|
+ * changes need to be made there. We also have to pass in old_roots so
|
|
|
+ * we have an accurate count of the roots as it pertains to this
|
|
|
+ * operations view of the world.
|
|
|
*/
|
|
|
- ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
|
|
|
- seq, sgn, node->num_bytes, qgroup);
|
|
|
- if (ret)
|
|
|
- goto unlock;
|
|
|
+ ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
|
|
|
+ &old_roots);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
|
|
|
/*
|
|
|
- * step 3: walk again from old refs
|
|
|
+ * We are adding our root, need to adjust up the number of roots,
|
|
|
+ * otherwise old_roots is the number of roots we want.
|
|
|
*/
|
|
|
- ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
|
|
|
- seq, sgn, node->num_bytes);
|
|
|
- if (ret)
|
|
|
- goto unlock;
|
|
|
+ if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
|
|
|
+ new_roots = old_roots + 1;
|
|
|
+ } else {
|
|
|
+ new_roots = old_roots;
|
|
|
+ old_roots++;
|
|
|
+ }
|
|
|
+ fs_info->qgroup_seq += old_roots + 1;
|
|
|
|
|
|
-unlock:
|
|
|
+
|
|
|
+ /*
|
|
|
+ * And now the magic happens, bless Arne for having a pretty elegant
|
|
|
+ * solution for this.
|
|
|
+ */
|
|
|
+ qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
|
|
|
+ qgroups, seq, old_roots, new_roots, 0);
|
|
|
+out:
|
|
|
spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(qgroups);
|
|
|
ulist_free(roots);
|
|
|
+ ulist_free(tmp);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
|
|
|
+ * from the fs. First, all roots referencing the extent are searched, and
|
|
|
+ * then the space is accounted accordingly to the different roots. The
|
|
|
+ * accounting algorithm works in 3 steps documented inline.
|
|
|
+ */
|
|
|
+static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup_operation *oper)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ if (!fs_info->quota_enabled)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ BUG_ON(!fs_info->quota_root);
|
|
|
+
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
+ if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+
|
|
|
+ ASSERT(is_fstree(oper->ref_root));
|
|
|
+
|
|
|
+ switch (oper->type) {
|
|
|
+ case BTRFS_QGROUP_OPER_ADD_EXCL:
|
|
|
+ case BTRFS_QGROUP_OPER_SUB_EXCL:
|
|
|
+ ret = qgroup_excl_accounting(fs_info, oper);
|
|
|
+ break;
|
|
|
+ case BTRFS_QGROUP_OPER_ADD_SHARED:
|
|
|
+ case BTRFS_QGROUP_OPER_SUB_SHARED:
|
|
|
+ ret = qgroup_shared_accounting(trans, fs_info, oper);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ASSERT(0);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
|
|
|
+/*
|
|
|
+ * Needs to be called everytime we run delayed refs, even if there is an error
|
|
|
+ * in order to cleanup outstanding operations.
|
|
|
+ */
|
|
|
+int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
|
|
|
+ struct btrfs_fs_info *fs_info)
|
|
|
+{
|
|
|
+ struct btrfs_qgroup_operation *oper;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ while (!list_empty(&trans->qgroup_ref_list)) {
|
|
|
+ oper = list_first_entry(&trans->qgroup_ref_list,
|
|
|
+ struct btrfs_qgroup_operation, list);
|
|
|
+ list_del_init(&oper->list);
|
|
|
+ if (!ret || !trans->aborted)
|
|
|
+ ret = btrfs_qgroup_account(trans, fs_info, oper);
|
|
|
+ spin_lock(&fs_info->qgroup_op_lock);
|
|
|
+ rb_erase(&oper->n, &fs_info->qgroup_op_tree);
|
|
|
+ spin_unlock(&fs_info->qgroup_op_lock);
|
|
|
+ btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
|
|
+ kfree(oper);
|
|
|
+ }
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -1629,8 +2091,16 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
|
|
srcgroup = find_qgroup_rb(fs_info, srcid);
|
|
|
if (!srcgroup)
|
|
|
goto unlock;
|
|
|
- dstgroup->rfer = srcgroup->rfer - level_size;
|
|
|
- dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We call inherit after we clone the root in order to make sure
|
|
|
+ * our counts don't go crazy, so at this point the only
|
|
|
+ * difference between the two roots should be the root node.
|
|
|
+ */
|
|
|
+ dstgroup->rfer = srcgroup->rfer;
|
|
|
+ dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
|
|
|
+ dstgroup->excl = level_size;
|
|
|
+ dstgroup->excl_cmpr = level_size;
|
|
|
srcgroup->excl = level_size;
|
|
|
srcgroup->excl_cmpr = level_size;
|
|
|
qgroup_dirty(fs_info, dstgroup);
|
|
@@ -1734,7 +2204,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
|
|
|
struct btrfs_qgroup *qg;
|
|
|
struct btrfs_qgroup_list *glist;
|
|
|
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
|
|
|
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
|
|
|
qg->reserved + (s64)qg->rfer + num_bytes >
|
|
@@ -1766,7 +2236,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
|
|
|
while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
|
|
|
struct btrfs_qgroup *qg;
|
|
|
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
|
|
|
qg->reserved += num_bytes;
|
|
|
}
|
|
@@ -1812,7 +2282,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
|
|
|
struct btrfs_qgroup *qg;
|
|
|
struct btrfs_qgroup_list *glist;
|
|
|
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
|
|
|
+ qg = u64_to_ptr(unode->aux);
|
|
|
|
|
|
qg->reserved -= num_bytes;
|
|
|
|
|
@@ -1848,15 +2318,15 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
|
|
|
*/
|
|
|
static int
|
|
|
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
|
|
- struct btrfs_trans_handle *trans, struct ulist *tmp,
|
|
|
- struct extent_buffer *scratch_leaf)
|
|
|
+ struct btrfs_trans_handle *trans, struct ulist *qgroups,
|
|
|
+ struct ulist *tmp, struct extent_buffer *scratch_leaf)
|
|
|
{
|
|
|
struct btrfs_key found;
|
|
|
struct ulist *roots = NULL;
|
|
|
- struct ulist_node *unode;
|
|
|
- struct ulist_iterator uiter;
|
|
|
struct seq_list tree_mod_seq_elem = {};
|
|
|
+ u64 num_bytes;
|
|
|
u64 seq;
|
|
|
+ int new_roots;
|
|
|
int slot;
|
|
|
int ret;
|
|
|
|
|
@@ -1897,8 +2367,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
|
|
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
|
|
|
for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
|
|
|
- u64 num_bytes;
|
|
|
-
|
|
|
btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
|
|
|
if (found.type != BTRFS_EXTENT_ITEM_KEY &&
|
|
|
found.type != BTRFS_METADATA_ITEM_KEY)
|
|
@@ -1908,76 +2376,34 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
|
|
else
|
|
|
num_bytes = found.offset;
|
|
|
|
|
|
- ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
|
|
|
- tree_mod_seq_elem.seq, &roots);
|
|
|
+ ulist_reinit(qgroups);
|
|
|
+ ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
|
|
|
+ &roots);
|
|
|
if (ret < 0)
|
|
|
goto out;
|
|
|
spin_lock(&fs_info->qgroup_lock);
|
|
|
seq = fs_info->qgroup_seq;
|
|
|
fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
|
|
|
|
|
|
- ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
|
|
|
- if (ret) {
|
|
|
+ new_roots = 0;
|
|
|
+ ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
|
|
|
+ seq, &new_roots, 1);
|
|
|
+ if (ret < 0) {
|
|
|
spin_unlock(&fs_info->qgroup_lock);
|
|
|
ulist_free(roots);
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- * step2 of btrfs_qgroup_account_ref works from a single root,
|
|
|
- * we're doing all at once here.
|
|
|
- */
|
|
|
- ulist_reinit(tmp);
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
- while ((unode = ulist_next(roots, &uiter))) {
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
-
|
|
|
- qg = find_qgroup_rb(fs_info, unode->val);
|
|
|
- if (!qg)
|
|
|
- continue;
|
|
|
-
|
|
|
- ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
|
|
|
- GFP_ATOMIC);
|
|
|
- if (ret < 0) {
|
|
|
- spin_unlock(&fs_info->qgroup_lock);
|
|
|
- ulist_free(roots);
|
|
|
- goto out;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
|
|
|
- ULIST_ITER_INIT(&uiter);
|
|
|
- while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
- struct btrfs_qgroup *qg;
|
|
|
- struct btrfs_qgroup_list *glist;
|
|
|
-
|
|
|
- qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
|
|
|
- qg->rfer += num_bytes;
|
|
|
- qg->rfer_cmpr += num_bytes;
|
|
|
- WARN_ON(qg->tag >= seq);
|
|
|
- if (qg->refcnt - seq == roots->nnodes) {
|
|
|
- qg->excl += num_bytes;
|
|
|
- qg->excl_cmpr += num_bytes;
|
|
|
- }
|
|
|
- qgroup_dirty(fs_info, qg);
|
|
|
-
|
|
|
- list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
- ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
- (uintptr_t)glist->group,
|
|
|
- GFP_ATOMIC);
|
|
|
- if (ret < 0) {
|
|
|
- spin_unlock(&fs_info->qgroup_lock);
|
|
|
- ulist_free(roots);
|
|
|
- goto out;
|
|
|
- }
|
|
|
- }
|
|
|
+ ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
|
|
|
+ seq, 0, new_roots, 1);
|
|
|
+ if (ret < 0) {
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(roots);
|
|
|
+ goto out;
|
|
|
}
|
|
|
-
|
|
|
spin_unlock(&fs_info->qgroup_lock);
|
|
|
ulist_free(roots);
|
|
|
- ret = 0;
|
|
|
}
|
|
|
-
|
|
|
out:
|
|
|
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
|
|
|
|
|
@@ -1990,13 +2416,16 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
|
|
qgroup_rescan_work);
|
|
|
struct btrfs_path *path;
|
|
|
struct btrfs_trans_handle *trans = NULL;
|
|
|
- struct ulist *tmp = NULL;
|
|
|
+ struct ulist *tmp = NULL, *qgroups = NULL;
|
|
|
struct extent_buffer *scratch_leaf = NULL;
|
|
|
int err = -ENOMEM;
|
|
|
|
|
|
path = btrfs_alloc_path();
|
|
|
if (!path)
|
|
|
goto out;
|
|
|
+ qgroups = ulist_alloc(GFP_NOFS);
|
|
|
+ if (!qgroups)
|
|
|
+ goto out;
|
|
|
tmp = ulist_alloc(GFP_NOFS);
|
|
|
if (!tmp)
|
|
|
goto out;
|
|
@@ -2015,7 +2444,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
|
|
err = -EINTR;
|
|
|
} else {
|
|
|
err = qgroup_rescan_leaf(fs_info, path, trans,
|
|
|
- tmp, scratch_leaf);
|
|
|
+ qgroups, tmp, scratch_leaf);
|
|
|
}
|
|
|
if (err > 0)
|
|
|
btrfs_commit_transaction(trans, fs_info->fs_root);
|
|
@@ -2025,7 +2454,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
|
|
|
|
|
out:
|
|
|
kfree(scratch_leaf);
|
|
|
- ulist_free(tmp);
|
|
|
+ ulist_free(qgroups);
|
|
|
btrfs_free_path(path);
|
|
|
|
|
|
mutex_lock(&fs_info->qgroup_rescan_lock);
|