|
@@ -111,6 +111,66 @@ xfs_inobt_get_rec(
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Insert a single inobt record. Cursor must already point to desired location.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_inobt_insert_rec(
|
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
|
+ __int32_t freecount,
|
|
|
|
+ xfs_inofree_t free,
|
|
|
|
+ int *stat)
|
|
|
|
+{
|
|
|
|
+ cur->bc_rec.i.ir_freecount = freecount;
|
|
|
|
+ cur->bc_rec.i.ir_free = free;
|
|
|
|
+ return xfs_btree_insert(cur, stat);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Insert records describing a newly allocated inode chunk into the inobt.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_inobt_insert(
|
|
|
|
+ struct xfs_mount *mp,
|
|
|
|
+ struct xfs_trans *tp,
|
|
|
|
+ struct xfs_buf *agbp,
|
|
|
|
+ xfs_agino_t newino,
|
|
|
|
+ xfs_agino_t newlen,
|
|
|
|
+ xfs_btnum_t btnum)
|
|
|
|
+{
|
|
|
|
+ struct xfs_btree_cur *cur;
|
|
|
|
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
|
|
|
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
|
|
|
+ xfs_agino_t thisino;
|
|
|
|
+ int i;
|
|
|
|
+ int error;
|
|
|
|
+
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
|
|
|
|
+
|
|
|
|
+ for (thisino = newino;
|
|
|
|
+ thisino < newino + newlen;
|
|
|
|
+ thisino += XFS_INODES_PER_CHUNK) {
|
|
|
|
+ error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
|
|
|
|
+ if (error) {
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
+ return error;
|
|
|
|
+ }
|
|
|
|
+ ASSERT(i == 0);
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
|
|
|
|
+ XFS_INOBT_ALL_FREE, &i);
|
|
|
|
+ if (error) {
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
+ return error;
|
|
|
|
+ }
|
|
|
|
+ ASSERT(i == 1);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Verify that the number of free inodes in the AGI is correct.
|
|
* Verify that the number of free inodes in the AGI is correct.
|
|
*/
|
|
*/
|
|
@@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc(
|
|
{
|
|
{
|
|
xfs_agi_t *agi; /* allocation group header */
|
|
xfs_agi_t *agi; /* allocation group header */
|
|
xfs_alloc_arg_t args; /* allocation argument structure */
|
|
xfs_alloc_arg_t args; /* allocation argument structure */
|
|
- xfs_btree_cur_t *cur; /* inode btree cursor */
|
|
|
|
xfs_agnumber_t agno;
|
|
xfs_agnumber_t agno;
|
|
int error;
|
|
int error;
|
|
- int i;
|
|
|
|
xfs_agino_t newino; /* new first inode's number */
|
|
xfs_agino_t newino; /* new first inode's number */
|
|
xfs_agino_t newlen; /* new number of inodes */
|
|
xfs_agino_t newlen; /* new number of inodes */
|
|
- xfs_agino_t thisino; /* current inode number, for loop */
|
|
|
|
int isaligned = 0; /* inode allocation at stripe unit */
|
|
int isaligned = 0; /* inode allocation at stripe unit */
|
|
/* boundary */
|
|
/* boundary */
|
|
struct xfs_perag *pag;
|
|
struct xfs_perag *pag;
|
|
@@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc(
|
|
agi->agi_newino = cpu_to_be32(newino);
|
|
agi->agi_newino = cpu_to_be32(newino);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Insert records describing the new inode chunk into the btree.
|
|
|
|
|
|
+ * Insert records describing the new inode chunk into the btrees.
|
|
*/
|
|
*/
|
|
- cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
|
|
|
|
- for (thisino = newino;
|
|
|
|
- thisino < newino + newlen;
|
|
|
|
- thisino += XFS_INODES_PER_CHUNK) {
|
|
|
|
- cur->bc_rec.i.ir_startino = thisino;
|
|
|
|
- cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
|
|
|
|
- cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
|
|
|
|
- error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
|
|
|
|
- if (error) {
|
|
|
|
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
- return error;
|
|
|
|
- }
|
|
|
|
- ASSERT(i == 0);
|
|
|
|
- error = xfs_btree_insert(cur, &i);
|
|
|
|
- if (error) {
|
|
|
|
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
|
|
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
|
|
|
+ XFS_BTNUM_INO);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+
|
|
|
|
+ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
|
|
|
|
+ error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
|
|
|
|
+ XFS_BTNUM_FINO);
|
|
|
|
+ if (error)
|
|
return error;
|
|
return error;
|
|
- }
|
|
|
|
- ASSERT(i == 1);
|
|
|
|
}
|
|
}
|
|
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
|
|
/*
|
|
/*
|
|
* Log allocation group header fields
|
|
* Log allocation group header fields
|
|
*/
|
|
*/
|
|
@@ -675,13 +722,10 @@ xfs_ialloc_get_rec(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Allocate an inode.
|
|
|
|
- *
|
|
|
|
- * The caller selected an AG for us, and made sure that free inodes are
|
|
|
|
- * available.
|
|
|
|
|
|
+ * Allocate an inode using the inobt-only algorithm.
|
|
*/
|
|
*/
|
|
STATIC int
|
|
STATIC int
|
|
-xfs_dialloc_ag(
|
|
|
|
|
|
+xfs_dialloc_ag_inobt(
|
|
struct xfs_trans *tp,
|
|
struct xfs_trans *tp,
|
|
struct xfs_buf *agbp,
|
|
struct xfs_buf *agbp,
|
|
xfs_ino_t parent,
|
|
xfs_ino_t parent,
|
|
@@ -707,7 +751,7 @@ xfs_dialloc_ag(
|
|
ASSERT(pag->pagi_freecount > 0);
|
|
ASSERT(pag->pagi_freecount > 0);
|
|
|
|
|
|
restart_pagno:
|
|
restart_pagno:
|
|
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
|
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
|
/*
|
|
/*
|
|
* If pagino is 0 (this is the root inode allocation) use newino.
|
|
* If pagino is 0 (this is the root inode allocation) use newino.
|
|
* This must work because we've just allocated some.
|
|
* This must work because we've just allocated some.
|
|
@@ -939,6 +983,294 @@ error0:
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Use the free inode btree to allocate an inode based on distance from the
|
|
|
|
+ * parent. Note that the provided cursor may be deleted and replaced.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_dialloc_ag_finobt_near(
|
|
|
|
+ xfs_agino_t pagino,
|
|
|
|
+ struct xfs_btree_cur **ocur,
|
|
|
|
+ struct xfs_inobt_rec_incore *rec)
|
|
|
|
+{
|
|
|
|
+ struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
|
|
|
|
+ struct xfs_btree_cur *rcur; /* right search cursor */
|
|
|
|
+ struct xfs_inobt_rec_incore rrec;
|
|
|
|
+ int error;
|
|
|
|
+ int i, j;
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+
|
|
|
|
+ if (i == 1) {
|
|
|
|
+ error = xfs_inobt_get_rec(lcur, rec, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * See if we've landed in the parent inode record. The finobt
|
|
|
|
+ * only tracks chunks with at least one free inode, so record
|
|
|
|
+ * existence is enough.
|
|
|
|
+ */
|
|
|
|
+ if (pagino >= rec->ir_startino &&
|
|
|
|
+ pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ error = xfs_btree_dup_cursor(lcur, &rcur);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_rcur;
|
|
|
|
+ if (j == 1) {
|
|
|
|
+ error = xfs_inobt_get_rec(rcur, &rrec, &j);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_rcur;
|
|
|
|
+ XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
|
|
|
|
+ if (i == 1 && j == 1) {
|
|
|
|
+ /*
|
|
|
|
+ * Both the left and right records are valid. Choose the closer
|
|
|
|
+ * inode chunk to the target.
|
|
|
|
+ */
|
|
|
|
+ if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
|
|
|
|
+ (rrec.ir_startino - pagino)) {
|
|
|
|
+ *rec = rrec;
|
|
|
|
+ xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
|
|
|
|
+ *ocur = rcur;
|
|
|
|
+ } else {
|
|
|
|
+ xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
|
|
|
|
+ }
|
|
|
|
+ } else if (j == 1) {
|
|
|
|
+ /* only the right record is valid */
|
|
|
|
+ *rec = rrec;
|
|
|
|
+ xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
|
|
|
|
+ *ocur = rcur;
|
|
|
|
+ } else if (i == 1) {
|
|
|
|
+ /* only the left record is valid */
|
|
|
|
+ xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+error_rcur:
|
|
|
|
+ xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Use the free inode btree to find a free inode based on a newino hint. If
|
|
|
|
+ * the hint is NULL, find the first free inode in the AG.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_dialloc_ag_finobt_newino(
|
|
|
|
+ struct xfs_agi *agi,
|
|
|
|
+ struct xfs_btree_cur *cur,
|
|
|
|
+ struct xfs_inobt_rec_incore *rec)
|
|
|
|
+{
|
|
|
|
+ int error;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
|
|
|
|
+ error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
|
|
|
|
+ &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ if (i == 1) {
|
|
|
|
+ error = xfs_inobt_get_rec(cur, rec, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Find the first inode available in the AG.
|
|
|
|
+ */
|
|
|
|
+ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_get_rec(cur, rec, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Update the inobt based on a modification made to the finobt. Also ensure that
|
|
|
|
+ * the records from both trees are equivalent post-modification.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_dialloc_ag_update_inobt(
|
|
|
|
+ struct xfs_btree_cur *cur, /* inobt cursor */
|
|
|
|
+ struct xfs_inobt_rec_incore *frec, /* finobt record */
|
|
|
|
+ int offset) /* inode offset */
|
|
|
|
+{
|
|
|
|
+ struct xfs_inobt_rec_incore rec;
|
|
|
|
+ int error;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_get_rec(cur, &rec, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN(i == 1);
|
|
|
|
+ ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
|
|
|
|
+ XFS_INODES_PER_CHUNK) == 0);
|
|
|
|
+
|
|
|
|
+ rec.ir_free &= ~XFS_INOBT_MASK(offset);
|
|
|
|
+ rec.ir_freecount--;
|
|
|
|
+
|
|
|
|
+ XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
|
|
|
|
+ (rec.ir_freecount == frec->ir_freecount));
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_update(cur, &rec);
|
|
|
|
+ if (error)
|
|
|
|
+ return error;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Allocate an inode using the free inode btree, if available. Otherwise, fall
|
|
|
|
+ * back to the inobt search algorithm.
|
|
|
|
+ *
|
|
|
|
+ * The caller selected an AG for us, and made sure that free inodes are
|
|
|
|
+ * available.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_dialloc_ag(
|
|
|
|
+ struct xfs_trans *tp,
|
|
|
|
+ struct xfs_buf *agbp,
|
|
|
|
+ xfs_ino_t parent,
|
|
|
|
+ xfs_ino_t *inop)
|
|
|
|
+{
|
|
|
|
+ struct xfs_mount *mp = tp->t_mountp;
|
|
|
|
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
|
|
|
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
|
|
|
+ xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
|
|
|
|
+ xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
|
|
|
|
+ struct xfs_perag *pag;
|
|
|
|
+ struct xfs_btree_cur *cur; /* finobt cursor */
|
|
|
|
+ struct xfs_btree_cur *icur; /* inobt cursor */
|
|
|
|
+ struct xfs_inobt_rec_incore rec;
|
|
|
|
+ xfs_ino_t ino;
|
|
|
|
+ int error;
|
|
|
|
+ int offset;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ if (!xfs_sb_version_hasfinobt(&mp->m_sb))
|
|
|
|
+ return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
|
|
|
|
+
|
|
|
|
+ pag = xfs_perag_get(mp, agno);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If pagino is 0 (this is the root inode allocation) use newino.
|
|
|
|
+ * This must work because we've just allocated some.
|
|
|
|
+ */
|
|
|
|
+ if (!pagino)
|
|
|
|
+ pagino = be32_to_cpu(agi->agi_newino);
|
|
|
|
+
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
|
|
|
|
+
|
|
|
|
+ error = xfs_check_agi_freecount(cur, agi);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_cur;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * The search algorithm depends on whether we're in the same AG as the
|
|
|
|
+ * parent. If so, find the closest available inode to the parent. If
|
|
|
|
+ * not, consider the agi hint or find the first free inode in the AG.
|
|
|
|
+ */
|
|
|
|
+ if (agno == pagno)
|
|
|
|
+ error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
|
|
|
|
+ else
|
|
|
|
+ error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_cur;
|
|
|
|
+
|
|
|
|
+ offset = xfs_lowbit64(rec.ir_free);
|
|
|
|
+ ASSERT(offset >= 0);
|
|
|
|
+ ASSERT(offset < XFS_INODES_PER_CHUNK);
|
|
|
|
+ ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
|
|
|
+ XFS_INODES_PER_CHUNK) == 0);
|
|
|
|
+ ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Modify or remove the finobt record.
|
|
|
|
+ */
|
|
|
|
+ rec.ir_free &= ~XFS_INOBT_MASK(offset);
|
|
|
|
+ rec.ir_freecount--;
|
|
|
|
+ if (rec.ir_freecount)
|
|
|
|
+ error = xfs_inobt_update(cur, &rec);
|
|
|
|
+ else
|
|
|
|
+ error = xfs_btree_delete(cur, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_cur;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * The finobt has now been updated appropriately. We haven't updated the
|
|
|
|
+ * agi and superblock yet, so we can create an inobt cursor and validate
|
|
|
|
+ * the original freecount. If all is well, make the equivalent update to
|
|
|
|
+ * the inobt using the finobt record and offset information.
|
|
|
|
+ */
|
|
|
|
+ icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
|
|
|
+
|
|
|
|
+ error = xfs_check_agi_freecount(icur, agi);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_icur;
|
|
|
|
+
|
|
|
|
+ error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_icur;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Both trees have now been updated. We must update the perag and
|
|
|
|
+ * superblock before we can check the freecount for each btree.
|
|
|
|
+ */
|
|
|
|
+ be32_add_cpu(&agi->agi_freecount, -1);
|
|
|
|
+ xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
|
|
|
|
+ pag->pagi_freecount--;
|
|
|
|
+
|
|
|
|
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
|
|
|
|
+
|
|
|
|
+ error = xfs_check_agi_freecount(icur, agi);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_icur;
|
|
|
|
+ error = xfs_check_agi_freecount(cur, agi);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error_icur;
|
|
|
|
+
|
|
|
|
+ xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
|
|
+ xfs_perag_put(pag);
|
|
|
|
+ *inop = ino;
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+error_icur:
|
|
|
|
+ xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
|
|
|
|
+error_cur:
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
+ xfs_perag_put(pag);
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Allocate an inode on disk.
|
|
* Allocate an inode on disk.
|
|
*
|
|
*
|
|
@@ -1098,78 +1430,34 @@ out_error:
|
|
return XFS_ERROR(error);
|
|
return XFS_ERROR(error);
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * Free disk inode. Carefully avoids touching the incore inode, all
|
|
|
|
- * manipulations incore are the caller's responsibility.
|
|
|
|
- * The on-disk inode is not changed by this operation, only the
|
|
|
|
- * btree (free inode mask) is changed.
|
|
|
|
- */
|
|
|
|
-int
|
|
|
|
-xfs_difree(
|
|
|
|
- xfs_trans_t *tp, /* transaction pointer */
|
|
|
|
- xfs_ino_t inode, /* inode to be freed */
|
|
|
|
- xfs_bmap_free_t *flist, /* extents to free */
|
|
|
|
- int *delete, /* set if inode cluster was deleted */
|
|
|
|
- xfs_ino_t *first_ino) /* first inode in deleted cluster */
|
|
|
|
|
|
+STATIC int
|
|
|
|
+xfs_difree_inobt(
|
|
|
|
+ struct xfs_mount *mp,
|
|
|
|
+ struct xfs_trans *tp,
|
|
|
|
+ struct xfs_buf *agbp,
|
|
|
|
+ xfs_agino_t agino,
|
|
|
|
+ struct xfs_bmap_free *flist,
|
|
|
|
+ int *delete,
|
|
|
|
+ xfs_ino_t *first_ino,
|
|
|
|
+ struct xfs_inobt_rec_incore *orec)
|
|
{
|
|
{
|
|
- /* REFERENCED */
|
|
|
|
- xfs_agblock_t agbno; /* block number containing inode */
|
|
|
|
- xfs_buf_t *agbp; /* buffer containing allocation group header */
|
|
|
|
- xfs_agino_t agino; /* inode number relative to allocation group */
|
|
|
|
- xfs_agnumber_t agno; /* allocation group number */
|
|
|
|
- xfs_agi_t *agi; /* allocation group header */
|
|
|
|
- xfs_btree_cur_t *cur; /* inode btree cursor */
|
|
|
|
- int error; /* error return value */
|
|
|
|
- int i; /* result code */
|
|
|
|
- int ilen; /* inodes in an inode cluster */
|
|
|
|
- xfs_mount_t *mp; /* mount structure for filesystem */
|
|
|
|
- int off; /* offset of inode in inode chunk */
|
|
|
|
- xfs_inobt_rec_incore_t rec; /* btree record */
|
|
|
|
- struct xfs_perag *pag;
|
|
|
|
-
|
|
|
|
- mp = tp->t_mountp;
|
|
|
|
|
|
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
|
|
|
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
|
|
|
+ struct xfs_perag *pag;
|
|
|
|
+ struct xfs_btree_cur *cur;
|
|
|
|
+ struct xfs_inobt_rec_incore rec;
|
|
|
|
+ int ilen;
|
|
|
|
+ int error;
|
|
|
|
+ int i;
|
|
|
|
+ int off;
|
|
|
|
|
|
- /*
|
|
|
|
- * Break up inode number into its components.
|
|
|
|
- */
|
|
|
|
- agno = XFS_INO_TO_AGNO(mp, inode);
|
|
|
|
- if (agno >= mp->m_sb.sb_agcount) {
|
|
|
|
- xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
|
|
|
|
- __func__, agno, mp->m_sb.sb_agcount);
|
|
|
|
- ASSERT(0);
|
|
|
|
- return XFS_ERROR(EINVAL);
|
|
|
|
- }
|
|
|
|
- agino = XFS_INO_TO_AGINO(mp, inode);
|
|
|
|
- if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
|
|
|
|
- xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
|
|
|
|
- __func__, (unsigned long long)inode,
|
|
|
|
- (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
|
|
|
|
- ASSERT(0);
|
|
|
|
- return XFS_ERROR(EINVAL);
|
|
|
|
- }
|
|
|
|
- agbno = XFS_AGINO_TO_AGBNO(mp, agino);
|
|
|
|
- if (agbno >= mp->m_sb.sb_agblocks) {
|
|
|
|
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
|
|
|
|
- __func__, agbno, mp->m_sb.sb_agblocks);
|
|
|
|
- ASSERT(0);
|
|
|
|
- return XFS_ERROR(EINVAL);
|
|
|
|
- }
|
|
|
|
- /*
|
|
|
|
- * Get the allocation group header.
|
|
|
|
- */
|
|
|
|
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
|
|
|
|
- if (error) {
|
|
|
|
- xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
|
|
|
|
- __func__, error);
|
|
|
|
- return error;
|
|
|
|
- }
|
|
|
|
- agi = XFS_BUF_TO_AGI(agbp);
|
|
|
|
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
|
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
|
- ASSERT(agbno < be32_to_cpu(agi->agi_length));
|
|
|
|
|
|
+ ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Initialize the cursor.
|
|
* Initialize the cursor.
|
|
*/
|
|
*/
|
|
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
|
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
|
|
|
|
|
error = xfs_check_agi_freecount(cur, agi);
|
|
error = xfs_check_agi_freecount(cur, agi);
|
|
if (error)
|
|
if (error)
|
|
@@ -1261,6 +1549,7 @@ xfs_difree(
|
|
if (error)
|
|
if (error)
|
|
goto error0;
|
|
goto error0;
|
|
|
|
|
|
|
|
+ *orec = rec;
|
|
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
@@ -1269,6 +1558,182 @@ error0:
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Free an inode in the free inode btree.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xfs_difree_finobt(
|
|
|
|
+ struct xfs_mount *mp,
|
|
|
|
+ struct xfs_trans *tp,
|
|
|
|
+ struct xfs_buf *agbp,
|
|
|
|
+ xfs_agino_t agino,
|
|
|
|
+ struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
|
|
|
|
+{
|
|
|
|
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
|
|
|
|
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
|
|
|
|
+ struct xfs_btree_cur *cur;
|
|
|
|
+ struct xfs_inobt_rec_incore rec;
|
|
|
|
+ int offset = agino - ibtrec->ir_startino;
|
|
|
|
+ int error;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+ if (i == 0) {
|
|
|
|
+ /*
|
|
|
|
+ * If the record does not exist in the finobt, we must have just
|
|
|
|
+ * freed an inode in a previously fully allocated chunk. If not,
|
|
|
|
+ * something is out of sync.
|
|
|
|
+ */
|
|
|
|
+ XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
|
|
|
|
+
|
|
|
|
+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
|
|
|
|
+ ibtrec->ir_free, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+ ASSERT(i == 1);
|
|
|
|
+
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Read and update the existing record. We could just copy the ibtrec
|
|
|
|
+ * across here, but that would defeat the purpose of having redundant
|
|
|
|
+ * metadata. By making the modifications independently, we can catch
|
|
|
|
+ * corruptions that we wouldn't see if we just copied from one record
|
|
|
|
+ * to another.
|
|
|
|
+ */
|
|
|
|
+ error = xfs_inobt_get_rec(cur, &rec, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error);
|
|
|
|
+
|
|
|
|
+ rec.ir_free |= XFS_INOBT_MASK(offset);
|
|
|
|
+ rec.ir_freecount++;
|
|
|
|
+
|
|
|
|
+ XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
|
|
|
|
+ (rec.ir_freecount == ibtrec->ir_freecount),
|
|
|
|
+ error);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * The content of inobt records should always match between the inobt
|
|
|
|
+ * and finobt. The lifecycle of records in the finobt is different from
|
|
|
|
+ * the inobt in that the finobt only tracks records with at least one
|
|
|
|
+ * free inode. Hence, if all of the inodes are free and we aren't
|
|
|
|
+ * keeping inode chunks permanently on disk, remove the record.
|
|
|
|
+ * Otherwise, update the record with the new information.
|
|
|
|
+ */
|
|
|
|
+ if (rec.ir_freecount == mp->m_ialloc_inos &&
|
|
|
|
+ !(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
|
|
|
+ error = xfs_btree_delete(cur, &i);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+ ASSERT(i == 1);
|
|
|
|
+ } else {
|
|
|
|
+ error = xfs_inobt_update(cur, &rec);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+out:
|
|
|
|
+ error = xfs_check_agi_freecount(cur, agi);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error;
|
|
|
|
+
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+error:
|
|
|
|
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Free disk inode. Carefully avoids touching the incore inode, all
|
|
|
|
+ * manipulations incore are the caller's responsibility.
|
|
|
|
+ * The on-disk inode is not changed by this operation, only the
|
|
|
|
+ * btree (free inode mask) is changed.
|
|
|
|
+ */
|
|
|
|
+int
|
|
|
|
+xfs_difree(
|
|
|
|
+ struct xfs_trans *tp, /* transaction pointer */
|
|
|
|
+ xfs_ino_t inode, /* inode to be freed */
|
|
|
|
+ struct xfs_bmap_free *flist, /* extents to free */
|
|
|
|
+ int *delete,/* set if inode cluster was deleted */
|
|
|
|
+ xfs_ino_t *first_ino)/* first inode in deleted cluster */
|
|
|
|
+{
|
|
|
|
+ /* REFERENCED */
|
|
|
|
+ xfs_agblock_t agbno; /* block number containing inode */
|
|
|
|
+ struct xfs_buf *agbp; /* buffer for allocation group header */
|
|
|
|
+ xfs_agino_t agino; /* allocation group inode number */
|
|
|
|
+ xfs_agnumber_t agno; /* allocation group number */
|
|
|
|
+ int error; /* error return value */
|
|
|
|
+ struct xfs_mount *mp; /* mount structure for filesystem */
|
|
|
|
+ struct xfs_inobt_rec_incore rec;/* btree record */
|
|
|
|
+
|
|
|
|
+ mp = tp->t_mountp;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Break up inode number into its components.
|
|
|
|
+ */
|
|
|
|
+ agno = XFS_INO_TO_AGNO(mp, inode);
|
|
|
|
+ if (agno >= mp->m_sb.sb_agcount) {
|
|
|
|
+ xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
|
|
|
|
+ __func__, agno, mp->m_sb.sb_agcount);
|
|
|
|
+ ASSERT(0);
|
|
|
|
+ return XFS_ERROR(EINVAL);
|
|
|
|
+ }
|
|
|
|
+ agino = XFS_INO_TO_AGINO(mp, inode);
|
|
|
|
+ if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
|
|
|
|
+ xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
|
|
|
|
+ __func__, (unsigned long long)inode,
|
|
|
|
+ (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
|
|
|
|
+ ASSERT(0);
|
|
|
|
+ return XFS_ERROR(EINVAL);
|
|
|
|
+ }
|
|
|
|
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
|
|
|
|
+ if (agbno >= mp->m_sb.sb_agblocks) {
|
|
|
|
+ xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
|
|
|
|
+ __func__, agbno, mp->m_sb.sb_agblocks);
|
|
|
|
+ ASSERT(0);
|
|
|
|
+ return XFS_ERROR(EINVAL);
|
|
|
|
+ }
|
|
|
|
+ /*
|
|
|
|
+ * Get the allocation group header.
|
|
|
|
+ */
|
|
|
|
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
|
|
|
|
+ if (error) {
|
|
|
|
+ xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
|
|
|
|
+ __func__, error);
|
|
|
|
+ return error;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Fix up the inode allocation btree.
|
|
|
|
+ */
|
|
|
|
+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
|
|
|
|
+ &rec);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error0;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Fix up the free inode btree.
|
|
|
|
+ */
|
|
|
|
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
|
|
|
|
+ error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
|
|
|
|
+ if (error)
|
|
|
|
+ goto error0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+error0:
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
STATIC int
|
|
STATIC int
|
|
xfs_imap_lookup(
|
|
xfs_imap_lookup(
|
|
struct xfs_mount *mp,
|
|
struct xfs_mount *mp,
|
|
@@ -1300,7 +1765,7 @@ xfs_imap_lookup(
|
|
* we have a record, we need to ensure it contains the inode number
|
|
* we have a record, we need to ensure it contains the inode number
|
|
* we are looking up.
|
|
* we are looking up.
|
|
*/
|
|
*/
|
|
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
|
|
|
|
|
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
|
|
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
|
|
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
|
|
if (!error) {
|
|
if (!error) {
|
|
if (i)
|
|
if (i)
|
|
@@ -1488,7 +1953,16 @@ xfs_ialloc_compute_maxlevels(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Log specified fields for the ag hdr (inode section)
|
|
|
|
|
|
+ * Log specified fields for the ag hdr (inode section). The growth of the agi
|
|
|
|
+ * structure over time requires that we interpret the buffer as two logical
|
|
|
|
+ * regions delineated by the end of the unlinked list. This is due to the size
|
|
|
|
+ * of the hash table and its location in the middle of the agi.
|
|
|
|
+ *
|
|
|
|
+ * For example, a request to log a field before agi_unlinked and a field after
|
|
|
|
+ * agi_unlinked could cause us to log the entire hash table and use an excessive
|
|
|
|
+ * amount of log space. To avoid this behavior, log the region up through
|
|
|
|
+ * agi_unlinked in one call and the region after agi_unlinked through the end of
|
|
|
|
+ * the structure in another.
|
|
*/
|
|
*/
|
|
void
|
|
void
|
|
xfs_ialloc_log_agi(
|
|
xfs_ialloc_log_agi(
|
|
@@ -1511,6 +1985,8 @@ xfs_ialloc_log_agi(
|
|
offsetof(xfs_agi_t, agi_newino),
|
|
offsetof(xfs_agi_t, agi_newino),
|
|
offsetof(xfs_agi_t, agi_dirino),
|
|
offsetof(xfs_agi_t, agi_dirino),
|
|
offsetof(xfs_agi_t, agi_unlinked),
|
|
offsetof(xfs_agi_t, agi_unlinked),
|
|
|
|
+ offsetof(xfs_agi_t, agi_free_root),
|
|
|
|
+ offsetof(xfs_agi_t, agi_free_level),
|
|
sizeof(xfs_agi_t)
|
|
sizeof(xfs_agi_t)
|
|
};
|
|
};
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG
|
|
@@ -1519,15 +1995,30 @@ xfs_ialloc_log_agi(
|
|
agi = XFS_BUF_TO_AGI(bp);
|
|
agi = XFS_BUF_TO_AGI(bp);
|
|
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
|
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
|
|
#endif
|
|
#endif
|
|
|
|
+
|
|
|
|
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * Compute byte offsets for the first and last fields.
|
|
|
|
|
|
+ * Compute byte offsets for the first and last fields in the first
|
|
|
|
+ * region and log the agi buffer. This only logs up through
|
|
|
|
+ * agi_unlinked.
|
|
*/
|
|
*/
|
|
- xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
|
|
|
|
|
|
+ if (fields & XFS_AGI_ALL_BITS_R1) {
|
|
|
|
+ xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
|
|
|
|
+ &first, &last);
|
|
|
|
+ xfs_trans_log_buf(tp, bp, first, last);
|
|
|
|
+ }
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * Log the allocation group inode header buffer.
|
|
|
|
|
|
+ * Mask off the bits in the first region and calculate the first and
|
|
|
|
+ * last field offsets for any bits in the second region.
|
|
*/
|
|
*/
|
|
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
|
|
|
|
- xfs_trans_log_buf(tp, bp, first, last);
|
|
|
|
|
|
+ fields &= ~XFS_AGI_ALL_BITS_R1;
|
|
|
|
+ if (fields) {
|
|
|
|
+ xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
|
|
|
|
+ &first, &last);
|
|
|
|
+ xfs_trans_log_buf(tp, bp, first, last);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG
|