|
@@ -61,6 +61,9 @@ xlog_recover_check_summary(
|
|
#else
|
|
#else
|
|
#define xlog_recover_check_summary(log)
|
|
#define xlog_recover_check_summary(log)
|
|
#endif
|
|
#endif
|
|
|
|
+STATIC int
|
|
|
|
+xlog_do_recovery_pass(
|
|
|
|
+ struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
|
|
|
|
|
|
/*
|
|
/*
|
|
* This structure is used during recovery to record the buf log items which
|
|
* This structure is used during recovery to record the buf log items which
|
|
@@ -867,6 +870,351 @@ validate_head:
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Seek backwards in the log for log record headers.
|
|
|
|
+ *
|
|
|
|
+ * Given a starting log block, walk backwards until we find the provided number
|
|
|
|
+ * of records or hit the provided tail block. The return value is the number of
|
|
|
|
+ * records encountered or a negative error code. The log block and buffer
|
|
|
|
+ * pointer of the last record seen are returned in rblk and rhead respectively.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xlog_rseek_logrec_hdr(
|
|
|
|
+ struct xlog *log,
|
|
|
|
+ xfs_daddr_t head_blk,
|
|
|
|
+ xfs_daddr_t tail_blk,
|
|
|
|
+ int count,
|
|
|
|
+ struct xfs_buf *bp,
|
|
|
|
+ xfs_daddr_t *rblk,
|
|
|
|
+ struct xlog_rec_header **rhead,
|
|
|
|
+ bool *wrapped)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int error;
|
|
|
|
+ int found = 0;
|
|
|
|
+ char *offset = NULL;
|
|
|
|
+ xfs_daddr_t end_blk;
|
|
|
|
+
|
|
|
|
+ *wrapped = false;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Walk backwards from the head block until we hit the tail or the first
|
|
|
|
+ * block in the log.
|
|
|
|
+ */
|
|
|
|
+ end_blk = head_blk > tail_blk ? tail_blk : 0;
|
|
|
|
+ for (i = (int) head_blk - 1; i >= end_blk; i--) {
|
|
|
|
+ error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
+ if (error)
|
|
|
|
+ goto out_error;
|
|
|
|
+
|
|
|
|
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
+ *rblk = i;
|
|
|
|
+ *rhead = (struct xlog_rec_header *) offset;
|
|
|
|
+ if (++found == count)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If we haven't hit the tail block or the log record header count,
|
|
|
|
+ * start looking again from the end of the physical log. Note that
|
|
|
|
+ * callers can pass head == tail if the tail is not yet known.
|
|
|
|
+ */
|
|
|
|
+ if (tail_blk >= head_blk && found != count) {
|
|
|
|
+ for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
|
|
|
|
+ error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
+ if (error)
|
|
|
|
+ goto out_error;
|
|
|
|
+
|
|
|
|
+ if (*(__be32 *)offset ==
|
|
|
|
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
+ *wrapped = true;
|
|
|
|
+ *rblk = i;
|
|
|
|
+ *rhead = (struct xlog_rec_header *) offset;
|
|
|
|
+ if (++found == count)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return found;
|
|
|
|
+
|
|
|
|
+out_error:
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Seek forward in the log for log record headers.
|
|
|
|
+ *
|
|
|
|
+ * Given head and tail blocks, walk forward from the tail block until we find
|
|
|
|
+ * the provided number of records or hit the head block. The return value is the
|
|
|
|
+ * number of records encountered or a negative error code. The log block and
|
|
|
|
+ * buffer pointer of the last record seen are returned in rblk and rhead
|
|
|
|
+ * respectively.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xlog_seek_logrec_hdr(
|
|
|
|
+ struct xlog *log,
|
|
|
|
+ xfs_daddr_t head_blk,
|
|
|
|
+ xfs_daddr_t tail_blk,
|
|
|
|
+ int count,
|
|
|
|
+ struct xfs_buf *bp,
|
|
|
|
+ xfs_daddr_t *rblk,
|
|
|
|
+ struct xlog_rec_header **rhead,
|
|
|
|
+ bool *wrapped)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int error;
|
|
|
|
+ int found = 0;
|
|
|
|
+ char *offset = NULL;
|
|
|
|
+ xfs_daddr_t end_blk;
|
|
|
|
+
|
|
|
|
+ *wrapped = false;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Walk forward from the tail block until we hit the head or the last
|
|
|
|
+ * block in the log.
|
|
|
|
+ */
|
|
|
|
+ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
|
|
|
|
+ for (i = (int) tail_blk; i <= end_blk; i++) {
|
|
|
|
+ error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
+ if (error)
|
|
|
|
+ goto out_error;
|
|
|
|
+
|
|
|
|
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
+ *rblk = i;
|
|
|
|
+ *rhead = (struct xlog_rec_header *) offset;
|
|
|
|
+ if (++found == count)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If we haven't hit the head block or the log record header count,
|
|
|
|
+ * start looking again from the start of the physical log.
|
|
|
|
+ */
|
|
|
|
+ if (tail_blk > head_blk && found != count) {
|
|
|
|
+ for (i = 0; i < (int) head_blk; i++) {
|
|
|
|
+ error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
+ if (error)
|
|
|
|
+ goto out_error;
|
|
|
|
+
|
|
|
|
+ if (*(__be32 *)offset ==
|
|
|
|
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
+ *wrapped = true;
|
|
|
|
+ *rblk = i;
|
|
|
|
+ *rhead = (struct xlog_rec_header *) offset;
|
|
|
|
+ if (++found == count)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return found;
|
|
|
|
+
|
|
|
|
+out_error:
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Check the log tail for torn writes. This is required when torn writes are
|
|
|
|
+ * detected at the head and the head had to be walked back to a previous record.
|
|
|
|
+ * The tail of the previous record must now be verified to ensure the torn
|
|
|
|
+ * writes didn't corrupt the previous tail.
|
|
|
|
+ *
|
|
|
|
+ * Return an error if CRC verification fails as recovery cannot proceed.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xlog_verify_tail(
|
|
|
|
+ struct xlog *log,
|
|
|
|
+ xfs_daddr_t head_blk,
|
|
|
|
+ xfs_daddr_t tail_blk)
|
|
|
|
+{
|
|
|
|
+ struct xlog_rec_header *thead;
|
|
|
|
+ struct xfs_buf *bp;
|
|
|
|
+ xfs_daddr_t first_bad;
|
|
|
|
+ int count;
|
|
|
|
+ int error = 0;
|
|
|
|
+ bool wrapped;
|
|
|
|
+ xfs_daddr_t tmp_head;
|
|
|
|
+
|
|
|
|
+ bp = xlog_get_bp(log, 1);
|
|
|
|
+ if (!bp)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
|
|
|
|
+ * a temporary head block that points after the last possible
|
|
|
|
+ * concurrently written record of the tail.
|
|
|
|
+ */
|
|
|
|
+ count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
|
|
|
|
+ XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
|
|
|
|
+ &wrapped);
|
|
|
|
+ if (count < 0) {
|
|
|
|
+ error = count;
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
|
|
|
|
+ * into the actual log head. tmp_head points to the start of the record
|
|
|
|
+ * so update it to the actual head block.
|
|
|
|
+ */
|
|
|
|
+ if (count < XLOG_MAX_ICLOGS + 1)
|
|
|
|
+ tmp_head = head_blk;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * We now have a tail and temporary head block that covers at least
|
|
|
|
+ * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
|
|
|
|
+ * records were completely written. Run a CRC verification pass from
|
|
|
|
+ * tail to head and return the result.
|
|
|
|
+ */
|
|
|
|
+ error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
|
|
|
|
+ XLOG_RECOVER_CRCPASS, &first_bad);
|
|
|
|
+
|
|
|
|
+out:
|
|
|
|
+ xlog_put_bp(bp);
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Detect and trim torn writes from the head of the log.
|
|
|
|
+ *
|
|
|
|
+ * Storage without sector atomicity guarantees can result in torn writes in the
|
|
|
|
+ * log in the event of a crash. Our only means to detect this scenario is via
|
|
|
|
+ * CRC verification. While we can't always be certain that CRC verification
|
|
|
|
+ * failure is due to a torn write vs. an unrelated corruption, we do know that
|
|
|
|
+ * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
|
|
|
|
+ * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
|
|
|
|
+ * the log and treat failures in this range as torn writes as a matter of
|
|
|
|
+ * policy. In the event of CRC failure, the head is walked back to the last good
|
|
|
|
+ * record in the log and the tail is updated from that record and verified.
|
|
|
|
+ */
|
|
|
|
+STATIC int
|
|
|
|
+xlog_verify_head(
|
|
|
|
+ struct xlog *log,
|
|
|
|
+ xfs_daddr_t *head_blk, /* in/out: unverified head */
|
|
|
|
+ xfs_daddr_t *tail_blk, /* out: tail block */
|
|
|
|
+ struct xfs_buf *bp,
|
|
|
|
+ xfs_daddr_t *rhead_blk, /* start blk of last record */
|
|
|
|
+ struct xlog_rec_header **rhead, /* ptr to last record */
|
|
|
|
+ bool *wrapped) /* last rec. wraps phys. log */
|
|
|
|
+{
|
|
|
|
+ struct xlog_rec_header *tmp_rhead;
|
|
|
|
+ struct xfs_buf *tmp_bp;
|
|
|
|
+ xfs_daddr_t first_bad;
|
|
|
|
+ xfs_daddr_t tmp_rhead_blk;
|
|
|
|
+ int found;
|
|
|
|
+ int error;
|
|
|
|
+ bool tmp_wrapped;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Search backwards through the log looking for the log record header
|
|
|
|
+ * block. This wraps all the way back around to the head so something is
|
|
|
|
+ * seriously wrong if we can't find it.
|
|
|
|
+ */
|
|
|
|
+ found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
|
|
|
|
+ rhead, wrapped);
|
|
|
|
+ if (found < 0)
|
|
|
|
+ return found;
|
|
|
|
+ if (!found) {
|
|
|
|
+ xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
|
|
|
|
+ return -EIO;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Now that we have a tail block, check the head of the log for torn
|
|
|
|
+ * writes. Search again until we hit the tail or the maximum number of
|
|
|
|
+ * log record I/Os that could have been in flight at one time. Use a
|
|
|
|
+ * temporary buffer so we don't trash the rhead/bp pointer from the
|
|
|
|
+ * call above.
|
|
|
|
+ */
|
|
|
|
+ tmp_bp = xlog_get_bp(log, 1);
|
|
|
|
+ if (!tmp_bp)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
|
|
|
|
+ XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
|
|
|
|
+ &tmp_rhead, &tmp_wrapped);
|
|
|
|
+ xlog_put_bp(tmp_bp);
|
|
|
|
+ if (error < 0)
|
|
|
|
+ return error;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Now run a CRC verification pass over the records starting at the
|
|
|
|
+ * block found above to the current head. If a CRC failure occurs, the
|
|
|
|
+ * log block of the first bad record is saved in first_bad.
|
|
|
|
+ */
|
|
|
|
+ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
|
|
|
|
+ XLOG_RECOVER_CRCPASS, &first_bad);
|
|
|
|
+ if (error == -EFSBADCRC) {
|
|
|
|
+ /*
|
|
|
|
+ * We've hit a potential torn write. Reset the error and warn
|
|
|
|
+ * about it.
|
|
|
|
+ */
|
|
|
|
+ error = 0;
|
|
|
|
+ xfs_warn(log->l_mp,
|
|
|
|
+"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
|
|
|
|
+ first_bad, *head_blk);
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Get the header block and buffer pointer for the last good
|
|
|
|
+ * record before the bad record.
|
|
|
|
+ *
|
|
|
|
+ * Note that xlog_find_tail() clears the blocks at the new head
|
|
|
|
+ * (i.e., the records with invalid CRC) if the cycle number
|
|
|
|
+ * matches the the current cycle.
|
|
|
|
+ */
|
|
|
|
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
|
|
|
|
+ rhead_blk, rhead, wrapped);
|
|
|
|
+ if (found < 0)
|
|
|
|
+ return found;
|
|
|
|
+ if (found == 0) /* XXX: right thing to do here? */
|
|
|
|
+ return -EIO;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Reset the head block to the starting block of the first bad
|
|
|
|
+ * log record and set the tail block based on the last good
|
|
|
|
+ * record.
|
|
|
|
+ *
|
|
|
|
+ * Bail out if the updated head/tail match as this indicates
|
|
|
|
+ * possible corruption outside of the acceptable
|
|
|
|
+ * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
|
|
|
|
+ */
|
|
|
|
+ *head_blk = first_bad;
|
|
|
|
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
|
|
|
|
+ if (*head_blk == *tail_blk) {
|
|
|
|
+ ASSERT(0);
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Now verify the tail based on the updated head. This is
|
|
|
|
+ * required because the torn writes trimmed from the head could
|
|
|
|
+ * have been written over the tail of a previous record. Return
|
|
|
|
+ * any errors since recovery cannot proceed if the tail is
|
|
|
|
+ * corrupt.
|
|
|
|
+ *
|
|
|
|
+ * XXX: This leaves a gap in truly robust protection from torn
|
|
|
|
+ * writes in the log. If the head is behind the tail, the tail
|
|
|
|
+ * pushes forward to create some space and then a crash occurs
|
|
|
|
+ * causing the writes into the previous record's tail region to
|
|
|
|
+ * tear, log recovery isn't able to recover.
|
|
|
|
+ *
|
|
|
|
+ * How likely is this to occur? If possible, can we do something
|
|
|
|
+ * more intelligent here? Is it safe to push the tail forward if
|
|
|
|
+ * we can determine that the tail is within the range of the
|
|
|
|
+ * torn write (e.g., the kernel can only overwrite the tail if
|
|
|
|
+ * it has actually been pushed forward)? Alternatively, could we
|
|
|
|
+ * somehow prevent this condition at runtime?
|
|
|
|
+ */
|
|
|
|
+ error = xlog_verify_tail(log, *head_blk, *tail_blk);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Find the sync block number or the tail of the log.
|
|
* Find the sync block number or the tail of the log.
|
|
*
|
|
*
|
|
@@ -893,13 +1241,13 @@ xlog_find_tail(
|
|
xlog_op_header_t *op_head;
|
|
xlog_op_header_t *op_head;
|
|
char *offset = NULL;
|
|
char *offset = NULL;
|
|
xfs_buf_t *bp;
|
|
xfs_buf_t *bp;
|
|
- int error, i, found;
|
|
|
|
|
|
+ int error;
|
|
xfs_daddr_t umount_data_blk;
|
|
xfs_daddr_t umount_data_blk;
|
|
xfs_daddr_t after_umount_blk;
|
|
xfs_daddr_t after_umount_blk;
|
|
|
|
+ xfs_daddr_t rhead_blk;
|
|
xfs_lsn_t tail_lsn;
|
|
xfs_lsn_t tail_lsn;
|
|
int hblks;
|
|
int hblks;
|
|
-
|
|
|
|
- found = 0;
|
|
|
|
|
|
+ bool wrapped = false;
|
|
|
|
|
|
/*
|
|
/*
|
|
* Find previous log record
|
|
* Find previous log record
|
|
@@ -923,48 +1271,16 @@ xlog_find_tail(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Search backwards looking for log record header block
|
|
|
|
|
|
+ * Trim the head block back to skip over torn records. We can have
|
|
|
|
+ * multiple log I/Os in flight at any time, so we assume CRC failures
|
|
|
|
+ * back through the previous several records are torn writes and skip
|
|
|
|
+ * them.
|
|
*/
|
|
*/
|
|
ASSERT(*head_blk < INT_MAX);
|
|
ASSERT(*head_blk < INT_MAX);
|
|
- for (i = (int)(*head_blk) - 1; i >= 0; i--) {
|
|
|
|
- error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
- if (error)
|
|
|
|
- goto done;
|
|
|
|
-
|
|
|
|
- if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
- found = 1;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- /*
|
|
|
|
- * If we haven't found the log record header block, start looking
|
|
|
|
- * again from the end of the physical log. XXXmiken: There should be
|
|
|
|
- * a check here to make sure we didn't search more than N blocks in
|
|
|
|
- * the previous code.
|
|
|
|
- */
|
|
|
|
- if (!found) {
|
|
|
|
- for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
|
|
|
|
- error = xlog_bread(log, i, 1, bp, &offset);
|
|
|
|
- if (error)
|
|
|
|
- goto done;
|
|
|
|
-
|
|
|
|
- if (*(__be32 *)offset ==
|
|
|
|
- cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
|
|
|
|
- found = 2;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (!found) {
|
|
|
|
- xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
|
|
|
|
- xlog_put_bp(bp);
|
|
|
|
- ASSERT(0);
|
|
|
|
- return -EIO;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* find blk_no of tail of log */
|
|
|
|
- rhead = (xlog_rec_header_t *)offset;
|
|
|
|
- *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
|
|
|
|
|
|
+ error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
|
|
|
|
+ &rhead, &wrapped);
|
|
|
|
+ if (error)
|
|
|
|
+ goto done;
|
|
|
|
|
|
/*
|
|
/*
|
|
* Reset log values according to the state of the log when we
|
|
* Reset log values according to the state of the log when we
|
|
@@ -976,10 +1292,10 @@ xlog_find_tail(
|
|
* written was complete and ended exactly on the end boundary
|
|
* written was complete and ended exactly on the end boundary
|
|
* of the physical log.
|
|
* of the physical log.
|
|
*/
|
|
*/
|
|
- log->l_prev_block = i;
|
|
|
|
|
|
+ log->l_prev_block = rhead_blk;
|
|
log->l_curr_block = (int)*head_blk;
|
|
log->l_curr_block = (int)*head_blk;
|
|
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
|
|
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
|
|
- if (found == 2)
|
|
|
|
|
|
+ if (wrapped)
|
|
log->l_curr_cycle++;
|
|
log->l_curr_cycle++;
|
|
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
|
|
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
|
|
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
|
|
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
|
|
@@ -1014,12 +1330,13 @@ xlog_find_tail(
|
|
} else {
|
|
} else {
|
|
hblks = 1;
|
|
hblks = 1;
|
|
}
|
|
}
|
|
- after_umount_blk = (i + hblks + (int)
|
|
|
|
- BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
|
|
|
|
|
|
+ after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
|
|
|
|
+ after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
|
|
tail_lsn = atomic64_read(&log->l_tail_lsn);
|
|
tail_lsn = atomic64_read(&log->l_tail_lsn);
|
|
if (*head_blk == after_umount_blk &&
|
|
if (*head_blk == after_umount_blk &&
|
|
be32_to_cpu(rhead->h_num_logops) == 1) {
|
|
be32_to_cpu(rhead->h_num_logops) == 1) {
|
|
- umount_data_blk = (i + hblks) % log->l_logBBsize;
|
|
|
|
|
|
+ umount_data_blk = rhead_blk + hblks;
|
|
|
|
+ umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
|
|
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
|
|
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
|
|
if (error)
|
|
if (error)
|
|
goto done;
|
|
goto done;
|
|
@@ -3204,6 +3521,7 @@ xlog_recover_dquot_ra_pass2(
|
|
struct xfs_disk_dquot *recddq;
|
|
struct xfs_disk_dquot *recddq;
|
|
struct xfs_dq_logformat *dq_f;
|
|
struct xfs_dq_logformat *dq_f;
|
|
uint type;
|
|
uint type;
|
|
|
|
+ int len;
|
|
|
|
|
|
|
|
|
|
if (mp->m_qflags == 0)
|
|
if (mp->m_qflags == 0)
|
|
@@ -3224,8 +3542,12 @@ xlog_recover_dquot_ra_pass2(
|
|
ASSERT(dq_f);
|
|
ASSERT(dq_f);
|
|
ASSERT(dq_f->qlf_len == 1);
|
|
ASSERT(dq_f->qlf_len == 1);
|
|
|
|
|
|
- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
|
|
|
|
- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
|
|
|
|
|
|
+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
|
|
|
|
+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
|
|
|
|
+ &xfs_dquot_buf_ra_ops);
|
|
}
|
|
}
|
|
|
|
|
|
STATIC void
|
|
STATIC void
|
|
@@ -4118,26 +4440,69 @@ xlog_recover_process_iunlinks(
|
|
mp->m_dmevmask = mp_dmevmask;
|
|
mp->m_dmevmask = mp_dmevmask;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+STATIC int
|
|
|
|
+xlog_unpack_data(
|
|
|
|
+ struct xlog_rec_header *rhead,
|
|
|
|
+ char *dp,
|
|
|
|
+ struct xlog *log)
|
|
|
|
+{
|
|
|
|
+ int i, j, k;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
|
|
|
|
+ i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
|
|
|
|
+ *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
|
|
|
|
+ dp += BBSIZE;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
|
|
|
|
+ xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
|
|
|
|
+ for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
|
|
|
|
+ j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
|
|
|
+ k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
|
|
|
+ *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
|
|
|
|
+ dp += BBSIZE;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
- * Upack the log buffer data and crc check it. If the check fails, issue a
|
|
|
|
- * warning if and only if the CRC in the header is non-zero. This makes the
|
|
|
|
- * check an advisory warning, and the zero CRC check will prevent failure
|
|
|
|
- * warnings from being emitted when upgrading the kernel from one that does not
|
|
|
|
- * add CRCs by default.
|
|
|
|
- *
|
|
|
|
- * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
|
|
|
|
- * corruption failure
|
|
|
|
|
|
+ * CRC check, unpack and process a log record.
|
|
*/
|
|
*/
|
|
STATIC int
|
|
STATIC int
|
|
-xlog_unpack_data_crc(
|
|
|
|
|
|
+xlog_recover_process(
|
|
|
|
+ struct xlog *log,
|
|
|
|
+ struct hlist_head rhash[],
|
|
struct xlog_rec_header *rhead,
|
|
struct xlog_rec_header *rhead,
|
|
char *dp,
|
|
char *dp,
|
|
- struct xlog *log)
|
|
|
|
|
|
+ int pass)
|
|
{
|
|
{
|
|
|
|
+ int error;
|
|
__le32 crc;
|
|
__le32 crc;
|
|
|
|
|
|
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
|
|
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
|
|
- if (crc != rhead->h_crc) {
|
|
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Nothing else to do if this is a CRC verification pass. Just return
|
|
|
|
+ * if this a record with a non-zero crc. Unfortunately, mkfs always
|
|
|
|
+ * sets h_crc to 0 so we must consider this valid even on v5 supers.
|
|
|
|
+ * Otherwise, return EFSBADCRC on failure so the callers up the stack
|
|
|
|
+ * know precisely what failed.
|
|
|
|
+ */
|
|
|
|
+ if (pass == XLOG_RECOVER_CRCPASS) {
|
|
|
|
+ if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
|
|
|
|
+ return -EFSBADCRC;
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * We're in the normal recovery path. Issue a warning if and only if the
|
|
|
|
+ * CRC in the header is non-zero. This is an advisory warning and the
|
|
|
|
+ * zero CRC check prevents warnings from being emitted when upgrading
|
|
|
|
+ * the kernel from one that does not add CRCs by default.
|
|
|
|
+ */
|
|
|
|
+ if (crc != le32_to_cpu(rhead->h_crc)) {
|
|
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
|
|
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
|
|
xfs_alert(log->l_mp,
|
|
xfs_alert(log->l_mp,
|
|
"log record CRC mismatch: found 0x%x, expected 0x%x.",
|
|
"log record CRC mismatch: found 0x%x, expected 0x%x.",
|
|
@@ -4147,47 +4512,18 @@ xlog_unpack_data_crc(
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * If we've detected a log record corruption, then we can't
|
|
|
|
- * recover past this point. Abort recovery if we are enforcing
|
|
|
|
- * CRC protection by punting an error back up the stack.
|
|
|
|
|
|
+ * If the filesystem is CRC enabled, this mismatch becomes a
|
|
|
|
+ * fatal log corruption failure.
|
|
*/
|
|
*/
|
|
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
|
|
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
|
|
return -EFSCORRUPTED;
|
|
return -EFSCORRUPTED;
|
|
}
|
|
}
|
|
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-STATIC int
|
|
|
|
-xlog_unpack_data(
|
|
|
|
- struct xlog_rec_header *rhead,
|
|
|
|
- char *dp,
|
|
|
|
- struct xlog *log)
|
|
|
|
-{
|
|
|
|
- int i, j, k;
|
|
|
|
- int error;
|
|
|
|
-
|
|
|
|
- error = xlog_unpack_data_crc(rhead, dp, log);
|
|
|
|
|
|
+ error = xlog_unpack_data(rhead, dp, log);
|
|
if (error)
|
|
if (error)
|
|
return error;
|
|
return error;
|
|
|
|
|
|
- for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
|
|
|
|
- i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
|
|
|
|
- *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
|
|
|
|
- dp += BBSIZE;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
|
|
|
|
- xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
|
|
|
|
- for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
|
|
|
|
- j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
|
|
|
- k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
|
|
|
|
- *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
|
|
|
|
- dp += BBSIZE;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
|
|
+ return xlog_recover_process_data(log, rhash, rhead, dp, pass);
|
|
}
|
|
}
|
|
|
|
|
|
STATIC int
|
|
STATIC int
|
|
@@ -4239,18 +4575,21 @@ xlog_do_recovery_pass(
|
|
struct xlog *log,
|
|
struct xlog *log,
|
|
xfs_daddr_t head_blk,
|
|
xfs_daddr_t head_blk,
|
|
xfs_daddr_t tail_blk,
|
|
xfs_daddr_t tail_blk,
|
|
- int pass)
|
|
|
|
|
|
+ int pass,
|
|
|
|
+ xfs_daddr_t *first_bad) /* out: first bad log rec */
|
|
{
|
|
{
|
|
xlog_rec_header_t *rhead;
|
|
xlog_rec_header_t *rhead;
|
|
xfs_daddr_t blk_no;
|
|
xfs_daddr_t blk_no;
|
|
|
|
+ xfs_daddr_t rhead_blk;
|
|
char *offset;
|
|
char *offset;
|
|
xfs_buf_t *hbp, *dbp;
|
|
xfs_buf_t *hbp, *dbp;
|
|
- int error = 0, h_size;
|
|
|
|
|
|
+ int error = 0, h_size, h_len;
|
|
int bblks, split_bblks;
|
|
int bblks, split_bblks;
|
|
int hblks, split_hblks, wrapped_hblks;
|
|
int hblks, split_hblks, wrapped_hblks;
|
|
struct hlist_head rhash[XLOG_RHASH_SIZE];
|
|
struct hlist_head rhash[XLOG_RHASH_SIZE];
|
|
|
|
|
|
ASSERT(head_blk != tail_blk);
|
|
ASSERT(head_blk != tail_blk);
|
|
|
|
+ rhead_blk = 0;
|
|
|
|
|
|
/*
|
|
/*
|
|
* Read the header of the tail block and get the iclog buffer size from
|
|
* Read the header of the tail block and get the iclog buffer size from
|
|
@@ -4274,7 +4613,31 @@ xlog_do_recovery_pass(
|
|
error = xlog_valid_rec_header(log, rhead, tail_blk);
|
|
error = xlog_valid_rec_header(log, rhead, tail_blk);
|
|
if (error)
|
|
if (error)
|
|
goto bread_err1;
|
|
goto bread_err1;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * xfsprogs has a bug where record length is based on lsunit but
|
|
|
|
+ * h_size (iclog size) is hardcoded to 32k. Now that we
|
|
|
|
+ * unconditionally CRC verify the unmount record, this means the
|
|
|
|
+ * log buffer can be too small for the record and cause an
|
|
|
|
+ * overrun.
|
|
|
|
+ *
|
|
|
|
+ * Detect this condition here. Use lsunit for the buffer size as
|
|
|
|
+ * long as this looks like the mkfs case. Otherwise, return an
|
|
|
|
+ * error to avoid a buffer overrun.
|
|
|
|
+ */
|
|
h_size = be32_to_cpu(rhead->h_size);
|
|
h_size = be32_to_cpu(rhead->h_size);
|
|
|
|
+ h_len = be32_to_cpu(rhead->h_len);
|
|
|
|
+ if (h_len > h_size) {
|
|
|
|
+ if (h_len <= log->l_mp->m_logbsize &&
|
|
|
|
+ be32_to_cpu(rhead->h_num_logops) == 1) {
|
|
|
|
+ xfs_warn(log->l_mp,
|
|
|
|
+ "invalid iclog size (%d bytes), using lsunit (%d bytes)",
|
|
|
|
+ h_size, log->l_mp->m_logbsize);
|
|
|
|
+ h_size = log->l_mp->m_logbsize;
|
|
|
|
+ } else
|
|
|
|
+ return -EFSCORRUPTED;
|
|
|
|
+ }
|
|
|
|
+
|
|
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
|
|
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
|
|
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
|
|
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
|
|
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
|
|
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
|
|
@@ -4301,7 +4664,7 @@ xlog_do_recovery_pass(
|
|
}
|
|
}
|
|
|
|
|
|
memset(rhash, 0, sizeof(rhash));
|
|
memset(rhash, 0, sizeof(rhash));
|
|
- blk_no = tail_blk;
|
|
|
|
|
|
+ blk_no = rhead_blk = tail_blk;
|
|
if (tail_blk > head_blk) {
|
|
if (tail_blk > head_blk) {
|
|
/*
|
|
/*
|
|
* Perform recovery around the end of the physical log.
|
|
* Perform recovery around the end of the physical log.
|
|
@@ -4408,19 +4771,18 @@ xlog_do_recovery_pass(
|
|
goto bread_err2;
|
|
goto bread_err2;
|
|
}
|
|
}
|
|
|
|
|
|
- error = xlog_unpack_data(rhead, offset, log);
|
|
|
|
|
|
+ error = xlog_recover_process(log, rhash, rhead, offset,
|
|
|
|
+ pass);
|
|
if (error)
|
|
if (error)
|
|
goto bread_err2;
|
|
goto bread_err2;
|
|
|
|
|
|
- error = xlog_recover_process_data(log, rhash,
|
|
|
|
- rhead, offset, pass);
|
|
|
|
- if (error)
|
|
|
|
- goto bread_err2;
|
|
|
|
blk_no += bblks;
|
|
blk_no += bblks;
|
|
|
|
+ rhead_blk = blk_no;
|
|
}
|
|
}
|
|
|
|
|
|
ASSERT(blk_no >= log->l_logBBsize);
|
|
ASSERT(blk_no >= log->l_logBBsize);
|
|
blk_no -= log->l_logBBsize;
|
|
blk_no -= log->l_logBBsize;
|
|
|
|
+ rhead_blk = blk_no;
|
|
}
|
|
}
|
|
|
|
|
|
/* read first part of physical log */
|
|
/* read first part of physical log */
|
|
@@ -4441,21 +4803,22 @@ xlog_do_recovery_pass(
|
|
if (error)
|
|
if (error)
|
|
goto bread_err2;
|
|
goto bread_err2;
|
|
|
|
|
|
- error = xlog_unpack_data(rhead, offset, log);
|
|
|
|
|
|
+ error = xlog_recover_process(log, rhash, rhead, offset, pass);
|
|
if (error)
|
|
if (error)
|
|
goto bread_err2;
|
|
goto bread_err2;
|
|
|
|
|
|
- error = xlog_recover_process_data(log, rhash,
|
|
|
|
- rhead, offset, pass);
|
|
|
|
- if (error)
|
|
|
|
- goto bread_err2;
|
|
|
|
blk_no += bblks + hblks;
|
|
blk_no += bblks + hblks;
|
|
|
|
+ rhead_blk = blk_no;
|
|
}
|
|
}
|
|
|
|
|
|
bread_err2:
|
|
bread_err2:
|
|
xlog_put_bp(dbp);
|
|
xlog_put_bp(dbp);
|
|
bread_err1:
|
|
bread_err1:
|
|
xlog_put_bp(hbp);
|
|
xlog_put_bp(hbp);
|
|
|
|
+
|
|
|
|
+ if (error && first_bad)
|
|
|
|
+ *first_bad = rhead_blk;
|
|
|
|
+
|
|
return error;
|
|
return error;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4493,7 +4856,7 @@ xlog_do_log_recovery(
|
|
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
|
|
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
|
|
|
|
|
|
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
|
|
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
|
|
- XLOG_RECOVER_PASS1);
|
|
|
|
|
|
+ XLOG_RECOVER_PASS1, NULL);
|
|
if (error != 0) {
|
|
if (error != 0) {
|
|
kmem_free(log->l_buf_cancel_table);
|
|
kmem_free(log->l_buf_cancel_table);
|
|
log->l_buf_cancel_table = NULL;
|
|
log->l_buf_cancel_table = NULL;
|
|
@@ -4504,7 +4867,7 @@ xlog_do_log_recovery(
|
|
* When it is complete free the table of buf cancel items.
|
|
* When it is complete free the table of buf cancel items.
|
|
*/
|
|
*/
|
|
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
|
|
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
|
|
- XLOG_RECOVER_PASS2);
|
|
|
|
|
|
+ XLOG_RECOVER_PASS2, NULL);
|
|
#ifdef DEBUG
|
|
#ifdef DEBUG
|
|
if (!error) {
|
|
if (!error) {
|
|
int i;
|
|
int i;
|