[packages/kernel/LINUX_4_1] - v3 version of the patch (split into two chunks)

arekm arekm at pld-linux.org
Mon Jan 11 07:27:39 CET 2016


commit 1977e358d7684f4649cfef206687024a028adfc9
Author: Arkadiusz Miśkiewicz <arekm at maven.pl>
Date:   Mon Jan 11 07:27:30 2016 +0100

    - v3 version of the patch (split into two chunks)

 kernel-small_fixes.patch | 177 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 140 insertions(+), 37 deletions(-)
---
diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch
index 1478ec4..406d677 100644
--- a/kernel-small_fixes.patch
+++ b/kernel-small_fixes.patch
@@ -228,6 +228,132 @@ cgit v0.11.2
 
 From: Dave Chinner <dchinner at redhat.com>
 
+When we do inode readahead in log recovery, we do can do the
+readahead before we've replayed the icreate transaction that stamps
+the buffer with inode cores. The inode readahead verifier catches
+this and marks the buffer as !done to indicate that it doesn't yet
+contain valid inodes.
+
+In adding buffer error notification  (i.e. setting b_error = -EIO at
+the same time as as we clear the done flag) to such a readahead
+verifier failure, we can then get subsequent inode recovery failing
+with this error:
+
+XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32
+
+This occurs when readahead completion races with icreate item replay
+such as:
+
+	inode readahead
+		find buffer
+		lock buffer
+		submit RA io
+	....
+	icreate recovery
+	    xfs_trans_get_buffer
+		find buffer
+		lock buffer
+		<blocks on RA completion>
+	.....
+	<ra completion>
+		fails verifier
+		clear XBF_DONE
+		set bp->b_error = -EIO
+		release and unlock buffer
+	<icreate gains lock>
+	icreate initialises buffer
+	marks buffer as done
+	adds buffer to delayed write queue
+	releases buffer
+
+At this point, we have an initialised inode buffer that is up to
+date but has an -EIO state registered against it. When we finally
+get to recovering an inode in that buffer:
+
+	inode item recovery
+	    xfs_trans_read_buffer
+		find buffer
+		lock buffer
+		sees XBF_DONE is set, returns buffer
+	    sees bp->b_error is set
+		fail log recovery!
+
+Essentially, we need xfs_trans_get_buf_map() to clear the error status of
+the buffer when doing a lookup. This function returns uninitialised
+buffers, so the buffer returned can not be in an error state and
+none of the code that uses this function expects b_error to be set
+on return. Indeed, there is an ASSERT(!bp->b_error); in the
+transaction case in xfs_trans_get_buf_map() that would have caught
+this if log recovery used transactions....
+
+This patch firstly changes the inode readahead failure to set -EIO
+on the buffer, and secondly changes xfs_buf_get_map() to never
+return a buffer with an error state set so this first change doesn't
+cause unexpected log recovery failures.
+
+Signed-off-by: Dave Chinner <dchinner at redhat.com>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c | 12 +++++++-----
+ fs/xfs/xfs_buf.c              |  7 +++++++
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
+index 1b8d98a..ff17c48 100644
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -62,11 +62,12 @@ xfs_inobp_check(
+  * has not had the inode cores stamped into it. Hence for readahead, the buffer
+  * may be potentially invalid.
+  *
+- * If the readahead buffer is invalid, we don't want to mark it with an error,
+- * but we do want to clear the DONE status of the buffer so that a followup read
+- * will re-read it from disk. This will ensure that we don't get an unnecessary
+- * warnings during log recovery and we don't get unnecssary panics on debug
+- * kernels.
++ * If the readahead buffer is invalid, we need to mark it with an error and
++ * clear the DONE status of the buffer so that a followup read will re-read it
++ * from disk. We don't report the error otherwise to avoid warnings during log
++ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
++ * because all we want to do is say readahead failed; there is no-one to report
++ * the error to, so this will distinguish it from a non-ra verifier failure.
+  */
+ static void
+ xfs_inode_buf_verify(
+@@ -93,6 +94,7 @@ xfs_inode_buf_verify(
+ 						XFS_RANDOM_ITOBP_INOTOBP))) {
+ 			if (readahead) {
+ 				bp->b_flags &= ~XBF_DONE;
++				xfs_buf_ioerror(bp, -EIO);
+ 				return;
+ 			}
+ 
+diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
+index 45a8ea7..ae86b16 100644
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -604,6 +604,13 @@ found:
+ 		}
+ 	}
+ 
++	/*
++	 * Clear b_error if this is a lookup from a caller that doesn't expect
++	 * valid data to be found in the buffer.
++	 */
++	if (!(flags & XBF_READ))
++		xfs_buf_ioerror(bp, 0);
++
+ 	XFS_STATS_INC(xb_get);
+ 	trace_xfs_buf_get(bp, flags, _RET_IP_);
+ 	return bp;
+-- 
+2.5.0
+
+_______________________________________________
+xfs mailing list
+xfs at oss.sgi.com
+http://oss.sgi.com/mailman/listinfo/xfs
+From: Dave Chinner <dchinner at redhat.com>
+
 When we do dquot readahead in log recovery, we do not use a verifier
 as the underlying buffer may not have dquots in it. e.g. the
 allocation operation hasn't yet been replayed. Hence we do not want
@@ -266,21 +392,15 @@ together so we don't forget this behavioural link in future.
 
 cc: <stable at vger.kernel.org> # 3.12 - current
 Signed-off-by: Dave Chinner <dchinner at redhat.com>
+Reviewed-by: Brian Foster <bfoster at redhat.com>
+Signed-off-by: Dave Chinner <david at fromorbit.com>
 ---
-
-Version 2
-- fix logic error in determining if verify failed
-- set error on buffer when verifier fails
-- fix inode buffer readahead verifier to set error when it fails
-- better comments, link dquot and inode buffer ra verifiers in the
-  comments
-
  fs/xfs/libxfs/xfs_dquot_buf.c  | 36 ++++++++++++++++++++++++++++++------
- fs/xfs/libxfs/xfs_inode_buf.c  | 14 +++++++++-----
+ fs/xfs/libxfs/xfs_inode_buf.c  |  2 ++
  fs/xfs/libxfs/xfs_quota_defs.h |  2 +-
  fs/xfs/libxfs/xfs_shared.h     |  1 +
  fs/xfs/xfs_log_recover.c       |  9 +++++++--
- 5 files changed, 48 insertions(+), 14 deletions(-)
+ 5 files changed, 41 insertions(+), 9 deletions(-)
 
 diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
 index 11cefb2..3cc3cf7 100644
@@ -369,37 +489,18 @@ index 11cefb2..3cc3cf7 100644
 +	.verify_write = xfs_dquot_buf_write_verify,
 +};
 diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
-index 1b8d98a..4816209 100644
+index ff17c48..1aabfda 100644
 --- a/fs/xfs/libxfs/xfs_inode_buf.c
 +++ b/fs/xfs/libxfs/xfs_inode_buf.c
-@@ -62,11 +62,14 @@ xfs_inobp_check(
-  * has not had the inode cores stamped into it. Hence for readahead, the buffer
-  * may be potentially invalid.
-  *
-- * If the readahead buffer is invalid, we don't want to mark it with an error,
-- * but we do want to clear the DONE status of the buffer so that a followup read
-- * will re-read it from disk. This will ensure that we don't get an unnecessary
-- * warnings during log recovery and we don't get unnecssary panics on debug
-- * kernels.
-+ * If the readahead buffer is invalid, we need to mark it with an error and
-+ * clear the DONE status of the buffer so that a followup read will re-read it
-+ * from disk. We don't report the error otherwise to avoid warnings during log
-+ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
-+ * because all we want to do is say readahead failed; there is no-one to report
-+ * the error to, so this will distinguish it from a non-ra verifier failure.
+@@ -68,6 +68,8 @@ xfs_inobp_check(
+  * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
+  * because all we want to do is say readahead failed; there is no-one to report
+  * the error to, so this will distinguish it from a non-ra verifier failure.
 + * Changes to this readahead error behavour also need to be reflected in
 + * xfs_dquot_buf_readahead_verify().
   */
  static void
  xfs_inode_buf_verify(
-@@ -92,6 +95,7 @@ xfs_inode_buf_verify(
- 						XFS_ERRTAG_ITOBP_INOTOBP,
- 						XFS_RANDOM_ITOBP_INOTOBP))) {
- 			if (readahead) {
-+				xfs_buf_ioerror(bp, -EIO);
- 				bp->b_flags &= ~XBF_DONE;
- 				return;
- 			}
 diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
 index 1b0a083..f51078f 100644
 --- a/fs/xfs/libxfs/xfs_quota_defs.h
@@ -426,10 +527,10 @@ index 5be5297..15c3ceb 100644
  extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
  extern const struct xfs_buf_ops xfs_symlink_buf_ops;
 diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
-index 26e67b4..da37beb 100644
+index c5ecaac..5991cdc 100644
 --- a/fs/xfs/xfs_log_recover.c
 +++ b/fs/xfs/xfs_log_recover.c
-@@ -3521,6 +3521,7 @@ xlog_recover_dquot_ra_pass2(
+@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2(
  	struct xfs_disk_dquot	*recddq;
  	struct xfs_dq_logformat	*dq_f;
  	uint			type;
@@ -437,7 +538,7 @@ index 26e67b4..da37beb 100644
  
  
  	if (mp->m_qflags == 0)
-@@ -3541,8 +3542,12 @@ xlog_recover_dquot_ra_pass2(
+@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2(
  	ASSERT(dq_f);
  	ASSERT(dq_f->qlf_len == 1);
  
@@ -452,6 +553,8 @@ index 26e67b4..da37beb 100644
  }
  
  STATIC void
+-- 
+2.5.0
 
 _______________________________________________
 xfs mailing list
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/1977e358d7684f4649cfef206687024a028adfc9



More information about the pld-cvs-commit mailing list