[packages/kernel/LINUX_3_14] - xfs: log vector rounding leaks log space patch applied upstream

baggins baggins at pld-linux.org
Sun Aug 17 10:40:58 CEST 2014


commit ff7ccd27b120a2ef78b4a1aefdc9672c67efdc93
Author: Jan Rękorajski <baggins at pld-linux.org>
Date:   Sun Aug 17 10:39:41 2014 +0200

    - xfs: log vector rounding leaks log space patch applied upstream

 kernel-small_fixes.patch | 138 -----------------------------------------------
 1 file changed, 138 deletions(-)
---
diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch
index 6199b0a..c64eee1 100644
--- a/kernel-small_fixes.patch
+++ b/kernel-small_fixes.patch
@@ -113,141 +113,3 @@ index 4e565c8..732648b 100644
 -- 
 cgit v0.10.1
 
-commit 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045
-Author: Dave Chinner <dchinner at redhat.com>
-Date:   Tue May 20 08:18:09 2014 +1000
-
-    xfs: log vector rounding leaks log space
-    
-    The addition of direct formatting of log items into the CIL
-    linear buffer added alignment restrictions that the start of each
-    vector needed to be 64 bit aligned. Hence padding was added in
-    xlog_finish_iovec() to round up the vector length to ensure the next
-    vector started with the correct alignment.
-    
-    This adds a small number of bytes to the size of
-    the linear buffer that is otherwise unused. The issue is that we
-    then use the linear buffer size to determine the log space used by
-    the log item, and this includes the unused space. Hence when we
-    account for space used by the log item, it's more than is actually
-    written into the iclogs, and hence we slowly leak this space.
-    
-    This results on log hangs when reserving space, with threads getting
-    stuck with these stack traces:
-    
-    Call Trace:
-    [<ffffffff81d15989>] schedule+0x29/0x70
-    [<ffffffff8150d3a2>] xlog_grant_head_wait+0xa2/0x1a0
-    [<ffffffff8150d55d>] xlog_grant_head_check+0xbd/0x140
-    [<ffffffff8150ee33>] xfs_log_reserve+0x103/0x220
-    [<ffffffff814b7f05>] xfs_trans_reserve+0x2f5/0x310
-    .....
-    
-    The 4 bytes is significant. Brain Foster did all the hard work in
-    tracking down a reproducable leak to inode chunk allocation (it went
-    away with the ikeep mount option). His rough numbers were that
-    creating 50,000 inodes leaked 11 log blocks. This turns out to be
-    roughly 800 inode chunks or 1600 inode cluster buffers. That
-    works out at roughly 4 bytes per cluster buffer logged, and at that
-    I started looking for a 4 byte leak in the buffer logging code.
-    
-    What I found was that a struct xfs_buf_log_format structure for an
-    inode cluster buffer is 28 bytes in length. This gets rounded up to
-    32 bytes, but the vector length remains 28 bytes. Hence the CIL
-    ticket reservation is decremented by 32 bytes (via lv->lv_buf_len)
-    for that vector rather than 28 bytes which are written into the log.
-    
-    The fix for this problem is to separately track the bytes used by
-    the log vectors in the item and use that instead of the buffer
-    length when accounting for the log space that will be used by the
-    formatted log item.
-    
-    Again, thanks to Brian Foster for doing all the hard work and long
-    hours to isolate this leak and make finding the bug relatively
-    simple.
-    
-    Signed-off-by: Dave Chinner <dchinner at redhat.com>
-    Reviewed-by: Christoph Hellwig <hch at lst.de>
-    Reviewed-by: Brian Foster <bfoster at redhat.com>
-    Signed-off-by: Dave Chinner <david at fromorbit.com>
-
-diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
-index 2c40044..84e0deb 100644
---- a/fs/xfs/xfs_log.h
-+++ b/fs/xfs/xfs_log.h
-@@ -24,7 +24,8 @@ struct xfs_log_vec {
- 	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
- 	struct xfs_log_item	*lv_item;	/* owner */
- 	char			*lv_buf;	/* formatted buffer */
--	int			lv_buf_len;	/* size of formatted buffer */
-+	int			lv_bytes;	/* accounted space in buffer */
-+	int			lv_buf_len;	/* aligned size of buffer */
- 	int			lv_size;	/* size of allocated lv */
- };
- 
-@@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
- 	return vec->i_addr;
- }
- 
-+/*
-+ * We need to make sure the next buffer is naturally aligned for the biggest
-+ * basic data type we put into it.  We already accounted for this padding when
-+ * sizing the buffer.
-+ *
-+ * However, this padding does not get written into the log, and hence we have to
-+ * track the space used by the log vectors separately to prevent log space hangs
-+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
-+ * CIL context ticket.
-+ */
- static inline void
- xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
- {
--	/*
--	 * We need to make sure the next buffer is naturally aligned for the
--	 * biggest basic data type we put into it.  We already accounted for
--	 * this when sizing the buffer.
--	 */
- 	lv->lv_buf_len += round_up(len, sizeof(uint64_t));
-+	lv->lv_bytes += len;
- 	vec->i_len = len;
- }
- 
-diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
-index 7e54553..de835da 100644
---- a/fs/xfs/xfs_log_cil.c
-+++ b/fs/xfs/xfs_log_cil.c
-@@ -97,7 +97,7 @@ xfs_cil_prepare_item(
- {
- 	/* Account for the new LV being passed in */
- 	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
--		*diff_len += lv->lv_buf_len;
-+		*diff_len += lv->lv_bytes;
- 		*diff_iovecs += lv->lv_niovecs;
- 	}
- 
-@@ -111,7 +111,7 @@ xfs_cil_prepare_item(
- 	else if (old_lv != lv) {
- 		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
- 
--		*diff_len -= old_lv->lv_buf_len;
-+		*diff_len -= old_lv->lv_bytes;
- 		*diff_iovecs -= old_lv->lv_niovecs;
- 		kmem_free(old_lv);
- 	}
-@@ -239,7 +239,7 @@ xlog_cil_insert_format_items(
- 			 * that the space reservation accounting is correct.
- 			 */
- 			*diff_iovecs -= lv->lv_niovecs;
--			*diff_len -= lv->lv_buf_len;
-+			*diff_len -= lv->lv_bytes;
- 		} else {
- 			/* allocate new data chunk */
- 			lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
-@@ -259,6 +259,7 @@ xlog_cil_insert_format_items(
- 
- 		/* The allocated data region lies beyond the iovec region */
- 		lv->lv_buf_len = 0;
-+		lv->lv_bytes = 0;
- 		lv->lv_buf = (char *)lv + buf_size - nbytes;
- 		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
- 
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/ff7ccd27b120a2ef78b4a1aefdc9672c67efdc93



More information about the pld-cvs-commit mailing list