[packages/kernel] - xfs hang fix
arekm
arekm at pld-linux.org
Sat Aug 9 00:13:13 CEST 2014
commit fc3fc2cf178dbaef089a75182333d8c707a6a32f
Author: Arkadiusz Miśkiewicz <arekm at maven.pl>
Date: Sat Aug 9 00:12:58 2014 +0200
- xfs hang fix
kernel-small_fixes.patch | 138 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 138 insertions(+)
---
diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch
index 392d121..59f4a4b 100644
--- a/kernel-small_fixes.patch
+++ b/kernel-small_fixes.patch
@@ -46,3 +46,141 @@ index 7a0c800..ec5ebbb 100644
SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops);
dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
+commit 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045
+Author: Dave Chinner <dchinner at redhat.com>
+Date: Tue May 20 08:18:09 2014 +1000
+
+ xfs: log vector rounding leaks log space
+
+ The addition of direct formatting of log items into the CIL
+ linear buffer added alignment restrictions that the start of each
+ vector needed to be 64 bit aligned. Hence padding was added in
+ xlog_finish_iovec() to round up the vector length to ensure the next
+ vector started with the correct alignment.
+
+ This adds a small number of bytes to the size of
+ the linear buffer that is otherwise unused. The issue is that we
+ then use the linear buffer size to determine the log space used by
+ the log item, and this includes the unused space. Hence when we
+ account for space used by the log item, it's more than is actually
+ written into the iclogs, and hence we slowly leak this space.
+
+ This results on log hangs when reserving space, with threads getting
+ stuck with these stack traces:
+
+ Call Trace:
+ [<ffffffff81d15989>] schedule+0x29/0x70
+ [<ffffffff8150d3a2>] xlog_grant_head_wait+0xa2/0x1a0
+ [<ffffffff8150d55d>] xlog_grant_head_check+0xbd/0x140
+ [<ffffffff8150ee33>] xfs_log_reserve+0x103/0x220
+ [<ffffffff814b7f05>] xfs_trans_reserve+0x2f5/0x310
+ .....
+
+ The 4 bytes is significant. Brain Foster did all the hard work in
+ tracking down a reproducable leak to inode chunk allocation (it went
+ away with the ikeep mount option). His rough numbers were that
+ creating 50,000 inodes leaked 11 log blocks. This turns out to be
+ roughly 800 inode chunks or 1600 inode cluster buffers. That
+ works out at roughly 4 bytes per cluster buffer logged, and at that
+ I started looking for a 4 byte leak in the buffer logging code.
+
+ What I found was that a struct xfs_buf_log_format structure for an
+ inode cluster buffer is 28 bytes in length. This gets rounded up to
+ 32 bytes, but the vector length remains 28 bytes. Hence the CIL
+ ticket reservation is decremented by 32 bytes (via lv->lv_buf_len)
+ for that vector rather than 28 bytes which are written into the log.
+
+ The fix for this problem is to separately track the bytes used by
+ the log vectors in the item and use that instead of the buffer
+ length when accounting for the log space that will be used by the
+ formatted log item.
+
+ Again, thanks to Brian Foster for doing all the hard work and long
+ hours to isolate this leak and make finding the bug relatively
+ simple.
+
+ Signed-off-by: Dave Chinner <dchinner at redhat.com>
+ Reviewed-by: Christoph Hellwig <hch at lst.de>
+ Reviewed-by: Brian Foster <bfoster at redhat.com>
+ Signed-off-by: Dave Chinner <david at fromorbit.com>
+
+diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
+index 2c40044..84e0deb 100644
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -24,7 +24,8 @@ struct xfs_log_vec {
+ struct xfs_log_iovec *lv_iovecp; /* iovec array */
+ struct xfs_log_item *lv_item; /* owner */
+ char *lv_buf; /* formatted buffer */
+- int lv_buf_len; /* size of formatted buffer */
++ int lv_bytes; /* accounted space in buffer */
++ int lv_buf_len; /* aligned size of buffer */
+ int lv_size; /* size of allocated lv */
+ };
+
+@@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
+ return vec->i_addr;
+ }
+
++/*
++ * We need to make sure the next buffer is naturally aligned for the biggest
++ * basic data type we put into it. We already accounted for this padding when
++ * sizing the buffer.
++ *
++ * However, this padding does not get written into the log, and hence we have to
++ * track the space used by the log vectors separately to prevent log space hangs
++ * due to inaccurate accounting (i.e. a leak) of the used log space through the
++ * CIL context ticket.
++ */
+ static inline void
+ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
+ {
+- /*
+- * We need to make sure the next buffer is naturally aligned for the
+- * biggest basic data type we put into it. We already accounted for
+- * this when sizing the buffer.
+- */
+ lv->lv_buf_len += round_up(len, sizeof(uint64_t));
++ lv->lv_bytes += len;
+ vec->i_len = len;
+ }
+
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index 7e54553..de835da 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -97,7 +97,7 @@ xfs_cil_prepare_item(
+ {
+ /* Account for the new LV being passed in */
+ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+- *diff_len += lv->lv_buf_len;
++ *diff_len += lv->lv_bytes;
+ *diff_iovecs += lv->lv_niovecs;
+ }
+
+@@ -111,7 +111,7 @@ xfs_cil_prepare_item(
+ else if (old_lv != lv) {
+ ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+
+- *diff_len -= old_lv->lv_buf_len;
++ *diff_len -= old_lv->lv_bytes;
+ *diff_iovecs -= old_lv->lv_niovecs;
+ kmem_free(old_lv);
+ }
+@@ -239,7 +239,7 @@ xlog_cil_insert_format_items(
+ * that the space reservation accounting is correct.
+ */
+ *diff_iovecs -= lv->lv_niovecs;
+- *diff_len -= lv->lv_buf_len;
++ *diff_len -= lv->lv_bytes;
+ } else {
+ /* allocate new data chunk */
+ lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+@@ -259,6 +259,7 @@ xlog_cil_insert_format_items(
+
+ /* The allocated data region lies beyond the iovec region */
+ lv->lv_buf_len = 0;
++ lv->lv_bytes = 0;
+ lv->lv_buf = (char *)lv + buf_size - nbytes;
+ ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
+
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/fc3fc2cf178dbaef089a75182333d8c707a6a32f
More information about the pld-cvs-commit
mailing list