packages: kernel/kernel-xfs-delaylog.patch (NEW) - fixes from .38 and enabl...
arekm
arekm at pld-linux.org
Thu Apr 7 20:46:18 CEST 2011
Author: arekm Date: Thu Apr 7 18:46:18 2011 GMT
Module: packages Tag: HEAD
---- Log message:
- fixes from .38 and enable delaylog by default
---- Files affected:
packages/kernel:
kernel-xfs-delaylog.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: packages/kernel/kernel-xfs-delaylog.patch
diff -u /dev/null packages/kernel/kernel-xfs-delaylog.patch:1.1
--- /dev/null Thu Apr 7 20:46:18 2011
+++ packages/kernel/kernel-xfs-delaylog.patch Thu Apr 7 20:46:13 2011
@@ -0,0 +1,526 @@
+commit 0e57f6a36f9be03e5abb755f524ee91c4aebe854
+Author: Dave Chinner <dchinner at redhat.com>
+Date: Mon Dec 20 12:02:19 2010 +1100
+
+ xfs: bulk AIL insertion during transaction commit
+
+ When inserting items into the AIL from the transaction committed
+ callbacks, we take the AIL lock for every single item that is to be
+ inserted. For a CIL checkpoint commit, this can be tens of thousands
+ of individual inserts, yet almost all of the items will be inserted
+ at the same point in the AIL because they have the same index.
+
+ To reduce the overhead and contention on the AIL lock for such
+ operations, introduce a "bulk insert" operation which allows a list
+ of log items with the same LSN to be inserted in a single operation
+ via a list splice. To do this, we need to pre-sort the log items
+ being committed into a temporary list for insertion.
+
+ The complexity is that not every log item will end up with the same
+ LSN, and not every item is actually inserted into the AIL. Items
+ that don't match the commit LSN will be inserted and unpinned as per
+ the current one-at-a-time method (relatively rare), while items that
+ are not to be inserted will be unpinned and freed immediately. Items
+ that are to be inserted at the given commit lsn are placed in a
+ temporary array and inserted into the AIL in bulk each time the
+ array fills up.
+
+ As a result of this, we trade off AIL hold time for a significant
+ reduction in traffic. lock_stat output shows that the worst case
+ hold time is unchanged, but contention from AIL inserts drops by an
+ order of magnitude and the number of lock traversal decreases
+ significantly.
+
+ Signed-off-by: Dave Chinner <dchinner at redhat.com>
+ Reviewed-by: Christoph Hellwig <hch at lst.de>
+
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index 23d6ceb..f36f1a2 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -361,15 +361,10 @@ xlog_cil_committed(
+ int abort)
+ {
+ struct xfs_cil_ctx *ctx = args;
+- struct xfs_log_vec *lv;
+- int abortflag = abort ? XFS_LI_ABORTED : 0;
+ struct xfs_busy_extent *busyp, *n;
+
+- /* unpin all the log items */
+- for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
+- xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
+- abortflag);
+- }
++ xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
++ ctx->start_lsn, abort);
+
+ list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+ xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
+index f6d956b..f80a067 100644
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
+ * they could be immediately flushed and we'd have to race with the flusher
+ * trying to pull the item from the AIL as we add it.
+ */
+-void
++static void
+ xfs_trans_item_committed(
+ struct xfs_log_item *lip,
+ xfs_lsn_t commit_lsn,
+@@ -1425,6 +1425,83 @@ xfs_trans_committed(
+ xfs_trans_free(tp);
+ }
+
++static inline void
++xfs_log_item_batch_insert(
++ struct xfs_ail *ailp,
++ struct xfs_log_item **log_items,
++ int nr_items,
++ xfs_lsn_t commit_lsn)
++{
++ int i;
++
++ spin_lock(&ailp->xa_lock);
++ /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
++ xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
++
++ for (i = 0; i < nr_items; i++)
++ IOP_UNPIN(log_items[i], 0);
++}
++
++/*
++ * Bulk operation version of xfs_trans_committed that takes a log vector of
++ * items to insert into the AIL. This uses bulk AIL insertion techniques to
++ * minimise lock traffic.
++ */
++void
++xfs_trans_committed_bulk(
++ struct xfs_ail *ailp,
++ struct xfs_log_vec *log_vector,
++ xfs_lsn_t commit_lsn,
++ int aborted)
++{
++#define LOG_ITEM_BATCH_SIZE 32
++ struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
++ struct xfs_log_vec *lv;
++ int i = 0;
++
++ /* unpin all the log items */
++ for (lv = log_vector; lv; lv = lv->lv_next ) {
++ struct xfs_log_item *lip = lv->lv_item;
++ xfs_lsn_t item_lsn;
++
++ if (aborted)
++ lip->li_flags |= XFS_LI_ABORTED;
++ item_lsn = IOP_COMMITTED(lip, commit_lsn);
++
++ /* item_lsn of -1 means the item was freed */
++ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
++ continue;
++
++ if (item_lsn != commit_lsn) {
++
++ /*
++ * Not a bulk update option due to unusual item_lsn.
++ * Push into AIL immediately, rechecking the lsn once
++ * we have the ail lock. Then unpin the item.
++ */
++ spin_lock(&ailp->xa_lock);
++ if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
++ xfs_trans_ail_update(ailp, lip, item_lsn);
++ else
++ spin_unlock(&ailp->xa_lock);
++ IOP_UNPIN(lip, 0);
++ continue;
++ }
++
++ /* Item is a candidate for bulk AIL insert. */
++ log_items[i++] = lv->lv_item;
++ if (i >= LOG_ITEM_BATCH_SIZE) {
++ xfs_log_item_batch_insert(ailp, log_items,
++ LOG_ITEM_BATCH_SIZE, commit_lsn);
++ i = 0;
++ }
++ }
++
++ /* make sure we insert the remainder! */
++ if (i)
++ xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
++}
++
+ /*
+ * Called from the trans_commit code when we notice that
+ * the filesystem is in the middle of a forced shutdown.
+diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
+index 645928c..fe991a7 100644
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -29,6 +29,7 @@
+ #include "xfs_error.h"
+
+ STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
++STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
+ STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
+ STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
+ STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
+@@ -502,6 +503,79 @@ xfs_trans_ail_update(
+ } /* xfs_trans_update_ail */
+
+ /*
++ * xfs_trans_ail_update - bulk AIL insertion operation.
++ *
++ * @xfs_trans_ail_update takes an array of log items that all need to be
++ * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
++ * be added. Otherwise, it will be repositioned by removing it and re-adding
++ * it to the AIL. If we move the first item in the AIL, update the log tail to
++ * match the new minimum LSN in the AIL.
++ *
++ * This function takes the AIL lock once to execute the update operations on
++ * all the items in the array, and as such should not be called with the AIL
++ * lock held. As a result, once we have the AIL lock, we need to check each log
++ * item LSN to confirm it needs to be moved forward in the AIL.
++ *
++ * To optimise the insert operation, we delete all the items from the AIL in
++ * the first pass, moving them into a temporary list, then splice the temporary
++ * list into the correct position in the AIL. This avoids needing to do an
++ * insert operation on every item.
++ *
++ * This function must be called with the AIL lock held. The lock is dropped
++ * before returning.
++ */
++void
++xfs_trans_ail_update_bulk(
++ struct xfs_ail *ailp,
++ struct xfs_log_item **log_items,
++ int nr_items,
++ xfs_lsn_t lsn) __releases(ailp->xa_lock)
++{
++ xfs_log_item_t *mlip;
++ xfs_lsn_t tail_lsn;
++ int mlip_changed = 0;
++ int i;
++ LIST_HEAD(tmp);
++
++ mlip = xfs_ail_min(ailp);
++
++ for (i = 0; i < nr_items; i++) {
++ struct xfs_log_item *lip = log_items[i];
++ if (lip->li_flags & XFS_LI_IN_AIL) {
++ /* check if we really need to move the item */
++ if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
++ continue;
++
++ xfs_ail_delete(ailp, lip);
++ if (mlip == lip)
++ mlip_changed = 1;
++ } else {
++ lip->li_flags |= XFS_LI_IN_AIL;
++ }
++ lip->li_lsn = lsn;
++ list_add(&lip->li_ail, &tmp);
++ }
++
++ xfs_ail_splice(ailp, &tmp, lsn);
++
++ if (!mlip_changed) {
++ spin_unlock(&ailp->xa_lock);
++ return;
++ }
++
++ /*
++ * It is not safe to access mlip after the AIL lock is dropped, so we
++ * must get a copy of li_lsn before we do so. This is especially
++ * important on 32-bit platforms where accessing and updating 64-bit
++ * values like li_lsn is not atomic.
++ */
++ mlip = xfs_ail_min(ailp);
++ tail_lsn = mlip->li_lsn;
++ spin_unlock(&ailp->xa_lock);
++ xfs_log_move_tail(ailp->xa_mount, tail_lsn);
++}
++
++/*
+ * Delete the given item from the AIL. It must already be in
+ * the AIL.
+ *
+@@ -642,8 +716,8 @@ xfs_ail_insert(
+ break;
+ }
+
+- ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
+- (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
++ ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
++ XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0);
+
+ list_add(&lip->li_ail, &next_lip->li_ail);
+
+@@ -652,6 +726,37 @@ xfs_ail_insert(
+ }
+
+ /*
++ * splice the log item list into the AIL at the given LSN.
++ */
++STATIC void
++xfs_ail_splice(
++ struct xfs_ail *ailp,
++ struct list_head *list,
++ xfs_lsn_t lsn)
++{
++ xfs_log_item_t *next_lip;
++
++ /*
++ * If the list is empty, just insert the item.
++ */
++ if (list_empty(&ailp->xa_ail)) {
++ list_splice(list, &ailp->xa_ail);
++ return;
++ }
++
++ list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
++ if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
++ break;
++ }
++
++ ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
++ (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
++
++ list_splice_init(list, &next_lip->li_ail);
++ return;
++}
++
++/*
+ * Delete the given item from the AIL. Return a pointer to the item.
+ */
+ STATIC void
+diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
+index 62da86c..e039729 100644
+--- a/fs/xfs/xfs_trans_priv.h
++++ b/fs/xfs/xfs_trans_priv.h
+@@ -22,15 +22,17 @@ struct xfs_log_item;
+ struct xfs_log_item_desc;
+ struct xfs_mount;
+ struct xfs_trans;
++struct xfs_ail;
++struct xfs_log_vec;
+
+ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
+ void xfs_trans_del_item(struct xfs_log_item *);
+ void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
+ int flags);
+-void xfs_trans_item_committed(struct xfs_log_item *lip,
+- xfs_lsn_t commit_lsn, int aborted);
+ void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
+
++void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
++ xfs_lsn_t commit_lsn, int aborted);
+ /*
+ * AIL traversal cursor.
+ *
+@@ -76,6 +78,10 @@ struct xfs_ail {
+ void xfs_trans_ail_update(struct xfs_ail *ailp,
+ struct xfs_log_item *lip, xfs_lsn_t lsn)
+ __releases(ailp->xa_lock);
++void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
++ struct xfs_log_item **log_items,
++ int nr_items, xfs_lsn_t lsn)
++ __releases(ailp->xa_lock);
+ void xfs_trans_ail_delete(struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+ __releases(ailp->xa_lock);
+commit 7db37c5e6575b229a5051be1d3ef15257ae0ba5d
+Author: Dave Chinner <dchinner at redhat.com>
+Date: Thu Jan 27 12:02:00 2011 +1100
+
+ xfs: fix log ticket leak on forced shutdown.
+
+ The kmemleak detector shows this after test 139:
+
+ unreferenced object 0xffff880079b88bb0 (size 264):
+ comm "xfs_io", pid 4904, jiffies 4294909382 (age 276.824s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N..........
+ ff ff ff ff ff ff ff ff 48 7b c9 82 ff ff ff ff ........H{......
+ backtrace:
+ [<ffffffff81afb04d>] kmemleak_alloc+0x2d/0x60
+ [<ffffffff8115c6cf>] kmem_cache_alloc+0x13f/0x2b0
+ [<ffffffff814aaa97>] kmem_zone_alloc+0x77/0xf0
+ [<ffffffff814aab2e>] kmem_zone_zalloc+0x1e/0x50
+ [<ffffffff8148f394>] xlog_ticket_alloc+0x34/0x170
+ [<ffffffff81494444>] xlog_cil_push+0xa4/0x3f0
+ [<ffffffff81494eca>] xlog_cil_force_lsn+0x15a/0x160
+ [<ffffffff814933a5>] _xfs_log_force_lsn+0x75/0x2d0
+ [<ffffffff814a264d>] _xfs_trans_commit+0x2bd/0x2f0
+ [<ffffffff8148bfdd>] xfs_iomap_write_allocate+0x1ad/0x350
+ [<ffffffff814ac17f>] xfs_map_blocks+0x21f/0x370
+ [<ffffffff814ad1b7>] xfs_vm_writepage+0x1c7/0x550
+ [<ffffffff8112200a>] __writepage+0x1a/0x50
+ [<ffffffff81122df2>] write_cache_pages+0x1c2/0x4c0
+ [<ffffffff81123117>] generic_writepages+0x27/0x30
+ [<ffffffff814aba5d>] xfs_vm_writepages+0x5d/0x80
+
+ By inspection, the leak occurs when xlog_write() returns and error
+ and we jump to the abort path without dropping the reference on the
+ active ticket.
+
+ Signed-off-by: Dave Chinner <dchinner at redhat.com>
+ Reviewed-by: Christoph Hellwig <hch at lst.de>
+ Reviewed-by: Alex Elder <aelder at sgi.com>
+
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index 9dc8125..c7eac5a 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -543,7 +543,7 @@ xlog_cil_push(
+
+ error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
+ if (error)
+- goto out_abort;
++ goto out_abort_free_ticket;
+
+ /*
+ * now that we've written the checkpoint into the log, strictly
+@@ -569,8 +569,9 @@ restart:
+ }
+ spin_unlock(&cil->xc_cil_lock);
+
++ /* xfs_log_done always frees the ticket on error. */
+ commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
+- if (error || commit_lsn == -1)
++ if (commit_lsn == -1)
+ goto out_abort;
+
+ /* attach all the transactions w/ busy extents to iclog */
+@@ -600,6 +601,8 @@ out_free_ticket:
+ kmem_free(new_ctx);
+ return 0;
+
++out_abort_free_ticket:
++ xfs_log_ticket_put(tic);
+ out_abort:
+ xlog_cil_committed(ctx, XFS_LI_ABORTED);
+ return XFS_ERROR(EIO);
+commit c6f990d1ff8e4e53b12f4175eb7d7ea710c3ca73
+Author: Dave Chinner <dchinner at redhat.com>
+Date: Thu Jan 27 13:23:28 2011 +1100
+
+ xfs: handle CIl transaction commit failures correctly
+
+ Failure to commit a transaction into the CIL is not handled
+ correctly. This currently can only happen when racing with a
+ shutdown and requires an explicit shutdown check, so it rare and can
+ be avoided. Remove the shutdown check and make the CIL commit a void
+ function to indicate it will always succeed, thereby removing the
+ incorrectly handled failure case.
+
+ Signed-off-by: Dave Chinner <dchinner at redhat.com>
+ Reviewed-by: Christoph Hellwig <hch at lst.de>
+ Reviewed-by: Alex Elder <aelder at sgi.com>
+
+diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
+index 916eb7d..3bd3291 100644
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
+
+ xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
+
+-int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
++void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_log_vec *log_vector,
+ xfs_lsn_t *commit_lsn, int flags);
+ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index c7eac5a..9ca59be 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -625,7 +625,7 @@ out_abort:
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+-int
++void
+ xfs_log_commit_cil(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+@@ -640,11 +640,6 @@ xfs_log_commit_cil(
+ if (flags & XFS_TRANS_RELEASE_LOG_RES)
+ log_flags = XFS_LOG_REL_PERM_RESERV;
+
+- if (XLOG_FORCED_SHUTDOWN(log)) {
+- xlog_cil_free_logvec(log_vector);
+- return XFS_ERROR(EIO);
+- }
+-
+ /*
+ * do all the hard work of formatting items (including memory
+ * allocation) outside the CIL context lock. This prevents stalling CIL
+@@ -704,7 +699,6 @@ xfs_log_commit_cil(
+ */
+ if (push)
+ xlog_cil_push(log, 0);
+- return 0;
+ }
+
+ /*
+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
+index 29f5e54..7692279 100644
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -1755,7 +1755,6 @@ xfs_trans_commit_cil(
+ int flags)
+ {
+ struct xfs_log_vec *log_vector;
+- int error;
+
+ /*
+ * Get each log item to allocate a vector structure for
+@@ -1766,9 +1765,7 @@ xfs_trans_commit_cil(
+ if (!log_vector)
+ return ENOMEM;
+
+- error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
+- if (error)
+- return error;
++ xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
+
+ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+ xfs_trans_free(tp);
+commit 20ad9ea9becd34a3c16252ca9d815f2c74f8f30f
+Author: Christoph Hellwig <hch at infradead.org>
+Date: Sun Feb 13 12:06:34 2011 +0000
+
+ xfs: enable delaylog by default
+
+ Signed-off-by: Christoph Hellwig <hch at lst.de>
+ Signed-off-by: Dave Chinner <dchinner at redhat.com>
+ Signed-off-by: Alex Elder <aelder at sgi.com>
+
+diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
+index 7445bf3..5282e3e 100644
+--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
++++ b/Documentation/filesystems/xfs-delayed-logging-design.txt
+@@ -791,10 +791,3 @@ mount option. Fundamentally, there is no reason why the log manager would not
+ be able to swap methods automatically and transparently depending on load
+ characteristics, but this should not be necessary if delayed logging works as
+ designed.
+-
+-Roadmap:
+-
+-2.6.39 Switch default mount option to use delayed logging
+- => should be roughly 12 months after initial merge
+- => enough time to shake out remaining problems before next round of
+- enterprise distro kernel rebases
+diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
+index 9731898..7ec1fb8 100644
+--- a/fs/xfs/linux-2.6/xfs_super.c
++++ b/fs/xfs/linux-2.6/xfs_super.c
+@@ -189,6 +189,7 @@ xfs_parseargs(
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
++ mp->m_flags |= XFS_MOUNT_DELAYLOG;
+
+ /*
+ * These can be overridden by the mount option parsing.
================================================================
More information about the pld-cvs-commit
mailing list