packages: kernel/kernel-small_fixes.patch - xfs fixes from upstream

arekm arekm at pld-linux.org
Fri Oct 14 19:56:02 CEST 2011


Author: arekm                        Date: Fri Oct 14 17:56:02 2011 GMT
Module: packages                      Tag: HEAD
---- Log message:
- xfs fixes from upstream

---- Files affected:
packages/kernel:
   kernel-small_fixes.patch (1.38 -> 1.39) 

---- Diffs:

================================================================
Index: packages/kernel/kernel-small_fixes.patch
diff -u packages/kernel/kernel-small_fixes.patch:1.38 packages/kernel/kernel-small_fixes.patch:1.39
--- packages/kernel/kernel-small_fixes.patch:1.38	Wed Oct 12 19:27:48 2011
+++ packages/kernel/kernel-small_fixes.patch	Fri Oct 14 19:55:57 2011
@@ -802,3 +802,427 @@
  	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
  
 
+From: Christoph Hellwig <hch at infradead.org>
+Subject: [PATCH 1/4] xfs: do not update xa_last_pushed_lsn for locked items
+
+If an item was locked we should not update xa_last_pushed_lsn and thus skip
+it when restarting the AIL scan as we need to be able to lock and write it
+out as soon as possible.  Otherwise heavy lock contention might starve AIL
+pushing too easily, especially given the larger backoff once we moved
+xa_last_pushed_lsn all the way to the target lsn.
+
+Signed-off-by: Christoph Hellwig <hch at lst.de>
+Reported-by: Stefan Priebe <s.priebe at profihost.ag>
+Tested-by: Stefan Priebe <s.priebe at profihost.ag>
+
+Index: xfs/fs/xfs/xfs_trans_ail.c
+===================================================================
+--- xfs.orig/fs/xfs/xfs_trans_ail.c	2011-10-11 15:48:49.302003241 +0200
++++ xfs/fs/xfs/xfs_trans_ail.c	2011-10-11 15:49:10.307505812 +0200
+@@ -440,7 +440,6 @@ xfs_ail_worker(
+ 
+ 		case XFS_ITEM_LOCKED:
+ 			XFS_STATS_INC(xs_push_ail_locked);
+-			ailp->xa_last_pushed_lsn = lsn;
+ 			stuck++;
+ 			break;
+ 
+
+_______________________________________________
+xfs mailing list
+xfs at oss.sgi.com
+http://oss.sgi.com/mailman/listinfo/xfs
+
+From: Christoph Hellwig <hch at infradead.org>
+Subject: [PATCH 2/4] xfs: force the log if we encounter pinned buffers in
+	.iop_pushbuf
+
+We need to check for pinned buffers even in .iop_pushbuf given that inode
+items flush into the same buffers that may be pinned directly due operations
+on the unlinked inode list operating directly on buffers.  To do this add a
+return value to .iop_pushbuf that tells the AIL push about this and use
+the existing log force mechanisms to unpin it.
+
+Signed-off-by: Christoph Hellwig <hch at lst.de>
+Reported-by: Stefan Priebe <s.priebe at profihost.ag>
+Tested-by: Stefan Priebe <s.priebe at profihost.ag>
+
+Index: xfs/fs/xfs/quota/xfs_dquot_item.c
+===================================================================
+--- xfs.orig/fs/xfs/quota/xfs_dquot_item.c	2011-10-11 15:48:49.290003546 +0200
++++ xfs/fs/xfs/quota/xfs_dquot_item.c	2011-10-11 15:49:17.727006849 +0200
+@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
+  * search the buffer cache can be a time consuming thing, and AIL lock is a
+  * spinlock.
+  */
+-STATIC void
++STATIC bool
+ xfs_qm_dquot_logitem_pushbuf(
+ 	struct xfs_log_item	*lip)
+ {
+ 	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
+ 	struct xfs_dquot	*dqp = qlip->qli_dquot;
+ 	struct xfs_buf		*bp;
++	bool			ret = true;
+ 
+ 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ 
+@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
+ 	if (completion_done(&dqp->q_flush) ||
+ 	    !(lip->li_flags & XFS_LI_IN_AIL)) {
+ 		xfs_dqunlock(dqp);
+-		return;
++		return true;
+ 	}
+ 
+ 	bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+ 			dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+ 	xfs_dqunlock(dqp);
+ 	if (!bp)
+-		return;
++		return true;
+ 	if (XFS_BUF_ISDELAYWRITE(bp))
+ 		xfs_buf_delwri_promote(bp);
++	if (XFS_BUF_ISPINNED(bp))
++		ret = false;
+ 	xfs_buf_relse(bp);
++	return ret;
+ }
+ 
+ /*
+Index: xfs/fs/xfs/xfs_buf_item.c
+===================================================================
+--- xfs.orig/fs/xfs/xfs_buf_item.c	2011-10-11 15:48:49.286004461 +0200
++++ xfs/fs/xfs/xfs_buf_item.c	2011-10-11 15:49:17.727006849 +0200
+@@ -629,7 +629,7 @@ xfs_buf_item_push(
+  * the xfsbufd to get this buffer written. We have to unlock the buffer
+  * to allow the xfsbufd to write it, too.
+  */
+-STATIC void
++STATIC bool
+ xfs_buf_item_pushbuf(
+ 	struct xfs_log_item	*lip)
+ {
+@@ -643,6 +643,7 @@ xfs_buf_item_pushbuf(
+ 
+ 	xfs_buf_delwri_promote(bp);
+ 	xfs_buf_relse(bp);
++	return true;
+ }
+ 
+ STATIC void
+Index: xfs/fs/xfs/xfs_inode_item.c
+===================================================================
+--- xfs.orig/fs/xfs/xfs_inode_item.c	2011-10-11 15:48:40.750005198 +0200
++++ xfs/fs/xfs/xfs_inode_item.c	2011-10-11 15:49:17.735004729 +0200
+@@ -708,13 +708,14 @@ xfs_inode_item_committed(
+  * marked delayed write. If that's the case, we'll promote it and that will
+  * allow the caller to write the buffer by triggering the xfsbufd to run.
+  */
+-STATIC void
++STATIC bool
+ xfs_inode_item_pushbuf(
+ 	struct xfs_log_item	*lip)
+ {
+ 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ 	struct xfs_inode	*ip = iip->ili_inode;
+ 	struct xfs_buf		*bp;
++	bool			ret = true;
+ 
+ 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
+ 
+@@ -725,7 +726,7 @@ xfs_inode_item_pushbuf(
+ 	if (completion_done(&ip->i_flush) ||
+ 	    !(lip->li_flags & XFS_LI_IN_AIL)) {
+ 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+-		return;
++		return true;
+ 	}
+ 
+ 	bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
+@@ -733,10 +734,13 @@ xfs_inode_item_pushbuf(
+ 
+ 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ 	if (!bp)
+-		return;
++		return true;
+ 	if (XFS_BUF_ISDELAYWRITE(bp))
+ 		xfs_buf_delwri_promote(bp);
++	if (XFS_BUF_ISPINNED(bp))
++		ret = false;
+ 	xfs_buf_relse(bp);
++	return ret;
+ }
+ 
+ /*
+Index: xfs/fs/xfs/xfs_trans.h
+===================================================================
+--- xfs.orig/fs/xfs/xfs_trans.h	2011-10-11 15:48:40.758004637 +0200
++++ xfs/fs/xfs/xfs_trans.h	2011-10-11 15:49:17.743032550 +0200
+@@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
+ 	void (*iop_unlock)(xfs_log_item_t *);
+ 	xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
+ 	void (*iop_push)(xfs_log_item_t *);
+-	void (*iop_pushbuf)(xfs_log_item_t *);
++	bool (*iop_pushbuf)(xfs_log_item_t *);
+ 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
+ } xfs_item_ops_t;
+ 
+Index: xfs/fs/xfs/xfs_trans_ail.c
+===================================================================
+--- xfs.orig/fs/xfs/xfs_trans_ail.c	2011-10-11 15:49:10.307505812 +0200
++++ xfs/fs/xfs/xfs_trans_ail.c	2011-10-11 17:07:49.826504898 +0200
+@@ -427,8 +427,13 @@ xfs_ail_worker(
+ 
+ 		case XFS_ITEM_PUSHBUF:
+ 			XFS_STATS_INC(xs_push_ail_pushbuf);
+-			IOP_PUSHBUF(lip);
+-			ailp->xa_last_pushed_lsn = lsn;
++
++			if (!IOP_PUSHBUF(lip)) {
++				stuck++;
++				flush_log = 1;
++			} else {
++				ailp->xa_last_pushed_lsn = lsn;
++			}
+ 			push_xfsbufd = 1;
+ 			break;
+ 
+
+_______________________________________________
+xfs mailing list
+xfs at oss.sgi.com
+http://oss.sgi.com/mailman/listinfo/xfs
+
+From: Christoph Hellwig <hch at infradead.org>
+Subject: [PATCH 3/4] xfs: revert to using a kthread for AIL pushing
+
+Currently we have a few issues with the way the workqueue code is used to
+implement AIL pushing:
+
+ - it accidentally uses the same workqueue as the syncer action, and thus
+   can be prevented from running if there are enough sync actions active
+   in the system.
+ - it doesn't use the HIGHPRI flag to queue at the head of the queue of
+   work items
+
+At this point I'm not confident enough in getting all the workqueue flags and
+tweaks right to provide a perfectly reliable execution context for AIL
+pushing, which is the most important piece in XFS to make forward progress
+when the log fills.
+
+Revert back to use a kthread per filesystem which fixes all the above issues
+at the cost of having a task struct and stack around for each mounted
+filesystem.  In addition this also gives us much better ways to diagnose
+any issues involving hung AIL pushing and removes a small amount of code.
+
+Signed-off-by: Christoph Hellwig <hch at lst.de>
+Reported-by: Stefan Priebe <s.priebe at profihost.ag>
+Tested-by: Stefan Priebe <s.priebe at profihost.ag>
+
+Index: xfs/fs/xfs/linux-2.6/xfs_super.c
+===================================================================
+--- xfs.orig/fs/xfs/linux-2.6/xfs_super.c	2011-10-11 15:48:49.000000000 +0200
++++ xfs/fs/xfs/linux-2.6/xfs_super.c	2011-10-11 15:52:13.383505329 +0200
+@@ -1652,24 +1652,13 @@ xfs_init_workqueues(void)
+ 	 */
+ 	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+ 	if (!xfs_syncd_wq)
+-		goto out;
+-
+-	xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+-	if (!xfs_ail_wq)
+-		goto out_destroy_syncd;
+-
++		return -ENOMEM;
+ 	return 0;
+-
+-out_destroy_syncd:
+-	destroy_workqueue(xfs_syncd_wq);
+-out:
+-	return -ENOMEM;
+ }
+ 
+ STATIC void
+ xfs_destroy_workqueues(void)
+ {
+-	destroy_workqueue(xfs_ail_wq);
+ 	destroy_workqueue(xfs_syncd_wq);
+ }
+ 
+Index: xfs/fs/xfs/xfs_trans_ail.c
+===================================================================
+--- xfs.orig/fs/xfs/xfs_trans_ail.c	2011-10-11 15:51:58.546005158 +0200
++++ xfs/fs/xfs/xfs_trans_ail.c	2011-10-11 15:52:13.383505329 +0200
+@@ -28,8 +28,6 @@
+ #include "xfs_trans_priv.h"
+ #include "xfs_error.h"
+ 
+-struct workqueue_struct	*xfs_ail_wq;	/* AIL workqueue */
+-
+ #ifdef DEBUG
+ /*
+  * Check that the list is sorted as it should be.
+@@ -356,16 +354,10 @@ xfs_ail_delete(
+ 	xfs_trans_ail_cursor_clear(ailp, lip);
+ }
+ 
+-/*
+- * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
+- * to run at a later time if there is more work to do to complete the push.
+- */
+-STATIC void
+-xfs_ail_worker(
+-	struct work_struct	*work)
++static long
++xfsaild_push(
++	struct xfs_ail		*ailp)
+ {
+-	struct xfs_ail		*ailp = container_of(to_delayed_work(work),
+-					struct xfs_ail, xa_work);
+ 	xfs_mount_t		*mp = ailp->xa_mount;
+ 	struct xfs_ail_cursor	cur;
+ 	xfs_log_item_t		*lip;
+@@ -505,20 +497,6 @@ out_done:
+ 		/* We're past our target or empty, so idle */
+ 		ailp->xa_last_pushed_lsn = 0;
+ 
+-		/*
+-		 * We clear the XFS_AIL_PUSHING_BIT first before checking
+-		 * whether the target has changed. If the target has changed,
+-		 * this pushes the requeue race directly onto the result of the
+-		 * atomic test/set bit, so we are guaranteed that either the
+-		 * the pusher that changed the target or ourselves will requeue
+-		 * the work (but not both).
+-		 */
+-		clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
+-		smp_rmb();
+-		if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
+-		    test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
+-			return;
+-
+ 		tout = 50;
+ 	} else if (XFS_LSN_CMP(lsn, target) >= 0) {
+ 		/*
+@@ -541,9 +519,30 @@ out_done:
+ 		tout = 20;
+ 	}
+ 
+-	/* There is more to do, requeue us.  */
+-	queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
+-					msecs_to_jiffies(tout));
++	return tout;
++}
++
++static int
++xfsaild(
++	void		*data)
++{
++	struct xfs_ail	*ailp = data;
++	long		tout = 0;	/* milliseconds */
++
++	while (!kthread_should_stop()) {
++		if (tout && tout <= 20)
++			__set_current_state(TASK_KILLABLE);
++		else
++			__set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(tout ?
++				 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
++
++		try_to_freeze();
++
++		tout = xfsaild_push(ailp);
++	}
++
++	return 0;
+ }
+ 
+ /*
+@@ -578,8 +577,9 @@ xfs_ail_push(
+ 	 */
+ 	smp_wmb();
+ 	xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
+-	if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
+-		queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
++	smp_wmb();
++
++	wake_up_process(ailp->xa_task);
+ }
+ 
+ /*
+@@ -817,9 +817,18 @@ xfs_trans_ail_init(
+ 	INIT_LIST_HEAD(&ailp->xa_ail);
+ 	INIT_LIST_HEAD(&ailp->xa_cursors);
+ 	spin_lock_init(&ailp->xa_lock);
+-	INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
++
++	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
++			ailp->xa_mount->m_fsname);
++	if (IS_ERR(ailp->xa_task))
++		goto out_free_ailp;
++
+ 	mp->m_ail = ailp;
+ 	return 0;
++
++out_free_ailp:
++	kmem_free(ailp);
++	return ENOMEM;
+ }
+ 
+ void
+@@ -828,6 +837,6 @@ xfs_trans_ail_destroy(
+ {
+ 	struct xfs_ail	*ailp = mp->m_ail;
+ 
+-	cancel_delayed_work_sync(&ailp->xa_work);
++	kthread_stop(ailp->xa_task);
+ 	kmem_free(ailp);
+ }
+Index: xfs/fs/xfs/xfs_trans_priv.h
+===================================================================
+--- xfs.orig/fs/xfs/xfs_trans_priv.h	2011-10-11 15:48:40.000000000 +0200
++++ xfs/fs/xfs/xfs_trans_priv.h	2011-10-11 15:53:35.382504829 +0200
+@@ -64,23 +64,17 @@ struct xfs_ail_cursor {
+  */
+ struct xfs_ail {
+ 	struct xfs_mount	*xa_mount;
++	struct task_struct	*xa_task;
+ 	struct list_head	xa_ail;
+ 	xfs_lsn_t		xa_target;
+ 	struct xfs_ail_cursor	xa_cursors;
+ 	spinlock_t		xa_lock;
+-	struct delayed_work	xa_work;
+ 	xfs_lsn_t		xa_last_pushed_lsn;
+-	unsigned long		xa_flags;
+ };
+ 
+-#define XFS_AIL_PUSHING_BIT	0
+-
+ /*
+  * From xfs_trans_ail.c
+  */
+-
+-extern struct workqueue_struct	*xfs_ail_wq;	/* AIL workqueue */
+-
+ void	xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+ 				struct xfs_ail_cursor *cur,
+ 				struct xfs_log_item **log_items, int nr_items,
+Index: xfs/fs/xfs/linux-2.6/xfs_linux.h
+===================================================================
+--- xfs.orig/fs/xfs/linux-2.6/xfs_linux.h	2011-10-11 15:48:49.000000000 +0200
++++ xfs/fs/xfs/linux-2.6/xfs_linux.h	2011-10-11 15:52:13.383505329 +0200
+@@ -68,6 +68,8 @@
+ #include <linux/ctype.h>
+ #include <linux/writeback.h>
+ #include <linux/capability.h>
++#include <linux/kthread.h>
++#include <linux/freezer.h>
+ #include <linux/list_sort.h>
+ 
+ #include <asm/page.h>
+
+_______________________________________________
+xfs mailing list
+xfs at oss.sgi.com
+http://oss.sgi.com/mailman/listinfo/xfs
+
================================================================

---- CVS-web:
    http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel/kernel-small_fixes.patch?r1=1.38&r2=1.39&f=u



More information about the pld-cvs-commit mailing list