packages: kernel/kernel-lru_add_drain_all.patch (NEW), kernel/kernel-waiter...

blues blues at pld-linux.org
Wed Sep 2 15:23:39 CEST 2009


Author: blues                        Date: Wed Sep  2 13:23:39 2009 GMT
Module: packages                      Tag: HEAD
---- Log message:
- git 

---- Files affected:
packages/kernel:
   kernel-lru_add_drain_all.patch (NONE -> 1.1)  (NEW), kernel-waiter_starvation.patch (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: packages/kernel/kernel-lru_add_drain_all.patch
diff -u /dev/null packages/kernel/kernel-lru_add_drain_all.patch:1.1
--- /dev/null	Wed Sep  2 15:23:39 2009
+++ packages/kernel/kernel-lru_add_drain_all.patch	Wed Sep  2 15:23:34 2009
@@ -0,0 +1,70 @@
+commit f31f0e205efd969ebb8045e5244a934a6899f670
+Author: KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+Date:   Tue Dec 9 13:14:16 2008 -0800
+
+    mm: remove UP version of lru_add_drain_all()
+    
+    commit 6841c8e26357904ef462650273f5d5015f7bb370 upstream.
+    
+    Currently, lru_add_drain_all() has two version.
+      (1) use schedule_on_each_cpu()
+      (2) don't use schedule_on_each_cpu()
+    
+    Gerald Schaefer reported it doesn't work well on SMP (not NUMA) S390
+    machine.
+    
+      offline_pages() calls lru_add_drain_all() followed by drain_all_pages().
+      While drain_all_pages() works on each cpu, lru_add_drain_all() only runs
+      on the current cpu for architectures w/o CONFIG_NUMA. This let us run
+      into the BUG_ON(!PageBuddy(page)) in __offline_isolated_pages() during
+      memory hotplug stress test on s390. The page in question was still on the
+      pcp list, because of a race with lru_add_drain_all() and drain_all_pages()
+      on different cpus.
+    
+    Actually, Almost machine has CONFIG_UNEVICTABLE_LRU=y. Then almost machine use
+    (1) version lru_add_drain_all although the machine is UP.
+    
+    Then this ifdef is not valueable.
+    simple removing is better.
+    
+    Signed-off-by: KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+    Cc: Christoph Lameter <cl at linux-foundation.org>
+    Cc: Lee Schermerhorn <Lee.Schermerhorn at hp.com>
+    Acked-by: Gerald Schaefer <gerald.schaefer at de.ibm.com>
+    Cc: Dave Hansen <dave at linux.vnet.ibm.com>
+    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+    Cc: Nick Piggin <npiggin at suse.de>
+    Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+
+diff --git a/mm/swap.c b/mm/swap.c
+index 9e0cb31..65e6825 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -244,7 +244,6 @@ void lru_add_drain(void)
+ 	put_cpu();
+ }
+ 
+-#ifdef CONFIG_NUMA
+ static void lru_add_drain_per_cpu(struct work_struct *dummy)
+ {
+ 	lru_add_drain();
+@@ -258,18 +257,6 @@ int lru_add_drain_all(void)
+ 	return schedule_on_each_cpu(lru_add_drain_per_cpu);
+ }
+ 
+-#else
+-
+-/*
+- * Returns 0 for success
+- */
+-int lru_add_drain_all(void)
+-{
+-	lru_add_drain();
+-	return 0;
+-}
+-#endif
+-
+ /*
+  * Batched page_cache_release().  Decrement the reference count on all the
+  * passed pages.  If it fell to zero then remove the page from the LRU and

================================================================
Index: packages/kernel/kernel-waiter_starvation.patch
diff -u /dev/null packages/kernel/kernel-waiter_starvation.patch:1.1
--- /dev/null	Wed Sep  2 15:23:39 2009
+++ packages/kernel/kernel-waiter_starvation.patch	Wed Sep  2 15:23:34 2009
@@ -0,0 +1,190 @@
+commit 777c6c5f1f6e757ae49ecca2ed72d6b1f523c007
+Author: Johannes Weiner <hannes at cmpxchg.org>
+Date:   Wed Feb 4 15:12:14 2009 -0800
+
+    wait: prevent exclusive waiter starvation
+    
+    With exclusive waiters, every process woken up through the wait queue must
+    ensure that the next waiter down the line is woken when it has finished.
+    
+    Interruptible waiters don't do that when aborting due to a signal.  And if
+    an aborting waiter is concurrently woken up through the waitqueue, noone
+    will ever wake up the next waiter.
+    
+    This has been observed with __wait_on_bit_lock() used by
+    lock_page_killable(): the first contender on the queue was aborting when
+    the actual lock holder woke it up concurrently.  The aborted contender
+    didn't acquire the lock and therefor never did an unlock followed by
+    waking up the next waiter.
+    
+    Add abort_exclusive_wait() which removes the process' wait descriptor from
+    the waitqueue, iff still queued, or wakes up the next waiter otherwise.
+    It does so under the waitqueue lock.  Racing with a wake up means the
+    aborting process is either already woken (removed from the queue) and will
+    wake up the next waiter, or it will remove itself from the queue and the
+    concurrent wake up will apply to the next waiter after it.
+    
+    Use abort_exclusive_wait() in __wait_event_interruptible_exclusive() and
+    __wait_on_bit_lock() when they were interrupted by other means than a wake
+    up through the queue.
+    
+    [akpm at linux-foundation.org: coding-style fixes]
+    Reported-by: Chris Mason <chris.mason at oracle.com>
+    Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
+    Mentored-by: Oleg Nesterov <oleg at redhat.com>
+    Cc: Peter Zijlstra <a.p.zijlstra at chello.nl>
+    Cc: Matthew Wilcox <matthew at wil.cx>
+    Cc: Chuck Lever <cel at citi.umich.edu>
+    Cc: Nick Piggin <nickpiggin at yahoo.com.au>
+    Cc: Ingo Molnar <mingo at elte.hu>
+    Cc: <stable at kernel.org>		["after some testing"]
+    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index ef609f8..a210ede 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -132,6 +132,8 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
+ 	list_del(&old->task_list);
+ }
+ 
++void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, int sync, void *key);
+ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+ extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+ extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+@@ -333,16 +335,19 @@ do {									\
+ 	for (;;) {							\
+ 		prepare_to_wait_exclusive(&wq, &__wait,			\
+ 					TASK_INTERRUPTIBLE);		\
+-		if (condition)						\
++		if (condition) {					\
++			finish_wait(&wq, &__wait);			\
+ 			break;						\
++		}							\
+ 		if (!signal_pending(current)) {				\
+ 			schedule();					\
+ 			continue;					\
+ 		}							\
+ 		ret = -ERESTARTSYS;					\
++		abort_exclusive_wait(&wq, &__wait, 			\
++				TASK_INTERRUPTIBLE, NULL);		\
+ 		break;							\
+ 	}								\
+-	finish_wait(&wq, &__wait);					\
+ } while (0)
+ 
+ #define wait_event_interruptible_exclusive(wq, condition)		\
+@@ -431,6 +436,8 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
+ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
++void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
++			unsigned int mode, void *key);
+ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+ 
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 242d0d4..8ee437a 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4697,8 +4697,8 @@ EXPORT_SYMBOL(default_wake_function);
+  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
+  * zero in this (rare) case, and we handle it by continuing to scan the queue.
+  */
+-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+-			     int nr_exclusive, int sync, void *key)
++void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, int sync, void *key)
+ {
+ 	wait_queue_t *curr, *next;
+ 
+diff --git a/kernel/wait.c b/kernel/wait.c
+index cd87131..42a2dbc 100644
+--- a/kernel/wait.c
++++ b/kernel/wait.c
+@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+ }
+ EXPORT_SYMBOL(prepare_to_wait_exclusive);
+ 
++/*
++ * finish_wait - clean up after waiting in a queue
++ * @q: waitqueue waited on
++ * @wait: wait descriptor
++ *
++ * Sets current thread back to running state and removes
++ * the wait descriptor from the given waitqueue if still
++ * queued.
++ */
+ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+ {
+ 	unsigned long flags;
+@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+ }
+ EXPORT_SYMBOL(finish_wait);
+ 
++/*
++ * abort_exclusive_wait - abort exclusive waiting in a queue
++ * @q: waitqueue waited on
++ * @wait: wait descriptor
++ * @state: runstate of the waiter to be woken
++ * @key: key to identify a wait bit queue or %NULL
++ *
++ * Sets current thread back to running state and removes
++ * the wait descriptor from the given waitqueue if still
++ * queued.
++ *
++ * Wakes up the next waiter if the caller is concurrently
++ * woken up through the queue.
++ *
++ * This prevents waiter starvation where an exclusive waiter
++ * aborts and is woken up concurrently and noone wakes up
++ * the next waiter.
++ */
++void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
++			unsigned int mode, void *key)
++{
++	unsigned long flags;
++
++	__set_current_state(TASK_RUNNING);
++	spin_lock_irqsave(&q->lock, flags);
++	if (!list_empty(&wait->task_list))
++		list_del_init(&wait->task_list);
++	else if (waitqueue_active(q))
++		__wake_up_common(q, mode, 1, 0, key);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL(abort_exclusive_wait);
++
+ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+ {
+ 	int ret = default_wake_function(wait, mode, sync, key);
+@@ -177,17 +219,20 @@ int __sched
+ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+ 			int (*action)(void *), unsigned mode)
+ {
+-	int ret = 0;
+-
+ 	do {
++		int ret;
++
+ 		prepare_to_wait_exclusive(wq, &q->wait, mode);
+-		if (test_bit(q->key.bit_nr, q->key.flags)) {
+-			if ((ret = (*action)(q->key.flags)))
+-				break;
+-		}
++		if (!test_bit(q->key.bit_nr, q->key.flags))
++			continue;
++		ret = action(q->key.flags);
++		if (!ret)
++			continue;
++		abort_exclusive_wait(wq, &q->wait, mode, &q->key);
++		return ret;
+ 	} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
+ 	finish_wait(wq, &q->wait);
+-	return ret;
++	return 0;
+ }
+ EXPORT_SYMBOL(__wait_on_bit_lock);
+ 
================================================================


More information about the pld-cvs-commit mailing list