packages: kernel/kernel-lru_add_drain_all.patch (NEW), kernel/kernel-waiter...
blues
blues at pld-linux.org
Wed Sep 2 15:23:39 CEST 2009
Author: blues Date: Wed Sep 2 13:23:39 2009 GMT
Module: packages Tag: HEAD
---- Log message:
- git
---- Files affected:
packages/kernel:
kernel-lru_add_drain_all.patch (NONE -> 1.1) (NEW), kernel-waiter_starvation.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: packages/kernel/kernel-lru_add_drain_all.patch
diff -u /dev/null packages/kernel/kernel-lru_add_drain_all.patch:1.1
--- /dev/null Wed Sep 2 15:23:39 2009
+++ packages/kernel/kernel-lru_add_drain_all.patch Wed Sep 2 15:23:34 2009
@@ -0,0 +1,70 @@
+commit f31f0e205efd969ebb8045e5244a934a6899f670
+Author: KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+Date: Tue Dec 9 13:14:16 2008 -0800
+
+ mm: remove UP version of lru_add_drain_all()
+
+ commit 6841c8e26357904ef462650273f5d5015f7bb370 upstream.
+
+ Currently, lru_add_drain_all() has two version.
+ (1) use schedule_on_each_cpu()
+ (2) don't use schedule_on_each_cpu()
+
+ Gerald Schaefer reported it doesn't work well on SMP (not NUMA) S390
+ machine.
+
+ offline_pages() calls lru_add_drain_all() followed by drain_all_pages().
+ While drain_all_pages() works on each cpu, lru_add_drain_all() only runs
+ on the current cpu for architectures w/o CONFIG_NUMA. This let us run
+ into the BUG_ON(!PageBuddy(page)) in __offline_isolated_pages() during
+ memory hotplug stress test on s390. The page in question was still on the
+ pcp list, because of a race with lru_add_drain_all() and drain_all_pages()
+ on different cpus.
+
+ Actually, Almost machine has CONFIG_UNEVICTABLE_LRU=y. Then almost machine use
+ (1) version lru_add_drain_all although the machine is UP.
+
+ Then this ifdef is not valueable.
+ simple removing is better.
+
+ Signed-off-by: KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+ Cc: Christoph Lameter <cl at linux-foundation.org>
+ Cc: Lee Schermerhorn <Lee.Schermerhorn at hp.com>
+ Acked-by: Gerald Schaefer <gerald.schaefer at de.ibm.com>
+ Cc: Dave Hansen <dave at linux.vnet.ibm.com>
+ Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+ Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+ Cc: Nick Piggin <npiggin at suse.de>
+ Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+
+diff --git a/mm/swap.c b/mm/swap.c
+index 9e0cb31..65e6825 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -244,7 +244,6 @@ void lru_add_drain(void)
+ put_cpu();
+ }
+
+-#ifdef CONFIG_NUMA
+ static void lru_add_drain_per_cpu(struct work_struct *dummy)
+ {
+ lru_add_drain();
+@@ -258,18 +257,6 @@ int lru_add_drain_all(void)
+ return schedule_on_each_cpu(lru_add_drain_per_cpu);
+ }
+
+-#else
+-
+-/*
+- * Returns 0 for success
+- */
+-int lru_add_drain_all(void)
+-{
+- lru_add_drain();
+- return 0;
+-}
+-#endif
+-
+ /*
+ * Batched page_cache_release(). Decrement the reference count on all the
+ * passed pages. If it fell to zero then remove the page from the LRU and
================================================================
Index: packages/kernel/kernel-waiter_starvation.patch
diff -u /dev/null packages/kernel/kernel-waiter_starvation.patch:1.1
--- /dev/null Wed Sep 2 15:23:39 2009
+++ packages/kernel/kernel-waiter_starvation.patch Wed Sep 2 15:23:34 2009
@@ -0,0 +1,190 @@
+commit 777c6c5f1f6e757ae49ecca2ed72d6b1f523c007
+Author: Johannes Weiner <hannes at cmpxchg.org>
+Date: Wed Feb 4 15:12:14 2009 -0800
+
+ wait: prevent exclusive waiter starvation
+
+ With exclusive waiters, every process woken up through the wait queue must
+ ensure that the next waiter down the line is woken when it has finished.
+
+ Interruptible waiters don't do that when aborting due to a signal. And if
+ an aborting waiter is concurrently woken up through the waitqueue, noone
+ will ever wake up the next waiter.
+
+ This has been observed with __wait_on_bit_lock() used by
+ lock_page_killable(): the first contender on the queue was aborting when
+ the actual lock holder woke it up concurrently. The aborted contender
+ didn't acquire the lock and therefor never did an unlock followed by
+ waking up the next waiter.
+
+ Add abort_exclusive_wait() which removes the process' wait descriptor from
+ the waitqueue, iff still queued, or wakes up the next waiter otherwise.
+ It does so under the waitqueue lock. Racing with a wake up means the
+ aborting process is either already woken (removed from the queue) and will
+ wake up the next waiter, or it will remove itself from the queue and the
+ concurrent wake up will apply to the next waiter after it.
+
+ Use abort_exclusive_wait() in __wait_event_interruptible_exclusive() and
+ __wait_on_bit_lock() when they were interrupted by other means than a wake
+ up through the queue.
+
+ [akpm at linux-foundation.org: coding-style fixes]
+ Reported-by: Chris Mason <chris.mason at oracle.com>
+ Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
+ Mentored-by: Oleg Nesterov <oleg at redhat.com>
+ Cc: Peter Zijlstra <a.p.zijlstra at chello.nl>
+ Cc: Matthew Wilcox <matthew at wil.cx>
+ Cc: Chuck Lever <cel at citi.umich.edu>
+ Cc: Nick Piggin <nickpiggin at yahoo.com.au>
+ Cc: Ingo Molnar <mingo at elte.hu>
+ Cc: <stable at kernel.org> ["after some testing"]
+ Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+ Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index ef609f8..a210ede 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -132,6 +132,8 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
+ list_del(&old->task_list);
+ }
+
++void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++ int nr_exclusive, int sync, void *key);
+ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+ extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+ extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+@@ -333,16 +335,19 @@ do { \
+ for (;;) { \
+ prepare_to_wait_exclusive(&wq, &__wait, \
+ TASK_INTERRUPTIBLE); \
+- if (condition) \
++ if (condition) { \
++ finish_wait(&wq, &__wait); \
+ break; \
++ } \
+ if (!signal_pending(current)) { \
+ schedule(); \
+ continue; \
+ } \
+ ret = -ERESTARTSYS; \
++ abort_exclusive_wait(&wq, &__wait, \
++ TASK_INTERRUPTIBLE, NULL); \
+ break; \
+ } \
+- finish_wait(&wq, &__wait); \
+ } while (0)
+
+ #define wait_event_interruptible_exclusive(wq, condition) \
+@@ -431,6 +436,8 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
+ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
++void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
++ unsigned int mode, void *key);
+ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 242d0d4..8ee437a 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4697,8 +4697,8 @@ EXPORT_SYMBOL(default_wake_function);
+ * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
+ */
+-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+- int nr_exclusive, int sync, void *key)
++void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++ int nr_exclusive, int sync, void *key)
+ {
+ wait_queue_t *curr, *next;
+
+diff --git a/kernel/wait.c b/kernel/wait.c
+index cd87131..42a2dbc 100644
+--- a/kernel/wait.c
++++ b/kernel/wait.c
+@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+ }
+ EXPORT_SYMBOL(prepare_to_wait_exclusive);
+
++/*
++ * finish_wait - clean up after waiting in a queue
++ * @q: waitqueue waited on
++ * @wait: wait descriptor
++ *
++ * Sets current thread back to running state and removes
++ * the wait descriptor from the given waitqueue if still
++ * queued.
++ */
+ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+ {
+ unsigned long flags;
+@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+ }
+ EXPORT_SYMBOL(finish_wait);
+
++/*
++ * abort_exclusive_wait - abort exclusive waiting in a queue
++ * @q: waitqueue waited on
++ * @wait: wait descriptor
++ * @state: runstate of the waiter to be woken
++ * @key: key to identify a wait bit queue or %NULL
++ *
++ * Sets current thread back to running state and removes
++ * the wait descriptor from the given waitqueue if still
++ * queued.
++ *
++ * Wakes up the next waiter if the caller is concurrently
++ * woken up through the queue.
++ *
++ * This prevents waiter starvation where an exclusive waiter
++ * aborts and is woken up concurrently and noone wakes up
++ * the next waiter.
++ */
++void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
++ unsigned int mode, void *key)
++{
++ unsigned long flags;
++
++ __set_current_state(TASK_RUNNING);
++ spin_lock_irqsave(&q->lock, flags);
++ if (!list_empty(&wait->task_list))
++ list_del_init(&wait->task_list);
++ else if (waitqueue_active(q))
++ __wake_up_common(q, mode, 1, 0, key);
++ spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL(abort_exclusive_wait);
++
+ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+ {
+ int ret = default_wake_function(wait, mode, sync, key);
+@@ -177,17 +219,20 @@ int __sched
+ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+ int (*action)(void *), unsigned mode)
+ {
+- int ret = 0;
+-
+ do {
++ int ret;
++
+ prepare_to_wait_exclusive(wq, &q->wait, mode);
+- if (test_bit(q->key.bit_nr, q->key.flags)) {
+- if ((ret = (*action)(q->key.flags)))
+- break;
+- }
++ if (!test_bit(q->key.bit_nr, q->key.flags))
++ continue;
++ ret = action(q->key.flags);
++ if (!ret)
++ continue;
++ abort_exclusive_wait(wq, &q->wait, mode, &q->key);
++ return ret;
+ } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
+ finish_wait(wq, &q->wait);
+- return ret;
++ return 0;
+ }
+ EXPORT_SYMBOL(__wait_on_bit_lock);
+
================================================================
More information about the pld-cvs-commit
mailing list