packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch - up to ...
cactus
cactus at pld-linux.org
Fri Oct 16 02:04:34 CEST 2009
Author: cactus Date: Fri Oct 16 00:04:35 2009 GMT
Module: packages Tag: Titanium
---- Log message:
- up to BFS 303
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (1.1.2.11 -> 1.1.2.12)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.11 packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.12
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.11 Thu Oct 1 17:29:50 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Fri Oct 16 02:04:27 2009
@@ -1,4 +1,4 @@
-The Brain Fuck Scheduler v0.232 by Con Kolivas.
+The Brain Fuck Scheduler v0.303 by Con Kolivas.
A single shared runqueue O(n) strict fairness earliest deadline first design.
@@ -23,37 +23,32 @@
---
Documentation/sysctl/kernel.txt | 26
- Makefile | 2
+ Makefile | 4
fs/pipe.c | 4
fs/proc/base.c | 2
include/linux/init_task.h | 15
include/linux/ioprio.h | 2
- include/linux/sched.h | 194
+ include/linux/sched.h | 193 -
init/Kconfig | 61
- kernel/Kconfig.preempt | 19
+ init/main.c | 2
kernel/Makefile | 4
kernel/delayacct.c | 2
- kernel/exit.c | 6
- kernel/fork.c | 2
- kernel/kthread.c | 4
+ kernel/exit.c | 7
+ kernel/fork.c | 1
+ kernel/kthread.c | 3
kernel/posix-cpu-timers.c | 14
- kernel/sched.c |10583 ----------------------------------------
- kernel/sched_bfs.c | 6150 +++++++++++++++++++++++
- kernel/sched_debug.c | 509 -
- kernel/sched_fair.c | 1842 ------
- kernel/sched_idletask.c | 129
- kernel/sched_rt.c | 1787 ------
+ kernel/sched_bfs.c | 6295 ++++++++++++++++++++++++++++++++++++++++
kernel/sysctl.c | 156
kernel/timer.c | 3
kernel/trace/trace.c | 4
kernel/workqueue.c | 2
mm/oom_kill.c | 2
- 26 files changed, 6259 insertions(+), 15265 deletions(-)
+ 21 files changed, 6404 insertions(+), 398 deletions(-)
Index: linux-2.6.31-bfs/Documentation/sysctl/kernel.txt
===================================================================
---- linux-2.6.31-bfs.orig/Documentation/sysctl/kernel.txt 2009-09-10 11:43:10.000000000 +1000
-+++ linux-2.6.31-bfs/Documentation/sysctl/kernel.txt 2009-09-22 18:42:26.751614798 +1000
+--- linux-2.6.31-bfs.orig/Documentation/sysctl/kernel.txt 2009-10-06 12:23:21.309990236 +1100
++++ linux-2.6.31-bfs/Documentation/sysctl/kernel.txt 2009-10-06 12:23:34.040742058 +1100
@@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
- domainname
- hostname
@@ -110,8 +105,8 @@
The file rtsig-max can be used to tune the maximum number
Index: linux-2.6.31-bfs/fs/pipe.c
===================================================================
---- linux-2.6.31-bfs.orig/fs/pipe.c 2009-09-10 11:45:24.000000000 +1000
-+++ linux-2.6.31-bfs/fs/pipe.c 2009-09-22 18:42:26.777615327 +1000
+--- linux-2.6.31-bfs.orig/fs/pipe.c 2009-10-06 12:23:21.314996180 +1100
++++ linux-2.6.31-bfs/fs/pipe.c 2009-10-06 12:23:34.042742313 +1100
@@ -78,10 +78,6 @@ void pipe_wait(struct pipe_inode_info *p
{
DEFINE_WAIT(wait);
@@ -125,8 +120,8 @@
schedule();
Index: linux-2.6.31-bfs/include/linux/init_task.h
===================================================================
---- linux-2.6.31-bfs.orig/include/linux/init_task.h 2009-09-10 11:45:32.000000000 +1000
-+++ linux-2.6.31-bfs/include/linux/init_task.h 2009-09-22 18:42:26.793618650 +1000
+--- linux-2.6.31-bfs.orig/include/linux/init_task.h 2009-10-06 12:23:21.394990154 +1100
++++ linux-2.6.31-bfs/include/linux/init_task.h 2009-10-06 12:23:34.075750347 +1100
@@ -116,21 +116,16 @@ extern struct cred init_cred;
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
@@ -156,8 +151,8 @@
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
Index: linux-2.6.31-bfs/include/linux/sched.h
===================================================================
---- linux-2.6.31-bfs.orig/include/linux/sched.h 2009-09-10 11:45:35.000000000 +1000
-+++ linux-2.6.31-bfs/include/linux/sched.h 2009-09-22 18:42:26.810615979 +1000
+--- linux-2.6.31-bfs.orig/include/linux/sched.h 2009-10-06 12:23:21.483019489 +1100
++++ linux-2.6.31-bfs/include/linux/sched.h 2009-10-06 12:23:34.078766233 +1100
@@ -36,8 +36,11 @@
#define SCHED_FIFO 1
#define SCHED_RR 2
@@ -357,7 +352,7 @@
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
-@@ -1172,17 +1026,18 @@ struct task_struct {
+@@ -1172,17 +1026,16 @@ struct task_struct {
int lock_depth; /* BKL lock depth */
@@ -367,7 +362,6 @@
-#endif
-#endif
-
-+ int load_weight; /* for niceness load balancing purposes */
int prio, static_prio, normal_prio;
+ int time_slice, first_time_slice;
+ unsigned long deadline;
@@ -376,15 +370,14 @@
- const struct sched_class *sched_class;
- struct sched_entity se;
- struct sched_rt_entity rt;
-+ unsigned long long timestamp, last_ran;
++ u64 last_ran;
+ u64 sched_time; /* sched_clock time spent running */
+
-+ int rt_nr_cpus_allowed;
+ unsigned long rt_timeout;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
-@@ -1205,6 +1060,9 @@ struct task_struct {
+@@ -1205,6 +1058,9 @@ struct task_struct {
unsigned int policy;
cpumask_t cpus_allowed;
@@ -394,7 +387,7 @@
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
-@@ -1273,6 +1131,7 @@ struct task_struct {
+@@ -1273,6 +1129,7 @@ struct task_struct {
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
cputime_t utime, stime, utimescaled, stimescaled;
@@ -402,7 +395,7 @@
cputime_t gtime;
cputime_t prev_utime, prev_stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
-@@ -1497,11 +1356,14 @@ struct task_struct {
+@@ -1497,11 +1354,14 @@ struct task_struct {
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
@@ -420,7 +413,7 @@
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static inline int rt_prio(int prio)
-@@ -1785,11 +1647,7 @@ task_sched_runtime(struct task_struct *t
+@@ -1785,11 +1645,7 @@ task_sched_runtime(struct task_struct *t
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
@@ -432,10 +425,18 @@
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-Index: linux-2.6.31-bfs/kernel/sched.c
+@@ -1939,6 +1795,7 @@ extern void wake_up_new_task(struct task
+ static inline void kick_process(struct task_struct *tsk) { }
+ #endif
+ extern void sched_fork(struct task_struct *p, int clone_flags);
++extern void sched_exit(struct task_struct *p);
+ extern void sched_dead(struct task_struct *p);
+
+ extern void proc_caches_init(void);
+Index: linux-2.6.31-bfs/kernel/sysctl.c
===================================================================
---- linux-2.6.31-bfs.orig/kernel/sysctl.c 2009-09-10 11:45:40.000000000 +1000
-+++ linux-2.6.31-bfs/kernel/sysctl.c 2009-09-22 18:42:26.823615207 +1000
+--- linux-2.6.31-bfs.orig/kernel/sysctl.c 2009-10-06 12:23:21.369989419 +1100
++++ linux-2.6.31-bfs/kernel/sysctl.c 2009-10-06 12:23:34.145991666 +1100
@@ -86,6 +86,8 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
@@ -625,8 +626,8 @@
.ctl_name = KERN_SPIN_RETRY,
Index: linux-2.6.31-bfs/kernel/workqueue.c
===================================================================
---- linux-2.6.31-bfs.orig/kernel/workqueue.c 2009-09-10 11:45:41.000000000 +1000
-+++ linux-2.6.31-bfs/kernel/workqueue.c 2009-09-22 18:42:26.827615694 +1000
+--- linux-2.6.31-bfs.orig/kernel/workqueue.c 2009-10-06 12:23:21.374989705 +1100
++++ linux-2.6.31-bfs/kernel/workqueue.c 2009-10-06 12:23:34.223741968 +1100
@@ -317,8 +317,6 @@ static int worker_thread(void *__cwq)
if (cwq->wq->freezeable)
set_freezable();
@@ -636,11 +637,11 @@
for (;;) {
prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
if (!freezing(current) &&
-Index: linux-2.6.31-bfs/kernel/sched_fair.c
+Index: linux-2.6.31-bfs/kernel/sched_bfs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-09-22 19:16:28.271354482 +1000
-@@ -0,0 +1,6150 @@
++++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-10-06 12:37:32.317752798 +1100
+@@ -0,0 +1,6295 @@
+/*
+ * kernel/sched_bfs.c, was sched.c
+ *
@@ -790,13 +791,24 @@
+ */
+int sched_iso_cpu __read_mostly = 70;
+
-+int prio_ratios[PRIO_RANGE] __read_mostly;
++/*
++ * The relative length of deadline for each priority(nice) level.
++ */
++static int prio_ratios[PRIO_RANGE] __read_mostly;
+
++/*
++ * The quota handed out to tasks of all priority levels when refilling their
++ * time_slice.
++ */
+static inline unsigned long timeslice(void)
+{
+ return MS_TO_US(rr_interval);
+}
+
++/*
++ * The global runqueue data that all CPUs work off. All data is protected
++ * by grq.lock.
++ */
+struct global_rq {
+ spinlock_t lock;
+ unsigned long nr_running;
@@ -812,11 +824,12 @@
+#endif
+};
+
++/* There can be only one */
+static struct global_rq grq;
+
+/*
+ * This is the main, per-CPU runqueue data structure.
-+ * All this is protected by the global_rq lock.
++ * This data should only be modified by the local cpu.
+ */
+struct rq {
+#ifdef CONFIG_SMP
@@ -827,28 +840,28 @@
+
+ struct task_struct *curr, *idle;
+ struct mm_struct *prev_mm;
-+ struct list_head queue; /* Place to store currently running task */
+
+ /* Stored data about rq->curr to work outside grq lock */
+ unsigned long rq_deadline;
+ unsigned int rq_policy;
+ int rq_time_slice;
++ u64 rq_last_ran;
+ int rq_prio;
+
+ /* Accurate timekeeping data */
+ u64 timekeep_clock;
+ unsigned long user_pc, nice_pc, irq_pc, softirq_pc, system_pc,
-+ iowait_pc, idle_pc;
++ iowait_pc, idle_pc;
+ atomic_t nr_iowait;
+
+ int cpu; /* cpu of this runqueue */
-+ int online;
+
+#ifdef CONFIG_SMP
++ int online;
++
+ struct root_domain *rd;
+ struct sched_domain *sd;
-+
-+ struct list_head migration_queue;
++ unsigned long *cpu_locality; /* CPU relative cache distance */
+#endif
+
+ u64 clock;
@@ -915,7 +928,6 @@
+ * members (mimicking the global state we have today).
+ */
+static struct root_domain def_root_domain;
-+
+#endif
+
+static inline int cpu_of(struct rq *rq)
@@ -937,10 +949,18 @@
+#define for_each_domain(cpu, __sd) \
+ for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+
++#ifdef CONFIG_SMP
+#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
+#define this_rq() (&__get_cpu_var(runqueues))
+#define task_rq(p) cpu_rq(task_cpu(p))
+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
++#else /* CONFIG_SMP */
++static struct rq *uprq;
++#define cpu_rq(cpu) (uprq)
++#define this_rq() (uprq)
++#define task_rq(p) (uprq)
++#define cpu_curr(cpu) ((uprq)->curr)
++#endif
+
+#include "sched_stats.h"
+
@@ -951,6 +971,11 @@
+# define finish_arch_switch(prev) do { } while (0)
+#endif
+
++/*
++ * All common locking functions performed on grq.lock. rq->clock is local to
++ * the cpu accessing it so it can be modified just with interrupts disabled,
++ * but looking up task_rq must be done under grq.lock to be safe.
++ */
+inline void update_rq_clock(struct rq *rq)
+{
+ rq->clock = sched_clock_cpu(cpu_of(rq));
@@ -964,7 +989,6 @@
+static inline void grq_lock(void)
+ __acquires(grq.lock)
+{
-+ smp_mb();
+ spin_lock(&grq.lock);
+}
+
@@ -977,15 +1001,14 @@
+static inline void grq_lock_irq(void)
+ __acquires(grq.lock)
+{
-+ smp_mb();
+ spin_lock_irq(&grq.lock);
+}
+
+static inline void time_lock_grq(struct rq *rq)
+ __acquires(grq.lock)
+{
-+ grq_lock();
+ update_rq_clock(rq);
++ grq_lock();
+}
+
+static inline void grq_unlock_irq(void)
@@ -997,8 +1020,7 @@
+static inline void grq_lock_irqsave(unsigned long *flags)
+ __acquires(grq.lock)
+{
-+ local_irq_save(*flags);
-+ grq_lock();
++ spin_lock_irqsave(&grq.lock, *flags);
+}
+
+static inline void grq_unlock_irqrestore(unsigned long *flags)
@@ -1024,24 +1046,21 @@
+ return rq;
+}
+
-+static inline struct rq
-+*task_grq_lock_irq(struct task_struct *p)
++static inline struct rq *task_grq_lock_irq(struct task_struct *p)
+ __acquires(grq.lock)
+{
+ grq_lock_irq();
+ return task_rq(p);
+}
+
-+static inline void
-+time_task_grq_lock_irq(struct task_struct *p)
++static inline void time_task_grq_lock_irq(struct task_struct *p)
+ __acquires(grq.lock)
+{
+ struct rq *rq = task_grq_lock_irq(p);
+ update_rq_clock(rq);
+}
+
-+static inline void
-+task_grq_unlock_irq(void)
++static inline void task_grq_unlock_irq(void)
+ __releases(grq.lock)
+{
+ grq_unlock_irq();
@@ -1060,12 +1079,12 @@
+ * This interface allows printk to be called with the runqueue lock
+ * held and know whether or not it is OK to wake up the klogd.
+ */
-+int grunqueue_is_locked(void)
++inline int grunqueue_is_locked(void)
+{
+ return spin_is_locked(&grq.lock);
+}
+
-+void grq_unlock_wait(void)
++inline void grq_unlock_wait(void)
+ __releases(grq.lock)
+{
+ smp_mb(); /* spin-unlock-wait is not a full memory barrier */
@@ -1134,24 +1153,16 @@
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+/*
-+ * A task that is queued will be on the grq run list.
++ * A task that is queued but not running will be on the grq run list.
+ * A task that is not running or queued will not be on the grq run list.
-+ * A task that is currently running will have ->oncpu set and be queued
-+ * temporarily in its own rq queue.
-+ * A task that is running and no longer queued will be seen only on
-+ * context switch exit.
++ * A task that is currently running will have ->oncpu set but not on the
++ * grq run list.
+ */
-+
+static inline int task_queued(struct task_struct *p)
+{
+ return (!list_empty(&p->run_list));
+}
+
-+static inline int task_queued_only(struct task_struct *p)
-+{
-+ return (!list_empty(&p->run_list) && !task_running(p));
-+}
-+
+/*
+ * Removing from the global runqueue. Enter with grq locked.
+ */
@@ -1210,7 +1221,7 @@
+ sched_info_queued(p);
+}
+
-+static inline int pratio(struct task_struct *p)
++static inline int task_prio_ratio(struct task_struct *p)
+{
+ return prio_ratios[TASK_USER_PRIO(p)];
+}
@@ -1222,7 +1233,7 @@
+ */
+static inline int task_timeslice(struct task_struct *p)
+{
-+ return (rr_interval * pratio(p) / 100);
++ return (rr_interval * task_prio_ratio(p) / 100);
+}
+
+#ifdef CONFIG_SMP
@@ -1266,6 +1277,22 @@
+ wake_up_idle_cpu(first_cpu(tmp));
+}
+
++/*
++ * The cpu cache locality difference between CPUs is used to determine how far
++ * to offset the virtual deadline. "One" difference in locality means that one
++ * timeslice difference is allowed longer for the cpu local tasks. This is
++ * enough in the common case when tasks are up to 2* number of CPUs to keep
++ * tasks within their shared cache CPUs only. CPUs on different nodes or not
++ * even in this domain (NUMA) have "3" difference, allowing 4 times longer
++ * deadlines before being taken onto another cpu, allowing for 2* the double
++ * seen by separate CPUs above. See sched_init_smp for how locality is
++ * determined.
++ */
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++ return rq->cpu_locality[task_rq->cpu] * task_timeslice(p);
++}
+#else /* CONFIG_SMP */
+static inline void inc_qnr(void)
+{
@@ -1289,7 +1316,7 @@
+}
+
+/* Always called from a busy cpu on UP */
-+static int suitable_idle_cpus(struct task_struct *p)
++static inline int suitable_idle_cpus(struct task_struct *p)
+{
+ return 0;
+}
@@ -1297,6 +1324,12 @@
+static inline void resched_suitable_idle(struct task_struct *p)
+{
+}
++
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++ return 0;
++}
+#endif /* CONFIG_SMP */
+
+/*
@@ -1345,7 +1378,10 @@
+ */
+static void activate_task(struct task_struct *p, struct rq *rq)
+{
-+ u64 now = rq->clock;
++ u64 now;
++
++ update_rq_clock(rq);
++ now = rq->clock;
+
+ /*
+ * Sleep time is in units of nanosecs, so shift by 20 to get a
@@ -1355,11 +1391,10 @@
+ if (unlikely(prof_on == SLEEP_PROFILING)) {
+ if (p->state == TASK_UNINTERRUPTIBLE)
+ profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+ (now - p->timestamp) >> 20);
++ (now - p->last_ran) >> 20);
+ }
+
+ p->prio = effective_prio(p);
-+ p->timestamp = now;
+ if (task_contributes_to_load(p))
+ grq.nr_uninterruptible--;
+ enqueue_task(p);
@@ -1382,6 +1417,7 @@
+void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ trace_sched_migrate_task(p, cpu);
++ perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+ /*
+ * After ->cpu is set up to a new value, task_grq_lock(p, ...) can be
+ * successfuly executed on another CPU. We must ensure that updates of
@@ -1400,7 +1436,6 @@
+{
+ set_task_cpu(p, rq->cpu);
+ dequeue_task(p);
-+ list_add(&p->run_list, &rq->queue);
+ dec_qnr();
+}
+
@@ -1410,7 +1445,6 @@
+ */
+static inline void return_task(struct task_struct *p, int deactivate)
+{
-+ list_del_init(&p->run_list);
+ if (deactivate)
+ deactivate_task(p);
+ else {
@@ -1551,10 +1585,17 @@
+ * We do the initial early heuristics without holding
+ * any task-queue locks at all. We'll only try to get
+ * the runqueue lock when things look like they will
-+ * work out!
++ * work out! In the unlikely event rq is dereferenced
++ * since we're lockless, grab it again.
+ */
++#ifdef CONFIG_SMP
++retry_rq:
+ rq = task_rq(p);
-+
++ if (unlikely(!rq))
++ goto retry_rq;
++#else /* CONFIG_SMP */
++ rq = task_rq(p);
++#endif
+ /*
+ * If the task is actively running on another CPU
+ * still, just relax and busy-wait without holding
@@ -1562,9 +1603,9 @@
+ *
+ * NOTE! Since we don't hold any locks, it's not
+ * even sure that "rq" stays as the right runqueue!
-+ * But we don't care, since this will
-+ * return false if the runqueue has changed and p
-+ * is actually now running somewhere else!
++ * But we don't care, since this will return false
++ * if the runqueue has changed and p is actually now
++ * running somewhere else!
+ */
+ while (task_running(p) && p == rq->curr) {
+ if (match_state && unlikely(p->state != match_state))
@@ -1659,71 +1700,57 @@
+
+/*
+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_BATCH and SCHED_IDLEPRIO don't preempt,
-+ * they cooperatively multitask.
-+ */
-+static inline int task_preempts_curr(struct task_struct *p, struct rq *rq)
-+{
-+ int preempts = 0;
-+
-+ if (p->prio < rq->rq_prio)
-+ preempts = 1;
-+ else if (p->policy == SCHED_NORMAL && (p->prio == rq->rq_prio &&
-+ time_before(p->deadline, rq->rq_deadline)))
-+ preempts = 1;
-+ return preempts;
-+}
-+
-+/*
-+ * Wake up *any* suitable cpu to schedule this task.
++ * basis of earlier deadlines. SCHED_BATCH, ISO and IDLEPRIO don't preempt
++ * between themselves, they cooperatively multitask. An idle rq scores as
++ * prio PRIO_LIMIT so it is always preempted. The offset_deadline will choose
++ * an idle runqueue that is closer cache-wise in preference. latest_deadline
++ * and highest_prio_rq are initialised only to silence the compiler. When
++ * all else is equal, still prefer this_rq.
+ */
-+static void try_preempt(struct task_struct *p)
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
+{
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.11&r2=1.1.2.12&f=u
More information about the pld-cvs-commit
mailing list