packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch - bfs up...
cactus
cactus at pld-linux.org
Fri Oct 16 13:59:22 CEST 2009
Author: cactus Date: Fri Oct 16 11:59:22 2009 GMT
Module: packages Tag: Titanium
---- Log message:
- bfs up to 304
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (1.1.2.12 -> 1.1.2.13)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.12 packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.13
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.12 Fri Oct 16 02:04:27 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Fri Oct 16 13:59:17 2009
@@ -1,4 +1,4 @@
-The Brain Fuck Scheduler v0.303 by Con Kolivas.
+The Brain Fuck Scheduler v0.304 by Con Kolivas.
A single shared runqueue O(n) strict fairness earliest deadline first design.
@@ -22,33 +22,34 @@
cpu usage may be very different.
---
- Documentation/sysctl/kernel.txt | 26
- Makefile | 4
- fs/pipe.c | 4
- fs/proc/base.c | 2
- include/linux/init_task.h | 15
- include/linux/ioprio.h | 2
- include/linux/sched.h | 193 -
- init/Kconfig | 61
- init/main.c | 2
- kernel/Makefile | 4
- kernel/delayacct.c | 2
- kernel/exit.c | 7
- kernel/fork.c | 1
- kernel/kthread.c | 3
- kernel/posix-cpu-timers.c | 14
- kernel/sched_bfs.c | 6295 ++++++++++++++++++++++++++++++++++++++++
- kernel/sysctl.c | 156
- kernel/timer.c | 3
- kernel/trace/trace.c | 4
- kernel/workqueue.c | 2
- mm/oom_kill.c | 2
- 21 files changed, 6404 insertions(+), 398 deletions(-)
+ Documentation/scheduler/sched-BFS.txt | 335 +
+ Documentation/sysctl/kernel.txt | 26
+ Makefile | 4
+ fs/pipe.c | 4
+ fs/proc/base.c | 2
+ include/linux/init_task.h | 15
+ include/linux/ioprio.h | 2
+ include/linux/sched.h | 193 -
+ init/Kconfig | 61
+ init/main.c | 2
+ kernel/Makefile | 4
+ kernel/delayacct.c | 2
+ kernel/exit.c | 7
+ kernel/fork.c | 1
+ kernel/kthread.c | 3
+ kernel/posix-cpu-timers.c | 14
+ kernel/sched_bfs.c | 6336 ++++++++++++++++++++++++++++++++++
+ kernel/sysctl.c | 156
+ kernel/timer.c | 3
+ kernel/trace/trace.c | 4
+ kernel/workqueue.c | 2
+ mm/oom_kill.c | 2
+ 22 files changed, 6780 insertions(+), 398 deletions(-)
Index: linux-2.6.31-bfs/Documentation/sysctl/kernel.txt
===================================================================
---- linux-2.6.31-bfs.orig/Documentation/sysctl/kernel.txt 2009-10-06 12:23:21.309990236 +1100
-+++ linux-2.6.31-bfs/Documentation/sysctl/kernel.txt 2009-10-06 12:23:34.040742058 +1100
+--- linux-2.6.31-bfs.orig/Documentation/sysctl/kernel.txt 2009-10-06 21:06:26.175820508 +1100
++++ linux-2.6.31-bfs/Documentation/sysctl/kernel.txt 2009-10-06 21:06:48.532821648 +1100
@@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
- domainname
- hostname
@@ -66,9 +67,9 @@
- rtsig-nr
- sem
@@ -171,6 +173,16 @@ Default value is "/sbin/hotplug".
-
+
==============================================================
-
+
+iso_cpu:
+
+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
@@ -80,12 +81,12 @@
+==============================================================
+
l2cr: (PPC only)
-
+
This flag controls the L2 cache of G3 processor boards. If
@@ -333,6 +345,20 @@ rebooting. ???
-
+
==============================================================
-
+
+rr_interval:
+
+This is the smallest duration that any cpu process scheduling unit
@@ -101,16 +102,16 @@
+==============================================================
+
rtsig-max & rtsig-nr:
-
+
The file rtsig-max can be used to tune the maximum number
Index: linux-2.6.31-bfs/fs/pipe.c
===================================================================
---- linux-2.6.31-bfs.orig/fs/pipe.c 2009-10-06 12:23:21.314996180 +1100
-+++ linux-2.6.31-bfs/fs/pipe.c 2009-10-06 12:23:34.042742313 +1100
+--- linux-2.6.31-bfs.orig/fs/pipe.c 2009-10-06 21:06:26.150821027 +1100
++++ linux-2.6.31-bfs/fs/pipe.c 2009-10-06 21:06:48.533821285 +1100
@@ -78,10 +78,6 @@ void pipe_wait(struct pipe_inode_info *p
{
DEFINE_WAIT(wait);
-
+
- /*
- * Pipes are system-local resources, so sleeping on them
- * is considered a noninteractive wait:
@@ -120,8 +121,8 @@
schedule();
Index: linux-2.6.31-bfs/include/linux/init_task.h
===================================================================
---- linux-2.6.31-bfs.orig/include/linux/init_task.h 2009-10-06 12:23:21.394990154 +1100
-+++ linux-2.6.31-bfs/include/linux/init_task.h 2009-10-06 12:23:34.075750347 +1100
+--- linux-2.6.31-bfs.orig/include/linux/init_task.h 2009-10-06 21:06:26.181821043 +1100
++++ linux-2.6.31-bfs/include/linux/init_task.h 2009-10-06 21:06:48.562821138 +1100
@@ -116,21 +116,16 @@ extern struct cred init_cred;
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
@@ -151,8 +152,8 @@
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
Index: linux-2.6.31-bfs/include/linux/sched.h
===================================================================
---- linux-2.6.31-bfs.orig/include/linux/sched.h 2009-10-06 12:23:21.483019489 +1100
-+++ linux-2.6.31-bfs/include/linux/sched.h 2009-10-06 12:23:34.078766233 +1100
+--- linux-2.6.31-bfs.orig/include/linux/sched.h 2009-10-06 21:06:26.192821918 +1100
++++ linux-2.6.31-bfs/include/linux/sched.h 2009-10-08 22:59:46.191157813 +1100
@@ -36,8 +36,11 @@
#define SCHED_FIFO 1
#define SCHED_RR 2
@@ -164,12 +165,12 @@
+
+#define SCHED_MAX (SCHED_IDLEPRIO)
+#define SCHED_RANGE(policy) ((policy) <= SCHED_MAX)
-
+
#ifdef __KERNEL__
-
+
@@ -144,13 +147,10 @@ extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr);
-
+
struct seq_file;
-struct cfs_rq;
struct task_group;
@@ -181,7 +182,7 @@
#else
static inline void
proc_sched_show_task(struct task_struct *p, struct seq_file *m)
-@@ -159,10 +159,6 @@ proc_sched_show_task(struct task_struct
+@@ -159,10 +159,6 @@ proc_sched_show_task(struct task_struct
static inline void proc_sched_set_task(struct task_struct *p)
{
}
@@ -190,23 +191,23 @@
-{
-}
#endif
-
+
extern unsigned long long time_sync_thresh;
@@ -254,8 +250,8 @@ extern asmlinkage void schedule_tail(str
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
-
+
-extern int runqueue_is_locked(void);
-extern void task_rq_unlock_wait(struct task_struct *p);
+extern int grunqueue_is_locked(void);
+extern void grq_unlock_wait(void);
-
+
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
@@ -1021,148 +1017,6 @@ struct uts_namespace;
struct rq;
struct sched_domain;
-
+
-struct sched_class {
- const struct sched_class *next;
-
@@ -353,9 +354,9 @@
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -1172,17 +1026,16 @@ struct task_struct {
-
+
int lock_depth; /* BKL lock depth */
-
+
-#ifdef CONFIG_SMP
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
@@ -374,22 +375,22 @@
+ u64 sched_time; /* sched_clock time spent running */
+
+ unsigned long rt_timeout;
-
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
@@ -1205,6 +1058,9 @@ struct task_struct {
-
+
unsigned int policy;
cpumask_t cpus_allowed;
+#ifdef CONFIG_HOTPLUG_CPU
+ cpumask_t unplugged_mask;
+#endif
-
+
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -1273,6 +1129,7 @@ struct task_struct {
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
-
+
cputime_t utime, stime, utimescaled, stimescaled;
+ unsigned long utime_pc, stime_pc;
cputime_t gtime;
@@ -411,18 +412,18 @@
+#define IDLE_PRIO (MAX_RT_PRIO + 2)
+#define PRIO_LIMIT ((IDLE_PRIO) + 1)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
-
+
static inline int rt_prio(int prio)
@@ -1785,11 +1645,7 @@ task_sched_runtime(struct task_struct *t
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
-
+
/* sched_exec is called by processes performing an exec */
-#ifdef CONFIG_SMP
-extern void sched_exec(void);
-#else
#define sched_exec() {}
-#endif
-
+
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
@@ -1939,6 +1795,7 @@ extern void wake_up_new_task(struct task
@@ -431,12 +432,12 @@
extern void sched_fork(struct task_struct *p, int clone_flags);
+extern void sched_exit(struct task_struct *p);
extern void sched_dead(struct task_struct *p);
-
+
extern void proc_caches_init(void);
Index: linux-2.6.31-bfs/kernel/sysctl.c
===================================================================
---- linux-2.6.31-bfs.orig/kernel/sysctl.c 2009-10-06 12:23:21.369989419 +1100
-+++ linux-2.6.31-bfs/kernel/sysctl.c 2009-10-06 12:23:34.145991666 +1100
+--- linux-2.6.31-bfs.orig/kernel/sysctl.c 2009-10-06 21:06:26.223820846 +1100
++++ linux-2.6.31-bfs/kernel/sysctl.c 2009-10-06 21:06:48.595826016 +1100
@@ -86,6 +86,8 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
@@ -448,7 +449,7 @@
#endif
@@ -100,10 +102,11 @@ static int neg_one = -1;
#endif
-
+
static int zero;
-static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
@@ -457,13 +458,13 @@
+static int __read_mostly one = 1;
+static int __read_mostly one_hundred = 100;
+static int __read_mostly five_thousand = 5000;
-
+
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -238,134 +241,7 @@ static struct ctl_table root_table[] = {
{ .ctl_name = 0 }
};
-
+
-#ifdef CONFIG_SCHED_DEBUG
-static int min_sched_granularity_ns = 100000; /* 100 usecs */
-static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
@@ -626,12 +627,12 @@
.ctl_name = KERN_SPIN_RETRY,
Index: linux-2.6.31-bfs/kernel/workqueue.c
===================================================================
---- linux-2.6.31-bfs.orig/kernel/workqueue.c 2009-10-06 12:23:21.374989705 +1100
-+++ linux-2.6.31-bfs/kernel/workqueue.c 2009-10-06 12:23:34.223741968 +1100
+--- linux-2.6.31-bfs.orig/kernel/workqueue.c 2009-10-06 21:06:26.256820979 +1100
++++ linux-2.6.31-bfs/kernel/workqueue.c 2009-10-06 21:06:48.600828706 +1100
@@ -317,8 +317,6 @@ static int worker_thread(void *__cwq)
if (cwq->wq->freezeable)
set_freezable();
-
+
- set_user_nice(current, -5);
-
for (;;) {
@@ -640,8 +641,8 @@
Index: linux-2.6.31-bfs/kernel/sched_bfs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-10-06 12:37:32.317752798 +1100
-@@ -0,0 +1,6295 @@
++++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-10-16 21:19:59.899777179 +1100
+@@ -0,0 +1,6336 @@
+/*
+ * kernel/sched_bfs.c, was sched.c
+ *
@@ -816,8 +817,8 @@
+ unsigned long long nr_switches;
+ struct list_head queue[PRIO_LIMIT];
+ DECLARE_BITMAP(prio_bitmap, PRIO_LIMIT + 1);
-+ unsigned long iso_ticks;
-+ unsigned short iso_refractory;
++ int iso_ticks;
++ int iso_refractory;
+#ifdef CONFIG_SMP
+ unsigned long qnr; /* queued not running */
+ cpumask_t cpu_idle_map;
@@ -854,9 +855,8 @@
+ iowait_pc, idle_pc;
+ atomic_t nr_iowait;
+
-+ int cpu; /* cpu of this runqueue */
-+
+#ifdef CONFIG_SMP
++ int cpu; /* cpu of this runqueue */
+ int online;
+
+ struct root_domain *rd;
@@ -983,7 +983,7 @@
+
+static inline int task_running(struct task_struct *p)
+{
-+ return (!!p->oncpu);
++ return p->oncpu;
+}
+
+static inline void grq_lock(void)
@@ -1173,18 +1173,31 @@
+ __clear_bit(p->prio, grq.prio_bitmap);
+}
+
++/*
++ * When a task is freshly forked, the first_time_slice flag is set to say
++ * it has taken time_slice from its parent and if it exits on this first
++ * time_slice it can return its time_slice back to the parent.
++ */
+static inline void reset_first_time_slice(struct task_struct *p)
+{
+ if (unlikely(p->first_time_slice))
+ p->first_time_slice = 0;
+}
+
++/*
++ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
++ * an idle task, we ensure none of the following conditions are met.
++ */
+static int idleprio_suitable(struct task_struct *p)
+{
+ return (!freezing(p) && !signal_pending(p) &&
+ !(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
+}
+
++/*
++ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
++ * that the iso_refractory flag is not set.
++ */
+static int isoprio_suitable(void)
+{
+ return !grq.iso_refractory;
@@ -1221,6 +1234,10 @@
+ sched_info_queued(p);
+}
+
++/*
++ * Returns the relative length of deadline all compared to the shortest
++ * deadline which is that of nice -20.
++ */
+static inline int task_prio_ratio(struct task_struct *p)
+{
+ return prio_ratios[TASK_USER_PRIO(p)];
@@ -1237,6 +1254,11 @@
+}
+
+#ifdef CONFIG_SMP
++/*
++ * qnr is the "queued but not running" count which is the total number of
++ * tasks on the global runqueue list waiting for cpu time but not actually
++ * currently running on a cpu.
++ */
+static inline void inc_qnr(void)
+{
+ grq.qnr++;
@@ -1252,6 +1274,10 @@
+ return grq.qnr;
+}
+
++/*
++ * The cpu_idle_map stores a bitmap of all the cpus currently idle to
++ * allow easy lookup of whether any suitable idle cpus are available.
++ */
+static inline void set_cpuidle_map(unsigned long cpu)
+{
+ cpu_set(cpu, grq.cpu_idle_map);
@@ -1285,13 +1311,14 @@
+ * tasks within their shared cache CPUs only. CPUs on different nodes or not
+ * even in this domain (NUMA) have "3" difference, allowing 4 times longer
+ * deadlines before being taken onto another cpu, allowing for 2* the double
-+ * seen by separate CPUs above. See sched_init_smp for how locality is
-+ * determined.
++ * seen by separate CPUs above.
++ * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
++ * on separate CPUs and quadruple in separate NUMA nodes.
+ */
+static inline int
+cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
+{
-+ return rq->cpu_locality[task_rq->cpu] * task_timeslice(p);
++ return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
+}
+#else /* CONFIG_SMP */
+static inline void inc_qnr(void)
@@ -1373,15 +1400,11 @@
+}
+
+/*
-+ * activate_task - move a task to the runqueue. Enter with grq locked. The rq
-+ * doesn't really matter but gives us the local clock.
++ * activate_task - move a task to the runqueue. Enter with grq locked.
+ */
+static void activate_task(struct task_struct *p, struct rq *rq)
+{
-+ u64 now;
-+
+ update_rq_clock(rq);
-+ now = rq->clock;
+
+ /*
+ * Sleep time is in units of nanosecs, so shift by 20 to get a
@@ -1391,7 +1414,7 @@
+ if (unlikely(prof_on == SLEEP_PROFILING)) {
+ if (p->state == TASK_UNINTERRUPTIBLE)
+ profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
-+ (now - p->last_ran) >> 20);
++ (rq->clock - p->last_ran) >> 20);
+ }
+
+ p->prio = effective_prio(p);
@@ -1404,7 +1427,7 @@
+
+/*
+ * deactivate_task - If it's running, it's not on the grq and we can just
-+ * decrement the nr_running.
++ * decrement the nr_running. Enter with grq locked.
+ */
+static inline void deactivate_task(struct task_struct *p)
+{
@@ -1434,7 +1457,7 @@
+ */
+static inline void take_task(struct rq *rq, struct task_struct *p)
+{
-+ set_task_cpu(p, rq->cpu);
++ set_task_cpu(p, cpu_of(rq));
+ dequeue_task(p);
+ dec_qnr();
+}
@@ -1707,6 +1730,7 @@
+ * and highest_prio_rq are initialised only to silence the compiler. When
+ * all else is equal, still prefer this_rq.
+ */
++#ifdef CONFIG_SMP
+static void try_preempt(struct task_struct *p, struct rq *this_rq)
+{
+ unsigned long latest_deadline = 0, cpu;
@@ -1754,6 +1778,16 @@
+ resched_task(highest_prio_rq->curr);
+ return;
+}
++#else /* CONFIG_SMP */
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
++{
++ if (p->prio < this_rq->rq_prio ||
++ (p->prio == this_rq->rq_prio && p->policy == SCHED_NORMAL &&
++ time_before(p->deadline, this_rq->rq_deadline)))
++ resched_task(this_rq->curr);
++ return;
++}
++#endif /* CONFIG_SMP */
+
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
@@ -2439,9 +2473,9 @@
+ * negative/overflow. time_diff is only used for internal scheduler
+ * time_slice accounting.
+ */
-+ if (time_diff <= 0)
++ if (unlikely(time_diff <= 0))
+ time_diff = JIFFIES_TO_NS(1) / 2;
-+ else if (time_diff > JIFFIES_TO_NS(1))
++ else if (unlikely(time_diff > JIFFIES_TO_NS(1)))
+ time_diff = JIFFIES_TO_NS(1);
+
+ rq->rq_time_slice -= time_diff / 1000;
@@ -2462,7 +2496,7 @@
+ if (p == rq->curr) {
+ update_rq_clock(rq);
+ ns = rq->clock - rq->rq_last_ran;
-+ if ((s64)ns < 0)
++ if (unlikely((s64)ns < 0))
+ ns = 0;
+ }
+
@@ -2815,8 +2849,8 @@
+ * same nice value, it proportions cpu according to nice level, it means the
+ * task that last woke up the longest ago has the earliest deadline, thus
+ * ensuring that interactive tasks get low latency on wake up. The CPU
-+ * proportion works out to the square of the difference, so this equation will
-+ * give nice 19 3% CPU compared to nice 0 and nice 0 3% compared to nice -20.
++ * proportion works out to the square of the virtual deadline difference, so
++ * this equation will give nice 19 3% CPU compared to nice 0.
+ */
+static inline int prio_deadline_diff(int user_prio)
+{
@@ -2876,13 +2910,18 @@
+ * earliest deadline.
+ * Finally if no SCHED_NORMAL tasks are found, SCHED_IDLEPRIO tasks are
+ * selected by the earliest deadline.
++ * Once deadlines are expired (jiffies has passed it) tasks are chosen in FIFO
++ * order. Note that very few tasks will be FIFO for very long because they
++ * only end up that way if they sleep for long or if if there are enough fully
++ * cpu bound tasks to push the load to ~8 higher than the number of CPUs for
++ * nice 0.
+ */
+static inline struct
+task_struct *earliest_deadline_task(struct rq *rq, struct task_struct *idle)
+{
+ unsigned long dl, earliest_deadline = 0; /* Initialise to silence compiler */
+ struct task_struct *p, *edt;
-+ unsigned int cpu = rq->cpu;
++ unsigned int cpu = cpu_of(rq);
+ struct list_head *queue;
+ int idx = 0;
+
@@ -3009,7 +3048,6 @@
+ unsigned long *switch_count;
+ int deactivate, cpu;
+ struct rq *rq;
-+ u64 now;
+
+need_resched:
+ preempt_disable();
@@ -3029,7 +3067,6 @@
+
+ local_irq_disable();
+ update_rq_clock(rq);
-+ now = rq->clock;
+ update_cpu_clock(rq, prev, 0);
+
+ grq_lock();
@@ -3070,7 +3107,7 @@
+ else
+ clear_cpuidle_map(cpu);
+
-+ prev->last_ran = now;
++ prev->last_ran = rq->clock;
+
+ if (likely(prev != next)) {
+ sched_info_switch(prev, next);
@@ -3240,7 +3277,7 @@
+
+ list_for_each_safe(tmp, next, &q->task_list) {
+ wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
-+ unsigned flags = curr->flags;
++ unsigned int flags = curr->flags;
+
+ if (curr->func(curr, mode, sync, key) &&
+ (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
@@ -3815,20 +3852,22 @@
+static void
+__setscheduler(struct task_struct *p, struct rq *rq, int policy, int prio)
+{
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.12&r2=1.1.2.13&f=u
More information about the pld-cvs-commit
mailing list