packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch - up to ...
cactus
cactus at pld-linux.org
Mon Sep 14 22:25:17 CEST 2009
Author: cactus Date: Mon Sep 14 20:25:16 2009 GMT
Module: packages Tag: Titanium
---- Log message:
- up to bfs 0.221 for kernel 2.6.31
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (1.1.2.4 -> 1.1.2.5)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.4 packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.5
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.4 Mon Sep 7 20:20:39 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Mon Sep 14 22:25:11 2009
@@ -1,4 +1,4 @@
-The Brain Fuck Scheduler v0.209 by Con Kolivas.
+The Brain Fuck Scheduler v0.221 by Con Kolivas.
A single shared runqueue O(n) strict fairness earliest deadline first design.
@@ -7,7 +7,6 @@
Scalability is optimal when your workload is equal to the number of CPUs on
bfs. ie you should ONLY do make -j4 on quad core, -j2 on dual core and so on.
-It's actually faster than higher numbers of jobs on *any* scheduler.
Features SCHED_IDLEPRIO and SCHED_ISO scheduling policies as well.
@@ -19,36 +18,43 @@
schedtool -I -e amarok
+Now includes accurate sub-tick accounting of tasks so userspace reported
+cpu usage may be very different.
+
---
- Documentation/sysctl/kernel.txt | 25
+ Documentation/sysctl/kernel.txt | 28
+ Makefile | 2
fs/pipe.c | 4
fs/proc/base.c | 2
include/linux/init_task.h | 15
include/linux/ioprio.h | 2
- include/linux/sched.h | 193
+ include/linux/sched.h | 194
init/Kconfig | 61
+ init/main.c | 5
+ kernel/Kconfig.preempt | 19
kernel/Makefile | 4
kernel/delayacct.c | 2
kernel/exit.c | 6
kernel/fork.c | 2
kernel/kthread.c | 4
- kernel/posix-cpu-timers.c | 12
- kernel/sched.c |10241 ----------------------------------------
- kernel/sched_bfs.c | 5793 ++++++++++++++++++++++
+ kernel/posix-cpu-timers.c | 14
+ kernel/sched.c |10583 ----------------------------------------
+ kernel/sched_bfs.c | 6203 +++++++++++++++++++++++
kernel/sched_debug.c | 509 -
- kernel/sched_fair.c | 1835 -------
- kernel/sched_idletask.c | 128
- kernel/sched_rt.c | 1771 ------
- kernel/sysctl.c | 145
+ kernel/sched_fair.c | 1842 ------
+ kernel/sched_idletask.c | 129
+ kernel/sched_rt.c | 1787 ------
+ kernel/sysctl.c | 156
+ kernel/timer.c | 3
kernel/trace/trace.c | 4
kernel/workqueue.c | 2
mm/oom_kill.c | 2
- 23 files changed, 5896 insertions(+), 14866 deletions(-)
+ 27 files changed, 6319 insertions(+), 15265 deletions(-)
-Index: linux-2.6.30-bfs/Documentation/sysctl/kernel.txt
+Index: linux-2.6.31-bfs/Documentation/sysctl/kernel.txt
===================================================================
---- linux-2.6.30-bfs.orig/Documentation/sysctl/kernel.txt 2009-09-03 19:50:51.796053865 +1000
-+++ linux-2.6.30-bfs/Documentation/sysctl/kernel.txt 2009-09-03 19:51:10.160055089 +1000
+--- linux-2.6.31-bfs.orig/Documentation/sysctl/kernel.txt 2009-09-13 13:45:35.267511949 +1000
++++ linux-2.6.31-bfs/Documentation/sysctl/kernel.txt 2009-09-13 13:45:45.782386464 +1000
@@ -27,6 +27,7 @@
- domainname
- hostname
@@ -57,7 +63,7 @@
- java-appletviewer [ binfmt_java, obsolete ]
- java-interpreter [ binfmt_java, obsolete ]
- kstack_depth_to_print [ X86 only ]
-@@ -48,6 +49,7 @@
+@@ -49,6 +50,7 @@
- randomize_va_space
- real-root-dev ==> Documentation/initrd.txt
- reboot-cmd [ SPARC only ]
@@ -65,7 +71,7 @@
- rtsig-max
- rtsig-nr
- sem
-@@ -170,6 +172,16 @@
+@@ -171,6 +173,16 @@
==============================================================
@@ -82,7 +88,7 @@
l2cr: (PPC only)
This flag controls the L2 cache of G3 processor boards. If
-@@ -322,6 +334,19 @@
+@@ -333,6 +345,22 @@
==============================================================
@@ -91,21 +97,24 @@
+This is the smallest duration that any cpu process scheduling unit
+will run for. Increasing this value can increase throughput of cpu
+bound tasks substantially but at the expense of increased latencies
-+overall. This value is in milliseconds and the default value chosen
-+depends on the number of cpus available at scheduler initialisation
-+with a minimum of 6.
++overall. Conversely decreasing it will decrease average and maximum
++latencies but at the expense of throughput. This value is in
++milliseconds and the default value chosen depends on the number of
++cpus available at scheduler initialisation with a minimum of 6. The
++value can be set to 0 which means no more than one tick (limited
++by HZ resolution).
+
-+Valid values are from 1-5000.
++Valid values are from 0-5000.
+
+==============================================================
+
rtsig-max & rtsig-nr:
The file rtsig-max can be used to tune the maximum number
-Index: linux-2.6.30-bfs/fs/pipe.c
+Index: linux-2.6.31-bfs/fs/pipe.c
===================================================================
---- linux-2.6.30-bfs.orig/fs/pipe.c 2009-09-03 19:50:51.757054464 +1000
-+++ linux-2.6.30-bfs/fs/pipe.c 2009-09-03 19:51:10.181054636 +1000
+--- linux-2.6.31-bfs.orig/fs/pipe.c 2009-09-13 13:45:35.238512036 +1000
++++ linux-2.6.31-bfs/fs/pipe.c 2009-09-13 13:45:45.794386717 +1000
@@ -78,10 +78,6 @@
{
DEFINE_WAIT(wait);
@@ -117,11 +126,11 @@
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
schedule();
-Index: linux-2.6.30-bfs/include/linux/init_task.h
+Index: linux-2.6.31-bfs/include/linux/init_task.h
===================================================================
---- linux-2.6.30-bfs.orig/include/linux/init_task.h 2009-09-03 19:50:51.802053428 +1000
-+++ linux-2.6.30-bfs/include/linux/init_task.h 2009-09-06 18:56:46.759601885 +1000
-@@ -119,21 +119,16 @@
+--- linux-2.6.31-bfs.orig/include/linux/init_task.h 2009-09-13 13:45:35.272511904 +1000
++++ linux-2.6.31-bfs/include/linux/init_task.h 2009-09-13 13:45:45.811386843 +1000
+@@ -116,21 +116,16 @@
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
.lock_depth = -1, \
@@ -148,10 +157,10 @@
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
-Index: linux-2.6.30-bfs/include/linux/sched.h
+Index: linux-2.6.31-bfs/include/linux/sched.h
===================================================================
---- linux-2.6.30-bfs.orig/include/linux/sched.h 2009-09-03 19:50:51.813054751 +1000
-+++ linux-2.6.30-bfs/include/linux/sched.h 2009-09-03 23:38:22.267262950 +1000
+--- linux-2.6.31-bfs.orig/include/linux/sched.h 2009-09-13 13:45:35.281511942 +1000
++++ linux-2.6.31-bfs/include/linux/sched.h 2009-09-13 13:46:11.880566952 +1000
@@ -36,8 +36,11 @@
#define SCHED_FIFO 1
#define SCHED_RR 2
@@ -166,7 +175,7 @@
#ifdef __KERNEL__
-@@ -141,13 +144,10 @@
+@@ -144,13 +147,10 @@
extern unsigned long get_parent_ip(unsigned long addr);
struct seq_file;
@@ -180,7 +189,7 @@
#else
static inline void
proc_sched_show_task(struct task_struct *p, struct seq_file *m)
-@@ -156,10 +156,6 @@
+@@ -159,10 +159,6 @@
static inline void proc_sched_set_task(struct task_struct *p)
{
}
@@ -191,7 +200,7 @@
#endif
extern unsigned long long time_sync_thresh;
-@@ -251,8 +247,8 @@
+@@ -254,8 +250,8 @@
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
@@ -202,11 +211,7 @@
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-@@ -969,151 +965,9 @@
- struct mempolicy;
- struct pipe_inode_info;
- struct uts_namespace;
--
+@@ -1021,148 +1017,6 @@
struct rq;
struct sched_domain;
@@ -289,9 +294,10 @@
- u64 last_wakeup;
- u64 avg_overlap;
-
+- u64 nr_migrations;
+-
- u64 start_runtime;
- u64 avg_wakeup;
-- u64 nr_migrations;
-
-#ifdef CONFIG_SCHEDSTATS
- u64 wait_start;
@@ -354,7 +360,7 @@
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
-@@ -1123,17 +977,18 @@
+@@ -1172,17 +1026,18 @@
int lock_depth; /* BKL lock depth */
@@ -381,7 +387,7 @@
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
-@@ -1156,6 +1011,9 @@
+@@ -1205,6 +1060,9 @@
unsigned int policy;
cpumask_t cpus_allowed;
@@ -391,7 +397,15 @@
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
-@@ -1446,11 +1304,14 @@
+@@ -1273,6 +1131,7 @@
+ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
+
+ cputime_t utime, stime, utimescaled, stimescaled;
++ unsigned long utime_pc, stime_pc;
+ cputime_t gtime;
+ cputime_t prev_utime, prev_stime;
+ unsigned long nvcsw, nivcsw; /* context switch counts */
+@@ -1497,11 +1356,14 @@
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
@@ -409,7 +423,7 @@
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static inline int rt_prio(int prio)
-@@ -1733,11 +1594,7 @@
+@@ -1785,11 +1647,7 @@
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
@@ -421,11 +435,11 @@
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-Index: linux-2.6.30-bfs/kernel/sched.c
+Index: linux-2.6.31-bfs/kernel/sched.c
===================================================================
---- linux-2.6.30-bfs.orig/kernel/sysctl.c 2009-09-03 19:50:51.867053380 +1000
-+++ linux-2.6.30-bfs/kernel/sysctl.c 2009-09-03 19:51:10.311054330 +1000
-@@ -83,6 +83,8 @@
+--- linux-2.6.31-bfs.orig/kernel/sysctl.c 2009-09-13 13:45:35.325511987 +1000
++++ linux-2.6.31-bfs/kernel/sysctl.c 2009-09-13 13:45:45.824386781 +1000
+@@ -86,6 +86,8 @@
extern int compat_log;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
@@ -434,7 +448,7 @@
#ifndef CONFIG_MMU
extern int sysctl_nr_trim_pages;
#endif
-@@ -97,10 +99,11 @@
+@@ -100,10 +102,11 @@
#endif
static int zero;
@@ -448,7 +462,7 @@
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-@@ -234,123 +237,7 @@
+@@ -238,134 +241,7 @@
{ .ctl_name = 0 }
};
@@ -544,6 +558,17 @@
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "timer_migration",
+- .data = &sysctl_timer_migration,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_minmax,
+- .strategy = &sysctl_intvec,
+- .extra1 = &zero,
+- .extra2 = &one,
+- },
-#endif
- {
- .ctl_name = CTL_UNNUMBERED,
@@ -572,7 +597,7 @@
#ifdef CONFIG_PROVE_LOCKING
{
.ctl_name = CTL_UNNUMBERED,
-@@ -756,6 +643,28 @@
+@@ -798,6 +674,28 @@
.proc_handler = &proc_dointvec,
},
#endif
@@ -584,7 +609,7 @@
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
-+ .extra1 = &one,
++ .extra1 = &zero,
+ .extra2 = &five_thousand,
+ },
+ {
@@ -601,11 +626,11 @@
#if defined(CONFIG_S390) && defined(CONFIG_SMP)
{
.ctl_name = KERN_SPIN_RETRY,
-Index: linux-2.6.30-bfs/kernel/workqueue.c
+Index: linux-2.6.31-bfs/kernel/workqueue.c
===================================================================
---- linux-2.6.30-bfs.orig/kernel/workqueue.c 2009-09-03 19:50:51.895053538 +1000
-+++ linux-2.6.30-bfs/kernel/workqueue.c 2009-09-03 19:51:10.366061008 +1000
-@@ -320,8 +320,6 @@
+--- linux-2.6.31-bfs.orig/kernel/workqueue.c 2009-09-13 13:45:35.345512018 +1000
++++ linux-2.6.31-bfs/kernel/workqueue.c 2009-09-13 13:45:45.836549400 +1000
+@@ -317,8 +317,6 @@
if (cwq->wq->freezeable)
set_freezable();
@@ -614,11 +639,11 @@
for (;;) {
prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
if (!freezing(current) &&
-Index: linux-2.6.30-bfs/kernel/sched_fair.c
+Index: linux-2.6.31-bfs/kernel/sched_fair.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.30-bfs/kernel/sched_bfs.c 2009-09-06 18:56:58.389602008 +1000
-@@ -0,0 +1,5793 @@
++++ linux-2.6.31-bfs/kernel/sched_bfs.c 2009-09-14 16:41:30.090860676 +1000
+@@ -0,0 +1,6203 @@
+/*
+ * kernel/sched_bfs.c, was sched.c
+ *
@@ -662,6 +687,7 @@
+#include <linux/completion.h>
+#include <linux/kernel_stat.h>
+#include <linux/debug_locks.h>
++#include <linux/perf_counter.h>
+#include <linux/security.h>
+#include <linux/notifier.h>
+#include <linux/profile.h>
@@ -687,20 +713,25 @@
+#include <linux/delayacct.h>
+#include <linux/reciprocal_div.h>
+#include <linux/log2.h>
++#include <linux/bootmem.h>
+#include <linux/ftrace.h>
-+#include <trace/sched.h>
+
+#include <asm/tlb.h>
+#include <asm/unistd.h>
+
++#define CREATE_TRACE_POINTS
++#include <trace/events/sched.h>
++
+#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
+#define rt_task(p) rt_prio((p)->prio)
++#define rt_queue(rq) rt_prio((rq)->rq_prio)
+#define batch_task(p) (unlikely((p)->policy == SCHED_BATCH))
+#define is_rt_policy(policy) ((policy) == SCHED_FIFO || \
+ (policy) == SCHED_RR)
+#define has_rt_policy(p) unlikely(is_rt_policy((p)->policy))
+#define idleprio_task(p) unlikely((p)->policy == SCHED_IDLEPRIO)
+#define iso_task(p) unlikely((p)->policy == SCHED_ISO)
++#define iso_queue(rq) unlikely((rq)->rq_policy == SCHED_ISO)
+#define ISO_PERIOD ((5 * HZ * num_online_cpus()) + 1)
+
+/*
@@ -727,13 +758,6 @@
+#define MS_TO_NS(TIME) ((TIME) * 1000000)
+#define MS_TO_US(TIME) ((TIME) * 1000)
+
-+
-+DEFINE_TRACE(sched_wait_task);
-+DEFINE_TRACE(sched_wakeup);
-+DEFINE_TRACE(sched_wakeup_new);
-+DEFINE_TRACE(sched_switch);
-+DEFINE_TRACE(sched_migrate_task);
-+
+#ifdef CONFIG_SMP
+/*
+ * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -788,6 +812,7 @@
+#ifdef CONFIG_SMP
+ cpumask_t cpu_idle_map;
+#endif
++ void (*wunt)(struct task_struct *, struct rq *, unsigned long);
+};
+
+static struct global_rq grq;
@@ -807,8 +832,19 @@
+ struct task_struct *curr, *idle;
+ struct mm_struct *prev_mm;
+
-+ unsigned long queued_deadline;
-+ int queued_prio;
++ /* Stored data about rq->curr to work outside grq lock */
++ unsigned long rq_deadline;
++ unsigned int rq_policy;
++ int rq_time_slice;
++ int rq_prio;
++
++ /* Accurate timekeeping data */
++ u64 timekeep_clock;
++ unsigned long user_ns, nice_ns, irq_ns, softirq_ns, system_ns,
++ iowait_ns, idle_ns;
++ unsigned long user_pc, nice_pc, irq_pc, softirq_pc, system_pc,
++ iowait_pc, idle_pc;
++ unsigned long total_ns, last_total_ns;
+
+ atomic_t nr_iowait;
+
@@ -922,7 +958,7 @@
+# define finish_arch_switch(prev) do { } while (0)
+#endif
+
-+static inline void update_rq_clock(struct rq *rq)
++inline void update_rq_clock(struct rq *rq)
+{
+ rq->clock = sched_clock_cpu(cpu_of(rq));
+}
@@ -935,6 +971,7 @@
+static inline void grq_lock(void)
+ __acquires(grq.lock)
+{
++ smp_mb();
+ spin_lock(&grq.lock);
+}
+
@@ -947,14 +984,15 @@
+static inline void grq_lock_irq(void)
+ __acquires(grq.lock)
+{
++ smp_mb();
+ spin_lock_irq(&grq.lock);
+}
+
-+static inline void time_lock_rq(struct rq *rq)
++static inline void time_lock_grq(struct rq *rq)
+ __acquires(grq.lock)
+{
-+ grq_lock();
+ update_rq_clock(rq);
++ grq_lock();
+}
+
+static inline void grq_unlock_irq(void)
@@ -967,7 +1005,7 @@
+ __acquires(grq.lock)
+{
+ local_irq_save(*flags);
-+ spin_lock(&grq.lock);
++ grq_lock();
+}
+
+static inline void grq_unlock_irqrestore(unsigned long *flags)
@@ -991,9 +1029,10 @@
+{
+ struct rq *rq;
+
-+ grq_lock_irqsave(flags);
+ rq = task_rq(p);
++ local_irq_save(*flags);
+ update_rq_clock(rq);
++ grq_lock();
+ return rq;
+}
+
@@ -1026,7 +1065,7 @@
+ __acquires(grq.lock)
+{
+ local_irq_save(*flags);
-+ time_lock_rq(rq);
++ time_lock_grq(rq);
+}
+
+static inline struct rq *__task_grq_lock(struct task_struct *p)
@@ -1131,17 +1170,12 @@
+/*
+ * Adding to the global runqueue. Enter with grq locked.
+ */
-+static inline void enqueue_task(struct task_struct *p)
++static void enqueue_task(struct task_struct *p)
+{
-+ if (idleprio_task(p) && !rt_task(p)) {
-+ if (idleprio_suitable(p))
-+ p->prio = p->normal_prio;
-+ else
-+ p->prio = NORMAL_PRIO;
-+ }
-+
-+ if (iso_task(p) && !rt_task(p)) {
-+ if (isoprio_suitable())
++ if (!rt_task(p)) {
++ /* Check it hasn't gotten rt from PI */
++ if ((idleprio_task(p) && idleprio_suitable(p)) ||
++ (iso_task(p) && isoprio_suitable()))
+ p->prio = p->normal_prio;
+ else
+ p->prio = NORMAL_PRIO;
@@ -1166,7 +1200,7 @@
+
+static inline int prio_ratio(struct task_struct *p)
+{
-+ return prio_ratios[USER_PRIO(p->static_prio)];
++ return prio_ratios[TASK_USER_PRIO(p)];
+}
+
+/*
@@ -1174,7 +1208,7 @@
+ * length. CPU distribution is handled by giving different deadlines to
+ * tasks of different priorities.
+ */
-+static int task_timeslice(struct task_struct *p)
++static inline int task_timeslice(struct task_struct *p)
+{
+ return (rr_interval * prio_ratio(p) / 100);
+}
@@ -1260,7 +1294,7 @@
+#ifdef CONFIG_SMP
+void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
-+ trace_sched_migrate_task(p, task_cpu(p), cpu);
++ trace_sched_migrate_task(p, cpu);
+ /*
+ * After ->cpu is set up to a new value, task_grq_lock(p, ...) can be
+ * successfuly executed on another CPU. We must ensure that updates of
@@ -1355,6 +1389,49 @@
+};
+
+/*
++ * wait_task_context_switch - wait for a thread to complete at least one
++ * context switch.
++ *
++ * @p must not be current.
++ */
++void wait_task_context_switch(struct task_struct *p)
++{
++ unsigned long nvcsw, nivcsw, flags;
++ int running;
++ struct rq *rq;
++
++ nvcsw = p->nvcsw;
++ nivcsw = p->nivcsw;
++ for (;;) {
++ /*
++ * The runqueue is assigned before the actual context
++ * switch. We need to take the runqueue lock.
++ *
++ * We could check initially without the lock but it is
++ * very likely that we need to take the lock in every
++ * iteration.
++ */
++ rq = task_grq_lock(p, &flags);
++ running = task_running(p);
++ task_grq_unlock(&flags);
++
++ if (likely(!running))
++ break;
++ /*
++ * The switch count is incremented before the actual
++ * context switch. We thus wait for two switches to be
++ * sure at least one completed.
++ */
++ if ((p->nvcsw - nvcsw) > 1)
++ break;
++ if ((p->nivcsw - nivcsw) > 1)
++ break;
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.4&r2=1.1.2.5&f=u
More information about the pld-cvs-commit
mailing list