packages: kernel-desktop/kernel-desktop-sched-bfs.patch (NEW) - bfs scheduler
cactus
cactus at pld-linux.org
Thu Sep 3 10:06:44 CEST 2009
Author: cactus Date: Thu Sep 3 08:06:44 2009 GMT
Module: packages Tag: HEAD
---- Log message:
- bfs scheduler
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u /dev/null packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1
--- /dev/null Thu Sep 3 10:06:44 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Thu Sep 3 10:06:38 2009
@@ -0,0 +1,9333 @@
+The Brain Fuck Scheduler v0.203 by Con Kolivas.
+
+A single shared runqueue O(n) strict fairness earliest deadline first design.
+
+Ultra low latency and excellent desktop performance.
+Not recommended for 4096 cpus.
+
+Scalability is optimal when your workload is equal to the number of CPUs on
+bfs. ie you should ONLY do make -j4 on quad core, -j2 on dual core and so on.
+It's actually faster than higher numbers of jobs on *any* scheduler.
+
+Features SCHED_IDLEPRIO and SCHED_ISO scheduling policies as well.
+
+To run something idleprio, use schedtool like so:
+
+schedtool -D -e make -j4
+
+To run something isoprio, use schedtool like so:
+
+schedtool -I -e amarok
+
+---
+ Documentation/sysctl/kernel.txt | 25
+ fs/pipe.c | 4
+ fs/proc/base.c | 2
+ include/linux/init_task.h | 15
+ include/linux/ioprio.h | 2
+ include/linux/sched.h | 198
+ init/Kconfig | 173
+ kernel/Makefile | 4
+ kernel/delayacct.c | 2
+ kernel/exit.c | 6
+ kernel/fork.c | 2
+ kernel/kthread.c | 4
+ kernel/posix-cpu-timers.c | 12
+ kernel/sched.c |10241 ----------------------------------------
+ kernel/sched_bfs.c | 5819 ++++++++++++++++++++++
+ kernel/sched_debug.c | 509 -
+ kernel/sched_fair.c | 1835 -------
+ kernel/sched_idletask.c | 128
+ kernel/sched_rt.c | 1771 ------
+ kernel/sysctl.c | 145
+ kernel/trace/trace.c | 4
+ kernel/workqueue.c | 2
+ mm/oom_kill.c | 2
+ 23 files changed, 5925 insertions(+), 14980 deletions(-)
+
+Index: linux-2.6.30-test/Documentation/sysctl/kernel.txt
+===================================================================
+--- linux-2.6.30-test.orig/Documentation/sysctl/kernel.txt 2009-09-01 09:56:45.475824529 +1000
++++ linux-2.6.30-test/Documentation/sysctl/kernel.txt 2009-09-01 09:57:21.504098535 +1000
+@@ -27,6 +27,7 @@
+ - domainname
+ - hostname
+ - hotplug
++- iso_cpu
+ - java-appletviewer [ binfmt_java, obsolete ]
+ - java-interpreter [ binfmt_java, obsolete ]
+ - kstack_depth_to_print [ X86 only ]
+@@ -48,6 +49,7 @@
+ - randomize_va_space
+ - real-root-dev ==> Documentation/initrd.txt
+ - reboot-cmd [ SPARC only ]
++- rr_interval
+ - rtsig-max
+ - rtsig-nr
+ - sem
+@@ -170,6 +172,16 @@
+
+ ==============================================================
+
++iso_cpu:
++
++This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
++run effectively at realtime priority, averaged over a rolling five
++seconds over the -whole- system, meaning all cpus.
++
++Set to 70 (percent) by default.
++
++==============================================================
++
+ l2cr: (PPC only)
+
+ This flag controls the L2 cache of G3 processor boards. If
+@@ -322,6 +334,19 @@
+
+ ==============================================================
+
++rr_interval:
++
++This is the smallest duration that any cpu process scheduling unit
++will run for. Increasing this value can increase throughput of cpu
++bound tasks substantially but at the expense of increased latencies
++overall. This value is in milliseconds and the default value chosen
++depends on the number of cpus available at scheduler initialisation
++with a minimum of 6.
++
++Valid values are from 1-5000.
++
++==============================================================
++
+ rtsig-max & rtsig-nr:
+
+ The file rtsig-max can be used to tune the maximum number
+Index: linux-2.6.30-test/fs/pipe.c
+===================================================================
+--- linux-2.6.30-test.orig/fs/pipe.c 2009-09-01 09:56:45.479826436 +1000
++++ linux-2.6.30-test/fs/pipe.c 2009-09-01 09:57:21.505096986 +1000
+@@ -78,10 +78,6 @@
+ {
+ DEFINE_WAIT(wait);
+
+- /*
+- * Pipes are system-local resources, so sleeping on them
+- * is considered a noninteractive wait:
+- */
+ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
+ pipe_unlock(pipe);
+ schedule();
+Index: linux-2.6.30-test/include/linux/init_task.h
+===================================================================
+--- linux-2.6.30-test.orig/include/linux/init_task.h 2009-09-01 09:56:45.560826284 +1000
++++ linux-2.6.30-test/include/linux/init_task.h 2009-09-01 09:57:21.505096986 +1000
+@@ -119,21 +119,16 @@
+ .usage = ATOMIC_INIT(2), \
+ .flags = PF_KTHREAD, \
+ .lock_depth = -1, \
+- .prio = MAX_PRIO-20, \
++ .prio = NORMAL_PRIO, \
+ .static_prio = MAX_PRIO-20, \
+- .normal_prio = MAX_PRIO-20, \
++ .normal_prio = NORMAL_PRIO, \
++ .deadline = 0, \
+ .policy = SCHED_NORMAL, \
+ .cpus_allowed = CPU_MASK_ALL, \
+ .mm = NULL, \
+ .active_mm = &init_mm, \
+- .se = { \
+- .group_node = LIST_HEAD_INIT(tsk.se.group_node), \
+- }, \
+- .rt = { \
+- .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
+- .time_slice = HZ, \
+- .nr_cpus_allowed = NR_CPUS, \
+- }, \
++ .run_list = LIST_HEAD_INIT(tsk.run_list), \
++ .time_slice = HZ, \
+ .tasks = LIST_HEAD_INIT(tsk.tasks), \
+ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
+ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \
+Index: linux-2.6.30-test/include/linux/sched.h
+===================================================================
+--- linux-2.6.30-test.orig/include/linux/sched.h 2009-09-01 09:56:45.570847184 +1000
++++ linux-2.6.30-test/include/linux/sched.h 2009-09-01 09:57:21.506111012 +1000
+@@ -36,8 +36,11 @@
+ #define SCHED_FIFO 1
+ #define SCHED_RR 2
+ #define SCHED_BATCH 3
+-/* SCHED_ISO: reserved but not implemented yet */
+-#define SCHED_IDLE 5
++#define SCHED_ISO 4
++#define SCHED_IDLEPRIO 5
++
++#define SCHED_MAX (SCHED_IDLEPRIO)
++#define SCHED_RANGE(policy) ((policy) <= SCHED_MAX)
+
+ #ifdef __KERNEL__
+
+@@ -141,13 +144,10 @@
+ extern unsigned long get_parent_ip(unsigned long addr);
+
+ struct seq_file;
+-struct cfs_rq;
+ struct task_group;
+ #ifdef CONFIG_SCHED_DEBUG
+ extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+ extern void proc_sched_set_task(struct task_struct *p);
+-extern void
+-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+ #else
+ static inline void
+ proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+@@ -156,10 +156,6 @@
+ static inline void proc_sched_set_task(struct task_struct *p)
+ {
+ }
+-static inline void
+-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+-{
+-}
+ #endif
+
+ extern unsigned long long time_sync_thresh;
+@@ -251,8 +247,8 @@
+ extern void init_idle(struct task_struct *idle, int cpu);
+ extern void init_idle_bootup_task(struct task_struct *idle);
+
+-extern int runqueue_is_locked(void);
+-extern void task_rq_unlock_wait(struct task_struct *p);
++extern int grunqueue_is_locked(void);
++extern void grq_unlock_wait(void);
+
+ extern cpumask_var_t nohz_cpu_mask;
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
+@@ -969,151 +965,9 @@
+ struct mempolicy;
+ struct pipe_inode_info;
+ struct uts_namespace;
+-
+ struct rq;
+ struct sched_domain;
+
+-struct sched_class {
+- const struct sched_class *next;
+-
+- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
+- void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+- void (*yield_task) (struct rq *rq);
+-
+- void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
+-
+- struct task_struct * (*pick_next_task) (struct rq *rq);
+- void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+-
+-#ifdef CONFIG_SMP
+- int (*select_task_rq)(struct task_struct *p, int sync);
+-
+- unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+- struct rq *busiest, unsigned long max_load_move,
+- struct sched_domain *sd, enum cpu_idle_type idle,
+- int *all_pinned, int *this_best_prio);
+-
+- int (*move_one_task) (struct rq *this_rq, int this_cpu,
+- struct rq *busiest, struct sched_domain *sd,
+- enum cpu_idle_type idle);
+- void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+- int (*needs_post_schedule) (struct rq *this_rq);
+- void (*post_schedule) (struct rq *this_rq);
+- void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+-
+- void (*set_cpus_allowed)(struct task_struct *p,
+- const struct cpumask *newmask);
+-
+- void (*rq_online)(struct rq *rq);
+- void (*rq_offline)(struct rq *rq);
+-#endif
+-
+- void (*set_curr_task) (struct rq *rq);
+- void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+- void (*task_new) (struct rq *rq, struct task_struct *p);
+-
+- void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+- int running);
+- void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+- int running);
+- void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+- int oldprio, int running);
+-
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+- void (*moved_group) (struct task_struct *p);
+-#endif
+-};
+-
+-struct load_weight {
+- unsigned long weight, inv_weight;
+-};
+-
+-/*
+- * CFS stats for a schedulable entity (task, task-group etc)
+- *
+- * Current field usage histogram:
+- *
+- * 4 se->block_start
+- * 4 se->run_node
+- * 4 se->sleep_start
+- * 6 se->load.weight
+- */
+-struct sched_entity {
+- struct load_weight load; /* for load-balancing */
+- struct rb_node run_node;
+- struct list_head group_node;
+- unsigned int on_rq;
+-
+- u64 exec_start;
+- u64 sum_exec_runtime;
+- u64 vruntime;
+- u64 prev_sum_exec_runtime;
+-
+- u64 last_wakeup;
+- u64 avg_overlap;
+-
+- u64 start_runtime;
+- u64 avg_wakeup;
+- u64 nr_migrations;
+-
+-#ifdef CONFIG_SCHEDSTATS
+- u64 wait_start;
+- u64 wait_max;
+- u64 wait_count;
+- u64 wait_sum;
+-
+- u64 sleep_start;
+- u64 sleep_max;
+- s64 sum_sleep_runtime;
+-
+- u64 block_start;
+- u64 block_max;
+- u64 exec_max;
+- u64 slice_max;
+-
+- u64 nr_migrations_cold;
+- u64 nr_failed_migrations_affine;
+- u64 nr_failed_migrations_running;
+- u64 nr_failed_migrations_hot;
+- u64 nr_forced_migrations;
+- u64 nr_forced2_migrations;
+-
+- u64 nr_wakeups;
+- u64 nr_wakeups_sync;
+- u64 nr_wakeups_migrate;
+- u64 nr_wakeups_local;
+- u64 nr_wakeups_remote;
+- u64 nr_wakeups_affine;
+- u64 nr_wakeups_affine_attempts;
+- u64 nr_wakeups_passive;
+- u64 nr_wakeups_idle;
+-#endif
+-
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+- struct sched_entity *parent;
+- /* rq on which this entity is (to be) queued: */
+- struct cfs_rq *cfs_rq;
+- /* rq "owned" by this entity/group: */
+- struct cfs_rq *my_q;
+-#endif
+-};
+-
+-struct sched_rt_entity {
+- struct list_head run_list;
+- unsigned long timeout;
+- unsigned int time_slice;
+- int nr_cpus_allowed;
+-
+- struct sched_rt_entity *back;
+-#ifdef CONFIG_RT_GROUP_SCHED
+- struct sched_rt_entity *parent;
+- /* rq on which this entity is (to be) queued: */
+- struct rt_rq *rt_rq;
+- /* rq "owned" by this entity/group: */
+- struct rt_rq *my_q;
+-#endif
+-};
+-
+ struct task_struct {
+ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+ void *stack;
+@@ -1123,17 +977,18 @@
+
+ int lock_depth; /* BKL lock depth */
+
+-#ifdef CONFIG_SMP
+-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
+ int oncpu;
+-#endif
+-#endif
+-
++ int load_weight; /* for niceness load balancing purposes */
+ int prio, static_prio, normal_prio;
++ int time_slice, first_time_slice;
++ unsigned long deadline;
++ struct list_head run_list;
+ unsigned int rt_priority;
+- const struct sched_class *sched_class;
+- struct sched_entity se;
+- struct sched_rt_entity rt;
++ unsigned long long timestamp, last_ran;
++ u64 sched_time; /* sched_clock time spent running */
++
++ int rt_nr_cpus_allowed;
++ unsigned long rt_timeout;
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ /* list of struct preempt_notifier: */
+@@ -1156,6 +1011,9 @@
+
+ unsigned int policy;
+ cpumask_t cpus_allowed;
++#ifdef CONFIG_HOTPLUG_CPU
++ cpumask_t unplugged_mask;
++#endif
+
+ #ifdef CONFIG_PREEMPT_RCU
+ int rcu_read_lock_nesting;
+@@ -1446,11 +1304,14 @@
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+-
++#define PRIO_RANGE (40)
+ #define MAX_USER_RT_PRIO 100
+ #define MAX_RT_PRIO MAX_USER_RT_PRIO
+-
+-#define MAX_PRIO (MAX_RT_PRIO + 40)
++#define MAX_PRIO (MAX_RT_PRIO + PRIO_RANGE)
++#define ISO_PRIO (MAX_RT_PRIO)
++#define NORMAL_PRIO (MAX_RT_PRIO + 1)
++#define IDLE_PRIO (MAX_RT_PRIO + 2)
++#define PRIO_LIMIT ((IDLE_PRIO) + 1)
+ #define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+
+ static inline int rt_prio(int prio)
+@@ -1733,11 +1594,7 @@
+ extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
+
+ /* sched_exec is called by processes performing an exec */
+-#ifdef CONFIG_SMP
+-extern void sched_exec(void);
+-#else
+ #define sched_exec() {}
+-#endif
+
+ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+@@ -2283,7 +2140,10 @@
+ return task_thread_info(p)->cpu;
+ }
+
+-extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
++static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++ task_thread_info(p)->cpu = cpu;
++}
+
+ #else
+
+Index: linux-2.6.30-test/kernel/sysctl.c
+===================================================================
+--- linux-2.6.30-test.orig/kernel/sysctl.c 2009-09-01 09:56:45.546847267 +1000
++++ linux-2.6.30-test/kernel/sysctl.c 2009-09-01 09:57:21.516097550 +1000
+@@ -83,6 +83,8 @@
+ extern int compat_log;
+ extern int latencytop_enabled;
+ extern int sysctl_nr_open_min, sysctl_nr_open_max;
++extern int rr_interval;
++extern int sched_iso_cpu;
+ #ifndef CONFIG_MMU
+ extern int sysctl_nr_trim_pages;
+ #endif
+@@ -97,10 +99,11 @@
+ #endif
+
+ static int zero;
+-static int __maybe_unused one = 1;
+ static int __maybe_unused two = 2;
+ static unsigned long one_ul = 1;
+-static int one_hundred = 100;
++static int __read_mostly one = 1;
++static int __read_mostly one_hundred = 100;
++static int __read_mostly five_thousand = 5000;
+
+ /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
+ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
+@@ -234,123 +237,7 @@
+ { .ctl_name = 0 }
+ };
+
+-#ifdef CONFIG_SCHED_DEBUG
+-static int min_sched_granularity_ns = 100000; /* 100 usecs */
+-static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
+-static int min_wakeup_granularity_ns; /* 0 usecs */
+-static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
+-#endif
+-
+ static struct ctl_table kern_table[] = {
+-#ifdef CONFIG_SCHED_DEBUG
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_min_granularity_ns",
+- .data = &sysctl_sched_min_granularity,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &sched_nr_latency_handler,
+- .strategy = &sysctl_intvec,
+- .extra1 = &min_sched_granularity_ns,
+- .extra2 = &max_sched_granularity_ns,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_latency_ns",
+- .data = &sysctl_sched_latency,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &sched_nr_latency_handler,
+- .strategy = &sysctl_intvec,
+- .extra1 = &min_sched_granularity_ns,
+- .extra2 = &max_sched_granularity_ns,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_wakeup_granularity_ns",
+- .data = &sysctl_sched_wakeup_granularity,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_minmax,
+- .strategy = &sysctl_intvec,
+- .extra1 = &min_wakeup_granularity_ns,
+- .extra2 = &max_wakeup_granularity_ns,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_shares_ratelimit",
+- .data = &sysctl_sched_shares_ratelimit,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_shares_thresh",
+- .data = &sysctl_sched_shares_thresh,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_minmax,
+- .strategy = &sysctl_intvec,
+- .extra1 = &zero,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_child_runs_first",
+- .data = &sysctl_sched_child_runs_first,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_features",
+- .data = &sysctl_sched_features,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_migration_cost",
+- .data = &sysctl_sched_migration_cost,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_nr_migrate",
+- .data = &sysctl_sched_nr_migrate,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#endif
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_rt_period_us",
+- .data = &sysctl_sched_rt_period,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &sched_rt_handler,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_rt_runtime_us",
+- .data = &sysctl_sched_rt_runtime,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &sched_rt_handler,
+- },
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "sched_compat_yield",
+- .data = &sysctl_sched_compat_yield,
+- .maxlen = sizeof(unsigned int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+ #ifdef CONFIG_PROVE_LOCKING
+ {
+ .ctl_name = CTL_UNNUMBERED,
+@@ -756,6 +643,28 @@
+ .proc_handler = &proc_dointvec,
+ },
+ #endif
++ {
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list