packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch - BFS pa...
lmasko
lmasko at pld-linux.org
Sat May 22 11:56:58 CEST 2010
Author: lmasko Date: Sat May 22 09:56:58 2010 GMT
Module: packages Tag: Titanium
---- Log message:
- BFS patch updated to the version proper for 2.6.34.
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (1.1.2.17 -> 1.1.2.18)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.17 packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.18
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.17 Wed Apr 28 19:21:43 2010
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Sat May 22 11:56:53 2010
@@ -1,4 +1,4 @@
-The Brain Fuck Scheduler v0.316 by Con Kolivas.
+The Brain Fuck Scheduler v0.318 by Con Kolivas.
A single shared runqueue O(n) strict fairness earliest deadline first design.
@@ -20,7 +20,7 @@
schedtool -I -e amarok
-Now includes accurate sub-tick accounting of tasks so userspace reported
+Includes accurate sub-tick accounting of tasks so userspace reported
cpu usage may be very different if you have very short lived tasks.
---
@@ -32,23 +32,23 @@
include/linux/ioprio.h | 2
include/linux/sched.h | 107
init/Kconfig | 17
- init/main.c | 2
+ init/main.c | 1
kernel/delayacct.c | 2
kernel/exit.c | 2
kernel/kthread.c | 2
kernel/posix-cpu-timers.c | 14
kernel/sched.c | 4
- kernel/sched_bfs.c | 6723 ++++++++++++++++++++++++++++++
+ kernel/sched_bfs.c | 6898 ++++++++++++++++++++++++++++++
kernel/slow-work.c | 1
kernel/sysctl.c | 31
lib/Kconfig.debug | 2
mm/oom_kill.c | 2
- 19 files changed, 7336 insertions(+), 29 deletions(-)
+ 19 files changed, 7510 insertions(+), 29 deletions(-)
-Index: linux-2.6.33-bfs/Documentation/sysctl/kernel.txt
+Index: linux-2.6.34-ck1/Documentation/sysctl/kernel.txt
===================================================================
---- linux-2.6.33-bfs.orig/Documentation/sysctl/kernel.txt 2010-02-25 21:51:46.000000000 +1100
-+++ linux-2.6.33-bfs/Documentation/sysctl/kernel.txt 2010-04-25 15:33:47.431132113 +1000
+--- linux-2.6.34-ck1.orig/Documentation/sysctl/kernel.txt 2010-05-18 12:24:34.172444877 +1000
++++ linux-2.6.34-ck1/Documentation/sysctl/kernel.txt 2010-05-18 12:26:15.769444964 +1000
@@ -31,6 +31,7 @@ show up in /proc/sys/kernel:
- domainname
- hostname
@@ -103,11 +103,11 @@
rtsig-max & rtsig-nr:
The file rtsig-max can be used to tune the maximum number
-Index: linux-2.6.33-bfs/include/linux/init_task.h
+Index: linux-2.6.34-ck1/include/linux/init_task.h
===================================================================
---- linux-2.6.33-bfs.orig/include/linux/init_task.h 2010-02-25 21:51:52.000000000 +1100
-+++ linux-2.6.33-bfs/include/linux/init_task.h 2010-04-25 15:33:47.431132113 +1000
-@@ -115,6 +115,69 @@ extern struct cred init_cred;
+--- linux-2.6.34-ck1.orig/include/linux/init_task.h 2010-05-18 12:24:34.178444440 +1000
++++ linux-2.6.34-ck1/include/linux/init_task.h 2010-05-18 12:26:15.769444964 +1000
+@@ -107,6 +107,69 @@ extern struct cred init_cred;
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
*/
@@ -177,7 +177,7 @@
#define INIT_TASK(tsk) \
{ \
.state = 0, \
-@@ -181,7 +244,7 @@ extern struct cred init_cred;
+@@ -173,7 +236,7 @@ extern struct cred init_cred;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
}
@@ -186,10 +186,10 @@
#define INIT_CPU_TIMERS(cpu_timers) \
{ \
-Index: linux-2.6.33-bfs/include/linux/sched.h
+Index: linux-2.6.34-ck1/include/linux/sched.h
===================================================================
---- linux-2.6.33-bfs.orig/include/linux/sched.h 2010-02-25 21:51:52.000000000 +1100
-+++ linux-2.6.33-bfs/include/linux/sched.h 2010-04-25 15:39:03.378132181 +1000
+--- linux-2.6.34-ck1.orig/include/linux/sched.h 2010-05-18 12:24:34.191445024 +1000
++++ linux-2.6.34-ck1/include/linux/sched.h 2010-05-18 12:26:15.769444964 +1000
@@ -36,8 +36,15 @@
#define SCHED_FIFO 1
#define SCHED_RR 2
@@ -207,7 +207,7 @@
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
-@@ -264,9 +271,6 @@ extern asmlinkage void schedule_tail(str
+@@ -268,9 +275,6 @@ extern asmlinkage void schedule_tail(str
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
@@ -217,7 +217,7 @@
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
-@@ -1238,17 +1242,31 @@ struct task_struct {
+@@ -1176,17 +1180,31 @@ struct task_struct {
int lock_depth; /* BKL lock depth */
@@ -249,7 +249,7 @@
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
-@@ -1270,6 +1288,9 @@ struct task_struct {
+@@ -1208,6 +1226,9 @@ struct task_struct {
unsigned int policy;
cpumask_t cpus_allowed;
@@ -259,7 +259,7 @@
#ifdef CONFIG_TREE_PREEMPT_RCU
int rcu_read_lock_nesting;
-@@ -1347,6 +1368,9 @@ struct task_struct {
+@@ -1287,6 +1308,9 @@ struct task_struct {
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
cputime_t utime, stime, utimescaled, stimescaled;
@@ -269,7 +269,7 @@
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
-@@ -1568,6 +1592,64 @@ struct task_struct {
+@@ -1507,6 +1531,64 @@ struct task_struct {
#endif
};
@@ -293,7 +293,7 @@
+
+static inline void print_scheduler_version(void)
+{
-+ printk(KERN_INFO"BFS CPU scheduler v0.316 by Con Kolivas.\n");
++ printk(KERN_INFO"BFS CPU scheduler v0.318 by Con Kolivas.\n");
+}
+
+static inline int iso_task(struct task_struct *p)
@@ -334,7 +334,7 @@
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
-@@ -1586,9 +1668,19 @@ struct task_struct {
+@@ -1525,9 +1607,19 @@ struct task_struct {
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
@@ -355,7 +355,7 @@
static inline int rt_prio(int prio)
{
-@@ -1897,7 +1989,7 @@ task_sched_runtime(struct task_struct *t
+@@ -1836,7 +1928,7 @@ task_sched_runtime(struct task_struct *t
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
@@ -364,7 +364,7 @@
extern void sched_exec(void);
#else
#define sched_exec() {}
-@@ -2060,6 +2152,9 @@ extern void wake_up_new_task(struct task
+@@ -1999,6 +2091,9 @@ extern void wake_up_new_task(struct task
static inline void kick_process(struct task_struct *tsk) { }
#endif
extern void sched_fork(struct task_struct *p, int clone_flags);
@@ -374,11 +374,11 @@
extern void sched_dead(struct task_struct *p);
extern void proc_caches_init(void);
-Index: linux-2.6.33-bfs/kernel/sysctl.c
+Index: linux-2.6.34-ck1/kernel/sysctl.c
===================================================================
---- linux-2.6.33-bfs.orig/kernel/sysctl.c 2010-02-25 21:51:52.000000000 +1100
-+++ linux-2.6.33-bfs/kernel/sysctl.c 2010-04-25 15:33:47.434131967 +1000
-@@ -104,7 +104,12 @@ static int zero;
+--- linux-2.6.34-ck1.orig/kernel/sysctl.c 2010-05-18 12:24:34.224444706 +1000
++++ linux-2.6.34-ck1/kernel/sysctl.c 2010-05-18 12:26:15.770444796 +1000
+@@ -113,7 +113,12 @@ static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static unsigned long one_ul = 1;
@@ -392,7 +392,7 @@
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
-@@ -239,7 +244,7 @@ static struct ctl_table root_table[] = {
+@@ -229,7 +234,7 @@ static struct ctl_table root_table[] = {
{ }
};
@@ -401,7 +401,7 @@
static int min_sched_granularity_ns = 100000; /* 100 usecs */
static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_wakeup_granularity_ns; /* 0 usecs */
-@@ -251,6 +256,7 @@ static int max_sched_shares_ratelimit =
+@@ -241,6 +246,7 @@ static int max_sched_shares_ratelimit =
#endif
static struct ctl_table kern_table[] = {
@@ -409,7 +409,7 @@
{
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
-@@ -364,6 +370,7 @@ static struct ctl_table kern_table[] = {
+@@ -354,6 +360,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
@@ -417,7 +417,7 @@
#ifdef CONFIG_PROVE_LOCKING
{
.procname = "prove_locking",
-@@ -761,6 +768,26 @@ static struct ctl_table kern_table[] = {
+@@ -751,6 +758,26 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
@@ -444,11 +444,11 @@
#if defined(CONFIG_S390) && defined(CONFIG_SMP)
{
.procname = "spin_retry",
-Index: linux-2.6.33-bfs/kernel/sched_bfs.c
+Index: linux-2.6.34-ck1/kernel/sched_bfs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.33-bfs/kernel/sched_bfs.c 2010-04-25 15:39:03.378132181 +1000
-@@ -0,0 +1,6723 @@
++++ linux-2.6.34-ck1/kernel/sched_bfs.c 2010-05-18 12:26:15.774197412 +1000
+@@ -0,0 +1,6898 @@
+/*
+ * kernel/sched_bfs.c, was sched.c
+ *
@@ -519,6 +519,7 @@
+#include <linux/log2.h>
+#include <linux/bootmem.h>
+#include <linux/ftrace.h>
++#include <linux/slab.h>
+
+#include <asm/tlb.h>
+#include <asm/unistd.h>
@@ -687,6 +688,11 @@
+static DEFINE_MUTEX(sched_hotcpu_mutex);
+
+#ifdef CONFIG_SMP
++/*
++ * sched_domains_mutex serializes calls to arch_init_sched_domains,
++ * detach_destroy_domains and partition_sched_domains.
++ */
++static DEFINE_MUTEX(sched_domains_mutex);
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
@@ -733,6 +739,11 @@
+#endif
+}
+
++#define rcu_dereference_check_sched_domain(p) \
++ rcu_dereference_check((p), \
++ rcu_read_lock_sched_held() || \
++ lockdep_is_held(&sched_domains_mutex))
++
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
@@ -741,7 +752,7 @@
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
-+ for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
++ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+
+#ifdef CONFIG_SMP
+#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
@@ -1711,6 +1722,8 @@
+ unsigned long flags;
+ struct rq *rq;
+
++ get_cpu();
++
+ /* This barrier is undocumented, probably for p->state? くそ */
+ smp_wmb();
+
@@ -1745,6 +1758,8 @@
+ p->state = TASK_RUNNING;
+out_unlock:
+ task_grq_unlock(&flags);
++ put_cpu();
++
+ return success;
+}
+
@@ -1868,6 +1883,7 @@
+ unsigned long flags;
+ struct rq *rq;
+
++ get_cpu();
+ rq = task_grq_lock(p, &flags); ;
+ BUG_ON(p->state != TASK_WAKING);
+ p->state = TASK_RUNNING;
@@ -1887,6 +1903,7 @@
+ } else
+ try_preempt(p, rq);
+ task_grq_unlock(&flags);
++ put_cpu();
+}
+
+/*
@@ -2040,7 +2057,13 @@
+ */
+ prev_state = prev->state;
+ finish_arch_switch(prev);
-+ perf_event_task_sched_in(current, cpu_of(rq));
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++ local_irq_disable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
++ perf_event_task_sched_in(current);
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++ local_irq_enable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ finish_lock_switch(rq, prev);
+
+ fire_sched_in_preempt_notifiers(current);
@@ -2692,7 +2715,7 @@
+ */
+void scheduler_tick(void)
+{
-+ int cpu = smp_processor_id();
++ int cpu __maybe_unused = smp_processor_id();
+ struct rq *rq = cpu_rq(cpu);
+
+ sched_clock_tick();
@@ -2702,7 +2725,7 @@
+ task_running_tick(rq);
+ else
+ no_iso_tick();
-+ perf_event_task_tick(rq->curr, cpu);
++ perf_event_task_tick(rq->curr);
+}
+
+notrace unsigned long get_parent_ip(unsigned long addr)
@@ -3026,7 +3049,7 @@
+
+ if (likely(prev != next)) {
+ sched_info_switch(prev, next);
-+ perf_event_task_sched_out(prev, next, cpu);
++ perf_event_task_sched_out(prev, next);
+
+ set_rq_task(rq, next);
+ grq.nr_switches++;
@@ -3071,7 +3094,7 @@
+ * the mutex owner just released it and exited.
+ */
+ if (probe_kernel_address(&owner->cpu, cpu))
-+ goto out;
++ return 0;
+#else
+ cpu = owner->cpu;
+#endif
@@ -3081,14 +3104,14 @@
+ * the cpu field may no longer be valid.
+ */
+ if (cpu >= nr_cpumask_bits)
-+ goto out;
++ return 0;
+
+ /*
+ * We need to validate that we can do a
+ * get_cpu() and that we have the percpu area.
+ */
+ if (!cpu_online(cpu))
-+ goto out;
++ return 0;
+
+ rq = cpu_rq(cpu);
+
@@ -3107,7 +3130,7 @@
+
+ cpu_relax();
+ }
-+out:
++
+ return 1;
+}
+#endif
@@ -3662,7 +3685,7 @@
+ /* convert nice value [19,-20] to rlimit style value [1,40] */
+ int nice_rlim = 20 - nice;
+
-+ return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
++ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
+ capable(CAP_SYS_NICE));
+}
+
@@ -3826,7 +3849,7 @@
+
+ if (!lock_task_sighand(p, &lflags))
+ return -ESRCH;
-+ rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
++ rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+ unlock_task_sighand(p, &lflags);
+ if (rlim_rtprio)
+ goto recheck;
@@ -4237,7 +4260,9 @@
+ int ret;
+ cpumask_var_t mask;
+
-+ if (len < cpumask_size())
++ if ((len * BITS_PER_BYTE) < nr_cpu_ids)
++ return -EINVAL;
++ if (len & (sizeof(unsigned long)-1))
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4245,10 +4270,12 @@
+
+ ret = sched_getaffinity(pid, mask);
+ if (ret == 0) {
-+ if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
++ size_t retlen = min_t(size_t, len, cpumask_size());
++
++ if (copy_to_user(user_mask_ptr, mask, retlen))
+ ret = -EFAULT;
+ else
-+ ret = cpumask_size();
++ ret = retlen;
+ }
+ free_cpumask_var(mask);
+
@@ -4625,6 +4652,131 @@
+ return atomic_read(&nohz.load_balancer);
+}
+
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++/**
++ * lowest_flag_domain - Return lowest sched_domain containing flag.
++ * @cpu: The cpu whose lowest level of sched domain is to
++ * be returned.
++ * @flag: The flag to check for the lowest sched_domain
++ * for the given cpu.
++ *
++ * Returns the lowest sched_domain of a cpu which contains the given flag.
++ */
++static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
++{
++ struct sched_domain *sd;
++
++ for_each_domain(cpu, sd)
++ if (sd && (sd->flags & flag))
++ break;
++
++ return sd;
++}
++
++/**
++ * for_each_flag_domain - Iterates over sched_domains containing the flag.
++ * @cpu: The cpu whose domains we're iterating over.
++ * @sd: variable holding the value of the power_savings_sd
++ * for cpu.
++ * @flag: The flag to filter the sched_domains to be iterated.
++ *
++ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
++ * set, starting from the lowest sched_domain to the highest.
++ */
++#define for_each_flag_domain(cpu, sd, flag) \
++ for (sd = lowest_flag_domain(cpu, flag); \
++ (sd && (sd->flags & flag)); sd = sd->parent)
++
++/**
++ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
++ * @ilb_group: group to be checked for semi-idleness
++ *
++ * Returns: 1 if the group is semi-idle. 0 otherwise.
++ *
++ * We define a sched_group to be semi idle if it has atleast one idle-CPU
++ * and atleast one non-idle CPU. This helper function checks if the given
++ * sched_group is semi-idle or not.
++ */
++static inline int is_semi_idle_group(struct sched_group *ilb_group)
++{
++ cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
++ sched_group_cpus(ilb_group));
++
++ /*
++ * A sched_group is semi-idle when it has atleast one busy cpu
++ * and atleast one idle cpu.
++ */
++ if (cpumask_empty(nohz.ilb_grp_nohz_mask))
++ return 0;
++
++ if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
++ return 0;
++
++ return 1;
++}
++
++/**
++ * find_new_ilb - Finds the optimum idle load balancer for nomination.
++ * @cpu: The cpu which is nominating a new idle_load_balancer.
++ *
++ * Returns: Returns the id of the idle load balancer if it exists,
++ * Else, returns >= nr_cpu_ids.
++ *
++ * This algorithm picks the idle load balancer such that it belongs to a
++ * semi-idle powersavings sched_domain. The idea is to try and avoid
++ * completely idle packages/cores just for the purpose of idle load balancing
++ * when there are other idle cpu's which are better suited for that job.
++ */
++static int find_new_ilb(int cpu)
++{
++ struct sched_domain *sd;
++ struct sched_group *ilb_group;
++
++ /*
++ * Have idle load balancer selection from semi-idle packages only
++ * when power-aware load balancing is enabled
++ */
++ if (!(sched_smt_power_savings || sched_mc_power_savings))
++ goto out_done;
++
++ /*
++ * Optimize for the case when we have no idle CPUs or only one
++ * idle CPU. Don't walk the sched_domain hierarchy in such cases
++ */
++ if (cpumask_weight(nohz.cpu_mask) < 2)
++ goto out_done;
++
++ for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
++ ilb_group = sd->groups;
++
++ do {
++ if (is_semi_idle_group(ilb_group))
++ return cpumask_first(nohz.ilb_grp_nohz_mask);
++
++ ilb_group = ilb_group->next;
++
++ } while (ilb_group != sd->groups);
++ }
++
++out_done:
++ return cpumask_first(nohz.cpu_mask);
++}
++#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
++static inline int find_new_ilb(int call_cpu)
++{
++ return cpumask_first(nohz.cpu_mask);
++}
++#endif
++
++static inline void resched_cpu(int cpu)
++{
++ unsigned long flags;
++
++ grq_lock_irqsave(&flags);
++ resched_task(cpu_curr(cpu));
++ grq_unlock_irqrestore(&flags);
++}
++
+/*
+ * This routine will try to nominate the ilb (idle load balancing)
+ * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4679,8 +4831,24 @@
+ /* make me the ilb owner */
+ if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
+ return 1;
-+ } else if (atomic_read(&nohz.load_balancer) == cpu)
++ } else if (atomic_read(&nohz.load_balancer) == cpu) {
++ int new_ilb;
++
++ if (!(sched_smt_power_savings ||
++ sched_mc_power_savings))
++ return 1;
++ /*
++ * Check to see if there is a more power-efficient
++ * ilb.
++ */
++ new_ilb = find_new_ilb(cpu);
++ if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
++ atomic_set(&nohz.load_balancer, -1);
++ resched_cpu(new_ilb);
++ return 0;
++ }
+ return 1;
++ }
+ } else {
+ if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
+ return 0;
@@ -4782,8 +4950,11 @@
+ /* Task is running on the wrong cpu now, reschedule it. */
+ set_tsk_need_resched(p);
+ running_wrong = 1;
-+ } else
++ } else {
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.17&r2=1.1.2.18&f=u
More information about the pld-cvs-commit
mailing list