packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch up to BF...
cactus
cactus at pld-linux.org
Thu Oct 1 17:29:55 CEST 2009
Author: cactus Date: Thu Oct 1 15:29:55 2009 GMT
Module: packages Tag: Titanium
---- Log message:
up to BFS 300
---- Files affected:
packages/kernel-desktop:
kernel-desktop-sched-bfs.patch (1.1.2.10 -> 1.1.2.11)
---- Diffs:
================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.10 packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.11
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.10 Mon Sep 28 13:15:03 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch Thu Oct 1 17:29:50 2009
@@ -8278,3 +8278,594 @@
k->flags |= PF_THREAD_BOUND;
}
EXPORT_SYMBOL(kthread_bind);
+Index: linux-2.6.31-test/kernel/sched_bfs.c
+===================================================================
+--- linux-2.6.31-test.orig/kernel/sched_bfs.c 2009-10-01 12:24:56.538334919 +1000
++++ linux-2.6.31-test/kernel/sched_bfs.c 2009-10-01 12:30:25.539335484 +1000
+@@ -147,13 +147,24 @@ int rr_interval __read_mostly = 6;
+ */
+ int sched_iso_cpu __read_mostly = 70;
+
++/*
++ * The relative length of deadline for each priority(nice) level.
++ */
+ int prio_ratios[PRIO_RANGE] __read_mostly;
+
++/*
++ * The quota handed out to tasks of all priority levels when refilling their
++ * time_slice.
++ */
+ static inline unsigned long timeslice(void)
+ {
+ return MS_TO_US(rr_interval);
+ }
+
++/*
++ * The global runqueue data that all CPUs work off. All data is protected
++ * by grq.lock.
++ */
+ struct global_rq {
+ spinlock_t lock;
+ unsigned long nr_running;
+@@ -169,11 +180,12 @@ struct global_rq {
+ #endif
+ };
+
++/* There can be only one */
+ static struct global_rq grq;
+
+ /*
+ * This is the main, per-CPU runqueue data structure.
+- * All this is protected by the global_rq lock.
++ * This data should only be modified by the local cpu.
+ */
+ struct rq {
+ #ifdef CONFIG_SMP
+@@ -204,6 +216,7 @@ struct rq {
+ #ifdef CONFIG_SMP
+ struct root_domain *rd;
+ struct sched_domain *sd;
++ unsigned long *cpu_locality; /* CPU relative cache distance */
+
+ struct list_head migration_queue;
+ #endif
+@@ -272,7 +285,6 @@ struct root_domain {
+ * members (mimicking the global state we have today).
+ */
+ static struct root_domain def_root_domain;
+-
+ #endif
+
+ static inline int cpu_of(struct rq *rq)
+@@ -308,6 +320,11 @@ static inline int cpu_of(struct rq *rq)
+ # define finish_arch_switch(prev) do { } while (0)
+ #endif
+
++/*
++ * All common locking functions performed on grq.lock. rq->clock is local to
++ * the cpu accessing it so it can be modified just with interrupts disabled,
++ * but looking up task_rq must be done under grq.lock to be safe.
++ */
+ inline void update_rq_clock(struct rq *rq)
+ {
+ rq->clock = sched_clock_cpu(cpu_of(rq));
+@@ -321,7 +338,6 @@ static inline int task_running(struct ta
+ static inline void grq_lock(void)
+ __acquires(grq.lock)
+ {
+- smp_mb();
+ spin_lock(&grq.lock);
+ }
+
+@@ -334,15 +350,14 @@ static inline void grq_unlock(void)
+ static inline void grq_lock_irq(void)
+ __acquires(grq.lock)
+ {
+- smp_mb();
+ spin_lock_irq(&grq.lock);
+ }
+
+ static inline void time_lock_grq(struct rq *rq)
+ __acquires(grq.lock)
+ {
+- grq_lock();
+ update_rq_clock(rq);
++ grq_lock();
+ }
+
+ static inline void grq_unlock_irq(void)
+@@ -354,8 +369,7 @@ static inline void grq_unlock_irq(void)
+ static inline void grq_lock_irqsave(unsigned long *flags)
+ __acquires(grq.lock)
+ {
+- local_irq_save(*flags);
+- grq_lock();
++ spin_lock_irqsave(&grq.lock, *flags);
+ }
+
+ static inline void grq_unlock_irqrestore(unsigned long *flags)
+@@ -491,14 +505,11 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+ /*
+- * A task that is queued will be on the grq run list.
++ * A task that is queued but not running will be on the grq run list.
+ * A task that is not running or queued will not be on the grq run list.
+- * A task that is currently running will have ->oncpu set and be queued
+- * temporarily in its own rq queue.
+- * A task that is running and no longer queued will be seen only on
+- * context switch exit.
++ * A task that is currently running will have ->oncpu set but not on the
++ * grq run list.
+ */
+-
+ static inline int task_queued(struct task_struct *p)
+ {
+ return (!list_empty(&p->run_list));
+@@ -618,6 +629,19 @@ static inline void resched_suitable_idle
+ wake_up_idle_cpu(first_cpu(tmp));
+ }
+
++/*
++ * The cpu cache locality difference between CPUs is used to determine how far
++ * to offset the virtual deadline. "One" difference in locality means that one
++ * timeslice difference is allowed longer for the cpu local tasks. This is
++ * enough in the common case when tasks are up to 2* number of CPUs to keep
++ * tasks within their shared cache CPUs only. See sched_init_smp for how
++ * locality is determined.
++ */
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++ return rq->cpu_locality[task_rq->cpu] * task_timeslice(p);
++}
+ #else /* CONFIG_SMP */
+ static inline void inc_qnr(void)
+ {
+@@ -649,6 +673,12 @@ static inline int suitable_idle_cpus(str
+ static inline void resched_suitable_idle(struct task_struct *p)
+ {
+ }
++
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++ return 0;
++}
+ #endif /* CONFIG_SMP */
+
+ /*
+@@ -904,9 +934,13 @@ unsigned long wait_task_inactive(struct
+ * We do the initial early heuristics without holding
+ * any task-queue locks at all. We'll only try to get
+ * the runqueue lock when things look like they will
+- * work out!
++ * work out! In the unlikely event rq is dereferenced
++ * since we're lockless, grab it again.
+ */
++retry_rq:
+ rq = task_rq(p);
++ if (unlikely(!rq))
++ goto retry_rq;
+
+ /*
+ * If the task is actively running on another CPU
+@@ -915,9 +949,9 @@ unsigned long wait_task_inactive(struct
+ *
+ * NOTE! Since we don't hold any locks, it's not
+ * even sure that "rq" stays as the right runqueue!
+- * But we don't care, since this will
+- * return false if the runqueue has changed and p
+- * is actually now running somewhere else!
++ * But we don't care, since this will return false
++ * if the runqueue has changed and p is actually now
++ * running somewhere else!
+ */
+ while (task_running(p) && p == rq->curr) {
+ if (match_state && unlikely(p->state != match_state))
+@@ -1012,19 +1046,22 @@ EXPORT_SYMBOL_GPL(kick_process);
+
+ /*
+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
+- * basis of earlier deadlines. SCHED_BATCH and SCHED_IDLEPRIO don't preempt,
+- * they cooperatively multitask.
++ * basis of earlier deadlines. SCHED_BATCH, ISO and IDLEPRIO don't preempt
++ * between themselves, they cooperatively multitask.
+ */
+ static inline int task_preempts_curr(struct task_struct *p, struct rq *rq)
+ {
+- int preempts = 0;
+-
+ if (p->prio < rq->rq_prio)
+- preempts = 1;
+- else if (p->policy == SCHED_NORMAL && (p->prio == rq->rq_prio &&
+- time_before(p->deadline, rq->rq_deadline)))
+- preempts = 1;
+- return preempts;
++ return 1;
++ if (p->policy == SCHED_NORMAL) {
++ unsigned long p_deadline = p->deadline +
++ cache_distance(task_rq(p), rq, p);
++
++ if ((p->prio == rq->rq_prio &&
++ time_before(p_deadline, rq->rq_deadline)))
++ return 1;
++ }
++ return 0;
+ }
+
+ /*
+@@ -1119,6 +1156,9 @@ static int try_to_wake_up(struct task_st
+ int success = 0;
+ struct rq *rq;
+
++ /* This barrier is undocumented, probably for p->state? くそ */
++ smp_wmb();
++
+ /*
+ * No need to do time_lock_grq as we only need to update the rq clock
+ * if we activate the task
+@@ -1126,7 +1166,7 @@ static int try_to_wake_up(struct task_st
+ rq = task_grq_lock(p, &flags);
+
+ /* state is a volatile long, どうして、分からない */
+- if (!(unsigned int)p->state & state)
++ if (!((unsigned int)p->state & state))
+ goto out_unlock;
+
+ if (task_queued(p) || task_running(p))
+@@ -1273,7 +1313,7 @@ void wake_up_new_task(struct task_struct
+ /*
+ * Potentially available exiting-child timeslices are
+ * retrieved here - this way the parent does not get
+- * penalized for creating too many threads.
++ * penalised for creating too many threads.
+ *
+ * (this cannot be used to 'generate' timeslices
+ * artificially, because any timeslice recovered here
+@@ -1286,11 +1326,22 @@ void sched_exit(struct task_struct *p)
+ struct rq *rq;
+
+ if (p->first_time_slice) {
++ int *par_tslice, *p_tslice;
++
+ parent = p->parent;
+ rq = task_grq_lock(parent, &flags);
+- parent->time_slice += p->time_slice;
+- if (unlikely(parent->time_slice > timeslice()))
+- parent->time_slice = timeslice();
++ par_tslice = &parent->time_slice;
++ p_tslice = &p->time_slice;
++
++ /* The real time_slice of the "curr" task is on the rq var.*/
++ if (p == rq->curr)
++ p_tslice = &rq->rq_time_slice;
++ else if (parent == task_rq(parent)->curr)
++ par_tslice = &rq->rq_time_slice;
++
++ *par_tslice += *p_tslice;
++ if (unlikely(*par_tslice > timeslice()))
++ *par_tslice = timeslice();
+ task_grq_unlock(&flags);
+ }
+ }
+@@ -1940,20 +1991,17 @@ void account_idle_ticks(unsigned long ti
+ * quota as real time scheduling and convert them back to SCHED_NORMAL.
+ * Where possible, the data is tested lockless, to avoid grabbing grq_lock
+ * because the occasional inaccurate result won't matter. However the
+- * data is only ever modified under lock.
++ * tick data is only ever modified under lock. iso_refractory is only simply
++ * set to 0 or 1 so it's not worth grabbing the lock yet again for that.
+ */
+ static void set_iso_refractory(void)
+ {
+- grq_lock();
+ grq.iso_refractory = 1;
+- grq_unlock();
+ }
+
+ static void clear_iso_refractory(void)
+ {
+- grq_lock();
+ grq.iso_refractory = 0;
+- grq_unlock();
+ }
+
+ /*
+@@ -2133,7 +2181,7 @@ static inline int longest_deadline(void)
+ }
+
+ /*
+- * SCHED_IDLEPRIO tasks still have a deadline set, but offset by to nice +19.
++ * SCHED_IDLEPRIO tasks still have a deadline set, but offset by nice +19.
+ * This allows nice levels to work between IDLEPRIO tasks and gives a
+ * deadline longer than nice +19 for when they're scheduled as SCHED_NORMAL
+ * tasks.
+@@ -2202,10 +2250,9 @@ retry:
+ * there is no need to initialise earliest_deadline
+ * before. Normalise all old deadlines to now.
+ */
+- if (time_before(p->deadline, jiffies))
++ dl = p->deadline + cache_distance(task_rq(p), rq, p);
++ if (time_before(dl, jiffies))
+ dl = jiffies;
+- else
+- dl = p->deadline;
+
+ if (edt == idle ||
+ time_before(dl, earliest_deadline)) {
+@@ -2278,6 +2325,12 @@ static inline void set_rq_task(struct rq
+ rq->rq_prio = p->prio;
+ }
+
++static void reset_rq_task(struct rq *rq, struct task_struct *p)
++{
++ rq->rq_policy = p->policy;
++ rq->rq_prio = p->prio;
++}
++
+ /*
+ * schedule() is the main scheduler function.
+ */
+@@ -2361,7 +2414,7 @@ need_resched_nonpreemptible:
+ rq->curr = next;
+ ++*switch_count;
+
+- context_switch(rq, prev, next); /* unlocks the rq */
++ context_switch(rq, prev, next); /* unlocks the grq */
+ /*
+ * the context switch might have flipped the stack from under
+ * us, hence refresh the local variables.
+@@ -2522,7 +2575,7 @@ void __wake_up_locked_key(wait_queue_hea
+ *
+ * The sync wakeup differs that the waker knows that it will schedule
+ * away soon, so while the target thread will be woken up, it will not
+- * be migrated to another CPU - ie. the two threads are 'synchronized'
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
+ * with each other. This can prevent needless bouncing between CPUs.
+ *
+ * On UP it can prevent extra preemption.
+@@ -2556,7 +2609,7 @@ EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+ *
+ * The sync wakeup differs that the waker knows that it will schedule
+ * away soon, so while the target thread will be woken up, it will not
+- * be migrated to another CPU - ie. the two threads are 'synchronized'
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
+ * with each other. This can prevent needless bouncing between CPUs.
+ *
+ * On UP it can prevent extra preemption.
+@@ -2921,8 +2974,10 @@ void set_user_nice(struct task_struct *p
+ }
+
+ /* Just resched the task, schedule() will know what to do. */
+- if (task_running(p))
++ if (task_running(p)) {
+ resched_task(p);
++ reset_rq_task(rq, p);
++ }
+ out_unlock:
+ task_grq_unlock(&flags);
+ }
+@@ -3060,8 +3115,10 @@ __setscheduler(struct task_struct *p, st
+ * Reschedule if running. schedule() will know if it can continue
+ * running or not.
+ */
+- if (task_running(p))
++ if (task_running(p)) {
+ resched_task(p);
++ reset_rq_task(rq, p);
++ }
+ }
+
+ /*
+@@ -3824,7 +3881,7 @@ void show_state_filter(unsigned long sta
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
+-void __cpuinit init_idle(struct task_struct *idle, int cpu)
++void init_idle(struct task_struct *idle, int cpu)
+ {
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+@@ -3972,7 +4029,7 @@ void wake_up_idle_cpu(int cpu)
+ * This is safe, as this function is called with the timer
+ * wheel base lock of (cpu) held. When the CPU is on the way
+ * to idle and has not yet set rq->curr to idle then it will
+- * be serialized on the timer wheel base lock and take the new
++ * be serialised on the timer wheel base lock and take the new
+ * timer into account automatically.
+ */
+ if (unlikely(rq->curr != idle))
+@@ -4441,7 +4498,7 @@ early_initcall(migration_init);
+ #endif
+
+ /*
+- * sched_domains_mutex serializes calls to arch_init_sched_domains,
++ * sched_domains_mutex serialises calls to arch_init_sched_domains,
+ * detach_destroy_domains and partition_sched_domains.
+ */
+ static DEFINE_MUTEX(sched_domains_mutex);
+@@ -5077,7 +5134,7 @@ static void free_sched_groups(const stru
+ #endif /* CONFIG_NUMA */
+
+ /*
+- * Initialize sched groups cpu_power.
++ * Initialise sched groups cpu_power.
+ *
+ * cpu_power indicates the capacity of sched group, which is used while
+ * distributing the load between different sched groups in a sched domain.
+@@ -5129,7 +5186,7 @@ static void init_sched_groups_power(int
+ }
+
+ /*
+- * Initializers for schedule domains
++ * Initialisers for schedule domains
+ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
+ */
+
+@@ -5536,7 +5593,7 @@ static struct sched_domain_attr *dattr_c
+ static cpumask_var_t fallback_doms;
+
+ /*
+- * arch_update_cpu_topology lets virtualized architectures update the
++ * arch_update_cpu_topology lets virtualised architectures update the
+ * cpu core maps. It is supposed to return 1 if the topology changed
+ * or 0 if it stayed the same.
+ */
+@@ -5827,6 +5884,9 @@ static int update_runtime(struct notifie
+
+ void __init sched_init_smp(void)
+ {
++ struct sched_domain *sd;
++ int cpu;
++
+ cpumask_var_t non_isolated_cpus;
+
+ alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
+@@ -5866,6 +5926,35 @@ void __init sched_init_smp(void)
+ * fashion.
+ */
+ rr_interval *= 1 + ilog2(num_online_cpus());
++
++ /*
++ * Set up the relative cache distance of each online cpu from each
++ * other in a simple array for quick lookup. Locality is determined
++ * by the closest sched_domain that CPUs are separated by. CPUs with
++ * shared cache in SMT and MC are treated as local. Separate CPUs
++ * (within the same package or physically) within the same node are
++ * treated as not local. CPUs not even in the same domain (different
++ * nodes) are treated as very distant.
++ */
++ for_each_online_cpu(cpu) {
++ for_each_domain(cpu, sd) {
++ struct rq *rq = cpu_rq(cpu);
++ unsigned long locality;
++ int other_cpu;
++
++ if (sd->level <= SD_LV_MC)
++ locality = 0;
++ else if (sd->level <= SD_LV_NODE)
++ locality = 1;
++ else
++ continue;
++
++ for_each_cpu_mask(other_cpu, *sched_domain_span(sd)) {
++ if (locality < rq->cpu_locality[other_cpu])
++ rq->cpu_locality[other_cpu] = locality;
++ }
++ }
++ }
+ }
+ #else
+ void __init sched_init_smp(void)
+@@ -5882,7 +5971,7 @@ int in_sched_functions(unsigned long add
+ && addr < (unsigned long)__sched_text_end);
+ }
+
+-void sched_init(void)
++void __init sched_init(void)
+ {
+ int i;
+ int highest_cpu = 0;
+@@ -5925,6 +6014,18 @@ void sched_init(void)
+
+ #ifdef CONFIG_SMP
+ nr_cpu_ids = highest_cpu + 1;
++ for_each_possible_cpu(i) {
++ struct rq *rq = cpu_rq(i);
++ int j;
++
++ rq->cpu_locality = kmalloc(nr_cpu_ids * sizeof(unsigned long), GFP_NOWAIT);
++ for_each_possible_cpu(j) {
++ if (i == j)
++ rq->cpu_locality[j] = 0;
++ else
++ rq->cpu_locality[j] = 4;
++ }
++ }
+ #endif
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -6051,7 +6152,7 @@ struct task_struct *curr_task(int cpu)
+ * Description: This function must only be used when non-maskable interrupts
+ * are serviced on a separate stack. It allows the architecture to switch the
+ * notion of the current task on a cpu in a non-blocking manner. This function
+- * must be called with all CPU's synchronized, and interrupts disabled, the
++ * must be called with all CPU's synchronised, and interrupts disabled, the
+ * and caller must save the original value of the current task (see
+ * curr_task() above) and restore that value before reenabling interrupts and
+ * re-starting the system.
+Index: linux-2.6.31-test/kernel/Kconfig.preempt
+===================================================================
+--- linux-2.6.31-test.orig/kernel/Kconfig.preempt 2009-10-01 12:24:56.552354234 +1000
++++ linux-2.6.31-test/kernel/Kconfig.preempt 2009-10-01 12:30:25.539335484 +1000
+@@ -1,7 +1,7 @@
+
+ choice
+ prompt "Preemption Model"
+- default PREEMPT
++ default PREEMPT_NONE
+
+ config PREEMPT_NONE
+ bool "No Forced Preemption (Server)"
+@@ -16,6 +16,23 @@ config PREEMPT_NONE
+ raw processing power of the kernel, irrespective of scheduling
+ latencies.
+
++config PREEMPT_VOLUNTARY
++ bool "Voluntary Kernel Preemption (Desktop)"
++ help
++ This option reduces the latency of the kernel by adding more
++ "explicit preemption points" to the kernel code. These new
++ preemption points have been selected to reduce the maximum
++ latency of rescheduling, providing faster application reactions,
++ at the cost of slightly lower throughput.
++
++ This allows reaction to interactive events by allowing a
++ low priority process to voluntarily preempt itself even if it
++ is in kernel mode executing a system call. This allows
++ applications to run more 'smoothly' even when the system is
++ under load.
++
++ Select this if you are building a kernel for a desktop system.
++
+ config PREEMPT
+ bool "Preemptible Kernel (Low-Latency Desktop)"
+ help
+Index: linux-2.6.31-test/init/main.c
+===================================================================
+--- linux-2.6.31-test.orig/init/main.c 2009-09-10 11:45:38.000000000 +1000
++++ linux-2.6.31-test/init/main.c 2009-10-01 12:30:25.539335484 +1000
+@@ -843,6 +843,8 @@ static noinline int init_post(void)
+ system_state = SYSTEM_RUNNING;
+ numa_default_policy();
+
++ printk(KERN_INFO"Running BFS CPU scheduler v0.300 by Con Kolivas.\n");
++
+ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+
+Index: linux-2.6.31-test/kernel/exit.c
+===================================================================
+--- linux-2.6.31-test.orig/kernel/exit.c 2009-10-01 12:24:56.541364845 +1000
++++ linux-2.6.31-test/kernel/exit.c 2009-10-01 12:30:25.541335390 +1000
+@@ -206,6 +206,7 @@ repeat:
+ leader->exit_state = EXIT_DEAD;
+ }
+
++ sched_exit(p);
+ write_unlock_irq(&tasklist_lock);
+ release_thread(p);
+ call_rcu(&p->rcu, delayed_put_task_struct);
+Index: linux-2.6.31-test/include/linux/sched.h
+===================================================================
+--- linux-2.6.31-test.orig/include/linux/sched.h 2009-10-01 12:24:56.486614782 +1000
++++ linux-2.6.31-test/include/linux/sched.h 2009-10-01 12:30:25.543335645 +1000
+@@ -1795,6 +1795,7 @@ extern void wake_up_new_task(struct task
+ static inline void kick_process(struct task_struct *tsk) { }
+ #endif
+ extern void sched_fork(struct task_struct *p, int clone_flags);
++extern void sched_exit(struct task_struct *p);
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.10&r2=1.1.2.11&f=u
More information about the pld-cvs-commit
mailing list