SOURCES: kernel-desktop-ck.patch - updated to work with suspend2 p...

Thu Aug 2 14:09:05 CEST 2007

Author: czarny                       Date: Thu Aug  2 12:09:05 2007 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- updated to work with suspend2 patch

---- Files affected:
SOURCES:
   kernel-desktop-ck.patch (1.9 -> 1.10) 

---- Diffs:

================================================================
Index: SOURCES/kernel-desktop-ck.patch
diff -u SOURCES/kernel-desktop-ck.patch:1.9 SOURCES/kernel-desktop-ck.patch:1.10

--- SOURCES/kernel-desktop-ck.patch:1.9	Mon Jul 30 17:45:26 2007
+++ SOURCES/kernel-desktop-ck.patch	Thu Aug  2 14:08:59 2007
@@ -1,5167 +1,5124 @@
-Index: linux-2.6.22-ck1/include/linux/sched.h
-===================================================================
---- linux-2.6.22-ck1.orig/include/linux/sched.h	2007-07-10 14:55:00.000000000 +1000
-+++ linux-2.6.22-ck1/include/linux/sched.h	2007-07-10 14:55:21.000000000 +1000
-@@ -34,9 +34,14 @@
- #define SCHED_FIFO		1
- #define SCHED_RR		2
- #define SCHED_BATCH		3
-+#define SCHED_ISO		4
-+#define SCHED_IDLEPRIO		5
- 
- #ifdef __KERNEL__
- 
-+#define SCHED_MAX		SCHED_IDLEPRIO
-+#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
-+
- struct sched_param {
- 	int sched_priority;
- };
-@@ -129,7 +134,7 @@
- extern unsigned long nr_active(void);
- extern unsigned long nr_iowait(void);
- extern unsigned long weighted_cpuload(const int cpu);
--
-+extern int above_background_load(void);
+diff -urN linux-2.6.22.suspend2/arch/i386/defconfig linux-2.6.22.ck/arch/i386/defconfig
+--- linux-2.6.22.suspend2/arch/i386/defconfig	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/i386/defconfig	2007-08-02 13:23:45.865825786 +0200
+@@ -226,10 +226,10 @@
+ # CONFIG_IRQBALANCE is not set
+ CONFIG_SECCOMP=y
+ # CONFIG_HZ_100 is not set
+-CONFIG_HZ_250=y
++# CONFIG_HZ_250 is not set
+ # CONFIG_HZ_300 is not set
+-# CONFIG_HZ_1000 is not set
+-CONFIG_HZ=250
++CONFIG_HZ_1000=y
++CONFIG_HZ=1000
+ # CONFIG_KEXEC is not set
+ # CONFIG_CRASH_DUMP is not set
+ CONFIG_PHYSICAL_START=0x100000
+diff -urN linux-2.6.22.suspend2/arch/i386/Kconfig linux-2.6.22.ck/arch/i386/Kconfig
+--- linux-2.6.22.suspend2/arch/i386/Kconfig	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/i386/Kconfig	2007-08-02 13:23:45.861825804 +0200
+@@ -550,7 +550,7 @@
  
- /*
-  * Task state bitmask. NOTE! These bits are also
-@@ -150,8 +155,7 @@
- #define EXIT_ZOMBIE		16
- #define EXIT_DEAD		32
- /* in tsk->state again */
--#define TASK_NONINTERACTIVE	64
--#define TASK_DEAD		128
-+#define TASK_DEAD		64
+ choice
+ 	depends on EXPERIMENTAL
+-	prompt "Memory split" if EMBEDDED
++	prompt "Memory split"
+ 	default VMSPLIT_3G
+ 	help
+ 	  Select the desired split between kernel and user memory.
+@@ -569,17 +569,17 @@
+ 	  option alone!
  
- #define __set_task_state(tsk, state_value)		\
- 	do { (tsk)->state = (state_value); } while (0)
-@@ -537,14 +541,19 @@
+ 	config VMSPLIT_3G
+-		bool "3G/1G user/kernel split"
++		bool "Default 896MB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_3G_OPT
+ 		depends on !HIGHMEM
+-		bool "3G/1G user/kernel split (for full 1G low memory)"
++		bool "1GB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_2G
+-		bool "2G/2G user/kernel split"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_2G_OPT
+ 		depends on !HIGHMEM
+-		bool "2G/2G user/kernel split (for full 2G low memory)"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_1G
+-		bool "1G/3G user/kernel split"
++		bool "3GB lowmem (1G/3G user/kernel split)"
+ endchoice
  
- #define MAX_USER_RT_PRIO	100
- #define MAX_RT_PRIO		MAX_USER_RT_PRIO
-+#define PRIO_RANGE		(40)
-+#define ISO_PRIO		(MAX_RT_PRIO - 1)
+ config PAGE_OFFSET
+diff -urN linux-2.6.22.suspend2/arch/i386/kernel/cpu/proc.c linux-2.6.22.ck/arch/i386/kernel/cpu/proc.c
+--- linux-2.6.22.suspend2/arch/i386/kernel/cpu/proc.c	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/i386/kernel/cpu/proc.c	2007-08-02 13:23:45.917825552 +0200
+@@ -157,7 +157,7 @@
  
--#define MAX_PRIO		(MAX_RT_PRIO + 40)
-+#define MAX_PRIO		(MAX_RT_PRIO + PRIO_RANGE)
+ 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+ 		     c->loops_per_jiffy/(500000/HZ),
+-		     (c->loops_per_jiffy/(5000/HZ)) % 100);
++		     (c->loops_per_jiffy * 10/(50000/HZ)) % 100);
+ 	seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size);
  
--#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
-+#define rt_prio(prio)		unlikely((prio) < ISO_PRIO)
- #define rt_task(p)		rt_prio((p)->prio)
- #define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
--#define is_rt_policy(p)		((p) != SCHED_NORMAL && (p) != SCHED_BATCH)
-+#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
-+					(policy) == SCHED_RR)
- #define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
-+#define iso_task(p)		unlikely((p)->policy == SCHED_ISO)
-+#define idleprio_task(p)	unlikely((p)->policy == SCHED_IDLEPRIO)
+ 	return 0;
+diff -urN linux-2.6.22.suspend2/arch/i386/kernel/smpboot.c linux-2.6.22.ck/arch/i386/kernel/smpboot.c
+--- linux-2.6.22.suspend2/arch/i386/kernel/smpboot.c	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/i386/kernel/smpboot.c	2007-08-02 13:23:45.945825426 +0200
+@@ -1094,7 +1094,7 @@
+ 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ 		cpucount+1,
+ 		bogosum/(500000/HZ),
+-		(bogosum/(5000/HZ))%100);
++		(bogosum * 10/(50000/HZ))%100);
+ 	
+ 	Dprintk("Before bogocount - setting activated=1.\n");
  
- /*
-  * Some day this will be a full-fledged user tracking system..
-@@ -809,13 +818,6 @@
- struct pipe_inode_info;
- struct uts_namespace;
+diff -urN linux-2.6.22.suspend2/arch/x86_64/defconfig linux-2.6.22.ck/arch/x86_64/defconfig
+--- linux-2.6.22.suspend2/arch/x86_64/defconfig	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/x86_64/defconfig	2007-08-02 13:23:45.885825696 +0200
+@@ -185,10 +185,10 @@
+ CONFIG_SECCOMP=y
+ # CONFIG_CC_STACKPROTECTOR is not set
+ # CONFIG_HZ_100 is not set
+-CONFIG_HZ_250=y
++# CONFIG_HZ_250 is not set
+ # CONFIG_HZ_300 is not set
+-# CONFIG_HZ_1000 is not set
+-CONFIG_HZ=250
++CONFIG_HZ_1000=y
++CONFIG_HZ=1000
+ CONFIG_K8_NB=y
+ CONFIG_GENERIC_HARDIRQS=y
+ CONFIG_GENERIC_IRQ_PROBE=y
+diff -urN linux-2.6.22.suspend2/arch/x86_64/kernel/setup.c linux-2.6.22.ck/arch/x86_64/kernel/setup.c
+--- linux-2.6.22.suspend2/arch/x86_64/kernel/setup.c	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/arch/x86_64/kernel/setup.c	2007-08-02 13:23:45.957825372 +0200
+@@ -1047,7 +1047,7 @@
+ 		
+ 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+ 		   c->loops_per_jiffy/(500000/HZ),
+-		   (c->loops_per_jiffy/(5000/HZ)) % 100);
++		   (c->loops_per_jiffy * 10/(50000/HZ)) % 100);
  
--enum sleep_type {
--	SLEEP_NORMAL,
--	SLEEP_NONINTERACTIVE,
--	SLEEP_INTERACTIVE,
--	SLEEP_INTERRUPTED,
--};
--
- struct prio_array;
+ 	if (c->x86_tlbsize > 0) 
+ 		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+diff -urN linux-2.6.22.suspend2/block/cfq-iosched.c linux-2.6.22.ck/block/cfq-iosched.c
+--- linux-2.6.22.suspend2/block/cfq-iosched.c	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/block/cfq-iosched.c	2007-08-02 13:23:45.737826362 +0200
+@@ -1276,10 +1276,12 @@
+ 			printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
+ 		case IOPRIO_CLASS_NONE:
+ 			/*
+-			 * no prio set, place us in the middle of the BE classes
++			 * Select class and ioprio according to policy and nice
+ 			 */
++			cfqq->ioprio_class = task_policy_ioprio_class(tsk);
+ 			cfqq->ioprio = task_nice_ioprio(tsk);
+-			cfqq->ioprio_class = IOPRIO_CLASS_BE;
++			if (cfqq->ioprio_class == IOPRIO_CLASS_IDLE)
++				cfq_clear_cfqq_idle_window(cfqq);
+ 			break;
+ 		case IOPRIO_CLASS_RT:
+ 			cfqq->ioprio = task_ioprio(tsk);
+diff -urN linux-2.6.22.suspend2/Documentation/filesystems/proc.txt linux-2.6.22.ck/Documentation/filesystems/proc.txt
+--- linux-2.6.22.suspend2/Documentation/filesystems/proc.txt	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/Documentation/filesystems/proc.txt	2007-08-02 13:23:45.817826002 +0200
+@@ -1333,6 +1333,14 @@
+ As this is a non-destructive operation and dirty objects are not freeable, the
+ user should run `sync' first.
  
- struct task_struct {
-@@ -835,20 +837,33 @@
- 	int load_weight;	/* for niceness load balancing purposes */
- 	int prio, static_prio, normal_prio;
- 	struct list_head run_list;
-+	/*
-+	 * This bitmap shows what priorities this task has received quota
-+	 * from for this major priority rotation on its current runqueue.
-+	 */
-+	DECLARE_BITMAP(bitmap, PRIO_RANGE + 1);
- 	struct prio_array *array;
-+	/* Which major runqueue rotation did this task run */
-+	unsigned long rotation;
++tail_largefiles
++---------------
++
++When enabled reads from large files to the tail end of the inactive lru list.
++This means that any cache from reading large files is dropped very quickly,
++preventing loss of mapped ram and useful pagecache when large files are read.
++This does, however, make caching less effective when working with large files.
++
  
- 	unsigned short ioprio;
- #ifdef CONFIG_BLK_DEV_IO_TRACE
- 	unsigned int btrace_seq;
- #endif
--	unsigned long sleep_avg;
- 	unsigned long long timestamp, last_ran;
- 	unsigned long long sched_time; /* sched_clock time spent running */
--	enum sleep_type sleep_type;
+ 2.5 /proc/sys/dev - Device specific parameters
+ ----------------------------------------------
+diff -urN linux-2.6.22.suspend2/Documentation/sched-design.txt linux-2.6.22.ck/Documentation/sched-design.txt
+--- linux-2.6.22.suspend2/Documentation/sched-design.txt	2007-07-09 01:32:17.000000000 +0200
++++ linux-2.6.22.ck/Documentation/sched-design.txt	2007-08-02 13:23:45.633826830 +0200
+@@ -1,11 +1,14 @@
+-		   Goals, Design and Implementation of the
+-		      new ultra-scalable O(1) scheduler
++ Goals, Design and Implementation of the ultra-scalable O(1) scheduler by
++ Ingo Molnar and theStaircase Deadline cpu scheduler policy designed by
++ Con Kolivas.
  
- 	unsigned int policy;
- 	cpumask_t cpus_allowed;
--	unsigned int time_slice, first_time_slice;
-+	/*
-+	 * How much this task is entitled to run at the current priority
-+	 * before being requeued at a lower priority.
-+	 */
-+	int time_slice;
-+	/* Is this the very first time_slice this task has ever run. */
-+	unsigned int first_time_slice;
-+	/* How much this task receives at each priority level */
-+	int quota;
  
- #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
- 	struct sched_info sched_info;
-@@ -1013,6 +1028,7 @@
- 	struct held_lock held_locks[MAX_LOCK_DEPTH];
- 	unsigned int lockdep_recursion;
- #endif
-+	unsigned long mutexes_held;
+-  This is an edited version of an email Ingo Molnar sent to
+-  lkml on 4 Jan 2002.  It describes the goals, design, and
+-  implementation of Ingo's new ultra-scalable O(1) scheduler.
+-  Last Updated: 18 April 2002.
++  This was originally an edited version of an email Ingo Molnar sent to
++  lkml on 4 Jan 2002.  It describes the goals, design, and implementation
++  of Ingo's ultra-scalable O(1) scheduler. It now contains a description
++  of the Staircase Deadline priority scheduler that was built on this
++  design.
++  Last Updated: Fri, 4 May 2007
  
- /* journalling filesystem info */
- 	void *journal_info;
-@@ -1181,9 +1197,11 @@
- #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
- #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
- #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
-+#define PF_ISOREF	0x04000000	/* SCHED_ISO task has used up quota */
- #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
- #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
- #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
-+#define PF_NONSLEEP	0x80000000	/* Waiting on in-kernel activity */
  
- /*
-  * Only the _current_ task can read/write to tsk->flags, but other
-@@ -1253,7 +1271,7 @@
- #endif
+ Goal
+@@ -163,3 +166,222 @@
+ code is smaller than the old one.
  
- extern void set_user_nice(struct task_struct *p, long nice);
--extern int task_prio(const struct task_struct *p);
-+extern int task_prio(struct task_struct *p);
- extern int task_nice(const struct task_struct *p);
- extern int can_nice(const struct task_struct *p, const int nice);
- extern int task_curr(const struct task_struct *p);
-Index: linux-2.6.22-ck1/kernel/sched.c
-===================================================================
---- linux-2.6.22-ck1.orig/kernel/sched.c	2007-07-10 14:55:00.000000000 +1000
-+++ linux-2.6.22-ck1/kernel/sched.c	2007-07-10 14:55:24.000000000 +1000
-@@ -16,6 +16,7 @@
-  *		by Davide Libenzi, preemptible kernel bits by Robert Love.
-  *  2003-09-03	Interactivity tuning by Con Kolivas.
-  *  2004-04-02	Scheduler domains code by Nick Piggin
-+ *  2007-03-02	Staircase deadline scheduling policy by Con Kolivas
-  */
- 
- #include <linux/mm.h>
-@@ -53,8 +54,9 @@
- #include <linux/kprobes.h>
- #include <linux/delayacct.h>
- #include <linux/reciprocal_div.h>
--
-+#include <linux/log2.h>
- #include <asm/tlb.h>
-+
- #include <asm/unistd.h>
- 
- /*
-@@ -84,147 +86,85 @@
- #define USER_PRIO(p)		((p)-MAX_RT_PRIO)
- #define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
- #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
-+#define SCHED_PRIO(p)		((p)+MAX_RT_PRIO)
- 
--/*
-- * Some helpers for converting nanosecond timing to jiffy resolution
-- */
--#define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))
-+/* Some helpers for converting to/from various scales.*/
- #define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
--
--/*
-- * These are the 'tuning knobs' of the scheduler:
-- *
-- * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger),
-- * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
-- * Timeslices get refilled after they expire.
-- */
--#define MIN_TIMESLICE		max(5 * HZ / 1000, 1)
--#define DEF_TIMESLICE		(100 * HZ / 1000)
--#define ON_RUNQUEUE_WEIGHT	 30
--#define CHILD_PENALTY		 95
--#define PARENT_PENALTY		100
--#define EXIT_WEIGHT		  3
--#define PRIO_BONUS_RATIO	 25
--#define MAX_BONUS		(MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
--#define INTERACTIVE_DELTA	  2
--#define MAX_SLEEP_AVG		(DEF_TIMESLICE * MAX_BONUS)
--#define STARVATION_LIMIT	(MAX_SLEEP_AVG)
--#define NS_MAX_SLEEP_AVG	(JIFFIES_TO_NS(MAX_SLEEP_AVG))
--
--/*
-- * If a task is 'interactive' then we reinsert it in the active
-- * array after it has expired its current timeslice. (it will not
-- * continue to run immediately, it will still roundrobin with
-- * other interactive tasks.)
-- *
-- * This part scales the interactivity limit depending on niceness.
-- *
-- * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
-- * Here are a few examples of different nice levels:
-- *
-- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
-- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
-- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
-- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
-- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
-- *
-- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
-- *  priority range a task can explore, a value of '1' means the
-- *  task is rated interactive.)
-- *
-- * Ie. nice +19 tasks can never get 'interactive' enough to be
-- * reinserted into the active array. And only heavily CPU-hog nice -20
-- * tasks will be expired. Default nice 0 tasks are somewhere between,
-- * it takes some effort for them to get interactive, but it's not
-- * too hard.
-- */
--
--#define CURRENT_BONUS(p) \
--	(NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
--		MAX_SLEEP_AVG)
--
--#define GRANULARITY	(10 * HZ / 1000 ? : 1)
--
--#ifdef CONFIG_SMP
--#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
--		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
--			num_online_cpus())
--#else
--#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
--		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
--#endif
--
--#define SCALE(v1,v1_max,v2_max) \
--	(v1) * (v2_max) / (v1_max)
--
--#define DELTA(p) \
--	(SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
--		INTERACTIVE_DELTA)
--
--#define TASK_INTERACTIVE(p) \
--	((p)->prio <= (p)->static_prio - DELTA(p))
--
--#define INTERACTIVE_SLEEP(p) \
--	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
--		(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
--
--#define TASK_PREEMPTS_CURR(p, rq) \
--	((p)->prio < (rq)->curr->prio)
--
--#define SCALE_PRIO(x, prio) \
--	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
--
--static unsigned int static_prio_timeslice(int static_prio)
--{
--	if (static_prio < NICE_TO_PRIO(0))
--		return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
--	else
--		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
--}
--
--#ifdef CONFIG_SMP
--/*
-- * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
-- * Since cpu_power is a 'constant', we can use a reciprocal divide.
-+#define MS_TO_NS(TIME)		((TIME) * 1000000)
-+#define MS_TO_US(TIME)		((TIME) * 1000)
-+#define US_TO_MS(TIME)		((TIME) / 1000)
-+
-+#define TASK_PREEMPTS_CURR(p, curr)	((p)->prio < (curr)->prio)
+ 	Ingo
 +
-+/*
-+ * This is the time all tasks within the same priority round robin.
-+ * Value is in ms and set to a minimum of 10ms. Scales with number of cpus.
-+ * Tunable via /proc interface.
-+ */
-+int rr_interval __read_mostly = 6;
-+int sched_interactive __read_mostly = 1;
 +
-+/*
-+ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
-+ * are allowed to run (over ISO_PERIOD seconds) as real time tasks.
-+ * sched_iso_period - sysctl which determines the number of seconds over
-+ * which cpu usage of SCHED_ISO tasks is averaged to determine if they are
-+ * exceeding their allowable bandwidth.
-+*/
-+int sched_iso_cpu __read_mostly = 80;
-+int sched_iso_period __read_mostly = 5;
++Staircase Deadline cpu scheduler policy
++================================================
 +
-+#define ISO_PERIOD	((sched_iso_period * HZ) + 1)
++Design summary
++==============
 +
-+/*
-+ * This contains a bitmap for each dynamic priority level with empty slots
-+ * for the valid priorities each different nice level can have. It allows
-+ * us to stagger the slots where differing priorities run in a way that
-+ * keeps latency differences between different nice levels at a minimum.
-+ * The purpose of a pre-generated matrix is for rapid lookup of next slot in
-+ * O(1) time without having to recalculate every time priority gets demoted.
-+ * All nice levels use priority slot 39 as this allows less niced tasks to
-+ * get all priority slots better than that before expiration is forced.
-+ * ie, where 0 means a slot for that priority, priority running from left to
-+ * right is from prio 0 to prio 39:
-+ * nice -20 0000000000000000000000000000000000000000
-+ * nice -10 1000100010001000100010001000100010010000
-+ * nice   0 1010101010101010101010101010101010101010
-+ * nice   5 1011010110110101101101011011010110110110
-+ * nice  10 1110111011101110111011101110111011101110
-+ * nice  15 1111111011111110111111101111111011111110
-+ * nice  19 1111111111111111111111111111111111111110
-  */
--static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
--{
--	return reciprocal_divide(load, sg->reciprocal_cpu_power);
--}
-+static unsigned long prio_matrix[PRIO_RANGE][BITS_TO_LONGS(PRIO_RANGE)]
-+				 __read_mostly;
- 
--/*
-- * Each time a sched group cpu_power is changed,
-- * we must compute its reciprocal value
-- */
--static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
--{
--	sg->__cpu_power += val;
--	sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
--}
--#endif
-+struct rq;
- 
- /*
-- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
-- * to time slice values: [800ms ... 100ms ... 5ms]
-- *
-- * The higher a thread's priority, the bigger timeslices
-- * it gets during one round of execution. But even the lowest
-- * priority thread gets MIN_TIMESLICE worth of execution time.
-+ * These are the runqueue data structures:
-  */
-+struct prio_array {
-+	/* Tasks queued at each priority */
-+	struct list_head queue[MAX_PRIO + 1];
- 
--static inline unsigned int task_timeslice(struct task_struct *p)
--{
--	return static_prio_timeslice(p->static_prio);
--}
-+	/*
-+	 * The bitmap of priorities queued for this array. While the expired
-+	 * array will never have realtime tasks on it, it is simpler to have
-+	 * equal sized bitmaps for a cheap array swap. Include 1 bit for
-+	 * delimiter.
-+	 */
-+	DECLARE_BITMAP(prio_bitmap, MAX_PRIO + 1);
- 
--/*
-- * These are the runqueue data structures:
-- */
-+	/*
-+	 * The best static priority (of the dynamic priority tasks) queued
-+	 * this array.
-+	 */
-+	int best_static_prio;
- 
--struct prio_array {
--	unsigned int nr_active;
--	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
--	struct list_head queue[MAX_PRIO];
-+#ifdef CONFIG_SMP
-+	/* For convenience looks back at rq */
-+	struct rq *rq;
-+#endif
- };
- 
- /*
-@@ -260,14 +200,28 @@
- 	 */
- 	unsigned long nr_uninterruptible;
- 
--	unsigned long expired_timestamp;
- 	/* Cached timestamp set by update_cpu_clock() */
- 	unsigned long long most_recent_timestamp;
- 	struct task_struct *curr, *idle;
- 	unsigned long next_balance;
- 	struct mm_struct *prev_mm;
--	struct prio_array *active, *expired, arrays[2];
--	int best_expired_prio;
++A novel design which incorporates a foreground-background descending priority
++system (the staircase) via a bandwidth allocation matrix according to nice
++level.
 +
-+	struct prio_array *active, *expired, *idleprio, arrays[2];
-+	unsigned long *dyn_bitmap, *exp_bitmap;
 +
-+	/*
-+	 * The current dynamic priority level this runqueue is at per static
-+	 * priority level.
-+	 */
-+	int prio_level[PRIO_RANGE];
++Features
++========
 +
-+	/* How many times we have rotated the priority queue */
-+	unsigned long prio_rotation;
-+	unsigned long iso_ticks;
-+	unsigned short iso_refractory;
++A starvation free, strict fairness O(1) scalable design with interactivity
++as good as the above restrictions can provide. There is no interactivity
++estimator, no sleep/run measurements and only simple fixed accounting.
++The design has strict enough a design and accounting that task behaviour
++can be modelled and maximum scheduling latencies can be predicted by
++the virtual deadline mechanism that manages runqueues. The prime concern
++in this design is to maintain fairness at all costs determined by nice level,
++yet to maintain as good interactivity as can be allowed within the
++constraints of strict fairness.
 +
-+	/* Number of idleprio tasks running */
-+	unsigned long nr_idleprio;
- 	atomic_t nr_iowait;
- 
- #ifdef CONFIG_SMP
-@@ -606,12 +560,9 @@
- #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
- /*
-  * Called when a process is dequeued from the active array and given
-- * the cpu.  We should note that with the exception of interactive
<<Diff was trimmed, longer than 597 lines>>

---- CVS-web:
    http://cvs.pld-linux.org/SOURCES/kernel-desktop-ck.patch?r1=1.9&r2=1.10&f=u