SOURCES: kernel-desktop-ck.patch - recupered part which was in sus...

Sat Jul 1 00:26:19 CEST 2006

Author: sparky                       Date: Fri Jun 30 22:26:19 2006 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- recupered part which was in suspend2.patch before

---- Files affected:
SOURCES:
   kernel-desktop-ck.patch (1.2 -> 1.3) 

---- Diffs:

================================================================
Index: SOURCES/kernel-desktop-ck.patch
diff -u SOURCES/kernel-desktop-ck.patch:1.2 SOURCES/kernel-desktop-ck.patch:1.3

--- SOURCES/kernel-desktop-ck.patch:1.2	Fri Jun 23 21:14:34 2006
+++ SOURCES/kernel-desktop-ck.patch	Sat Jul  1 00:26:13 2006
@@ -2806,8 +2806,8 @@
 --- linux-2.6.17-ck1.orig/include/linux/sysctl.h	2006-06-18 15:32:49.000000000 +1000
 +++ linux-2.6.17-ck1/include/linux/sysctl.h	2006-06-18 15:34:43.000000000 +1000
 @@ -148,6 +148,9 @@ enum
- 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
- 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
+	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
+	KERN_IA64_UNALIGNED=72,	/* int: ia64 unaligned userland trap enable */
 	KERN_MAX_LOCK_DEPTH=80,
 +	KERN_INTERACTIVE=83,	/* interactive tasks can have cpu bursts */
 +	KERN_COMPUTE=84,	/* adjust timeslices for a compute server */
@@ -4145,7 +4145,7 @@
  
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
-@@ -61,6 +63,8 @@
+@@ -61,6 +63,8 @@ struct scan_control {
  	 * In this context, it doesn't matter that we scan the
  	 * whole list at once. */
  	int swap_cluster_max;
@@ -4154,7 +4154,7 @@
  };
  
  /*
-@@ -105,10 +109,11 @@
+@@ -105,10 +109,11 @@ struct shrinker {
  #endif
  
  /*
@@ -4169,7 +4169,7 @@
  
  static LIST_HEAD(shrinker_list);
  static DECLARE_RWSEM(shrinker_rwsem);
-@@ -380,6 +385,7 @@
+@@ -380,6 +385,7 @@ int remove_mapping(struct address_space 
  
  	if (PageSwapCache(page)) {
  		swp_entry_t swap = { .val = page_private(page) };
@@ -4177,7 +4177,7 @@
  		__delete_from_swap_cache(page);
  		write_unlock_irq(&mapping->tree_lock);
  		swap_free(swap);
-@@ -738,10 +744,14 @
+@@ -738,10 +744,14 @@ static void shrink_active_list(unsigned 
  		 * The distress ratio is important - we don't want to start
  		 * going oom.
  		 *
@@ -4195,7 +4195,7 @@
  
  		/*
  		 * Now use this metric to decide whether to start moving mapped
-@@ -888,6 +898,40 @
+@@ -888,6 +898,40 @@ static unsigned long shrink_zone(int pri
  }
  
  /*
@@ -4236,7 +4236,7 @@
   * This is the direct reclaim path, for page-allocating processes.  We only
   * try to reclaim pages from zones which will satisfy the caller's allocation
   * request.
-@@ -943,7 +987,8 @
+@@ -943,7 +987,8 @@ static unsigned long shrink_zones(int pr
   * holds filesystem locks which prevent writeout this might not work, and the
   * allocation attempt will fail.
   */
@@ -4246,7 +4246,7 @@
  {
  	int priority;
  	int ret = 0;
-@@ -951,14 +996,20 @
+@@ -951,14 +996,20 @@ unsigned long try_to_free_pages(struct z
  	unsigned long nr_reclaimed = 0;
  	struct reclaim_state *reclaim_state = current->reclaim_state;
  	unsigned long lru_pages = 0;
@@ -4268,7 +4268,7 @@
  	inc_page_state(allocstall);
  
  	for (i = 0; zones[i] != NULL; i++) {
-@@ -967,11 +1018,11 @
+@@ -967,11 +1018,11 @@ unsigned long try_to_free_pages(struct z
  		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
  			continue;
  
@@ -4282,7 +4282,7 @@
  		sc.nr_mapped = read_page_state(nr_mapped);
  		sc.nr_scanned = 0;
  		if (!priority)
-@@ -1002,7 +1053,7 @
+@@ -1002,7 +1053,7 @@ unsigned long try_to_free_pages(struct z
  		}
  
  		/* Take a nap, wait for some writeback to complete */
@@ -4291,10 +4291,26 @@
  			blk_congestion_wait(WRITE, HZ/10);
  	}
  out:
-@@ -1040,9 +1091,9 @
+@@ -1021,10 +1072,6 @@ out:
+  * For kswapd, balance_pgdat() will work across all this node's zones until
+  * they are all at pages_high.
+  *
+- * If `nr_pages' is non-zero then it is the number of pages which are to be
+- * reclaimed, regardless of the zone occupancies.  This is a software suspend
+- * special.
+- *
+  * Returns the number of pages which were actually freed.
+  *
+  * There is special handling here for zones which are full of pinned pages.
+@@ -1042,22 +1089,23 @@ out:
+  * the page allocator fallback scheme to ensure that aging of pages is balanced
+  * across the zones.
   */
- static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
+-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
+-				int order)
++static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
  {
+-	unsigned long to_free = nr_pages;
 -	int all_zones_ok;
 +	int all_zones_ok = 0;
  	int priority;
@@ -4303,10 +4319,11 @@
  	unsigned long total_scanned;
  	unsigned long nr_reclaimed;
  	struct reclaim_state *reclaim_state = current->reclaim_state;
-@@ -1050,8 +1101,11 @
+ 	struct scan_control sc = {
  		.gfp_mask = GFP_KERNEL,
  		.may_swap = 1,
- 		.swap_cluster_max = SWAP_CLUSTER_MAX,
+-		.swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
++		.swap_cluster_max = SWAP_CLUSTER_MAX,
 +		.mapped = vm_mapped,
  	};
  
@@ -4315,7 +4332,7 @@
  loop_again:
  	total_scanned = 0;
  	nr_reclaimed = 0;
-@@ -1063,10 +1117,10 @
+@@ -1069,10 +1117,10 @@ loop_again:
  	for (i = 0; i < pgdat->nr_zones; i++) {
  		struct zone *zone = pgdat->node_zones + i;
  
@@ -4328,22 +4345,42 @@
  		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
  		unsigned long lru_pages = 0;
  
-@@ -1082,19 +1136,26 @
- 		 */
- 		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
- 			struct zone *zone = pgdat->node_zones + i;
-+			unsigned long watermark;
+@@ -1082,31 +1130,34 @@ loop_again:
  
- 			if (!populated_zone(zone))
- 				continue;
+ 		all_zones_ok = 1;
  
--			if (zone->all_unreclaimable &&
--					priority != DEF_PRIORITY)
+-		if (nr_pages == 0) {
+-			/*
+-			 * Scan in the highmem->dma direction for the highest
+-			 * zone which needs scanning
+-			 */
+-			for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+-				struct zone *zone = pgdat->node_zones + i;
++		/*
++		 * Scan in the highmem->dma direction for the highest
++		 * zone which needs scanning
++		 */
++		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
++			struct zone *zone = pgdat->node_zones + i;
++			unsigned long watermark;
++
++			if (!populated_zone(zone))
++				continue;
+ 
+-				if (!populated_zone(zone))
+-					continue;
 +			if (zone->all_unreclaimable && priority != scan_priority)
- 				continue;
++				continue;
  
--			if (!zone_watermark_ok(zone, order, zone->pages_high,
--					       0, 0)) {
+-				if (zone->all_unreclaimable &&
+-						priority != DEF_PRIORITY)
+-					continue;
+-
+-				if (!zone_watermark_ok(zone, order,
+-						zone->pages_high, 0, 0)) {
+-					end_zone = i;
+-					goto scan;
+-				}
 +			/*
 +			 * The watermark is relaxed depending on the
 +			 * level of "priority" till it drops to
@@ -4352,14 +4389,19 @@
 +			watermark = zone->pages_high + (zone->pages_high *
 +				    priority / scan_priority);
 +			if (!zone_watermark_ok(zone, order, watermark, 0, 0)) {
- 				end_zone = i;
- 				goto scan;
++				end_zone = i;
++				goto scan;
  			}
+-			goto out;
+-		} else {
+-			end_zone = pgdat->nr_zones - 1;
 +
  		}
- 		goto out;
++		goto out;
  scan:
-@@ -1116,14 +1177,18 @
+ 		for (i = 0; i <= end_zone; i++) {
+ 			struct zone *zone = pgdat->node_zones + i;
+@@ -1126,18 +1177,20 @@ scan:
  		for (i = 0; i <= end_zone; i++) {
  			struct zone *zone = pgdat->node_zones + i;
  			int nr_slab;
@@ -4372,15 +4414,29 @@
 +			if (zone->all_unreclaimable && priority != scan_priority)
  				continue;
  
--			if (!zone_watermark_ok(zone, order, zone->pages_high,
+-			if (nr_pages == 0) {	/* Not software suspend */
+-				if (!zone_watermark_ok(zone, order,
+-						zone->pages_high, end_zone, 0))
+-					all_zones_ok = 0;
+-			}
 +			watermark = zone->pages_high + (zone->pages_high *
 +				    priority / scan_priority);
 +
 +			if (!zone_watermark_ok(zone, order, watermark,
- 					       end_zone, 0))
- 				all_zones_ok = 0;
++					       end_zone, 0))
++				all_zones_ok = 0;
  			zone->temp_priority = priority;
-@@ -1156,7 +1221,7 @
+ 			if (zone->prev_priority > priority)
+ 				zone->prev_priority = priority;
+@@ -1162,15 +1215,13 @@ scan:
+ 			    total_scanned > nr_reclaimed + nr_reclaimed / 2)
+ 				sc.may_writepage = 1;
+ 		}
+-		if (nr_pages && to_free > nr_reclaimed)
+-			continue;	/* swsusp: need to do more work */
+ 		if (all_zones_ok)
+ 			break;		/* kswapd: all done */
+ 		/*
  		 * OK, kswapd is getting into trouble.  Take a nap, then take
  		 * another pass across the zones.
  		 */
@@ -4389,7 +4445,16 @@
  			blk_congestion_wait(WRITE, HZ/10);
  
  		/*
-@@ -1182,6 +1247,8 @
+@@ -1179,7 +1230,7 @@ scan:
+ 		 * matches the direct reclaim path behaviour in terms of impact
+ 		 * on zone->*_priority.
+ 		 */
+-		if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
++		if (nr_reclaimed >= SWAP_CLUSTER_MAX)
+ 			break;
+ 	}
+ out:
+@@ -1196,6 +1247,8 @@ out:
  	return nr_reclaimed;
  }
  
@@ -4398,16 +4463,16 @@
  /*
   * The background pageout daemon, started as a kernel thread
   * from the init process. 
-@@ -1233,6 +1300,8 @
- 		if (try_to_freeze())
- 			pgdat->kswapd_max_order = 0;
+@@ -1246,6 +1299,8 @@ static int kswapd(void *p)
+ 
+ 		try_to_freeze();
  
 +		/* kswapd has been busy so delay watermark_timer */
 +		mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY);
  		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
  		new_order = pgdat->kswapd_max_order;
  		pgdat->kswapd_max_order = 0;
-@@ -1243,6 +1311,7 @
+@@ -1256,12 +1311,13 @@ static int kswapd(void *p)
  			 */
  			order = new_order;
  		} else {
@@ -4415,7 +4480,14 @@
  			schedule();
  			order = pgdat->kswapd_max_order;
  		}
-@@ -1256,9 +1325,10 @
+ 		finish_wait(&pgdat->kswapd_wait, &wait);
+ 
+-		balance_pgdat(pgdat, 0, order);
++		balance_pgdat(pgdat, order);
+ 	}
+ 	return 0;
+ }
+@@ -1269,9 +1325,10 @@ static int kswapd(void *p)
  /*
   * A zone is low on free memory, so wake its kswapd task to service it.
   */
@@ -4427,7 +4499,7 @@
  
  	if (!populated_zone(zone))
  		return;
-@@ -1270,7 +1340,9 @
+@@ -1283,42 +1340,163 @@ void wakeup_kswapd(struct zone *zone, in
  		pgdat->kswapd_max_order = order;
  	if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
  		return;
@@ -4438,53 +4510,178 @@
  		return;
  	wake_up_interruptible(&pgdat->kswapd_wait);
  }
-@@ -1332,14 +1404,15 @
+ 
+ #ifdef CONFIG_PM
+ /*
+- * Try to free `nr_pages' of memory, system-wide.  Returns the number of freed
+- * pages.
++ * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
++ * from LRU lists system-wide, for given pass and priority, and returns the
++ * number of reclaimed pages
++ *
++ * For pass > 3 we also try to shrink the LRU lists that contain a few pages
++ */
++unsigned long shrink_all_zones(unsigned long nr_pages, int pass, int prio,
++				struct scan_control *sc)
++{
++	struct zone *zone;
++	unsigned long nr_to_scan, ret = 0;
++
++	for_each_zone(zone) {
++
++		if (!populated_zone(zone))
++			continue;
++
++		if (zone->all_unreclaimable && prio != DEF_PRIORITY)
++			continue;
++
++		/* For pass = 0 we don't shrink the active list */
++		if (pass > 0) {
++			zone->nr_scan_active += (zone->nr_active >> prio) + 1;
++			if (zone->nr_scan_active >= nr_pages || pass > 3) {
++				zone->nr_scan_active = 0;
++				nr_to_scan = min(nr_pages, zone->nr_active);
++				shrink_active_list(nr_to_scan, zone, sc);
++			}
++		}
++
++		zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
++		if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
++			zone->nr_scan_inactive = 0;
++			nr_to_scan = min(nr_pages, zone->nr_inactive);
++			ret += shrink_inactive_list(nr_to_scan, zone, sc);
++			if (ret >= nr_pages)
++				return ret;
++		}
++	}
++
++	return ret;
++}
++
++/*
++ * Try to free `nr_pages' of memory, system-wide, and return the number of
++ * freed pages.
++ *
++ * Rather than trying to age LRUs the aim is to preserve the overall
++ * LRU order by reclaiming preferentially
++ * inactive > active > active referenced > active mapped
+  */
+ unsigned long shrink_all_memory(unsigned long nr_pages)
  {
- 	unsigned long lru_pages, nr_slab;
+-	pg_data_t *pgdat;
+-	unsigned long nr_to_free = nr_pages;
++	unsigned long lru_pages, nr_slab;
  	unsigned long ret = 0;
--	int swappiness = vm_swappiness, pass;
+-	unsigned retry = 2;
+-	struct reclaim_state reclaim_state = {
+-		.reclaimed_slab = 0,
 +	int pass;
- 	struct reclaim_state reclaim_state;
- 	struct zone *zone;
- 	struct scan_control sc = {
- 		.gfp_mask = GFP_KERNEL,
--		.may_swap = 1,
++	struct reclaim_state reclaim_state;
++	struct zone *zone;
++	struct scan_control sc = {
++		.gfp_mask = GFP_KERNEL,
 +		.may_swap = 0,
- 		.swap_cluster_max = nr_pages,
- 		.may_writepage = 1,
++		.swap_cluster_max = nr_pages,
++		.may_writepage = 1,
 +		.mapped = vm_mapped,
  	};
  
  	current->reclaim_state = &reclaim_state;
-@@ -1381,15 +1455,16 @
- 			}
- 
- 		/* Force reclaiming mapped pages in the passes #3 and #4 */
--		if (pass > 2)
--			vm_swappiness = 100;
+-repeat:
+-	for_each_online_pgdat(pgdat) {
+-		unsigned long freed;
+ 
+-		freed = balance_pgdat(pgdat, nr_to_free, 0);
+-		ret += freed;
+-		nr_to_free -= freed;
+-		if ((long)nr_to_free <= 0)
++	lru_pages = 0;
++	for_each_zone(zone)
++		lru_pages += zone->nr_active + zone->nr_inactive;
++
++	nr_slab = read_page_state(nr_slab);
++	/* If slab caches are huge, it's better to hit them first */
++	while (nr_slab >= lru_pages) {
++		reclaim_state.reclaimed_slab = 0;
++		shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
++		if (!reclaim_state.reclaimed_slab)
+ 			break;
++
++		ret += reclaim_state.reclaimed_slab;
++		if (ret >= nr_pages)
++			goto out;
++
++		nr_slab -= reclaim_state.reclaimed_slab;
+ 	}
+-	if (retry-- && ret < nr_pages) {
+-		blk_congestion_wait(WRITE, HZ/5);
+-		goto repeat;
++
++	/*
++	 * We try to shrink LRUs in 5 passes:
++	 * 0 = Reclaim from inactive_list only
++	 * 1 = Reclaim from active list but don't reclaim mapped
++	 * 2 = 2nd pass of type 1
++	 * 3 = Reclaim mapped (normal reclaim)
++	 * 4 = 2nd pass of type 3
++	 */
++	for (pass = 0; pass < 5; pass++) {
++		int prio;
++
++		/* Needed for shrinking slab caches later on */
++		if (!lru_pages)
++			for_each_zone(zone) {
++				lru_pages += zone->nr_active;
++				lru_pages += zone->nr_inactive;
++			}
++
++		/* Force reclaiming mapped pages in the passes #3 and #4 */
 +		if (pass > 2) {
 +			sc.may_swap = 1;
 +			sc.mapped = 0;
 +		}
- 
- 		for (prio = DEF_PRIORITY; prio >= 0; prio--) {
- 			unsigned long nr_to_scan = nr_pages - ret;
- 
- 			sc.nr_mapped = read_page_state(nr_mapped);
- 			sc.nr_scanned = 0;
--			sc.swap_cluster_max = nr_pages - ret;
- 
- 			ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
- 			if (ret >= nr_pages)
-@@ -1422,7 +1496,6 @
- 
- out:
++
++		for (prio = DEF_PRIORITY; prio >= 0; prio--) {
++			unsigned long nr_to_scan = nr_pages - ret;
++
++			sc.nr_mapped = read_page_state(nr_mapped);
++			sc.nr_scanned = 0;
++
++			ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
++			if (ret >= nr_pages)
++				goto out;
++
++			reclaim_state.reclaimed_slab = 0;
++			shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
++			ret += reclaim_state.reclaimed_slab;
++			if (ret >= nr_pages)
++				goto out;
++
++			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
++				blk_congestion_wait(WRITE, HZ / 10);
++		}
++
++		lru_pages = 0;
+ 	}
++
++	/*
++	 * If ret = 0, we could not shrink LRUs, but there may be something
++	 * in slab caches
++	 */
++	if (!ret)
++		do {
++			reclaim_state.reclaimed_slab = 0;
++			shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
++			ret += reclaim_state.reclaimed_slab;
++		} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
++
++out:
  	current->reclaim_state = NULL;
--	vm_swappiness = swappiness;
- 
++
  	return ret;
  }
-@@ -1451,12 +1524,41 @
+ #endif
+@@ -1346,12 +1524,41 @@ static int cpu_callback(struct notifier_
  }
  #endif /* CONFIG_HOTPLUG_CPU */
  
@@ -4526,7 +4723,7 @@
  		pid_t pid;
  
  		pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
-@@ -1464,6 +1566,11 @
+@@ -1359,6 +1566,11 @@ static int __init kswapd_init(void)
  		read_lock(&tasklist_lock);
  		pgdat->kswapd = find_task_by_pid(pid);
  		read_unlock(&tasklist_lock);
@@ -4538,7 +4735,7 @@
  	}
  	total_memory = nr_free_pagecache_pages();
  	hotcpu_notifier(cpu_callback, 0);
-@@ -1521,6 +1628,7 @
+@@ -1416,6 +1628,7 @@ static int __zone_reclaim(struct zone *z
  		.swap_cluster_max = max_t(unsigned long, nr_pages,
  					SWAP_CLUSTER_MAX),
  		.gfp_mask = gfp_mask,
@@ -4624,6 +4821,38 @@
 +#endif	/* CONFIG_SWAP_PREFETCH */
 +
 +#endif		/* SWAP_PREFETCH_H_INCLUDED */
+Index: linux-2.6.17-ck1/kernel/power/swsusp.c
+===================================================================
+--- linux-2.6.17-ck1.orig/kernel/power/swsusp.c	2006-06-18 15:32:49.000000000 +1000
++++ linux-2.6.17-ck1/kernel/power/swsusp.c	2006-06-18 15:34:38.000000000 +1000
+@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struc
+  */
+ 
+ #define SHRINK_BITE	10000
++static inline unsigned long __shrink_memory(long tmp)
++{
++	if (tmp > SHRINK_BITE)
++		tmp = SHRINK_BITE;
++	return shrink_all_memory(tmp);
++}
+ 
+ int swsusp_shrink_memory(void)
+ {
+@@ -195,12 +201,12 @@ int swsusp_shrink_memory(void)
+ 			if (!is_highmem(zone))
+ 				tmp -= zone->free_pages;
+ 		if (tmp > 0) {
+-			tmp = shrink_all_memory(SHRINK_BITE);
++			tmp = __shrink_memory(tmp);
+ 			if (!tmp)
+ 				return -ENOMEM;
+ 			pages += tmp;
+ 		} else if (size > image_size / PAGE_SIZE) {
+-			tmp = shrink_all_memory(SHRINK_BITE);
++			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
+ 			pages += tmp;
+ 		}
+ 		printk("\b%c", p[i++%4]);
 Index: linux-2.6.17-ck1/include/linux/mmzone.h
 ===================================================================
 --- linux-2.6.17-ck1.orig/include/linux/mmzone.h	2006-06-18 15:32:49.000000000 +1000
@@ -4667,13 +4896,13 @@
 --- linux-2.6.17-ck1.orig/mm/page_alloc.c	2006-06-18 15:32:49.000000000 +1000
 +++ linux-2.6.17-ck1/mm/page_alloc.c	2006-06-18 15:34:40.000000000 +1000
<<Diff was trimmed, longer than 597 lines>>

---- CVS-web:
    http://cvs.pld-linux.org/SOURCES/kernel-desktop-ck.patch?r1=1.2&r2=1.3&f=u