[packages/percona-server/v5.0.x: 101/202] - for 5.0.75

glen glen at pld-linux.org
Wed Oct 21 16:17:31 CEST 2015


commit eccb488f1f659d1774a04d96e1cd228b6aa6f200
Author: Elan Ruusamäe <glen at pld-linux.org>
Date:   Fri Jan 9 11:04:53 2009 +0000

    - for 5.0.75
    
    Changed files:
        mysql-innodb_check_fragmentation.patch -> 1.1.2.1
        mysql-innodb_fsync_source.patch -> 1.1.2.1
        mysql-innodb_io_patches.patch -> 1.1.2.1
        mysql-innodb_io_pattern.patch -> 1.1.2.1
        mysql-innodb_locks_held.patch -> 1.1.2.1
        mysql-innodb_rw_lock.patch -> 1.1.2.1
        mysql-innodb_show_bp.patch -> 1.1.2.1
        mysql-innodb_show_hashed_memory.patch -> 1.1.2.1
        mysql-microsec_process.patch -> 1.1.2.1

 mysql-innodb_check_fragmentation.patch |  275 ++++++
 mysql-innodb_fsync_source.patch        |  594 +++++++++++++
 mysql-innodb_io_patches.patch          |  487 +++++++++++
 mysql-innodb_io_pattern.patch          |  688 +++++++++++++++
 mysql-innodb_locks_held.patch          |  168 ++++
 mysql-innodb_rw_lock.patch             | 1459 ++++++++++++++++++++++++++++++++
 mysql-innodb_show_bp.patch             |  447 ++++++++++
 mysql-innodb_show_hashed_memory.patch  |  275 ++++++
 mysql-microsec_process.patch           |  281 ++++++
 9 files changed, 4674 insertions(+)
---
diff --git a/mysql-innodb_check_fragmentation.patch b/mysql-innodb_check_fragmentation.patch
new file mode 100644
index 0000000..4b16731
--- /dev/null
+++ b/mysql-innodb_check_fragmentation.patch
@@ -0,0 +1,275 @@
+diff -r 936d427a9a15 innobase/btr/btr0cur.c
+--- a/innobase/btr/btr0cur.c	Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0cur.c	Mon Dec 22 00:33:11 2008 -0800
+@@ -516,6 +516,14 @@
+ 						== index->table->comp);
+ 			}
+ 
++			if (level == 0) {
++				/* Initializes status counters */
++				innobase_mysql_thd_init_innodb_scan_cont();
++				innobase_mysql_thd_init_innodb_scan_jump();
++				innobase_mysql_thd_init_innodb_scan_data();
++				innobase_mysql_thd_init_innodb_scan_garbage();
++			}
++
+ 			break;
+ 		}
+ 
+@@ -663,6 +671,12 @@
+ 			        btr_cur_add_path_info(cursor, height,
+ 						      root_height);
+ 		        }
++
++			/* Initializes status counters */
++			innobase_mysql_thd_init_innodb_scan_cont();
++			innobase_mysql_thd_init_innodb_scan_jump();
++			innobase_mysql_thd_init_innodb_scan_data();
++			innobase_mysql_thd_init_innodb_scan_garbage();
+ 
+ 			break;
+ 		}
+diff -r 936d427a9a15 innobase/btr/btr0pcur.c
+--- a/innobase/btr/btr0pcur.c	Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0pcur.c	Mon Dec 22 00:33:11 2008 -0800
+@@ -381,6 +381,7 @@
+ 				last record of the current page */
+ 	mtr_t*		mtr)	/* in: mtr */
+ {
++	ulint	page_no;
+ 	ulint	next_page_no;
+ 	ulint	space;
+ 	page_t*	page;
+@@ -393,11 +394,22 @@
+ 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+ 	
+ 	page = btr_pcur_get_page(cursor);
++	page_no = buf_frame_get_page_no(page);
+ 
+ 	next_page_no = btr_page_get_next(page, mtr);
+ 	space = buf_frame_get_space_id(page);
+ 
+ 	ut_ad(next_page_no != FIL_NULL);	
++
++	if (next_page_no - page_no == 1) {
++		innobase_mysql_thd_increment_innodb_scan_cont(1);
++	} else {
++		innobase_mysql_thd_increment_innodb_scan_jump(1);
++	}
++	innobase_mysql_thd_increment_innodb_scan_data(
++				page_get_data_size(page));
++	innobase_mysql_thd_increment_innodb_scan_garbage(
++				page_header_get_field(page, PAGE_GARBAGE));
+ 
+ 	next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+ 	ut_a(page_is_comp(next_page) == page_is_comp(page));
+@@ -427,6 +439,7 @@
+ 				record of the current page */
+ 	mtr_t*		mtr)	/* in: mtr */
+ {
++	ulint	page_no;
+ 	ulint	prev_page_no;
+ 	ulint	space;
+ 	page_t*	page;
+@@ -462,9 +475,20 @@
+ 	btr_pcur_restore_position(latch_mode2, cursor, mtr);	
+ 
+ 	page = btr_pcur_get_page(cursor);
++	page_no = buf_frame_get_page_no(page);
+ 
+ 	prev_page_no = btr_page_get_prev(page, mtr);
+ 	space = buf_frame_get_space_id(page);
++
++	if (page_no - prev_page_no == 1) {
++		innobase_mysql_thd_increment_innodb_scan_cont(1);
++	} else {
++		innobase_mysql_thd_increment_innodb_scan_jump(1);
++	}
++	innobase_mysql_thd_increment_innodb_scan_data(
++				page_get_data_size(page));
++	innobase_mysql_thd_increment_innodb_scan_garbage(
++				page_header_get_field(page, PAGE_GARBAGE));
+ 
+ 	if (btr_pcur_is_before_first_on_page(cursor, mtr)
+ 					&& (prev_page_no != FIL_NULL)) {	
+diff -r 936d427a9a15 innobase/btr/btr0sea.c
+--- a/innobase/btr/btr0sea.c	Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0sea.c	Mon Dec 22 00:33:11 2008 -0800
+@@ -861,6 +861,12 @@
+ 
+ 	buf_pool->n_page_gets++;
+ 
++	/* Initializes status counters */
++	innobase_mysql_thd_init_innodb_scan_cont();
++	innobase_mysql_thd_init_innodb_scan_jump();
++	innobase_mysql_thd_init_innodb_scan_data();
++	innobase_mysql_thd_init_innodb_scan_garbage();
++
+ 	return(TRUE);	
+ 
+ 	/*-------------------------------------------*/
+diff -r 936d427a9a15 innobase/include/btr0cur.h
+--- a/innobase/include/btr0cur.h	Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/include/btr0cur.h	Mon Dec 22 00:33:11 2008 -0800
+@@ -697,6 +697,17 @@
+ extern ulint	btr_cur_n_non_sea_old;
+ extern ulint	btr_cur_n_sea_old;
+ 
++/*--------------------------------------*/
++/* prototypes for new functions added to ha_innodb.cc */
++void innobase_mysql_thd_init_innodb_scan_cont();
++void innobase_mysql_thd_increment_innodb_scan_cont(ulong length);
++void innobase_mysql_thd_init_innodb_scan_jump();
++void innobase_mysql_thd_increment_innodb_scan_jump(ulong length);
++void innobase_mysql_thd_init_innodb_scan_data();
++void innobase_mysql_thd_increment_innodb_scan_data(ulong length);
++void innobase_mysql_thd_init_innodb_scan_garbage();
++void innobase_mysql_thd_increment_innodb_scan_garbage(ulong length);
++
+ #ifndef UNIV_NONINL
+ #include "btr0cur.ic"
+ #endif
+diff -r 936d427a9a15 patch_info/innodb_check_fragmentation.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_check_fragmentation.info	Mon Dec 22 00:33:11 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_check_fragmentation.patch
++Name=Session status to check fragmentation of the last InnoDB scan
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=The names are Innodb_scan_*
+diff -r 936d427a9a15 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc	Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/ha_innodb.cc	Mon Dec 22 00:33:11 2008 -0800
+@@ -760,6 +760,102 @@
+ }
+ 
+ /*************************************************************************
++Initializes Innodb_scan_blocks_contiguous. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_cont()
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_cont = 0;
++	}
++}
++
++/*************************************************************************
++Increments Innodb_scan_blocks_contiguous. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_cont(ulong length)
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_cont+= length;
++	}
++}
++
++/*************************************************************************
++Initializes Innodb_scan_blocks_jumpy. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_jump()
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_jump = 0;
++	}
++}
++
++/*************************************************************************
++Increments Innodb_scan_blocks_jumpy. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_jump(ulong length)
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_jump+= length;
++	}
++}
++
++/*************************************************************************
++Initializes Innodb_scan_data_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_data()
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_data = 0;
++	}
++}
++
++/*************************************************************************
++Increments Innodb_scan_data_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_data(ulong length)
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_data+= length;
++	}
++}
++
++/*************************************************************************
++Initializes Innodb_scan_garbages_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_garbage()
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_garbage = 0;
++	}
++}
++
++/*************************************************************************
++Increments Innodb_scan_garbages_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_garbage(ulong length)
++{
++	THD *thd=current_thd;
++	if (likely(thd != 0)) {
++		thd->status_var.innodb_scan_garbage+= length;
++	}
++}
++
++/*************************************************************************
+ Gets the InnoDB transaction handle for a MySQL handler object, creates
+ an InnoDB transaction struct if the corresponding MySQL thread struct still
+ lacks one. */
+diff -r 936d427a9a15 sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/mysqld.cc	Mon Dec 22 00:33:11 2008 -0800
+@@ -6673,6 +6673,10 @@
+   {"Handler_write",            (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS},
+ #ifdef HAVE_INNOBASE_DB
+   {"Innodb_",                  (char*) &innodb_status_variables, SHOW_VARS},
++  {"Innodb_scan_pages_contiguous",(char*) offsetof(STATUS_VAR, innodb_scan_cont), SHOW_LONGLONG_STATUS},
++  {"Innodb_scan_pages_jumpy",  (char*) offsetof(STATUS_VAR, innodb_scan_jump), SHOW_LONGLONG_STATUS},
++  {"Innodb_scan_data_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_data), SHOW_LONGLONG_STATUS},
++  {"Innodb_scan_garbages_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_garbage), SHOW_LONGLONG_STATUS},
+ #endif /*HAVE_INNOBASE_DB*/
+   {"Key_blocks_not_flushed",   (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG},
+   {"Key_blocks_unused",        (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG},
+diff -r 936d427a9a15 sql/sql_class.h
+--- a/sql/sql_class.h	Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/sql_class.h	Mon Dec 22 00:33:11 2008 -0800
+@@ -729,6 +729,10 @@
+     sense to add to the /global/ status variable counter.
+   */
+   double last_query_cost;
++  ulonglong innodb_scan_cont;
++  ulonglong innodb_scan_jump;
++  ulonglong innodb_scan_data;
++  ulonglong innodb_scan_garbage;
+ } STATUS_VAR;
+ 
+ /*
diff --git a/mysql-innodb_fsync_source.patch b/mysql-innodb_fsync_source.patch
new file mode 100644
index 0000000..637a7d6
--- /dev/null
+++ b/mysql-innodb_fsync_source.patch
@@ -0,0 +1,594 @@
+diff -r 61031ebb48ce innobase/buf/buf0flu.c
+--- a/innobase/buf/buf0flu.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/buf/buf0flu.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -341,7 +341,7 @@
+ 
+ 	/* Now flush the doublewrite buffer data to disk */
+ 
+-	fil_flush(TRX_SYS_SPACE);
++	fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER);
+ 
+ 	/* We know that the writes have been flushed to disk now
+ 	and in recovery we will find them in the doublewrite buffer
+@@ -381,7 +381,7 @@
+ 
+ 	/* Now we flush the data to disk (for example, with fsync) */
+ 
+-	fil_flush_file_spaces(FIL_TABLESPACE);
++	fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER);
+ 
+ 	/* We can now reuse the doublewrite memory buffer: */
+ 
+@@ -501,7 +501,8 @@
+ 	}
+ #else
+ 	/* Force the log to the disk before writing the modified block */
+-	log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
++	log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE,
++		LOG_WRITE_FROM_DIRTY_BUFFER);
+ #endif	
+ 	buf_flush_init_for_writing(block->frame, block->newest_modification,
+ 						block->space, block->offset);
+diff -r 61031ebb48ce innobase/fil/fil0fil.c
+--- a/innobase/fil/fil0fil.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/fil/fil0fil.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -245,6 +245,7 @@
+ 					request */
+ 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
+ 					/* list of all file spaces */
++	ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */
+ };
+ 
+ /* The tablespace memory cache. This variable is NULL before the module is
+@@ -849,7 +850,7 @@
+ 	/* Flush tablespaces so that we can close modified files in the LRU
+ 	list */
+ 
+-	fil_flush_file_spaces(FIL_TABLESPACE);		
++	fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);		
+ 
+ 	count++;
+ 
+@@ -1309,7 +1310,10 @@
+ 
+ 	UT_LIST_INIT(system->unflushed_spaces);
+ 	UT_LIST_INIT(system->space_list);
+-
++	{
++		int x;
++		for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0;
++	}
+ 	return(system);
+ }
+ 
+@@ -1437,6 +1441,23 @@
+ 	}
+ 
+ 	mutex_exit(&(system->mutex));
++}
++
++/********************************************************************
++Prints internal counters */
++
++void
++fil_print(FILE *file)
++{
++	fprintf(file,
++		"fsync callers: %lu buffer pool, %lu other, %lu checkpoint, "
++		"%lu log aio, %lu log sync, %lu archive\n",
++		fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER],
++		fil_system->flush_types[FLUSH_FROM_OTHER],
++		fil_system->flush_types[FLUSH_FROM_CHECKPOINT],
++		fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE],
++		fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO],
++		fil_system->flush_types[FLUSH_FROM_ARCHIVE]);
+ }
+ 
+ /********************************************************************
+@@ -2256,7 +2277,7 @@
+ 
+ 		os_thread_sleep(20000);
+ 
+-		fil_flush(id);
++		fil_flush(id, FLUSH_FROM_OTHER);
+ 
+ 		goto retry;
+ 
+@@ -3574,7 +3595,7 @@
+                                         size_after_extend, *actual_size); */
+ 	mutex_exit(&(system->mutex));	
+ 
+-	fil_flush(space_id);
++	fil_flush(space_id, FLUSH_FROM_OTHER);
+ 
+ 	return(success);
+ }
+@@ -4166,8 +4187,9 @@
+ void
+ fil_flush(
+ /*======*/
+-	ulint	space_id)	/* in: file space id (this can be a group of
++	ulint	space_id,	/* in: file space id (this can be a group of
+ 				log files or a tablespace of the database) */
++	flush_from_type flush_type)/* in: identifies the caller */
+ {
+ 	fil_system_t*	system	= fil_system;
+ 	fil_space_t*	space;
+@@ -4176,7 +4198,7 @@
+ 	ib_longlong	old_mod_counter;
+ 
+ 	mutex_enter(&(system->mutex));
+-	
++	system->flush_types[flush_type]++;	
+ 	HASH_SEARCH(hash, system->spaces, space_id, space,
+ 							space->id == space_id);
+ 	if (!space || space->is_being_deleted) {
+@@ -4281,7 +4303,8 @@
+ void
+ fil_flush_file_spaces(
+ /*==================*/
+-	ulint	purpose)	/* in: FIL_TABLESPACE, FIL_LOG */
++	ulint	purpose,	/* in: FIL_TABLESPACE, FIL_LOG */
++	flush_from_type flush_type)/* in: identifies the caller */
+ {
+ 	fil_system_t*	system	= fil_system;
+ 	fil_space_t*	space;
+@@ -4322,7 +4345,7 @@
+ 	a non-existing space id. */
+ 	for (i = 0; i < n_space_ids; i++) {
+ 
+-		fil_flush(space_ids[i]);
++		fil_flush(space_ids[i], flush_type);
+ 	}
+ 
+ 	mem_free(space_ids);
+diff -r 61031ebb48ce innobase/include/fil0fil.h
+--- a/innobase/include/fil0fil.h	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/include/fil0fil.h	Mon Nov 03 05:07:56 2008 -0800
+@@ -197,6 +197,13 @@
+ fil_init(
+ /*=====*/
+ 	ulint	max_n_open);	/* in: max number of open files */
++/********************************************************************
++ * Prints internal counters. */
++
++void
++fil_print(
++	/*=====*/
++	FILE* file); /* in: output stream */
+ /***********************************************************************
+ Opens all log files and system tablespace data files. They stay open until the
+ database server shutdown. This should be called at a server startup after the
+@@ -621,14 +628,26 @@
+ 	ulint	segment);	/* in: the number of the segment in the aio
+ 				array to wait for */ 
+ /**************************************************************************
++Identifies the caller of fil_flush. */
++typedef enum {
++	FLUSH_FROM_DIRTY_BUFFER,
++	FLUSH_FROM_OTHER,
++	FLUSH_FROM_CHECKPOINT,
++	FLUSH_FROM_LOG_IO_COMPLETE,
++	FLUSH_FROM_LOG_WRITE_UP_TO,
++	FLUSH_FROM_ARCHIVE,
++	FLUSH_FROM_NUMBER
++} flush_from_type;
++/**************************************************************************
+ Flushes to disk possible writes cached by the OS. If the space does not exist
+ or is being dropped, does not do anything. */
+ 
+ void
+ fil_flush(
+ /*======*/
+-	ulint	space_id);	/* in: file space id (this can be a group of
++	ulint	space_id, 	/* in: file space id (this can be a group of
+ 				log files or a tablespace of the database) */
++	flush_from_type flush_type);/* in: identifies the caller */
+ /**************************************************************************
+ Flushes to disk writes in file spaces of the given type possibly cached by
+ the OS. */
+@@ -636,7 +655,8 @@
+ void
+ fil_flush_file_spaces(
+ /*==================*/
+-	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
++	ulint	purpose, 	/* in: FIL_TABLESPACE, FIL_LOG */
++	flush_from_type flush_type);/* in: identifies the caller */
+ /**********************************************************************
+ Checks the consistency of the tablespace cache. */
+ 
+diff -r 61031ebb48ce innobase/include/log0log.h
+--- a/innobase/include/log0log.h	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/include/log0log.h	Mon Nov 03 05:07:56 2008 -0800
+@@ -146,6 +146,22 @@
+ log_io_complete(
+ /*============*/
+ 	log_group_t*	group);	/* in: log group */
++
++/**********************************************************
++Describes the caller of log_write_up_to. */
++
++typedef enum {
++	LOG_WRITE_FROM_DIRTY_BUFFER,
++	LOG_WRITE_FROM_BACKGROUND_SYNC,
++	LOG_WRITE_FROM_BACKGROUND_ASYNC,
++	LOG_WRITE_FROM_INTERNAL,
++	LOG_WRITE_FROM_CHECKPOINT_SYNC,
++	LOG_WRITE_FROM_CHECKPOINT_ASYNC,
++	LOG_WRITE_FROM_LOG_ARCHIVE,
++	LOG_WRITE_FROM_COMMIT_SYNC,
++	LOG_WRITE_FROM_COMMIT_ASYNC,
++	LOG_WRITE_FROM_NUMBER
++} log_sync_type;
+ /**********************************************************
+ This function is called, e.g., when a transaction wants to commit. It checks
+ that the log has been written to the log file up to the last log entry written
+@@ -159,14 +175,21 @@
+ 			be written, ut_dulint_max if not specified */
+ 	ulint	wait,	/* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ 			or LOG_WAIT_ALL_GROUPS */
+-	ibool	flush_to_disk);
+-			/* in: TRUE if we want the written log also to be
+-			flushed to disk */
++	ibool	flush_to_disk,
++	/* in: TRUE if we want the written log also to be flushed to disk */
++	log_sync_type caller);/* in: identifies the caller */
+ /********************************************************************
+ Does a syncronous flush of the log buffer to disk. */
+ 
+ void
+ log_buffer_flush_to_disk(void);
++/*==========================*/
++/********************************************************************
++Flushes the log buffer. Forces it to disk depending on the value of
++the configuration parameter innodb_flush_log_at_trx_commit. */
++
++void
++log_buffer_flush_maybe_sync(void);
+ /*==========================*/
+ /********************************************************************
+ Advances the smallest lsn for which there are unflushed dirty blocks in the
+@@ -744,6 +767,12 @@
+ 					AND flushed to disk */
+ 	ulint		n_pending_writes;/* number of currently pending flushes
+ 					or writes */
++	ulint           log_sync_callers[LOG_WRITE_FROM_NUMBER];
++		/* counts calls to log_write_up_to */
++	ulint           log_sync_syncers[LOG_WRITE_FROM_NUMBER];
++		/* counts calls to log_write_up_to when log file is sync'd */
++	ulint           n_syncs;        /* number of fsyncs done for log file */
++	ulint           n_checkpoints;  /* number of calls to log_checkpoint */
+ 	/* NOTE on the 'flush' in names of the fields below: starting from
+ 	4.0.14, we separate the write of the log file and the actual fsync()
+ 	or other method to flush it to disk. The names below shhould really
+diff -r 61031ebb48ce innobase/log/log0log.c
+--- a/innobase/log/log0log.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/log/log0log.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -782,6 +782,15 @@
+ 	log_sys->written_to_all_lsn = log_sys->lsn;
+ 	
+ 	log_sys->n_pending_writes = 0;
++	{
++		int x;
++		for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) {
++			log_sys->log_sync_callers[x] = 0;
++			log_sys->log_sync_syncers[x] = 0;
++		}
++	}
++	log_sys->n_syncs = 0;
++        log_sys->n_checkpoints = 0;
+ 
+ 	log_sys->no_flush_event = os_event_create(NULL);
+ 
+@@ -1066,7 +1075,7 @@
+ 		if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
+ 		   && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+ 		
+-		        fil_flush(group->space_id);
++			fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
+ 		}
+ 
+ #ifdef UNIV_DEBUG
+@@ -1088,7 +1097,7 @@
+ 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ 	    && srv_flush_log_at_trx_commit != 2) {
+ 
+-	        fil_flush(group->space_id);
++		fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
+ 	}
+ 
+ 	mutex_enter(&(log_sys->mutex));
+@@ -1303,9 +1312,10 @@
+ 			be written, ut_dulint_max if not specified */
+ 	ulint	wait,	/* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ 			or LOG_WAIT_ALL_GROUPS */
+-	ibool	flush_to_disk)
++	ibool	flush_to_disk,
+ 			/* in: TRUE if we want the written log also to be
+ 			flushed to disk */
++	log_sync_type caller) /* in: identifies caller */
+ {
+ 	log_group_t*	group;
+ 	ulint		start_offset;
+@@ -1315,6 +1325,7 @@
+ 	ulint		loop_count;
+ 	ulint		unlock;
+ 
++	log_sys->log_sync_callers[caller]++;
+ 	if (recv_no_ibuf_operations) {
+ 		/* Recovery is running and no operations on the log files are
+ 		allowed yet (the variable name .._no_ibuf_.. is misleading) */
+@@ -1465,13 +1476,17 @@
+ 		so we have also flushed to disk what we have written */
+ 
+ 		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
++		log_sys->n_syncs++;
++		log_sys->log_sync_syncers[caller]++;
+ 
+ 	} else if (flush_to_disk) {
+ 
+ 		group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ 
+-	        fil_flush(group->space_id);
++		fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO);
+ 		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
++		log_sys->n_syncs++;
++		log_sys->log_sync_syncers[caller]++;
+ 	}
+ 
+ 	mutex_enter(&(log_sys->mutex));
+@@ -1520,7 +1535,8 @@
+ 
+ 	mutex_exit(&(log_sys->mutex));
+ 
+-	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++                        LOG_WRITE_FROM_BACKGROUND_SYNC);
+ }
+ 
+ /********************************************************************
+@@ -1551,7 +1567,7 @@
+ 	mutex_exit(&(log->mutex));
+ 
+ 	if (do_flush) {
+-		log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
++		log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL);
+ 	}
+ }
+ 
+@@ -1921,11 +1937,11 @@
+ 	}
+ 
+ 	if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+-	        fil_flush_file_spaces(FIL_TABLESPACE);
++		fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT);
+ 	}
+ 
+ 	mutex_enter(&(log_sys->mutex));
+-
++	log_sys->n_checkpoints++;
+ 	oldest_lsn = log_buf_pool_get_oldest_modification();
+ 
+ 	mutex_exit(&(log_sys->mutex));
+@@ -1938,7 +1954,8 @@
+ 	write-ahead-logging algorithm ensures that the log has been flushed
+ 	up to oldest_lsn. */
+ 
+-	log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++	log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++		LOG_WRITE_FROM_CHECKPOINT_SYNC);
+ 
+ 	mutex_enter(&(log_sys->mutex));
+ 
+@@ -2566,7 +2583,7 @@
+ 
+ 	mutex_exit(&(log_sys->mutex));
+ 
+-	fil_flush(group->archive_space_id);
++	fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE);
+ 	
+ 	mutex_enter(&(log_sys->mutex));
+ 	
+@@ -2647,7 +2664,8 @@
+ 
+ 		mutex_exit(&(log_sys->mutex));
+ 	
+-		log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++		log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++			LOG_WRITE_FROM_LOG_ARCHIVE);
+ 
+ 		calc_new_limit = FALSE;
+ 
+@@ -3184,8 +3202,8 @@
+ 	}
+ 	mutex_exit(&kernel_mutex);
+ 
+-	fil_flush_file_spaces(FIL_TABLESPACE);
+-	fil_flush_file_spaces(FIL_LOG);
++	fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
++	fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER);
+ 
+ 	/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
+ 	pool: therefore it is essential that the buffer pool has been
+@@ -3218,7 +3236,7 @@
+ 
+ 		fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+ 
+-	fil_flush_file_spaces(FIL_TABLESPACE);
++	fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
+ 
+ 	fil_close_all_files();
+ 
+@@ -3331,15 +3349,45 @@
+ 	time_elapsed = 0.001 + difftime(current_time,
+ 					log_sys->last_printout_time);
+ 	fprintf(file,
+-	"%lu pending log writes, %lu pending chkp writes\n"
+-	"%lu log i/o's done, %.2f log i/o's/second\n",
+-	(ulong) log_sys->n_pending_writes,
+-	(ulong) log_sys->n_pending_checkpoint_writes,
+-	(ulong) log_sys->n_log_ios,
+-	((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed));
++		"%lu pending log writes, %lu pending chkp writes\n"
++		"%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n",
++		(ulong) log_sys->n_pending_writes,
++		(ulong) log_sys->n_pending_checkpoint_writes,
++		(ulong) log_sys->n_log_ios,
++		(log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed,
++		log_sys->n_syncs,
++		log_sys->n_checkpoints);
+ 
+ 	log_sys->n_log_ios_old = log_sys->n_log_ios;
+ 	log_sys->last_printout_time = current_time;
++
++	fprintf(file,
++		"log sync callers: %lu buffer pool, background %lu sync and %lu async, "
++		"%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
++		"commit %lu sync and %lu async\n",
++		log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC],
++		log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]);
++
++	fprintf(file,
++		"log sync syncers: %lu buffer pool, background %lu sync and %lu async, "
++		"%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
++		"commit %lu sync and %lu async\n",
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC],
++		log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]);
+ 
+ 	mutex_exit(&(log_sys->mutex));
+ }
+diff -r 61031ebb48ce innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/srv/srv0srv.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -1638,6 +1638,12 @@
+ 		(ulong)time_elapsed);
+ 
+ 	fputs("----------\n"
++               "BACKGROUND THREAD\n"
++               "----------\n", file);
++        fil_print(file);
++
++
++	fputs("----------\n"
+ 		"SEMAPHORES\n"
+ 		"----------\n", file);
+ 	sync_print(file);
+diff -r 61031ebb48ce innobase/trx/trx0sys.c
+--- a/innobase/trx/trx0sys.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/trx/trx0sys.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -511,7 +511,7 @@
+ 		page += UNIV_PAGE_SIZE;
+ 	}
+ 
+-	fil_flush_file_spaces(FIL_TABLESPACE);
++	fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
+ 	
+ leave_func:
+ 	ut_free(unaligned_read_buf);
+diff -r 61031ebb48ce innobase/trx/trx0trx.c
+--- a/innobase/trx/trx0trx.c	Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/trx/trx0trx.c	Mon Nov 03 05:07:56 2008 -0800
+@@ -916,19 +916,21 @@
+                         if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                              	/* Write the log but do not flush it to disk */
+ 
+-                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+-									FALSE);
++                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                                LOG_WRITE_FROM_COMMIT_ASYNC);
+                         } else {
+                                	/* Write the log to the log files AND flush
+                                	them to disk */
+ 
+-                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++                                                LOG_WRITE_FROM_COMMIT_SYNC);
+                         }
+                 } else if (srv_flush_log_at_trx_commit == 2) {
+ 
+                         /* Write the log but do not flush it to disk */
+ 
+-                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                        LOG_WRITE_FROM_COMMIT_ASYNC);
+                 } else {
+                         ut_error;
+                 }
+@@ -1659,18 +1661,21 @@
+                 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                         /* Write the log but do not flush it to disk */
+ 
+-                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                        LOG_WRITE_FROM_COMMIT_ASYNC);
+                 } else {
+                         /* Write the log to the log files AND flush them to
+                         disk */
+ 
+-                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++                                        LOG_WRITE_FROM_COMMIT_SYNC);
+                 }
+         } else if (srv_flush_log_at_trx_commit == 2) {
+ 
+                 /* Write the log but do not flush it to disk */
+ 
+-                log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++                log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                LOG_WRITE_FROM_COMMIT_ASYNC);
+         } else {
+                 ut_error;
+         }
+@@ -1906,19 +1911,21 @@
+                    	if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                         	/* Write the log but do not flush it to disk */
+ 
+-                        	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+-								FALSE);
++                        	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                          LOG_WRITE_FROM_COMMIT_ASYNC);
+                         } else {
+                                	/* Write the log to the log files AND flush
+                                	them to disk */
+ 
+-                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++                               	log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++                                                LOG_WRITE_FROM_COMMIT_SYNC);
+                         }
+                 } else if (srv_flush_log_at_trx_commit == 2) {
+ 
+                         /* Write the log but do not flush it to disk */
+ 
+-                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++                        log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++                                        LOG_WRITE_FROM_COMMIT_ASYNC);
+                 } else {
+                         ut_error;
+                 }
+diff -r 61031ebb48ce patch_info/innodb_fsync_source.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_fsync_source.info	Mon Nov 03 05:07:56 2008 -0800
+@@ -0,0 +1,9 @@
++File=innodb_fsync_source.patch
++Name=Information of fsync callers in InnoDB
++Version=1.0
++Author=Google
++License=GPL
++Comment=
++ChangeLog=
++2008-11-01 
++VT: Initial porting
diff --git a/mysql-innodb_io_patches.patch b/mysql-innodb_io_patches.patch
new file mode 100644
index 0000000..90af625
--- /dev/null
+++ b/mysql-innodb_io_patches.patch
@@ -0,0 +1,487 @@
+diff -r 45683461331d innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/buf/buf0rea.c	Mon Dec 22 00:32:02 2008 -0800
+@@ -188,6 +188,10 @@
+ 	ulint		low, high;
+ 	ulint		err;
+ 	ulint		i;
++
++	if (!(srv_read_ahead & 1)) {
++		return(0);
++	}
+ 
+ 	if (srv_startup_is_before_trx_rollback_phase) {
+ 	        /* No read-ahead to avoid thread deadlocks */
+@@ -396,6 +400,10 @@
+ 	ulint		err;
+ 	ulint		i;
+ 	
++	if (!(srv_read_ahead & 2)) {
++		return(0);
++	}
++
+ 	if (srv_startup_is_before_trx_rollback_phase) {
+ 	        /* No read-ahead to avoid thread deadlocks */
+ 	        return(0);
+diff -r 45683461331d innobase/include/os0file.h
+--- a/innobase/include/os0file.h	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/include/os0file.h	Mon Dec 22 00:32:02 2008 -0800
+@@ -551,8 +551,10 @@
+ /*========*/
+ 	ulint	n,		/* in: maximum number of pending aio operations
+ 				allowed; n must be divisible by n_segments */
+-	ulint	n_segments,	/* in: combined number of segments in the four
+-				first aio arrays; must be >= 4 */
++//	ulint	n_segments,	/* in: combined number of segments in the four
++//				first aio arrays; must be >= 4 */
++	ulint	n_read_threads,  /* n_segments == 2 + n_read_threads + n_write_threads */
++	ulint	n_write_threads, /**/
+ 	ulint	n_slots_sync);	/* in: number of slots in the sync aio array */
+ /***********************************************************************
+ Requests an asynchronous i/o operation. */
+diff -r 45683461331d innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/include/srv0srv.h	Mon Dec 22 00:32:02 2008 -0800
+@@ -89,6 +89,8 @@
+ extern ulint	srv_lock_table_size;
+ 
+ extern ulint	srv_n_file_io_threads;
++extern ulint	srv_n_read_io_threads;
++extern ulint	srv_n_write_io_threads;
+ 
+ #ifdef UNIV_LOG_ARCHIVE
+ extern ibool	srv_log_archive_on;
+@@ -133,6 +135,10 @@
+ extern ulong	srv_max_purge_lag;
+ extern ibool	srv_use_awe;
+ extern ibool	srv_use_adaptive_hash_indexes;
++
++extern ulint	srv_io_capacity;
++extern ulint	srv_read_ahead;
++extern ulint	srv_adaptive_checkpoint;
+ /*-------------------------------------------*/
+ 
+ extern ulint	srv_n_rows_inserted;
+diff -r 45683461331d innobase/log/log0log.c
+--- a/innobase/log/log0log.c	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/log/log0log.c	Mon Dec 22 00:32:02 2008 -0800
+@@ -3326,6 +3326,15 @@
+ 			(ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
+ 			(ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+ 
++	fprintf(file,
++		"Max checkpoint age  %lu\n"
++		"Modified age        %lu\n"
++		"Checkpoint age      %lu\n",
++			(ulong) log_sys->max_checkpoint_age,
++			(ulong) ut_dulint_minus(log_sys->lsn,
++					log_buf_pool_get_oldest_modification()),
++			(ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn));
++
+ 	current_time = time(NULL);
+ 			
+ 	time_elapsed = 0.001 + difftime(current_time,
+diff -r 45683461331d innobase/os/os0file.c
+--- a/innobase/os/os0file.c	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/os/os0file.c	Mon Dec 22 00:32:02 2008 -0800
+@@ -2877,8 +2877,10 @@
+ /*========*/
+ 	ulint	n,		/* in: maximum number of pending aio operations
+ 				allowed; n must be divisible by n_segments */
+-	ulint	n_segments,	/* in: combined number of segments in the four
+-				first aio arrays; must be >= 4 */
++//	ulint	n_segments,	/* in: combined number of segments in the four
++//				first aio arrays; must be >= 4 */
++	ulint	n_read_threads,  /* n_segments == 2 + n_read_threads + n_write_threads*/
++	ulint	n_write_threads, /**/
+ 	ulint	n_slots_sync)	/* in: number of slots in the sync aio array */
+ {
+ 	ulint	n_read_segs;
+@@ -2888,6 +2890,8 @@
+ #ifdef POSIX_ASYNC_IO
+ 	sigset_t   sigset;
+ #endif
++	ulint	n_segments = 2 + n_read_threads + n_write_threads;
++
+ 	ut_ad(n % n_segments == 0);
+ 	ut_ad(n_segments >= 4);
+ 
+@@ -2898,8 +2902,8 @@
+ 	}
+ 
+ 	n_per_seg = n / n_segments;
+-	n_write_segs = (n_segments - 2) / 2;
+-	n_read_segs = n_segments - 2 - n_write_segs;
++	n_write_segs = n_write_threads;
++	n_read_segs = n_read_threads;
+ 	
+ 	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+ 
+@@ -3180,6 +3184,13 @@
+ 	struct aiocb*	control;
+ #endif
+ 	ulint		i;
++	ulint		prim_segment;
++	ulint		n;
++
++	n = array->n_slots / array->n_segments;
++	/* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++	prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ 	os_mutex_enter(array->mutex);
+ 
+@@ -3198,12 +3209,23 @@
+ 		goto loop;
+ 	}
+ 
++	for (i = prim_segment * n; i < array->n_slots; i++) {
++		slot = os_aio_array_get_nth_slot(array, i);
++
++		if (slot->reserved == FALSE) {
++			break;
++		}
++	}
++
++	if (slot->reserved == TRUE){
++		/* Not found after the intended segment. So we should search before. */
+ 	for (i = 0;; i++) {
+ 		slot = os_aio_array_get_nth_slot(array, i);
+ 
+ 		if (slot->reserved == FALSE) {
+ 			break;
+ 		}
++	}
+ 	}
+ 
+ 	array->n_reserved++;
+diff -r 45683461331d innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/srv/srv0srv.c	Mon Dec 22 00:32:02 2008 -0800
+@@ -167,6 +167,8 @@
+ ulint	srv_lock_table_size	= ULINT_MAX;
+ 
+ ulint	srv_n_file_io_threads	= ULINT_MAX;
++ulint	srv_n_read_io_threads	= 1;
++ulint	srv_n_write_io_threads	= 1;
+ 
+ #ifdef UNIV_LOG_ARCHIVE
+ ibool	srv_log_archive_on	= FALSE;
+@@ -324,6 +326,15 @@
+ ibool	srv_use_awe			= FALSE;
+ ibool	srv_use_adaptive_hash_indexes 	= TRUE;
+ 
++ulint	srv_io_capacity = 100;
++
++/* Returns the number of IO operations that is X percent of the capacity.
++PCT_IO(5) -> returns the number of IO operations that is 5% of the max
++where max is srv_io_capacity. */
++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
++
++ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
++ulint	srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
+ /*-------------------------------------------*/
+ ulong	srv_n_spin_wait_rounds	= 20;
+ ulong	srv_n_free_tickets_to_enter = 500;
+@@ -2214,6 +2225,8 @@
+ 	ibool		skip_sleep	= FALSE;
+ 	ulint		i;
+ 	
++	dulint		oldest_lsn;
++	
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+ 	fprintf(stderr, "Master thread starts, id %lu\n",
+ 			      os_thread_pf(os_thread_get_curr_id()));
+@@ -2302,9 +2315,9 @@
+ 						+ log_sys->n_pending_writes;
+ 		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ 						+ buf_pool->n_pages_written;
+-		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
++		if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
+ 			srv_main_thread_op_info = "doing insert buffer merge";
+-			ibuf_contract_for_n_pages(TRUE, 5);
++			ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
+ 
+ 			srv_main_thread_op_info = "flushing log";
+ 
+@@ -2317,7 +2330,7 @@
+ 			/* Try to keep the number of modified pages in the
+ 			buffer pool under the limit wished by the user */
+ 			
+-			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ 							  ut_dulint_max);
+ 
+ 		        /* If we had to do the flush, it may have taken
+@@ -2326,6 +2339,44 @@
+ 			iteration of this loop. */
+ 			     
+ 			skip_sleep = TRUE;
++		} else if (srv_adaptive_checkpoint) {
++
++			/* Try to keep modified age not to exceed
++			max_checkpoint_age * 7/8 line */
++
++			mutex_enter(&(log_sys->mutex));
++
++			oldest_lsn = buf_pool_get_oldest_modification();
++			if (ut_dulint_is_zero(oldest_lsn)) {
++
++				mutex_exit(&(log_sys->mutex));
++
++			} else {
++				if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
++				    > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
++
++					/* 2nd defence line (max_checkpoint_age * 3/4) */
++
++					mutex_exit(&(log_sys->mutex));
++
++					n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
++									  ut_dulint_max);
++					skip_sleep = TRUE;
++				} else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
++					   > (log_sys->max_checkpoint_age)/2 ) {
++
++					/* 1st defence line (max_checkpoint_age * 1/2) */
++
++					mutex_exit(&(log_sys->mutex));
++
++					n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
++									  ut_dulint_max);
++					skip_sleep = TRUE;
++				} else {
++					mutex_exit(&(log_sys->mutex));
++				}
++			}
++
+ 		}
+ 
+ 		if (srv_activity_count == old_activity_count) {
+@@ -2352,10 +2403,10 @@
+ 	n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
+ 	n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ 						+ buf_pool->n_pages_written;
+-	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
++	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
+ 
+ 		srv_main_thread_op_info = "flushing buffer pool pages";
+-		buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
+ 
+ 		srv_main_thread_op_info = "flushing log";
+ 		log_buffer_flush_to_disk();
+@@ -2365,7 +2416,7 @@
+ 	even if the server were active */
+ 
+ 	srv_main_thread_op_info = "doing insert buffer merge";
+-	ibuf_contract_for_n_pages(TRUE, 5);
++	ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
+ 
+ 	srv_main_thread_op_info = "flushing log";
+ 	log_buffer_flush_to_disk();
+@@ -2407,14 +2458,14 @@
+ 		(> 70 %), we assume we can afford reserving the disk(s) for
+ 		the time it requires to flush 100 pages */
+ 
+-	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ 							ut_dulint_max);
+ 	} else {
+ 	        /* Otherwise, we only flush a small number of pages so that
+ 		we do not unnecessarily use much disk i/o capacity from
+ 		other work */
+ 
+-	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
+ 							ut_dulint_max);
+ 	}
+ 
+@@ -2503,7 +2554,7 @@
+ 	if (srv_fast_shutdown && srv_shutdown_state > 0) {
+ 	        n_bytes_merged = 0;
+ 	} else {
+-	        n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
++	        n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
+ 	}
+ 
+ 	srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2520,7 +2571,7 @@
+ 
+ 	if (srv_fast_shutdown < 2) {
+ 		n_pages_flushed =
+-			buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++			buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
+ 	} else {
+ 		/* In the fastest shutdown we do not flush the buffer pool
+ 		to data files: we set n_pages_flushed to 0 artificially. */
+diff -r 45683461331d innobase/srv/srv0start.c
+--- a/innobase/srv/srv0start.c	Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/srv/srv0start.c	Mon Dec 22 00:32:02 2008 -0800
+@@ -1205,24 +1205,28 @@
+ 		return(DB_ERROR);
+ 	}
+ 
++	/* over write innodb_file_io_threads */
++	srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
++
+ 	/* Restrict the maximum number of file i/o threads */
+ 	if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
+ 
+ 		srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
++		srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
+ 	}
+ 
+ 	if (!os_aio_use_native_aio) {
+  		/* In simulated aio we currently have use only for 4 threads */
+-		srv_n_file_io_threads = 4;
++		/*srv_n_file_io_threads = 4;*/
+ 
+ 		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ 						* srv_n_file_io_threads,
+-					srv_n_file_io_threads,
+-					SRV_MAX_N_PENDING_SYNC_IOS);
++					srv_n_read_io_threads, srv_n_write_io_threads,
++					SRV_MAX_N_PENDING_SYNC_IOS * 8);
+ 	} else {
+ 		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ 						* srv_n_file_io_threads,
+-					srv_n_file_io_threads,
++					srv_n_read_io_threads, srv_n_write_io_threads,
+ 					SRV_MAX_N_PENDING_SYNC_IOS);
+ 	}
+ 	
+diff -r 45683461331d patch_info/innodb_io_patches.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_io_patches.info	Mon Dec 22 00:32:02 2008 -0800
+@@ -0,0 +1,9 @@
++File=innodb_io_patches.patch
++Name=Cluster of past InnoDB IO patches
++Version=1.0
++Author=Percona
++License=GPL
++Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush)
++ChangeLog=
++2008-11-06
++YK: Initial release
+diff -r 45683461331d sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc	Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/ha_innodb.cc	Mon Dec 22 00:32:02 2008 -0800
+@@ -149,6 +149,7 @@
+      innobase_lock_wait_timeout, innobase_force_recovery,
+      innobase_open_files;
+ 
++long innobase_read_io_threads, innobase_write_io_threads;
+ longlong innobase_buffer_pool_size, innobase_log_file_size;
+ 
+ /* The default values for the following char* start-up parameters
+@@ -1403,6 +1404,8 @@
+ 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+ 
+ 	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
++	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
++	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
+ 
+ 	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
+ 	srv_force_recovery = (ulint) innobase_force_recovery;
+diff -r 45683461331d sql/ha_innodb.h
+--- a/sql/ha_innodb.h	Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/ha_innodb.h	Mon Dec 22 00:32:02 2008 -0800
+@@ -204,6 +204,7 @@
+ extern long innobase_additional_mem_pool_size;
+ extern long innobase_buffer_pool_awe_mem_mb;
+ extern long innobase_file_io_threads, innobase_lock_wait_timeout;
++extern long innobase_read_io_threads, innobase_write_io_threads;
+ extern long innobase_force_recovery;
+ extern long innobase_open_files;
+ extern char *innobase_data_home_dir, *innobase_data_file_path;
+@@ -234,6 +235,9 @@
+ extern ulong srv_thread_concurrency;
+ extern ulong srv_commit_concurrency;
+ extern ulong srv_flush_log_at_trx_commit;
++extern ulong srv_io_capacity;
++extern ulong srv_read_ahead;
++extern ulong srv_adaptive_checkpoint;
+ }
+ 
+ bool innobase_init(void);
+diff -r 45683461331d sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/mysqld.cc	Mon Dec 22 00:32:02 2008 -0800
+@@ -5036,6 +5036,11 @@
+   OPT_INNODB_ROLLBACK_ON_TIMEOUT,
+   OPT_SECURE_FILE_PRIV,
+   OPT_KEEP_FILES_ON_CREATE,
++  OPT_INNODB_IO_CAPACITY,
++  OPT_INNODB_READ_AHEAD,
++  OPT_INNODB_ADAPTIVE_CHECKPOINT,
++  OPT_INNODB_READ_IO_THREADS,
++  OPT_INNODB_WRITE_IO_THREADS,
+   OPT_INNODB_ADAPTIVE_HASH_INDEX,
+   OPT_FEDERATED
+ };
+@@ -5344,6 +5349,26 @@
+    (gptr*) &global_system_variables.innodb_table_locks,
+    (gptr*) &global_system_variables.innodb_table_locks,
+    0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
++  {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
++   "Number of IO operations per second the server can do. Tunes background IO rate.",
++   (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity,
++   0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0},
++  {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
++   "Enable/Diasable read aheads bit0:random bit1:linear",
++   (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead,
++   0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0},
++  {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT,
++   "Enable/Diasable flushing along modified age 0:disable 1:enable",
++   (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint,
++   0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++  {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS,
++   "Number of background read I/O threads in InnoDB.",
++   (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads,
++   0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
++  {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS,
++   "Number of background write I/O threads in InnoDB.",
++   (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
++   0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+   {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+    (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 45683461331d sql/set_var.cc
+--- a/sql/set_var.cc	Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/set_var.cc	Mon Dec 22 00:32:02 2008 -0800
+@@ -484,6 +484,12 @@
+ sys_var_long_ptr  sys_innodb_flush_log_at_trx_commit(
+                                         "innodb_flush_log_at_trx_commit",
+                                         &srv_flush_log_at_trx_commit);
++sys_var_long_ptr	sys_innodb_io_capacity("innodb_io_capacity",
++                                               &srv_io_capacity);
++sys_var_long_ptr	sys_innodb_read_ahead("innodb_read_ahead",
++                                              &srv_read_ahead);
++sys_var_long_ptr	sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
++                                                      &srv_adaptive_checkpoint);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", 
+                                                &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", 
+@@ -847,6 +853,9 @@
+   &sys_innodb_thread_concurrency,
+   &sys_innodb_commit_concurrency,
+   &sys_innodb_flush_log_at_trx_commit,
++  &sys_innodb_io_capacity,
++  &sys_innodb_read_ahead,
++  &sys_innodb_adaptive_checkpoint,
+ #endif
+   &sys_trust_routine_creators,
+   &sys_trust_function_creators,
+@@ -982,6 +991,11 @@
+   {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
+   {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
+   {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
++  {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS},
++  {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
++  {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
++  {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
++  {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
+ #endif
+   {sys_interactive_timeout.name,(char*) &sys_interactive_timeout,   SHOW_SYS},
+   {sys_join_buffer_size.name,   (char*) &sys_join_buffer_size,	    SHOW_SYS},
diff --git a/mysql-innodb_io_pattern.patch b/mysql-innodb_io_pattern.patch
new file mode 100644
index 0000000..604404f
--- /dev/null
+++ b/mysql-innodb_io_pattern.patch
@@ -0,0 +1,688 @@
+diff -r 2bbfde0e0e70 include/mysql_com.h
+--- a/include/mysql_com.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/include/mysql_com.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -121,6 +121,9 @@
+ #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */
+ #define REFRESH_DES_KEY_FILE	0x40000L
+ #define REFRESH_USER_RESOURCES	0x80000L
++
++/* TRUNCATE INFORMATION_SCHEMA.INNODB_IO_PATTERN */
++#define REFRESH_INNODB_IO_PATTERN	0x1000000L
+ 
+ #define CLIENT_LONG_PASSWORD	1	/* new more secure passwords */
+ #define CLIENT_FOUND_ROWS	2	/* Found instead of affected rows */
+diff -r 2bbfde0e0e70 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c	Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/buf/buf0buf.c	Mon Dec 22 00:33:48 2008 -0800
+@@ -653,6 +653,9 @@
+ 	}
+ 
+ 	buf_pool->page_hash = hash_create(2 * max_size);
++	buf_pool->io_counter_hash = NULL;
++	buf_pool->io_counter_heap = NULL;
++	buf_pool->io_counters = 0;
+ 
+ 	buf_pool->n_pend_reads = 0;
+ 
+@@ -1966,6 +1969,9 @@
+ 	ulint		io_type;
+ 	ulint		read_page_no;
+ 	
++	buf_io_counter_t*	io_counter;
++	ulint		fold;
++	
+ 	ut_ad(block);
+ 
+ 	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+@@ -2067,6 +2073,26 @@
+ 		buf_pool->n_pages_read++;
+ 
+ 		rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
++		/* io_counter here */
++		if (srv_io_pattern && srv_io_pattern_trace_running) {
++		fold = buf_page_address_fold(block->space, block->offset);
++		HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter,
++			(io_counter->space == block->space) && (io_counter->offset == block->offset));
++		if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) {
++			io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t)));
++			io_counter->space = block->space;
++			io_counter->offset = block->offset;
++			io_counter->n_read = 0;
++			io_counter->n_write = 0;
++			HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash,
++				buf_page_address_fold(block->space, block->offset), io_counter);
++			buf_pool->io_counters++;
++		}
++		if (io_counter != NULL) {
++			io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block)));
++			io_counter->n_read++;
++		}
++		}
+ 
+ #ifdef UNIV_DEBUG
+ 		if (buf_debug_prints) {
+@@ -2082,6 +2108,26 @@
+ 		buf_flush_write_complete(block);
+ 
+ 		rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
++		/* io_counter here */
++		if (srv_io_pattern && srv_io_pattern_trace_running) {
++		fold = buf_page_address_fold(block->space, block->offset);
++		HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter,
++			(io_counter->space == block->space) && (io_counter->offset == block->offset));
++		if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) {
++			io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t)));
++			io_counter->space = block->space;
++			io_counter->offset = block->offset;
++			io_counter->n_read = 0;
++			io_counter->n_write = 0;
++			HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash,
++				buf_page_address_fold(block->space, block->offset), io_counter);
++			buf_pool->io_counters++;
++		}
++		if (io_counter != NULL) {
++			io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block)));
++			io_counter->n_write++;
++		}
++		}
+ 
+ 		buf_pool->n_pages_written++;
+ 
+@@ -2656,3 +2702,58 @@
+ return buf_pool_get_nth_block(buf_pool, i);
+ 
+ }
++
++/*************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++buf_io_counter_control(void)
++/*========================*/
++{
++       ulint   n;
++
++       mutex_enter(&(buf_pool->mutex));
++       if (srv_io_pattern_trace) {
++               if (buf_pool->io_counter_hash == NULL) {
++                       /* estimating (buf_pool * 10) */
++                       buf_pool->io_counter_hash = hash_create(20 * buf_pool->max_size);
++                       buf_pool->io_counter_heap = mem_heap_create(4096 * 1024);
++                       buf_pool->io_counters = 0;
++
++                       srv_io_pattern = TRUE;
++               }
++       } else {
++               if (buf_pool->io_counter_hash != NULL) {
++                       srv_io_pattern = FALSE;
++
++                       for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) {
++                               (buf_pool->io_counter_hash->array + n)->node = NULL;
++                       }
++                       mem_heap_free(buf_pool->io_counter_heap);
++                       buf_pool->io_counter_heap = NULL;
++                       buf_pool->io_counters = 0;
++
++                       hash_table_free(buf_pool->io_counter_hash);
++                       buf_pool->io_counter_hash = NULL;
++               }
++       }
++       mutex_exit(&(buf_pool->mutex));
++}
++
++void
++buf_io_counter_clear(void)
++/*======================*/
++{
++       ulint   n;
++
++       mutex_enter(&(buf_pool->mutex));
++       if (buf_pool->io_counter_hash != NULL) {
++               for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) {
++                       (buf_pool->io_counter_hash->array + n)->node = NULL;
++               }
++               mem_heap_empty(buf_pool->io_counter_heap);
++               buf_pool->io_counters = 0;
++       }
++       mutex_exit(&(buf_pool->mutex));
++}
+diff -r 2bbfde0e0e70 innobase/include/buf0buf.h
+--- a/innobase/include/buf0buf.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/buf0buf.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -709,6 +709,18 @@
+ void buf_pool_dump(void);
+ buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i);  
+ 
++
++/*************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++buf_io_counter_control(void);
++/*=========================*/
++
++void
++buf_io_counter_clear(void);
++/*=======================*/
+ 			
+ /* The buffer control block structure */
+ 
+@@ -930,6 +942,9 @@
+ 	ulint		curr_size;	/* current pool size in pages;
+ 					currently always the same as
+ 					max_size */
++	hash_table_t*	io_counter_hash;
++	mem_heap_t*	io_counter_heap;
++	ulint		io_counters;
+ 	hash_table_t*	page_hash;	/* hash table of the file pages */
+ 
+ 	ulint		n_pend_reads;	/* number of pending read operations */
+@@ -1015,6 +1030,15 @@
+ 					locki table, are not in this list */
+ };
+ 
++struct buf_io_counter_struct{
++	ulint	space;
++	ulint	offset;
++	buf_io_counter_t*	hash;
++	ulint	index_id;
++	ulint	n_read;
++	ulint	n_write;
++};
++
+ /* States of a control block */
+ #define	BUF_BLOCK_NOT_USED	211	/* is in the free list */
+ #define BUF_BLOCK_READY_FOR_USE	212	/* when buf_get_free_block returns
+diff -r 2bbfde0e0e70 innobase/include/buf0types.h
+--- a/innobase/include/buf0types.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/buf0types.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -12,6 +12,8 @@
+ typedef	struct buf_block_struct		buf_block_t;
+ typedef	struct buf_pool_struct		buf_pool_t;
+ 
++typedef	struct buf_io_counter_struct	buf_io_counter_t;
++
+ /* The 'type' used of a buffer frame */
+ typedef	byte	buf_frame_t;
+ 
+diff -r 2bbfde0e0e70 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/srv0srv.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -141,6 +141,11 @@
+ extern ulint	srv_io_capacity;
+ extern ulint	srv_read_ahead;
+ extern ulint	srv_adaptive_checkpoint;
++
++extern volatile ibool srv_io_pattern;
++extern ulong	srv_io_pattern_trace;
++extern ulong	srv_io_pattern_trace_running;
++extern ulong	srv_io_pattern_size_limit;
+ /*-------------------------------------------*/
+ 
+ extern ulint	srv_n_rows_inserted;
+diff -r 2bbfde0e0e70 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/srv/srv0srv.c	Mon Dec 22 00:33:48 2008 -0800
+@@ -337,6 +337,11 @@
+ 
+ ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
+ ulint	srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
++
++volatile ibool srv_io_pattern = FALSE;
++ulint   srv_io_pattern_trace = 0;
++ulint   srv_io_pattern_trace_running = 0;
++ulint   srv_io_pattern_size_limit = ULINT_MAX - (1024 * 1024);
+ /*-------------------------------------------*/
+ ulong	srv_n_spin_wait_rounds	= 20;
+ ulong	srv_n_free_tickets_to_enter = 500;
+diff -r 2bbfde0e0e70 mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result	Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/information_schema.result	Mon Dec 22 00:33:48 2008 -0800
+@@ -59,6 +59,7 @@
+ USER_PRIVILEGES
+ USER_STATISTICS
+ VIEWS
++INNODB_IO_PATTERN
+ columns_priv
+ db
+ func
+@@ -742,7 +743,7 @@
+ CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1;
+ CREATE VIEW a2 AS SELECT t_CRASHME FROM a1;
+ count(*)
+-108
++109
+ drop view a2, a1;
+ drop table t_crashme;
+ select table_schema,table_name, column_name from
+@@ -812,12 +813,13 @@
+ TABLE_PRIVILEGES	TABLE_NAME	select
+ TABLE_STATISTICS	TABLE_NAME	select
+ VIEWS	TABLE_NAME	select
++INNODB_IO_PATTERN	TABLE_NAME	select
+ delete from mysql.user where user='mysqltest_4';
+ delete from mysql.db where user='mysqltest_4';
+ flush privileges;
+ SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA;
+ table_schema	count(*)
+-information_schema	23
++information_schema	24
+ mysql	17
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1225,6 +1227,7 @@
+ USER_PRIVILEGES	GRANTEE
+ USER_STATISTICS	USER
+ VIEWS	TABLE_SCHEMA
++INNODB_IO_PATTERN	SPACE
+ SELECT t.table_name, c1.column_name
+ FROM information_schema.tables t
+ INNER JOIN
+@@ -1263,6 +1266,7 @@
+ USER_PRIVILEGES	GRANTEE
+ USER_STATISTICS	USER
+ VIEWS	TABLE_SCHEMA
++INNODB_IO_PATTERN	SPACE
+ SELECT MAX(table_name) FROM information_schema.tables;
+ MAX(table_name)
+ VIEWS
+@@ -1337,6 +1341,7 @@
+ COLUMN_PRIVILEGES	information_schema.COLUMN_PRIVILEGES	1
+ INDEX_STATISTICS	information_schema.INDEX_STATISTICS	1
+ INNODB_BUFFER_POOL_CONTENT	information_schema.INNODB_BUFFER_POOL_CONTENT	1
++INNODB_IO_PATTERN	information_schema.INNODB_IO_PATTERN	1
+ KEY_COLUMN_USAGE	information_schema.KEY_COLUMN_USAGE	1
+ PROCESSLIST	information_schema.PROCESSLIST	1
+ PROFILING	information_schema.PROFILING	1
+diff -r 2bbfde0e0e70 mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result	Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/information_schema_db.result	Mon Dec 22 00:33:48 2008 -0800
+@@ -28,6 +28,7 @@
+ USER_PRIVILEGES
+ USER_STATISTICS
+ VIEWS
++INNODB_IO_PATTERN
+ show tables from INFORMATION_SCHEMA like 'T%';
+ Tables_in_information_schema (T%)
+ TABLES
+diff -r 2bbfde0e0e70 mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result	Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/mysqlshow.result	Mon Dec 22 00:33:48 2008 -0800
+@@ -102,6 +102,7 @@
+ | USER_PRIVILEGES                       |
+ | USER_STATISTICS                       |
+ | VIEWS                                 |
++| INNODB_IO_PATTERN                     |
+ +---------------------------------------+
+ Database: INFORMATION_SCHEMA
+ +---------------------------------------+
+@@ -130,6 +131,7 @@
+ | USER_PRIVILEGES                       |
+ | USER_STATISTICS                       |
+ | VIEWS                                 |
++| INNODB_IO_PATTERN                     |
+ +---------------------------------------+
+ Wildcard: inf_rmation_schema
+ +--------------------+
+diff -r 2bbfde0e0e70 patch_info/innodb_io_pattern.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_io_pattern.info	Mon Dec 22 00:33:48 2008 -0800
+@@ -0,0 +1,8 @@
++File=innodb_io_pattern.patch
++Name=Information schema table of InnoDB IO counts for each datafile pages
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=INFORMATION_SCHEMA.INNODB_IO_PATTERN
++2008-12-01
++YK: fix for mysql-test
+diff -r 2bbfde0e0e70 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/ha_innodb.cc	Mon Dec 22 00:33:48 2008 -0800
+@@ -1569,6 +1569,8 @@
+         pthread_cond_init(&commit_cond, NULL);
+ 	innodb_inited= 1;
+ 
++	buf_io_counter_control();
++
+ 	/* If this is a replication slave and we needed to do a crash recovery,
+ 	set the master binlog position to what InnoDB internally knew about
+ 	how far we got transactions durable inside InnoDB. There is a
+@@ -6527,6 +6529,28 @@
+ }
+ 
+ /****************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++innodb_io_pattern_control(void)
++/*===========================*/
++{
++	if (innodb_inited) {
++		buf_io_counter_control();
++	}
++}
++
++void
++innodb_io_pattern_clear(void)
++/*=========================*/
++{
++	if (innodb_inited) {
++		buf_io_counter_clear();
++	}
++}
++
++/****************************************************************************
+ Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
+ Monitor to the client. */
+ 
+diff -r 2bbfde0e0e70 sql/ha_innodb.h
+--- a/sql/ha_innodb.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/ha_innodb.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -240,6 +240,9 @@
+ extern ulong srv_adaptive_checkpoint;
+ extern ulong srv_show_locks_held;
+ extern ulong srv_show_verbose_locks;
++extern ulong srv_io_pattern_trace;
++extern ulong srv_io_pattern_trace_running;
++extern ulong srv_io_pattern_size_limit;
+ }
+ 
+ bool innobase_init(void);
+@@ -266,6 +269,9 @@
+ bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables);
+ bool innodb_mutex_show_status(THD* thd);
+ void innodb_export_status(void);
++
++void innodb_io_pattern_control(void);
++void innodb_io_pattern_clear(void);
+ 
+ void innobase_release_temporary_latches(THD *thd);
+ 
+diff -r 2bbfde0e0e70 sql/lex.h
+--- a/sql/lex.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/lex.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -244,6 +244,7 @@
+   { "INNER",		SYM(INNER_SYM)},
+   { "INNOBASE",		SYM(INNOBASE_SYM)},
+   { "INNODB",		SYM(INNOBASE_SYM)},
++  { "INNODB_IO_PATTERN", SYM(INNODB_IO_PATTERN)},
+   { "INOUT",            SYM(INOUT_SYM)},
+   { "INSENSITIVE",      SYM(INSENSITIVE_SYM)},
+   { "INSERT",		SYM(INSERT)},
+diff -r 2bbfde0e0e70 sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/mysqld.cc	Mon Dec 22 00:33:48 2008 -0800
+@@ -4983,6 +4983,9 @@
+   OPT_INNODB_SYNC_SPIN_LOOPS,
+   OPT_INNODB_CONCURRENCY_TICKETS,
+   OPT_INNODB_THREAD_SLEEP_DELAY,
++  OPT_INNODB_IO_PATTERN_TRACE,
++  OPT_INNODB_IO_PATTERN_TRACE_RUNNING,
++  OPT_INNODB_IO_PATTERN_SIZE_LIMIT,
+   OPT_BDB_CACHE_SIZE,
+   OPT_BDB_LOG_BUFFER_SIZE,
+   OPT_BDB_MAX_LOCK,
+@@ -5382,6 +5385,18 @@
+    "Number of background write I/O threads in InnoDB.",
+    (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
+    0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
++  {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE,
++   "Create/Drop the internal hash table for IO pattern tracing.",
++   (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace,
++   0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++  {"innodb_io_pattern_trace_running", OPT_INNODB_IO_PATTERN_TRACE_RUNNING,
++   "Control IO pattern trace running or not.",
++   (gptr*) &srv_io_pattern_trace_running, (gptr*) &srv_io_pattern_trace_running,
++   0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++  {"innodb_io_pattern_size_limit", OPT_INNODB_IO_PATTERN_SIZE_LIMIT,
++   "Set max number of counters per data pages. (0 = disable counting).",
++   (gptr*) &srv_io_pattern_size_limit, (gptr*) &srv_io_pattern_size_limit,
++   0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX - (1024 * 1024), 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+   {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+    (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 2bbfde0e0e70 sql/set_var.cc
+--- a/sql/set_var.cc	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/set_var.cc	Mon Dec 22 00:33:48 2008 -0800
+@@ -501,6 +501,12 @@
+ sys_var_long_ptr  sys_innodb_show_verbose_locks(
+                                         "innodb_show_verbose_locks",
+                                         &srv_show_verbose_locks);
++sys_var_innodb_io_pattern_trace sys_innodb_io_pattern_trace("innodb_io_pattern_trace",
++                                                            &srv_io_pattern_trace);
++sys_var_long_ptr        sys_innodb_io_pattern_trace_running("innodb_io_pattern_trace_running",
++                                                            &srv_io_pattern_trace_running);
++sys_var_long_ptr        sys_innodb_io_pattern_size_limit("innodb_io_pattern_size_limit",
++                                                         &srv_io_pattern_size_limit);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", 
+                                                &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", 
+@@ -870,6 +876,9 @@
+   &sys_innodb_adaptive_checkpoint,
+   &sys_innodb_show_locks_held,
+   &sys_innodb_show_verbose_locks,
++  &sys_innodb_io_pattern_trace,
++  &sys_innodb_io_pattern_trace_running,
++  &sys_innodb_io_pattern_size_limit,
+ #endif
+   &sys_trust_routine_creators,
+   &sys_trust_function_creators,
+@@ -1012,6 +1021,9 @@
+   {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
+   {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
+   {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
++  {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS},
++  {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS},
++  {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS},
+ #endif
+   {sys_interactive_timeout.name,(char*) &sys_interactive_timeout,   SHOW_SYS},
+   {sys_join_buffer_size.name,   (char*) &sys_join_buffer_size,	    SHOW_SYS},
+@@ -3117,6 +3129,19 @@
+     thd->variables.lc_time_names= global_system_variables.lc_time_names;
+ }
+ 
++#ifdef HAVE_INNOBASE_DB
++bool sys_var_innodb_io_pattern_trace::update(THD *thd, set_var *var)
++{
++  bool ret;
++
++  ret = sys_var_long_ptr_global::update(thd, var);
++
++  innodb_io_pattern_control();
++
++  return ret;
++}
++#endif /* HAVE_INNOBASE_DB */
++
+ /*
+   Functions to update thd->options bits
+ */
+diff -r 2bbfde0e0e70 sql/set_var.h
+--- a/sql/set_var.h	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/set_var.h	Mon Dec 22 00:33:48 2008 -0800
+@@ -985,6 +985,17 @@
+   virtual void set_default(THD *thd, enum_var_type type);
+ };
+ 
++#ifdef HAVE_INNOBASE_DB
++/* sys_var_innodb_io_pattern_trace */
++class sys_var_innodb_io_pattern_trace :public sys_var_long_ptr
++{
++public:
++  sys_var_innodb_io_pattern_trace(const char *name_arg, ulong *value_ptr_arg)
++    :sys_var_long_ptr(name_arg,value_ptr_arg) {}
++  bool update(THD *thd, set_var *var);
++};
++#endif /* HAVE_INNOBASE_DB */
++
+ /****************************************************************************
+   Classes for parsing of the SET command
+ ****************************************************************************/
+diff -r 2bbfde0e0e70 sql/sql_parse.cc
+--- a/sql/sql_parse.cc	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_parse.cc	Mon Dec 22 00:33:48 2008 -0800
+@@ -7998,6 +7998,13 @@
+     }
+     pthread_mutex_unlock(&LOCK_global_user_client_stats);
+   }
++#ifdef HAVE_INNOBASE_DB
++ if (options & REFRESH_INNODB_IO_PATTERN)
++ {
++   tmp_write_to_binlog= 0;
++   innodb_io_pattern_clear();
++ }
++#endif /* HAVE_INNOBASE_DB */
+  *write_to_binlog= tmp_write_to_binlog;
+  return result;
+ }
+diff -r 2bbfde0e0e70 sql/sql_show.cc
+--- a/sql/sql_show.cc	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_show.cc	Mon Dec 22 00:33:48 2008 -0800
+@@ -32,6 +32,17 @@
+ #ifdef HAVE_INNOBASE_DB
+ #include "ha_innodb.h"
+ #endif
++
++#ifdef HAVE_INNOBASE_DB
++#define INSIDE_HA_INNOBASE_CC
++extern "C" {
++#include "srv0srv.h"
++#include "buf0buf.h"
++#include "dict0dict.h"
++}
++/* We need to undef it in InnoDB */
++#undef byte
++#endif /* HAVE_INNOBASE_DB */
+ 
+ #ifndef NO_EMBEDDED_ACCESS_CHECKS
+ static const char *grant_names[]={
+@@ -4074,6 +4085,67 @@
+   DBUG_RETURN(res);
+ }
+ 
++int innodb_io_pattern_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++  TABLE *table= (TABLE *) tables->table;
++
++  buf_io_counter_t* io_counter;
++  dict_index_t* index;
++
++  DBUG_ENTER("innodb_io_pattern_fill_table");
++  int returnable= 0;
++
++  /* We cannot use inline functions of InnoDB here */
++
++  /* !!!!!ATTENTION!!!!!: This function is not protected by mutex for performance.     */
++  /* Don't use "DROP TABLE innodb_io_pattern" and INFORMATION_SCHEMA.INNODB_IO_PATTERN */
++  /* at the same time as possible.                                                     */
++
++  if (srv_io_pattern) {
++  for (ulint n=0; n < buf_pool->io_counter_hash->n_cells; n++) {
++    if (!srv_io_pattern)
++      goto end_func;
++
++    io_counter = (buf_io_counter_t*)(buf_pool->io_counter_hash->array + n)->node;
++    while (io_counter) {
++      if (!srv_io_pattern)
++        goto end_func;
++
++      if (dict_sys != NULL) {
++        dulint id;
++        id.high = 0;
++        id.low = io_counter->index_id;
++        index = dict_index_find_on_id_low(id);
++      } else {
++        index = NULL;
++      }
++
++      table->field[0]->store(io_counter->space);
++      table->field[1]->store(io_counter->offset);
++      table->field[2]->store(io_counter->index_id);
++      if (index != NULL) {
++        table->field[3]->store(index->table_name,strlen(index->table_name),system_charset_info);
++        table->field[4]->store(index->name,strlen(index->name),system_charset_info);
++      } else {
++        table->field[3]->store("",0,system_charset_info);
++        table->field[4]->store("",0,system_charset_info);
++      }
++      table->field[5]->store(io_counter->n_read);
++      table->field[6]->store(io_counter->n_write);
++      if (schema_table_store_record(thd, table))
++      {
++        returnable= 1;
++        goto end_func;
++      }
++      io_counter = io_counter->hash;
++    }
++  }
++  }
++
++ end_func:
++  DBUG_RETURN(returnable);
++}
++
+ /*
+   Find schema_tables elment by name
+ 
+@@ -4880,6 +4952,19 @@
+   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0}
+ };
+ 
++#ifdef HAVE_INNOBASE_DB
++ST_FIELD_INFO innodb_io_pattern_field_info[]=
++{
++  {"SPACE", 11, MYSQL_TYPE_LONG, 0, 0, "space_id"},
++  {"OFFSET", 11, MYSQL_TYPE_LONG, 0, 0, "offset"},
++  {"INDEX_ID", 11, MYSQL_TYPE_LONG, 0, 0, "index id"},
++  {"TABLE_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "table name"},
++  {"INDEX_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "index name"},
++  {"N_READ", 11, MYSQL_TYPE_LONG, 0, 0, "read ios"},
++  {"N_WRITE", 11, MYSQL_TYPE_LONG, 0, 0, "write ios"},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0}
++};
++#endif
+ 
+ ST_FIELD_INFO variables_fields_info[]=
+ {
+@@ -5055,6 +5140,10 @@
+    make_old_format, 0, -1, -1, 1},
+   {"VIEWS", view_fields_info, create_schema_table, 
+     get_all_tables, 0, get_schema_views_record, 1, 2, 0},
++#ifdef HAVE_INNOBASE_DB
++  {"INNODB_IO_PATTERN", innodb_io_pattern_field_info, create_schema_table,
++    innodb_io_pattern_fill_table, 0, 0, -1, -1, 0},
++#endif
+   {0, 0, 0, 0, 0, 0, 0, 0, 0}
+ };
+ 
+diff -r 2bbfde0e0e70 sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy	Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_yacc.yy	Mon Dec 22 00:33:48 2008 -0800
+@@ -685,6 +685,7 @@
+ %token  INFILE
+ %token  INNER_SYM
+ %token  INNOBASE_SYM
++%token  INNODB_IO_PATTERN
+ %token  INOUT_SYM
+ %token  INSENSITIVE_SYM
+ %token  INSERT
+@@ -8541,6 +8542,7 @@
+         | MASTER_SYM    { Lex->type|= REFRESH_MASTER; }
+ 	| DES_KEY_FILE	{ Lex->type|= REFRESH_DES_KEY_FILE; }
+  	| RESOURCES     { Lex->type|= REFRESH_USER_RESOURCES; }
++ 	| INNODB_IO_PATTERN { Lex->type|= REFRESH_INNODB_IO_PATTERN; }
+  	| CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; }
+  	| USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; }
+  	| TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; }
+@@ -9594,6 +9596,7 @@
+ 	| ISOLATION		{}
+ 	| ISSUER_SYM		{}
+ 	| INNOBASE_SYM		{}
++	| INNODB_IO_PATTERN	{}
+ 	| INSERT_METHOD		{}
+ 	| IO_SYM                {}
+ 	| IPC_SYM               {}
diff --git a/mysql-innodb_locks_held.patch b/mysql-innodb_locks_held.patch
new file mode 100644
index 0000000..416d50e
--- /dev/null
+++ b/mysql-innodb_locks_held.patch
@@ -0,0 +1,168 @@
+diff -r ae6708ab17e5 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h	Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/include/srv0srv.h	Mon Dec 22 00:32:58 2008 -0800
+@@ -80,6 +80,8 @@
+ extern ulint	srv_log_file_size;
+ extern ulint	srv_log_buffer_size;
+ extern ulong	srv_flush_log_at_trx_commit;
++extern ulong	srv_show_locks_held;
++extern ulong	srv_show_verbose_locks;
+ 
+ extern byte	srv_latin1_ordering[256];/* The sort order table of the latin1
+ 					character set */
+diff -r ae6708ab17e5 innobase/lock/lock0lock.c
+--- a/innobase/lock/lock0lock.c	Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/lock/lock0lock.c	Mon Dec 22 00:32:58 2008 -0800
+@@ -4181,6 +4181,7 @@
+ #endif /* UNIV_SYNC_DEBUG */
+ 	}
+ 
++	if ( srv_show_verbose_locks ) {
+ 	for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ 
+ 		if (lock_rec_get_nth_bit(lock, i)) {
+@@ -4198,6 +4199,7 @@
+ 			putc('\n', file);
+ 		}
+ 	}
++	} /* srv_show_verbose_locks */
+ 
+ 	mtr_commit(&mtr);
+ 	if (UNIV_LIKELY_NULL(heap)) {
+@@ -4369,7 +4371,7 @@
+ 		}
+ 	}
+ 
+-	if (!srv_print_innodb_lock_monitor) {
++	if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+ 	  	nth_trx++;
+ 	  	goto loop;
+ 	}
+@@ -4426,9 +4428,9 @@
+ 
+ 	nth_lock++;
+ 
+-	if (nth_lock >= 10) {
++	if (nth_lock >= srv_show_locks_held) {
+ 		fputs(
+-		"10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n",
++		"TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n",
+ 			file);
+ 	
+ 		nth_trx++;
+diff -r ae6708ab17e5 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/srv/srv0srv.c	Mon Dec 22 00:32:58 2008 -0800
+@@ -116,6 +116,8 @@
+ ulint	srv_log_file_size	= ULINT_MAX;	/* size in database pages */ 
+ ulint	srv_log_buffer_size	= ULINT_MAX;	/* size in database pages */ 
+ ulong	srv_flush_log_at_trx_commit = 1;
++ulint	srv_show_locks_held	= 10;
++ulint	srv_show_verbose_locks	= 0;
+ 
+ byte	srv_latin1_ordering[256]	/* The sort order table of the latin1
+ 					character set. The following table is
+diff -r ae6708ab17e5 libmysqld/set_var.cc
+--- a/libmysqld/set_var.cc	Mon Dec 22 00:32:07 2008 -0800
++++ b/libmysqld/set_var.cc	Mon Dec 22 00:32:58 2008 -0800
+@@ -821,6 +821,8 @@
+   &sys_innodb_thread_concurrency,
+   &sys_innodb_commit_concurrency,
+   &sys_innodb_flush_log_at_trx_commit,
++  &sys_innodb_show_locks_held,
++  &sys_innodb_show_verbose_locks,
+ #endif
+   &sys_trust_routine_creators,
+   &sys_trust_function_creators,
+@@ -936,6 +938,8 @@
+   {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
+   {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
+   {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS},
++  {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS },
++  {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS },
+   {"innodb_flush_method",    (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR},
+   {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG },
+   {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG },
+diff -r ae6708ab17e5 patch_info/innodb_locks_held.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_locks_held.info	Mon Dec 22 00:32:58 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_locks_held.patch
++Name=Add locks held, remove locked records in SHOW INNODB STATUS
++Version=1.0
++Author=Baron Schwartz <baron at xaprb.com>
++License=GPL
++Comment=Bug #29126 fix
+diff -r ae6708ab17e5 sql/ha_innodb.h
+--- a/sql/ha_innodb.h	Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/ha_innodb.h	Mon Dec 22 00:32:58 2008 -0800
+@@ -238,6 +238,8 @@
+ extern ulong srv_io_capacity;
+ extern ulong srv_read_ahead;
+ extern ulong srv_adaptive_checkpoint;
++extern ulong srv_show_locks_held;
++extern ulong srv_show_verbose_locks;
+ }
+ 
+ bool innobase_init(void);
+diff -r ae6708ab17e5 sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/mysqld.cc	Mon Dec 22 00:32:58 2008 -0800
+@@ -4969,6 +4969,8 @@
+   OPT_INNODB_MAX_PURGE_LAG,
+   OPT_INNODB_FILE_IO_THREADS,
+   OPT_INNODB_LOCK_WAIT_TIMEOUT,
++  OPT_INNODB_SHOW_LOCKS_HELD,
++  OPT_INNODB_SHOW_VERBOSE_LOCKS,
+   OPT_INNODB_THREAD_CONCURRENCY,
+   OPT_INNODB_COMMIT_CONCURRENCY,
+   OPT_INNODB_FORCE_RECOVERY,
+@@ -5308,6 +5310,14 @@
+    (gptr*) &srv_flush_log_at_trx_commit,
+    (gptr*) &srv_flush_log_at_trx_commit,
+    0, GET_ULONG, OPT_ARG,  1, 0, 2, 0, 0, 0},
++  {"innodb_show_locks_held", OPT_INNODB_SHOW_LOCKS_HELD,
++   "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++   (gptr*) &srv_show_locks_held, (gptr*) &srv_show_locks_held,
++   0, GET_LONG, OPT_ARG, 10, 0, 1000, 0, 1, 0},
++  {"innodb_show_verbose_locks", OPT_INNODB_SHOW_VERBOSE_LOCKS,
++   "Whether to show records locked in SHOW INNODB STATUS.",
++   (gptr*) &srv_show_verbose_locks, (gptr*) &srv_show_verbose_locks,
++   0, GET_LONG, OPT_ARG, 0, 0, 1, 0, 1, 0},
+   {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD,
+    "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method,
+    (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
+diff -r ae6708ab17e5 sql/set_var.cc
+--- a/sql/set_var.cc	Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/set_var.cc	Mon Dec 22 00:32:58 2008 -0800
+@@ -495,6 +495,12 @@
+                                               &srv_read_ahead);
+ sys_var_long_ptr	sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
+                                                       &srv_adaptive_checkpoint);
++sys_var_long_ptr  sys_innodb_show_locks_held(
++                                        "innodb_show_locks_held",
++                                        &srv_show_locks_held);
++sys_var_long_ptr  sys_innodb_show_verbose_locks(
++                                        "innodb_show_verbose_locks",
++                                        &srv_show_verbose_locks);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", 
+                                                &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", 
+@@ -862,6 +868,8 @@
+   &sys_innodb_io_capacity,
+   &sys_innodb_read_ahead,
+   &sys_innodb_adaptive_checkpoint,
++  &sys_innodb_show_locks_held,
++  &sys_innodb_show_verbose_locks,
+ #endif
+   &sys_trust_routine_creators,
+   &sys_trust_function_creators,
+@@ -977,6 +985,8 @@
+   {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
+   {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
+   {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS},
++  {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS },
++  {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS },
+   {"innodb_flush_method",    (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR},
+   {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG },
+   {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG },
diff --git a/mysql-innodb_rw_lock.patch b/mysql-innodb_rw_lock.patch
new file mode 100644
index 0000000..3070bb0
--- /dev/null
+++ b/mysql-innodb_rw_lock.patch
@@ -0,0 +1,1459 @@
+diff -r 962aec0d731c innobase/configure
+--- a/innobase/configure	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/configure	Thu Oct 09 08:30:28 2008 -0700
+@@ -20519,6 +20519,88 @@
+ 
+ fi
+ done
++
++
++# as http://lists.mysql.com/commits/40686 does
++{ echo "$as_me:$LINENO: checking whether the compiler provides atomic builtins" >&5
++echo $ECHO_N "checking whether the compiler provides atomic builtins... $ECHO_C" >&6; }
++if test "${mysql_cv_atomic_builtins+set}" = set; then
++  echo $ECHO_N "(cached) $ECHO_C" >&6
++else
++  if test "$cross_compiling" = yes; then
++  { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
++See \`config.log' for more details." >&5
++echo "$as_me: error: cannot run test program while cross compiling
++See \`config.log' for more details." >&2;}
++   { (exit 1); exit 1; }; }
++else
++  cat >conftest.$ac_ext <<_ACEOF
++/* confdefs.h.  */
++_ACEOF
++cat confdefs.h >>conftest.$ac_ext
++cat >>conftest.$ac_ext <<_ACEOF
++/* end confdefs.h.  */
++
++  int main()
++  {
++    int foo= -10; int bar= 10;
++    __sync_fetch_and_add(&foo, bar);
++    if (foo)
++      return -1;
++    bar= __sync_lock_test_and_set(&foo, bar);
++    if (bar || foo != 10)
++      return -1;
++    bar= __sync_val_compare_and_swap(&bar, foo, 15);
++    if (bar)
++      return -1;
++    return 0;
++  }
++
++_ACEOF
++rm -f conftest$ac_exeext
++if { (ac_try="$ac_link"
++case "(($ac_try" in
++  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
++  *) ac_try_echo=$ac_try;;
++esac
++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
++  (eval "$ac_link") 2>&5
++  ac_status=$?
++  echo "$as_me:$LINENO: \$? = $ac_status" >&5
++  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
++  { (case "(($ac_try" in
++  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
++  *) ac_try_echo=$ac_try;;
++esac
++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
++  (eval "$ac_try") 2>&5
++  ac_status=$?
++  echo "$as_me:$LINENO: \$? = $ac_status" >&5
++  (exit $ac_status); }; }; then
++  mysql_cv_atomic_builtins=yes
++else
++  echo "$as_me: program exited with status $ac_status" >&5
++echo "$as_me: failed program was:" >&5
++sed 's/^/| /' conftest.$ac_ext >&5
++
++( exit $ac_status )
++mysql_cv_atomic_builtins=no
++fi
++rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
++fi
++
++
++fi
++{ echo "$as_me:$LINENO: result: $mysql_cv_atomic_builtins" >&5
++echo "${ECHO_T}$mysql_cv_atomic_builtins" >&6; }
++
++if test "x$mysql_cv_atomic_builtins" = xyes; then
++
++cat >>confdefs.h <<\_ACEOF
++#define HAVE_ATOMIC_BUILTINS 1
++_ACEOF
++
++fi
+ 
+ #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
+ # Some versions of Unix only take 2 arguments.
+diff -r 962aec0d731c innobase/configure.in
+--- a/innobase/configure.in	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/configure.in	Thu Oct 09 08:30:28 2008 -0700
+@@ -42,6 +42,31 @@
+ AC_CHECK_FUNCS(sched_yield)
+ AC_CHECK_FUNCS(fdatasync)
+ AC_CHECK_FUNCS(localtime_r)
++
++# as http://lists.mysql.com/commits/40686 does
++AC_CACHE_CHECK([whether the compiler provides atomic builtins],
++               [mysql_cv_atomic_builtins], [AC_TRY_RUN([
++  int main()
++  {
++    int foo= -10; int bar= 10;
++    __sync_fetch_and_add(&foo, bar);
++    if (foo)
++      return -1;
++    bar= __sync_lock_test_and_set(&foo, bar);
++    if (bar || foo != 10)
++      return -1;
++    bar= __sync_val_compare_and_swap(&bar, foo, 15);
++    if (bar)
++      return -1;
++    return 0;
++  }
++], [mysql_cv_atomic_builtins=yes], [mysql_cv_atomic_builtins=no])])
++
++if test "x$mysql_cv_atomic_builtins" = xyes; then
++  AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1,
++            [Define to 1 if compiler provides atomic builtins.])
++fi
++
+ #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
+ # Some versions of Unix only take 2 arguments.
+ #AC_C_INLINE  Already checked in MySQL
+diff -r 962aec0d731c innobase/ib_config.h
+--- a/innobase/ib_config.h	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/ib_config.h	Thu Oct 09 08:30:28 2008 -0700
+@@ -3,6 +3,9 @@
+ 
+ /* Define to 1 if you have the <aio.h> header file. */
+ #define HAVE_AIO_H 1
++
++/* Define to 1 if compiler provides atomic builtins. */
++#define HAVE_ATOMIC_BUILTINS 1
+ 
+ /* Define to 1 if you have the <dlfcn.h> header file. */
+ #define HAVE_DLFCN_H 1
+diff -r 962aec0d731c innobase/ib_config.h.in
+--- a/innobase/ib_config.h.in	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/ib_config.h.in	Thu Oct 09 08:30:28 2008 -0700
+@@ -2,6 +2,9 @@
+ 
+ /* Define to 1 if you have the <aio.h> header file. */
+ #undef HAVE_AIO_H
++
++/* Define to 1 if compiler provides atomic builtins. */
++#undef HAVE_ATOMIC_BUILTINS
+ 
+ /* Define to 1 if you have the <dlfcn.h> header file. */
+ #undef HAVE_DLFCN_H
+diff -r 962aec0d731c innobase/include/sync0rw.h
+--- a/innobase/include/sync0rw.h	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/include/sync0rw.h	Thu Oct 09 08:30:28 2008 -0700
+@@ -325,7 +325,17 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
++/*==================*/
++	rw_lock_t*	lock);
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*==================*/
++	rw_lock_t*	lock);
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
+ /*================*/
+ 	rw_lock_t*	lock);
+ UNIV_INLINE
+@@ -408,6 +418,11 @@
+ 	rw_lock_debug_t*	info);	/* in: debug struct */
+ #endif /* UNIV_SYNC_DEBUG */
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++/* This value means NOT_LOCKED */
++#define RW_LOCK_BIAS		0x00100000
++#endif
++
+ /* NOTE! The structure appears here only for the compiler to know its size.
+ Do not use its fields directly! The structure used in the spin lock
+ implementation of a read-write lock. Several threads may have a shared lock
+@@ -417,9 +432,9 @@
+ field. Then no new readers are allowed in. */
+ 
+ struct rw_lock_struct {
+-	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
+-
+-#ifdef __WIN__
++			/* Used by sync0arr.c for thread queueing */
++	os_event_t	s_event;	/* Used for s_lock */
++	os_event_t	x_event;	/* Used for x_lock */
+ 	os_event_t	wait_ex_event;	/* This windows specific event is
+ 				used by the thread which has set the
+ 				lock state to RW_LOCK_WAIT_EX. The
+@@ -427,31 +442,35 @@
+ 				thread will be the next one to proceed
+ 				once the current the event gets
+ 				signalled. See LEMMA 2 in sync0sync.c */
++
++#ifdef HAVE_ATOMIC_BUILTINS
++	volatile lint	lock_word;	/* Used by using atomic builtin */
+ #endif
+ 
+-	ulint	reader_count;	/* Number of readers who have locked this
++	volatile ulint	reader_count;	/* Number of readers who have locked this
+ 				lock in the shared mode */
+-	ulint	writer; 	/* This field is set to RW_LOCK_EX if there
++	volatile ulint	writer; 	/* This field is set to RW_LOCK_EX if there
+ 				is a writer owning the lock (in exclusive
+ 				mode), RW_LOCK_WAIT_EX if a writer is
+ 				queueing for the lock, and
+ 				RW_LOCK_NOT_LOCKED, otherwise. */
+-	os_thread_id_t	writer_thread;
++	volatile os_thread_id_t	writer_thread;
+ 				/* Thread id of a possible writer thread */
+-	ulint	writer_count;	/* Number of times the same thread has
++	volatile ulint	writer_count;	/* Number of times the same thread has
+ 				recursively locked the lock in the exclusive
+ 				mode */
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
++#endif
+ 	ulint	pass; 		/* Default value 0. This is set to some
+ 				value != 0 given by the caller of an x-lock
+ 				operation, if the x-lock is to be passed to
+ 				another thread to unlock (which happens in
+ 				asynchronous i/o). */
+-	ulint	waiters;	/* This ulint is set to 1 if there are
+-				waiters (readers or writers) in the global
+-				wait array, waiting for this rw_lock.
+-				Otherwise, == 0. */
+-	ibool	writer_is_wait_ex;
++	volatile ulint	s_waiters; /* 1: there are waiters (s_lock) */
++	volatile ulint	x_waiters; /* 1: there are waiters (x_lock) */
++	volatile ulint	wait_ex_waiters; /* 1: there are waiters (wait_ex) */
++	volatile ibool	writer_is_wait_ex;
+ 				/* This is TRUE if the writer field is
+ 				RW_LOCK_WAIT_EX; this field is located far
+ 				from the memory update hotspot fields which
+diff -r 962aec0d731c innobase/include/sync0rw.ic
+--- a/innobase/include/sync0rw.ic	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/include/sync0rw.ic	Thu Oct 09 08:30:28 2008 -0700
+@@ -47,20 +47,52 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
+ /*================*/
+ 	rw_lock_t*	lock)
+ {
+-	return(lock->waiters);
++	return(lock->s_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*================*/
++	rw_lock_t*	lock)
++{
++	return(lock->x_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
++/*================*/
++	rw_lock_t*      lock)
++{
++	return(lock->wait_ex_waiters);
+ }
+ UNIV_INLINE
+ void
+-rw_lock_set_waiters(
+-/*================*/
++rw_lock_set_s_waiters(
+ 	rw_lock_t*	lock,
+ 	ulint		flag)
+ {
+-	lock->waiters = flag;
++	lock->s_waiters = flag;
++}
++UNIV_INLINE
++void
++rw_lock_set_x_waiters(
++	rw_lock_t*	lock,
++	ulint		flag)
++{
++	lock->x_waiters = flag;
++}
++UNIV_INLINE
++void
++rw_lock_set_wx_waiters(
++/*================*/
++	rw_lock_t*      lock,
++	ulint           flag)
++{
++	lock->wait_ex_waiters = flag;
+ }
+ UNIV_INLINE
+ ulint
+@@ -68,7 +100,19 @@
+ /*===============*/
+ 	rw_lock_t*	lock)
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (lock->writer == RW_LOCK_NOT_LOCKED) {
++		return(RW_LOCK_NOT_LOCKED);
++	}
++
++	if (lock->writer_is_wait_ex) {
++		return(RW_LOCK_WAIT_EX);
++	} else {
++		return(RW_LOCK_EX);
++	}
++#else
+ 	return(lock->writer);
++#endif
+ }
+ UNIV_INLINE
+ void
+@@ -96,6 +140,7 @@
+ {
+ 	lock->reader_count = count;
+ }
++#ifndef HAVE_ATOMIC_BUILTINS
+ UNIV_INLINE
+ mutex_t*
+ rw_lock_get_mutex(
+@@ -104,6 +149,7 @@
+ {
+ 	return(&(lock->mutex));
+ }
++#endif
+ 
+ /**********************************************************************
+ Returns the value of writer_count for the lock. Does not reserve the lock
+@@ -133,14 +179,26 @@
+ 	const char*	file_name, /* in: file name where lock requested */
+ 	ulint		line)	/* in: line where requested */
+ {
+-#ifdef UNIV_SYNC_DEBUG
++#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS)
+ 	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+ #endif /* UNIV_SYNC_DEBUG */
+ 	/* Check if the writer field is free */
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) {
++		/* try s-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++			/* fail */
++			__sync_fetch_and_add(&(lock->lock_word),1);
++			return(FALSE);	/* locking did not succeed */
++		}
++		/* success */
++		__sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ 	if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
+ 		/* Set the shared lock by incrementing the reader count */
+ 		lock->reader_count++;
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 		rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+@@ -167,11 +225,15 @@
+ 	const char*	file_name,	/* in: file name where requested */
+ 	ulint		line)		/* in: line where lock requested */
+ {
+-	ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
++	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+ 	ut_ad(rw_lock_get_reader_count(lock) == 0);
+ 	
+ 	/* Set the shared lock by incrementing the reader count */
++#ifdef HAVE_ATOMIC_BUILTINS
++	__sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ 	lock->reader_count++;
++#endif
+ 
+ 	lock->last_s_file_name = file_name;
+ 	lock->last_s_line = line;
+@@ -199,7 +261,11 @@
+ 
+ 	rw_lock_set_writer(lock, RW_LOCK_EX);
+ 	lock->writer_thread = os_thread_get_curr_id();
++#ifdef HAVE_ATOMIC_BUILTINS
++	__sync_fetch_and_add(&(lock->writer_count),1);
++#else
+ 	lock->writer_count++;
++#endif
+ 	lock->pass = 0;
+ 			
+ 	lock->last_x_file_name = file_name;
+@@ -241,15 +307,21 @@
+ 	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+ #endif /* UNIV_SYNC_DEBUG */
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		return; /* Success */
+ 	} else {
+ 		/* Did not succeed, try spin wait */
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		rw_lock_s_lock_spin(lock, pass, file_name, line);
+ 
+@@ -272,11 +344,23 @@
+ {
+ 	ibool	success	= FALSE;
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++		/* try s-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++			/* fail */
++			__sync_fetch_and_add(&(lock->lock_word),1);
++			return(FALSE);	/* locking did not succeed */
++		}
++		/* success */
++		__sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ 	mutex_enter(rw_lock_get_mutex(lock));
+ 
+ 	if (lock->writer == RW_LOCK_NOT_LOCKED) {
+ 		/* Set the shared lock by incrementing the reader count */
+ 		lock->reader_count++;
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 		rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+@@ -289,7 +373,9 @@
+ 		success = TRUE;
+ 	}
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	return(success);
+ }
+@@ -309,6 +395,55 @@
+ {
+ 	ibool		success		= FALSE;
+ 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
++#ifdef HAVE_ATOMIC_BUILTINS
++	if ((lock->lock_word == RW_LOCK_BIAS)
++			&& rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++		/* try x-lock */
++		if(__sync_sub_and_fetch(&(lock->lock_word),
++				RW_LOCK_BIAS) == 0) {
++			/* success */
++			/* try to lock writer */
++			if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++					== RW_LOCK_NOT_LOCKED) {
++				/* success */
++				lock->writer_thread = curr_thread;
++				lock->pass = 0;
++				lock->writer_is_wait_ex = FALSE;
++				/* next function may work as memory barrier */
++			relock:
++				__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++				rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++#endif
++
++				lock->last_x_file_name = file_name;
++				lock->last_x_line = line;
++
++				ut_ad(rw_lock_validate(lock));
++
++				return(TRUE);
++			} else {
++				/* x-unlock */
++				__sync_fetch_and_add(&(lock->lock_word),
++					RW_LOCK_BIAS);
++			}
++		} else {
++			/* fail (x-lock) */
++			__sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS);
++		}
++	}
++
++	if (lock->pass == 0
++			&& os_thread_eq(lock->writer_thread, curr_thread)
++			&& rw_lock_get_writer(lock) == RW_LOCK_EX) {
++		goto relock;
++	}
++
++	ut_ad(rw_lock_validate(lock));
++
++	return(FALSE);
++#else
+ 	mutex_enter(rw_lock_get_mutex(lock));
+ 
+ 	if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
+@@ -339,6 +474,7 @@
+         ut_ad(rw_lock_validate(lock));
+ 
+ 	return(success);
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -354,16 +490,33 @@
+ #endif
+ 	)
+ {
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_t*	mutex	= &(lock->mutex);
+-	ibool		sg 	= FALSE;
++#endif
++	ibool		x_sg 	= FALSE;
++	ibool		wx_sg	= FALSE;
++#ifdef HAVE_ATOMIC_BUILTINS
++	ibool		last	= FALSE;
++#endif
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+         /* Acquire the mutex protecting the rw-lock fields */
+ 	mutex_enter(mutex);
++#endif
+ 
+ 	/* Reset the shared lock by decrementing the reader count */
+ 
+ 	ut_a(lock->reader_count > 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++	/* unlock lock_word */
++	__sync_fetch_and_add(&(lock->lock_word),1);
++
++	if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) {
++		last = TRUE;
++	}
++#else
+ 	lock->reader_count--;
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+@@ -372,20 +525,36 @@
+ 	/* If there may be waiters and this was the last s-lock,
+ 	signal the object */
+ 
+-	if (UNIV_UNLIKELY(lock->waiters)
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (UNIV_UNLIKELY(last && lock->wait_ex_waiters)) {
++#else
++	if (UNIV_UNLIKELY(lock->wait_ex_waiters)
+ 			&& lock->reader_count == 0) {
+-	       	sg = TRUE;
++#endif
++	       	wx_sg = TRUE;
+ 
+-		rw_lock_set_waiters(lock, 0);
++		rw_lock_set_wx_waiters(lock, 0);
++	}
++#ifdef HAVE_ATOMIC_BUILTINS
++	else if (UNIV_UNLIKELY(last && lock->x_waiters)) {
++#else
++	else if (UNIV_UNLIKELY(lock->x_waiters)
++			&& lock->reader_count == 0) {
++#endif
++		x_sg = TRUE;
++
++		rw_lock_set_x_waiters(lock, 0);
+ 	}
+ 	
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(mutex);
++#endif
+ 
+-	if (UNIV_UNLIKELY(sg)) {
+-#ifdef __WIN__
++	if (UNIV_UNLIKELY(wx_sg)) {
+ 		os_event_set(lock->wait_ex_event);
+-#endif
+-		os_event_set(lock->event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	} else if (UNIV_UNLIKELY(x_sg)) {
++		os_event_set(lock->x_event);
+ 		sync_array_object_signalled(sync_primary_wait_array);
+ 	}
+ 
+@@ -409,13 +578,22 @@
+ 
+ 	ut_ad(lock->reader_count > 0);
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	__sync_sub_and_fetch(&(lock->reader_count),1);
++#else
+ 	lock->reader_count--;
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+ #endif
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	ut_ad(!lock->s_waiters);
++	ut_ad(!lock->x_waiters);
++#else
+ 	ut_ad(!lock->waiters);
++#endif
+         ut_ad(rw_lock_validate(lock));
+ #ifdef UNIV_SYNC_PERF_STAT
+ 	rw_s_exit_count++;
+@@ -435,41 +613,81 @@
+ #endif
+ 	)
+ {
+-	ibool	sg 	= FALSE;
++#ifdef HAVE_ATOMIC_BUILTINS
++	ibool	last	= FALSE;
++#endif
++	ibool	s_sg	= FALSE;
++	ibool	x_sg	= FALSE;
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+         /* Acquire the mutex protecting the rw-lock fields */
+ 	mutex_enter(&(lock->mutex));
++#endif
+ 
+ 	/* Reset the exclusive lock if this thread no longer has an x-mode
+ 	lock */
+ 
+ 	ut_ad(lock->writer_count > 0);
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++		last = TRUE;
++	}
++
++	if (last) {
++		/* unlock lock_word */
++		__sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS);
++
++		/* FIXME: It is a value of bad manners for pthread.
++		          But we shouldn't keep an ID of not-owner. */
++		lock->writer_thread = -1;
++
++		/* atomic operation may be safer about memory order. */
++		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
++		__sync_synchronize();
++	}
++#else
+ 	lock->writer_count--;
+ 
+ 	if (lock->writer_count == 0) {
+ 		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ 	}
++#endif
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+ #endif
+ 	
+ 	/* If there may be waiters, signal the lock */
+-	if (UNIV_UNLIKELY(lock->waiters)
+-			&& lock->writer_count == 0) {
+-
+-	       	sg = TRUE;
+-		rw_lock_set_waiters(lock, 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (last) {
++#else
++	if (lock->writer_count == 0) {
++#endif
++		if(lock->s_waiters){
++			s_sg = TRUE;
++			rw_lock_set_s_waiters(lock, 0);
++		}
++		if(lock->x_waiters){
++			x_sg = TRUE;
++			rw_lock_set_x_waiters(lock, 0);
++		}
+ 	}
+ 	
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(&(lock->mutex));
++#endif
+ 
+-	if (UNIV_UNLIKELY(sg)) {
++	if (UNIV_UNLIKELY(s_sg)) {
++		os_event_set(lock->s_event);
++		sync_array_object_signalled(sync_primary_wait_array);
++	}
++	if (UNIV_UNLIKELY(x_sg)) {
+ #ifdef __WIN__
++		/* I doubt the necessity of it. */
+ 		os_event_set(lock->wait_ex_event);
+ #endif
+-		os_event_set(lock->event);
++		os_event_set(lock->x_event);
+ 		sync_array_object_signalled(sync_primary_wait_array);
+ 	}
+ 
+@@ -494,9 +712,13 @@
+ 
+ 	ut_ad(lock->writer_count > 0);
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++#else
+ 	lock->writer_count--;
+ 
+ 	if (lock->writer_count == 0) {
++#endif
+ 		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ 	}
+ 
+@@ -504,7 +726,12 @@
+ 	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+ #endif
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	ut_ad(!lock->s_waiters);
++	ut_ad(!lock->x_waiters);
++#else
+ 	ut_ad(!lock->waiters);
++#endif
+         ut_ad(rw_lock_validate(lock));
+ 
+ #ifdef UNIV_SYNC_PERF_STAT
+diff -r 962aec0d731c innobase/sync/sync0arr.c
+--- a/innobase/sync/sync0arr.c	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/sync/sync0arr.c	Thu Oct 09 08:30:28 2008 -0700
+@@ -309,13 +309,13 @@
+ {
+ 	if (type == SYNC_MUTEX) {
+ 		return(os_event_reset(((mutex_t *) object)->event));
+-#ifdef __WIN__
+ 	} else if (type == RW_LOCK_WAIT_EX) {
+ 		return(os_event_reset(
+ 		       ((rw_lock_t *) object)->wait_ex_event));
+-#endif
+-	} else {
+-		return(os_event_reset(((rw_lock_t *) object)->event));
++	} else if (type == RW_LOCK_SHARED) {
++		return(os_event_reset(((rw_lock_t *) object)->s_event));
++	} else { /* RW_LOCK_EX */
++		return(os_event_reset(((rw_lock_t *) object)->x_event));
+ 	}
+ }		
+ 
+@@ -415,15 +415,12 @@
+ 
+ 	if (cell->request_type == SYNC_MUTEX) {
+ 		event = ((mutex_t*) cell->wait_object)->event;
+-#ifdef __WIN__
+-	/* On windows if the thread about to wait is the one which
+-	has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
+-	it waits on a special event i.e.: wait_ex_event. */
+ 	} else if (cell->request_type == RW_LOCK_WAIT_EX) {
+ 		event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
+-#endif
+-	} else {	
+-		event = ((rw_lock_t*) cell->wait_object)->event;
++	} else if (cell->request_type == RW_LOCK_SHARED) {
++		event = ((rw_lock_t*) cell->wait_object)->s_event;
++	} else {
++		event = ((rw_lock_t*) cell->wait_object)->x_event;
+ 	}
+ 
+        	cell->waiting = TRUE;
+@@ -464,6 +461,7 @@
+ 	mutex_t*	mutex;
+ 	rw_lock_t*	rwlock;
+ 	ulint		type;
++	ulint		writer;
+ 
+ 	type = cell->request_type;
+ 
+@@ -492,12 +490,10 @@
+ 			(ulong) mutex->waiters);
+ 
+ 	} else if (type == RW_LOCK_EX
+-#ifdef __WIN__
+ 		   || type == RW_LOCK_WAIT_EX
+-#endif
+ 		   || type == RW_LOCK_SHARED) {
+ 
+-		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
++		fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
+ 
+ 		rwlock = cell->old_wait_rw_lock;
+ 
+@@ -505,21 +501,23 @@
+ 			" RW-latch at %p created in file %s line %lu\n",
+ 			rwlock, rwlock->cfile_name,
+ 			(ulong) rwlock->cline);
+-		if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
++		writer = rw_lock_get_writer(rwlock);
++		if (writer != RW_LOCK_NOT_LOCKED) {
+ 			fprintf(file,
+ 			"a writer (thread id %lu) has reserved it in mode %s",
+ 				(ulong) os_thread_pf(rwlock->writer_thread),
+-				rwlock->writer == RW_LOCK_EX
++				writer == RW_LOCK_EX
+ 				? " exclusive\n"
+ 				: " wait exclusive\n");
+ 		}
+ 		
+ 		fprintf(file,
+-			"number of readers %lu, waiters flag %lu\n"
++			"number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n"
+ 			"Last time read locked in file %s line %lu\n"
+ 			"Last time write locked in file %s line %lu\n",
+ 			(ulong) rwlock->reader_count,
+-			(ulong) rwlock->waiters,
++			(ulong) rwlock->s_waiters,
++			(ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
+ 			rwlock->last_s_file_name,
+ 			(ulong) rwlock->last_s_line,
+ 			rwlock->last_x_file_name,
+@@ -839,11 +837,15 @@
+ /*========================*/
+ 	sync_array_t*	arr)	/* in: wait array */
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++	__sync_fetch_and_add(&(arr->sg_count),1);
++#else
+         sync_array_enter(arr);
+ 
+ 	arr->sg_count++;
+ 
+         sync_array_exit(arr);
++#endif
+ }
+ 
+ /**************************************************************************
+@@ -880,19 +882,23 @@
+ 
+ 					mutex = cell->wait_object;
+ 					os_event_set(mutex->event);
+-#ifdef __WIN__
+ 				} else if (cell->request_type
+ 					   == RW_LOCK_WAIT_EX) {
+ 					rw_lock_t*	lock;
+ 
+ 					lock = cell->wait_object;
+ 					os_event_set(lock->wait_ex_event);
+-#endif
+-				} else {
++				} else if (cell->request_type
++					   == RW_LOCK_SHARED) {
+ 					rw_lock_t*	lock;
+ 
+ 					lock = cell->wait_object;
+-					os_event_set(lock->event);
++					os_event_set(lock->s_event);
++				} else {
++					rw_lock_t*      lock;
++
++					lock = cell->wait_object;
++					os_event_set(lock->x_event);
+ 				}
+                         }
+                 }
+diff -r 962aec0d731c innobase/sync/sync0rw.c
+--- a/innobase/sync/sync0rw.c	Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/sync/sync0rw.c	Thu Oct 09 08:30:28 2008 -0700
+@@ -99,6 +99,7 @@
+ 	object is created, then the following call initializes
+ 	the sync system. */
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_create(rw_lock_get_mutex(lock));
+ 	mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
+ 
+@@ -108,8 +109,14 @@
+ 	lock->mutex.cmutex_name = cmutex_name;
+ 	lock->mutex.mutex_type = 1;
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
++#endif /* !HAVE_ATOMIC_BUILTINS */
+   
+-	rw_lock_set_waiters(lock, 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++	lock->lock_word = RW_LOCK_BIAS;
++#endif
++	rw_lock_set_s_waiters(lock, 0);
++	rw_lock_set_x_waiters(lock, 0);
++	rw_lock_set_wx_waiters(lock, 0);
+ 	rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ 	lock->writer_count = 0;
+ 	rw_lock_set_reader_count(lock, 0);
+@@ -130,11 +137,9 @@
+ 	lock->last_x_file_name = "not yet reserved";
+ 	lock->last_s_line = 0;
+ 	lock->last_x_line = 0;
+-	lock->event = os_event_create(NULL);
+-
+-#ifdef __WIN__
++	lock->s_event = os_event_create(NULL);
++	lock->x_event = os_event_create(NULL);
+ 	lock->wait_ex_event = os_event_create(NULL);
+-#endif
+ 
+ 	mutex_enter(&rw_lock_list_mutex);
+ 	
+@@ -162,19 +167,21 @@
+ 	ut_a(rw_lock_validate(lock));
+ #endif /* UNIV_DEBUG */
+ 	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+-	ut_a(rw_lock_get_waiters(lock) == 0);
++	ut_a(rw_lock_get_s_waiters(lock) == 0);
++	ut_a(rw_lock_get_x_waiters(lock) == 0);
++	ut_a(rw_lock_get_wx_waiters(lock) == 0);
+ 	ut_a(rw_lock_get_reader_count(lock) == 0);
+ 	
+ 	lock->magic_n = 0;
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_free(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	mutex_enter(&rw_lock_list_mutex);
+-	os_event_free(lock->event);
+-
+-#ifdef __WIN__
++	os_event_free(lock->s_event);
++	os_event_free(lock->x_event);
+ 	os_event_free(lock->wait_ex_event);
+-#endif
+ 
+ 	if (UT_LIST_GET_PREV(list, lock)) {
+ 		ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+@@ -192,6 +199,8 @@
+ Checks that the rw-lock has been initialized and that there are no
+ simultaneous shared and exclusive locks. */
+ 
++/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */
++
+ ibool
+ rw_lock_validate(
+ /*=============*/
+@@ -199,7 +208,9 @@
+ {
+ 	ut_a(lock);
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ 	ut_a((rw_lock_get_reader_count(lock) == 0)
+@@ -207,11 +218,17 @@
+ 	ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
+ 	     || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+ 	     || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
+-	ut_a((rw_lock_get_waiters(lock) == 0)
+-	     || (rw_lock_get_waiters(lock) == 1));
++	ut_a((rw_lock_get_s_waiters(lock) == 0)
++	     || (rw_lock_get_s_waiters(lock) == 1));
++	ut_a((rw_lock_get_x_waiters(lock) == 0)
++	     || (rw_lock_get_x_waiters(lock) == 1));
++	ut_a((rw_lock_get_wx_waiters(lock) == 0)
++	     || (rw_lock_get_wx_waiters(lock) == 1));
+ 	ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
+ 	     
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	return(TRUE);
+ }
+@@ -237,13 +254,14 @@
+         ut_ad(rw_lock_validate(lock));
+ 
+ lock_loop:
++        i = 0;
++spin_loop:
+ 	rw_s_spin_wait_count++;
+ 
+ 	/* Spin waiting for the writer field to become free */
+-        i = 0;
+ 
+-        while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+-						&& i < SYNC_SPIN_ROUNDS) {
++        while (i < SYNC_SPIN_ROUNDS
++			&& rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+         	if (srv_spin_wait_delay) {
+         		ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+         	}
+@@ -262,15 +280,27 @@
+ 		lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ 	}
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(rw_lock_get_mutex(lock));
++#endif
+ 
+         /* We try once again to obtain the lock */
+ 
+ 	if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		return; /* Success */
+ 	} else {
++#ifdef HAVE_ATOMIC_BUILTINS
++		/* like sync0sync.c doing */
++		i++;
++
++		if (i < SYNC_SPIN_ROUNDS) {
++			goto spin_loop;
++		}
++#endif
+ 		/* If we get here, locking did not succeed, we may
+ 		suspend the thread to wait in the wait array */
+ 
+@@ -281,9 +311,19 @@
+ 				file_name, line,
+ 				&index);
+ 
+-		rw_lock_set_waiters(lock, 1);
++		rw_lock_set_s_waiters(lock, 1);
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++		/* like sync0sync.c doing */
++		for (i = 0; i < 4; i++) {
++			if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++				sync_array_free_cell(sync_primary_wait_array, index);
++				return; /* Success */
++			}
++		}
++#else
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		if (srv_print_latch_waits) {
+ 			fprintf(stderr,
+@@ -318,13 +358,19 @@
+ {
+ 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(&(lock->mutex));
++#endif
+ 
+ 	lock->writer_thread = os_thread_get_curr_id();
+ 
+ 	lock->pass = 0;
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(&(lock->mutex));
++#else
++	__sync_synchronize();
++#endif
+ }
+ 
+ /**********************************************************************
+@@ -342,6 +388,89 @@
+ 	const char*	file_name,/* in: file name where lock requested */
+ 	ulint		line)	/* in: line where requested */
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
++
++	/* try to lock writer */
++	if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++			== RW_LOCK_NOT_LOCKED) {
++		/* success */
++		/* obtain RW_LOCK_WAIT_EX right */
++		lock->writer_thread = curr_thread;
++		lock->pass = pass;
++		lock->writer_is_wait_ex = TRUE;
++		/* atomic operation may be safer about memory order. */
++		__sync_synchronize();
++#ifdef UNIV_SYNC_DEBUG
++		rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
++					file_name, line);
++#endif
++	}
++
++	if (!os_thread_eq(lock->writer_thread, curr_thread)) {
++		return(RW_LOCK_NOT_LOCKED);
++	}
++
++	switch(rw_lock_get_writer(lock)) {
++	    case RW_LOCK_WAIT_EX:
++		/* have right to try x-lock */
++		if (lock->lock_word == RW_LOCK_BIAS) {
++			/* try x-lock */
++			if(__sync_sub_and_fetch(&(lock->lock_word),
++					RW_LOCK_BIAS) == 0) {
++				/* success */
++				lock->pass = pass;
++				lock->writer_is_wait_ex = FALSE;
++				__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++				rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
++				rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++							file_name, line);
++#endif
++
++				lock->last_x_file_name = file_name;
++				lock->last_x_line = line;
++
++				/* Locking succeeded, we may return */
++				return(RW_LOCK_EX);
++			} else {
++				/* fail */
++				__sync_fetch_and_add(&(lock->lock_word),
++					RW_LOCK_BIAS);
++			}
++		}
++		/* There are readers, we have to wait */
++		return(RW_LOCK_WAIT_EX);
++
++		break;
++
++	    case RW_LOCK_EX:
++		/* already have x-lock */
++		if ((lock->pass == 0)&&(pass == 0)) {
++			__sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
++						line);
++#endif
++
++			lock->last_x_file_name = file_name;
++			lock->last_x_line = line;
++
++			/* Locking succeeded, we may return */
++			return(RW_LOCK_EX);
++		}
++
++		return(RW_LOCK_NOT_LOCKED);
++
++		break;
++
++	    default: /* ??? */
++		return(RW_LOCK_NOT_LOCKED);
++	}
++#else /* HAVE_ATOMIC_BUILTINS */
++
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -423,6 +552,7 @@
+ 		/* Locking succeeded, we may return */
+ 		return(RW_LOCK_EX);
+ 	}
++#endif /* HAVE_ATOMIC_BUILTINS */
+ 
+ 	/* Locking did not succeed */
+ 	return(RW_LOCK_NOT_LOCKED);
+@@ -448,19 +578,33 @@
+ 	ulint		line)	/* in: line where requested */
+ {
+         ulint	index;  /* index of the reserved wait cell */
+-        ulint	state;	/* lock state acquired */
++        ulint	state = RW_LOCK_NOT_LOCKED;	/* lock state acquired */
++#ifdef HAVE_ATOMIC_BUILTINS
++	ulint	prev_state = RW_LOCK_NOT_LOCKED;
++#endif
+         ulint	i;	/* spin round count */
+         
+         ut_ad(rw_lock_validate(lock));
+ 
+ lock_loop:
++	i = 0;
++
++#ifdef HAVE_ATOMIC_BUILTINS
++	prev_state = state;
++#else
+         /* Acquire the mutex protecting the rw-lock fields */
+ 	mutex_enter_fast(&(lock->mutex));
++#endif
+ 
+ 	state = rw_lock_x_lock_low(lock, pass, file_name, line);
+ 		
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (state != prev_state) i=0; /* if progress, reset counter. */
++#else
+ 	mutex_exit(&(lock->mutex));
++#endif
+         
++spin_loop:
+ 	if (state == RW_LOCK_EX) {
+ 
+ 		return;	/* Locking succeeded */
+@@ -468,10 +612,9 @@
+ 	} else if (state == RW_LOCK_NOT_LOCKED) {
+ 
+  		/* Spin waiting for the writer field to become free */
+-		i = 0;
+ 
+-        	while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED 
+-               					&& i < SYNC_SPIN_ROUNDS) {
++        	while (i < SYNC_SPIN_ROUNDS
++			&& rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+         		if (srv_spin_wait_delay) {
+ 				ut_delay(ut_rnd_interval(0,
+ 							srv_spin_wait_delay));
+@@ -485,9 +628,12 @@
+         } else if (state == RW_LOCK_WAIT_EX) {
+ 
+  		/* Spin waiting for the reader count field to become zero */
+-		i = 0;
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++		while (lock->lock_word != RW_LOCK_BIAS
++#else
+         	while (rw_lock_get_reader_count(lock) != 0 
++#endif
+                					&& i < SYNC_SPIN_ROUNDS) {
+         		if (srv_spin_wait_delay) {
+ 				ut_delay(ut_rnd_interval(0,
+@@ -500,7 +646,6 @@
+ 			os_thread_yield();
+ 		}
+         } else {
+-		i = 0; /* Eliminate a compiler warning */
+ 		ut_error;
+ 	}	
+ 
+@@ -516,34 +661,69 @@
+         /* We try once again to obtain the lock. Acquire the mutex protecting
+ 	the rw-lock fields */
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	prev_state = state;
++#else
+ 	mutex_enter(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	state = rw_lock_x_lock_low(lock, pass, file_name, line);
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	if (state != prev_state) i=0; /* if progress, reset counter. */
++#endif
++
+ 	if (state == RW_LOCK_EX) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		return;	/* Locking succeeded */
+ 	}
++
++#ifdef HAVE_ATOMIC_BUILTINS
++	/* like sync0sync.c doing */
++	i++;
++
++	if (i < SYNC_SPIN_ROUNDS) {
++		goto spin_loop;
++	}
++#endif
+ 
+ 	rw_x_system_call_count++;
+ 
+         sync_array_reserve_cell(sync_primary_wait_array,
+ 				lock,
+-#ifdef __WIN__
+-				/* On windows RW_LOCK_WAIT_EX signifies
+-				that this thread should wait on the
+-				special wait_ex_event. */
+ 				(state == RW_LOCK_WAIT_EX)
+ 				 ? RW_LOCK_WAIT_EX :
+-#endif
+ 				RW_LOCK_EX,
+ 				file_name, line,
+ 				&index);
+ 
+-	rw_lock_set_waiters(lock, 1);
++	if (state == RW_LOCK_WAIT_EX) {
++		rw_lock_set_wx_waiters(lock, 1);
++	} else {
++		rw_lock_set_x_waiters(lock, 1);
++	}
+ 
++#ifdef HAVE_ATOMIC_BUILTINS
++	/* like sync0sync.c doing */
++	for (i = 0; i < 4; i++) {
++		prev_state = state;
++		state = rw_lock_x_lock_low(lock, pass, file_name, line);
++		if (state == RW_LOCK_EX) {
++			sync_array_free_cell(sync_primary_wait_array, index);
++			return; /* Locking succeeded */
++		}
++		if (state != prev_state) {
++			/* retry! */
++			sync_array_free_cell(sync_primary_wait_array, index);
++			goto lock_loop;
++		}
++	}
++#else
+ 	mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 
+ 	if (srv_print_latch_waits) {
+ 		fprintf(stderr,
+@@ -718,7 +898,9 @@
+ 	ut_ad(lock);
+ 	ut_ad(rw_lock_validate(lock));
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(&(lock->mutex));
++#endif
+ 
+ 	info = UT_LIST_GET_FIRST(lock->debug_list);
+ 
+@@ -728,7 +910,9 @@
+ 		    && (info->pass == 0)
+ 		    && (info->lock_type == lock_type)) {
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 			mutex_exit(&(lock->mutex));
++#endif
+ 		    	/* Found! */
+ 
+ 		    	return(TRUE);
+@@ -736,7 +920,9 @@
+ 
+ 		info = UT_LIST_GET_NEXT(list, info);
+ 	}
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(&(lock->mutex));
++#endif
+ 
+ 	return(FALSE);
+ }
+@@ -758,21 +944,25 @@
+ 	ut_ad(lock);
+ 	ut_ad(rw_lock_validate(lock));
+ 	
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_enter(&(lock->mutex));
++#endif
+ 
+ 	if (lock_type == RW_LOCK_SHARED) {
+ 		if (lock->reader_count > 0) {
+ 			ret = TRUE;
+ 		}
+ 	} else if (lock_type == RW_LOCK_EX) {
+-		if (lock->writer == RW_LOCK_EX) {
++		if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
+ 			ret = TRUE;
+ 		}
+ 	} else {
+ 		ut_error;
+ 	}
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 	mutex_exit(&(lock->mutex));
++#endif
+ 
+ 	return(ret);
+ }
+@@ -801,16 +991,26 @@
+ 
+ 		count++;
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_enter(&(lock->mutex));
++#endif
+ 
+ 		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ 		    || (rw_lock_get_reader_count(lock) != 0)
+-		    || (rw_lock_get_waiters(lock) != 0)) {
++		    || (rw_lock_get_s_waiters(lock) != 0)
++		    || (rw_lock_get_x_waiters(lock) != 0)
++		    || (rw_lock_get_wx_waiters(lock) != 0)) {
+ 
+ 			fprintf(stderr, "RW-LOCK: %p ", lock);
+ 
+-			if (rw_lock_get_waiters(lock)) {
+-				fputs(" Waiters for the lock exist\n", stderr);
++			if (rw_lock_get_s_waiters(lock)) {
++				fputs(" s_waiters for the lock exist,", stderr);
++			}
++			if (rw_lock_get_x_waiters(lock)) {
++				fputs(" x_waiters for the lock exist\n", stderr);
++			}
++			if (rw_lock_get_wx_waiters(lock)) {
++				fputs(" wait_ex_waiters for the lock exist\n", stderr);
+ 			} else {
+ 				putc('\n', stderr);
+ 			}
+@@ -822,7 +1022,9 @@
+ 			}
+ 		}
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(&(lock->mutex));
++#endif
+ 		lock = UT_LIST_GET_NEXT(list, lock);
+ 	}
+ 
+@@ -847,10 +1049,18 @@
+ 
+ 	if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ 	    || (rw_lock_get_reader_count(lock) != 0)
+-	    || (rw_lock_get_waiters(lock) != 0)) {
++	    || (rw_lock_get_s_waiters(lock) != 0)
++	    || (rw_lock_get_x_waiters(lock) != 0)
++	    || (rw_lock_get_wx_waiters(lock) != 0)) {
+ 
+-		if (rw_lock_get_waiters(lock)) {
+-			fputs(" Waiters for the lock exist\n", stderr);
++		if (rw_lock_get_s_waiters(lock)) {
++			fputs(" s_waiters for the lock exist,", stderr);
++		}
++		if (rw_lock_get_x_waiters(lock)) {
++			fputs(" x_waiters for the lock exist\n", stderr);
++		}
++		if (rw_lock_get_wx_waiters(lock)) {
++			fputs(" wait_ex_waiters for the lock exist\n", stderr);
+ 		} else {
+ 			putc('\n', stderr);
+ 		}
+@@ -909,14 +1119,18 @@
+ 	lock = UT_LIST_GET_FIRST(rw_lock_list);
+ 
+ 	while (lock != NULL) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_enter(rw_lock_get_mutex(lock));
++#endif
+ 
+ 		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ 				|| (rw_lock_get_reader_count(lock) != 0)) {
+ 			count++;
+ 		}
+ 
++#ifndef HAVE_ATOMIC_BUILTINS
+ 		mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ 		lock = UT_LIST_GET_NEXT(list, lock);
+ 	}
+ 
+diff -r 962aec0d731c patch_info/innodb_rw_lock.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_rw_lock.info	Thu Oct 09 08:30:28 2008 -0700
+@@ -0,0 +1,6 @@
++File=innodb_rw_lock.patch
++Name=Fix of InnoDB rw_locks
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
diff --git a/mysql-innodb_show_bp.patch b/mysql-innodb_show_bp.patch
new file mode 100644
index 0000000..a56ae9a
--- /dev/null
+++ b/mysql-innodb_show_bp.patch
@@ -0,0 +1,447 @@
+diff -r fe944d2c6e1f innobase/btr/btr0btr.c
+--- a/innobase/btr/btr0btr.c	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/btr/btr0btr.c	Mon Nov 10 19:48:24 2008 -0800
+@@ -2989,3 +2989,11 @@
+ 
+ 	return(TRUE);
+ }
++
++dulint
++btr_page_get_index_id_noninline(
++/*============*/
++        page_t* page)  /* in: index page */
++{
++  return btr_page_get_index_id(page);
++}
+diff -r fe944d2c6e1f innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/buf/buf0buf.c	Mon Nov 10 19:48:24 2008 -0800
+@@ -2629,3 +2629,13 @@
+ 	buf_block_print(block);
+ }
+ 
++buf_block_t*
++buf_pool_get_nth_block_no_inline(
++/*===================*/
++                                /* out: pointer to block */
++        buf_pool_t*     buf_pool,/* in: buf_pool */
++        ulint           i)      /* in: index of the block */{
++
++return buf_pool_get_nth_block(buf_pool, i);
++
++}
+diff -r fe944d2c6e1f innobase/include/btr0btr.h
+--- a/innobase/include/btr0btr.h	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/btr0btr.h	Mon Nov 10 19:48:24 2008 -0800
+@@ -69,6 +69,12 @@
+ UNIV_INLINE
+ dulint
+ btr_page_get_index_id(
++/*==================*/
++				/* out: index id */
++	page_t*		page);	/* in: index page */
++
++dulint
++btr_page_get_index_id_noninline(
+ /*==================*/
+ 				/* out: index id */
+ 	page_t*		page);	/* in: index page */
+diff -r fe944d2c6e1f innobase/include/buf0buf.h
+--- a/innobase/include/buf0buf.h	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/buf0buf.h	Mon Nov 10 19:48:24 2008 -0800
+@@ -703,6 +703,8 @@
+ buf_get_free_list_len(void);
+ /*=======================*/
+ 
++void buf_pool_dump(void);
++buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i);  
+ 
+ 			
+ /* The buffer control block structure */
+diff -r fe944d2c6e1f innobase/include/page0page.h
+--- a/innobase/include/page0page.h	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/page0page.h	Mon Nov 10 19:48:24 2008 -0800
+@@ -260,6 +260,12 @@
+ /*============*/
+ 			/* out: number of user records */
+ 	page_t*	page);	/* in: index page */
++
++ulint
++page_get_n_recs_noninline(
++/*============*/
++			/* out: number of user records */
++	page_t*	page);	/* in: index page */
+ /*******************************************************************
+ Returns the number of records before the given record in chain.
+ The number includes infimum and supremum records. */
+@@ -519,6 +525,12 @@
+ UNIV_INLINE
+ ulint
+ page_get_data_size(
++/*===============*/
++			/* out: data in bytes */
++	page_t*	page);	/* in: index page */
++
++ulint
++page_get_data_size_noninline(
+ /*===============*/
+ 			/* out: data in bytes */
+ 	page_t*	page);	/* in: index page */
+diff -r fe944d2c6e1f innobase/page/page0page.c
+--- a/innobase/page/page0page.c	Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/page/page0page.c	Mon Nov 10 19:48:24 2008 -0800
+@@ -1994,3 +1994,25 @@
+ 		page_cur_move_to_next(&cur);
+ 	}
+ }
++
++ulint
++page_get_n_recs_noninline(
++/*============*/
++                        /* out: number of user records */
++        page_t* page)  /* in: index page */
++{
++ return page_get_n_recs(page);
++}
++
++
++ulint
++page_get_data_size_noninline(
++/*============*/
++                        /* out: number of user records */
++        page_t* page)  /* in: index page */
++{
++ return page_get_data_size(page);
++}
++
++
++
+diff -r fe944d2c6e1f mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result	Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/information_schema.result	Mon Nov 10 19:48:25 2008 -0800
+@@ -42,6 +42,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INNODB_BUFFER_POOL_CONTENT
+ INDEX_STATISTICS
+ KEY_COLUMN_USAGE
+ PROCESSLIST
+@@ -741,7 +742,7 @@
+ CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1;
+ CREATE VIEW a2 AS SELECT t_CRASHME FROM a1;
+ count(*)
+-107
++108
+ drop view a2, a1;
+ drop table t_crashme;
+ select table_schema,table_name, column_name from
+@@ -802,6 +803,7 @@
+ TABLE_NAME	COLUMN_NAME	PRIVILEGES
+ COLUMNS	TABLE_NAME	select
+ COLUMN_PRIVILEGES	TABLE_NAME	select
++INNODB_BUFFER_POOL_CONTENT	TABLE_NAME	select
+ INDEX_STATISTICS	TABLE_NAME	select
+ KEY_COLUMN_USAGE	TABLE_NAME	select
+ STATISTICS	TABLE_NAME	select
+@@ -815,7 +817,7 @@
+ flush privileges;
+ SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA;
+ table_schema	count(*)
+-information_schema	22
++information_schema	23
+ mysql	17
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1206,6 +1208,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY	COLLATION_NAME
+ COLUMNS	TABLE_SCHEMA
+ COLUMN_PRIVILEGES	TABLE_SCHEMA
++INNODB_BUFFER_POOL_CONTENT	TABLE_SCHEMA
+ INDEX_STATISTICS	TABLE_SCHEMA
+ KEY_COLUMN_USAGE	CONSTRAINT_SCHEMA
+ PROCESSLIST	ID
+@@ -1243,6 +1246,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY	COLLATION_NAME
+ COLUMNS	TABLE_SCHEMA
+ COLUMN_PRIVILEGES	TABLE_SCHEMA
++INNODB_BUFFER_POOL_CONTENT	TABLE_SCHEMA
+ INDEX_STATISTICS	TABLE_SCHEMA
+ KEY_COLUMN_USAGE	CONSTRAINT_SCHEMA
+ PROCESSLIST	ID
+@@ -1332,6 +1336,7 @@
+ COLUMNS	information_schema.COLUMNS	1
+ COLUMN_PRIVILEGES	information_schema.COLUMN_PRIVILEGES	1
+ INDEX_STATISTICS	information_schema.INDEX_STATISTICS	1
++INNODB_BUFFER_POOL_CONTENT	information_schema.INNODB_BUFFER_POOL_CONTENT	1
+ KEY_COLUMN_USAGE	information_schema.KEY_COLUMN_USAGE	1
+ PROCESSLIST	information_schema.PROCESSLIST	1
+ PROFILING	information_schema.PROFILING	1
+diff -r fe944d2c6e1f mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result	Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/information_schema_db.result	Mon Nov 10 19:48:25 2008 -0800
+@@ -11,6 +11,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INNODB_BUFFER_POOL_CONTENT
+ INDEX_STATISTICS
+ KEY_COLUMN_USAGE
+ PROCESSLIST
+diff -r fe944d2c6e1f mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result	Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/mysqlshow.result	Mon Nov 10 19:48:25 2008 -0800
+@@ -85,6 +85,7 @@
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS                               |
+ | COLUMN_PRIVILEGES                     |
++| INNODB_BUFFER_POOL_CONTENT            |
+ | INDEX_STATISTICS                      |
+ | KEY_COLUMN_USAGE                      |
+ | PROCESSLIST                           |
+@@ -112,6 +113,7 @@
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS                               |
+ | COLUMN_PRIVILEGES                     |
++| INNODB_BUFFER_POOL_CONTENT            |
+ | INDEX_STATISTICS                      |
+ | KEY_COLUMN_USAGE                      |
+ | PROCESSLIST                           |
+diff -r fe944d2c6e1f patch_info/innodb_show_bp.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_show_bp.info	Mon Nov 10 19:48:25 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_show_bp.patch
++Name=show innodb buffer pool content
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r fe944d2c6e1f sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc	Mon Nov 10 19:47:27 2008 -0800
++++ b/sql/ha_innodb.cc	Mon Nov 10 19:48:25 2008 -0800
+@@ -128,10 +128,12 @@
+ #include "../innobase/include/lock0lock.h"
+ #include "../innobase/include/dict0crea.h"
+ #include "../innobase/include/btr0cur.h"
++#include "../innobase/include/buf0buf.h"
+ #include "../innobase/include/btr0btr.h"
+ #include "../innobase/include/fsp0fsp.h"
+ #include "../innobase/include/sync0sync.h"
+ #include "../innobase/include/fil0fil.h"
++#include "../innobase/include/page0page.h"
+ #include "../innobase/include/trx0xa.h"
+ }
+ 
+@@ -6483,6 +6485,116 @@
+   	DBUG_RETURN(FALSE);
+ }
+ 
++bool
++innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables)
++{
++	ulint		size;
++	ulint		i;
++	dulint		id;
<Skipped 769 lines>
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/percona-server.git/commitdiff/431f68fe79a66d5dfdd53f2655709e6c925fbc22



More information about the pld-cvs-commit mailing list