[packages/percona-server/v5.0.x: 77/202] - from http://www.percona.com/mysql/5.0.68/patches/

glen glen at pld-linux.org
Wed Oct 21 16:15:30 CEST 2015


commit 833f26399d2f12a1a80df4076aee97f40af0e57b
Author: Elan Ruusamäe <glen at pld-linux.org>
Date:   Wed Sep 17 14:22:43 2008 +0000

    - from http://www.percona.com/mysql/5.0.68/patches/
    
    Changed files:
        mysql-acc-pslist.patch -> 1.1.2.1
        mysql-control_flush_and_merge_and_read.patch -> 1.1.2.1
        mysql-control_io-threads.patch -> 1.1.2.1
        mysql-microslow_innodb.patch -> 1.1.2.1
        mysql-show_patches.patch -> 1.1.2.1
        mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch -> 1.1.2.1
        mysql-userstats-testsuite.patch -> 1.1.2.1
        mysql-userstats.patch -> 1.1.2.1

 mysql-acc-pslist.patch                             |  115 +
 mysql-control_flush_and_merge_and_read.patch       |  238 ++
 mysql-control_io-threads.patch                     |   69 +
 mysql-microslow_innodb.patch                       | 2333 ++++++++++++++++++++
 mysql-show_patches.patch                           |  294 +++
 ...plit_buf_pool_mutex_fixed_optimistic_safe.patch | 1302 +++++++++++
 mysql-userstats-testsuite.patch                    |  222 ++
 mysql-userstats.patch                              | 1453 ++++++++++++
 8 files changed, 6026 insertions(+)
---
diff --git a/mysql-acc-pslist.patch b/mysql-acc-pslist.patch
new file mode 100644
index 0000000..f54950e
--- /dev/null
+++ b/mysql-acc-pslist.patch
@@ -0,0 +1,115 @@
+diff -r 174803e7e869 mysql-test/r/create.result
+--- a/mysql-test/r/create.result	Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/r/create.result	Thu Sep 04 12:20:19 2008 -0700
+@@ -1720,7 +1720,8 @@
+   `COMMAND` varchar(16) NOT NULL DEFAULT '',
+   `TIME` bigint(7) NOT NULL DEFAULT '0',
+   `STATE` varchar(64) DEFAULT NULL,
+-  `INFO` longtext
++  `INFO` longtext,
++  `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create temporary table t1 like information_schema.processlist;
+@@ -1734,7 +1735,8 @@
+   `COMMAND` varchar(16) NOT NULL DEFAULT '',
+   `TIME` bigint(7) NOT NULL DEFAULT '0',
+   `STATE` varchar(64) DEFAULT NULL,
+-  `INFO` longtext
++  `INFO` longtext,
++  `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create table t1 like information_schema.character_sets;
+diff -r 174803e7e869 mysql-test/r/not_embedded_server.result
+--- a/mysql-test/r/not_embedded_server.result	Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/r/not_embedded_server.result	Thu Sep 04 12:20:19 2008 -0700
+@@ -1,7 +1,7 @@
+ prepare stmt1 from ' SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!=\'Daemon\' ';
+ execute stmt1;
+-ID	USER	HOST	DB	COMMAND	TIME	STATE	INFO
+-number	root	localhost	test	Query	time	executing	SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!='Daemon'
++ID	USER	HOST	DB	COMMAND	TIME	STATE	INFO	TIME_MS
++number	root	localhost	test	Query	time	executing	SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!='Daemon'	time_ms
+ deallocate prepare stmt1;
+ FLUSH STATUS;
+ SHOW GLOBAL STATUS LIKE 'com_select';
+diff -r 174803e7e869 mysql-test/t/not_embedded_server.test
+--- a/mysql-test/t/not_embedded_server.test	Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/t/not_embedded_server.test	Thu Sep 04 12:20:19 2008 -0700
+@@ -16,7 +16,7 @@
+ # End of 4.1 tests
+ 
+ prepare stmt1 from ' SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!=\'Daemon\' ';
+---replace_column 1 number 6 time 3 localhost
++--replace_column 1 number 6 time 3 localhost 9 time_ms
+ execute stmt1;
+ deallocate prepare stmt1;
+ 
+diff -r 174803e7e869 patch_info/acc-pslist.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/acc-pslist.info	Thu Sep 04 12:20:19 2008 -0700
+@@ -0,0 +1,6 @@
++File=acc-pslist.patch
++Name=Milliseconds in PROCESSLIST
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r 174803e7e869 sql/sql_show.cc
+--- a/sql/sql_show.cc	Thu Sep 04 12:17:56 2008 -0700
++++ b/sql/sql_show.cc	Thu Sep 04 12:20:19 2008 -0700
+@@ -1803,7 +1803,7 @@
+   TABLE *table= tables->table;
+   CHARSET_INFO *cs= system_charset_info;
+   char *user;
+-  time_t now= my_time(0);
++  ulonglong unow= my_micro_time();
+   DBUG_ENTER("fill_process_list");
+ 
+   user= thd->security_ctx->master_access & PROCESS_ACL ?
+@@ -1861,8 +1861,8 @@
+         table->field[4]->store(command_name[tmp->command].str,
+                                command_name[tmp->command].length, cs);
+       /* MYSQL_TIME */
+-      table->field[5]->store((uint32)(tmp->start_time ?
+-                                      now - tmp->start_time : 0), TRUE);
++      const ulonglong utime= tmp->start_utime ? unow - tmp->start_utime : 0;
++      table->field[5]->store(utime / 1000000, TRUE);
+       /* STATE */
+ #ifndef EMBEDDED_LIBRARY
+       val= (char*) (tmp->locked ? "Locked" :
+@@ -1896,11 +1896,15 @@
+         table->field[7]->set_notnull();
+       }
+ 
++      /* TIME_MS */
++      table->field[8]->store((double)(utime / 1000.0));
++
+       if (schema_table_store_record(thd, table))
+       {
+         VOID(pthread_mutex_unlock(&LOCK_thread_count));
+         DBUG_RETURN(1);
+       }
++
+     }
+   }
+ 
+@@ -5532,7 +5536,7 @@
+     into it two numbers, based on modulus of base-10 numbers.  In the ones
+     position is the number of decimals.  Tens position is unused.  In the
+     hundreds and thousands position is a two-digit decimal number representing
+-    length.  Encode this value with  (decimals*100)+length  , where
++    length.  Encode this value with  (length*100)+decimals  , where
+     0<decimals<10 and 0<=length<100 .
+ 
+   @param
+@@ -6540,6 +6544,8 @@
+   {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
+   {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
+    SKIP_OPEN_TABLE},
++  {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL,
++    0, 0, "Time_ms", SKIP_OPEN_TABLE},
+   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
+ 
diff --git a/mysql-control_flush_and_merge_and_read.patch b/mysql-control_flush_and_merge_and_read.patch
new file mode 100644
index 0000000..aa87a0d
--- /dev/null
+++ b/mysql-control_flush_and_merge_and_read.patch
@@ -0,0 +1,238 @@
+diff -r 2fdaeb546d25 innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c	Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/buf/buf0rea.c	Mon Sep 08 16:40:14 2008 -0700
+@@ -188,6 +188,10 @@
+ 	ulint		low, high;
+ 	ulint		err;
+ 	ulint		i;
++
++	if (!(srv_read_ahead & 1)) {
++		return(0);
++	}
+ 
+ 	if (srv_startup_is_before_trx_rollback_phase) {
+ 	        /* No read-ahead to avoid thread deadlocks */
+@@ -396,6 +400,10 @@
+ 	ulint		err;
+ 	ulint		i;
+ 	
++	if (!(srv_read_ahead & 2)) {
++		return(0);
++	}
++
+ 	if (srv_startup_is_before_trx_rollback_phase) {
+ 	        /* No read-ahead to avoid thread deadlocks */
+ 	        return(0);
+diff -r 2fdaeb546d25 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h	Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/include/srv0srv.h	Mon Sep 08 16:40:14 2008 -0700
+@@ -131,6 +131,12 @@
+ extern ulong	srv_max_purge_lag;
+ extern ibool	srv_use_awe;
+ extern ibool	srv_use_adaptive_hash_indexes;
++
++extern ulint	srv_read_ahead;
++extern ulint	srv_ibuf_contract_const;
++extern ulint	srv_ibuf_contract_burst;
++extern ulint	srv_buf_flush_const;
++extern ulint	srv_buf_flush_burst;
+ /*-------------------------------------------*/
+ 
+ extern ulint	srv_n_rows_inserted;
+diff -r 2fdaeb546d25 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/srv/srv0srv.c	Mon Sep 08 16:40:14 2008 -0700
+@@ -322,6 +322,11 @@
+ ibool	srv_use_awe			= FALSE;
+ ibool	srv_use_adaptive_hash_indexes 	= TRUE;
+ 
++ulint	srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
++ulint	srv_ibuf_contract_const	= 5;
++ulint	srv_ibuf_contract_burst = 20;
++ulint	srv_buf_flush_const = 10;
++ulint	srv_buf_flush_burst = 100;
+ /*-------------------------------------------*/
+ ulong	srv_n_spin_wait_rounds	= 20;
+ ulong	srv_n_free_tickets_to_enter = 500;
+@@ -2298,7 +2303,7 @@
+ 						+ buf_pool->n_pages_written;
+ 		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
+ 			srv_main_thread_op_info = "doing insert buffer merge";
+-			ibuf_contract_for_n_pages(TRUE, 5);
++			ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_burst);
+ 
+ 			srv_main_thread_op_info = "flushing log";
+ 
+@@ -2311,7 +2316,7 @@
+ 			/* Try to keep the number of modified pages in the
+ 			buffer pool under the limit wished by the user */
+ 			
+-			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst,
+ 							  ut_dulint_max);
+ 
+ 		        /* If we had to do the flush, it may have taken
+@@ -2349,7 +2354,7 @@
+ 	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+ 
+ 		srv_main_thread_op_info = "flushing buffer pool pages";
+-		buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++		buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst, ut_dulint_max);
+ 
+ 		srv_main_thread_op_info = "flushing log";
+ 		log_buffer_flush_to_disk();
+@@ -2359,7 +2364,7 @@
+ 	even if the server were active */
+ 
+ 	srv_main_thread_op_info = "doing insert buffer merge";
+-	ibuf_contract_for_n_pages(TRUE, 5);
++	ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_const);
+ 
+ 	srv_main_thread_op_info = "flushing log";
+ 	log_buffer_flush_to_disk();
+@@ -2401,14 +2406,14 @@
+ 		(> 70 %), we assume we can afford reserving the disk(s) for
+ 		the time it requires to flush 100 pages */
+ 
+-	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst,
+ 							ut_dulint_max);
+ 	} else {
+ 	        /* Otherwise, we only flush a small number of pages so that
+ 		we do not unnecessarily use much disk i/o capacity from
+ 		other work */
+ 
+-	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++	        n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_const,
+ 							ut_dulint_max);
+ 	}
+ 
+@@ -2497,7 +2502,7 @@
+ 	if (srv_fast_shutdown && srv_shutdown_state > 0) {
+ 	        n_bytes_merged = 0;
+ 	} else {
+-	        n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
++	        n_bytes_merged = ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_burst);
+ 	}
+ 
+ 	srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2514,7 +2519,7 @@
+ 
+ 	if (srv_fast_shutdown < 2) {
+ 		n_pages_flushed =
+-			buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++			buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst, ut_dulint_max);
+ 	} else {
+ 		/* In the fastest shutdown we do not flush the buffer pool
+ 		to data files: we set n_pages_flushed to 0 artificially. */
+diff -r 2fdaeb546d25 patch_info/control_flush_and_merge_and_read.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/control_flush_and_merge_and_read.info	Mon Sep 08 16:40:14 2008 -0700
+@@ -0,0 +1,6 @@
++File=control_flush_and_merge_and_read.patch
++Name=InnoDB patch to control insert buffer and flushing
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
+diff -r 2fdaeb546d25 sql/ha_innodb.h
+--- a/sql/ha_innodb.h	Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/ha_innodb.h	Mon Sep 08 16:40:14 2008 -0700
+@@ -234,6 +234,11 @@
+ extern ulong srv_thread_concurrency;
+ extern ulong srv_commit_concurrency;
+ extern ulong srv_flush_log_at_trx_commit;
++extern ulong srv_read_ahead;
++extern ulong srv_ibuf_contract_const;
++extern ulong srv_ibuf_contract_burst;
++extern ulong srv_buf_flush_const;
++extern ulong srv_buf_flush_burst;
+ }
+ 
+ bool innobase_init(void);
+diff -r 2fdaeb546d25 sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/mysqld.cc	Mon Sep 08 16:40:14 2008 -0700
+@@ -5014,7 +5014,10 @@
+   OPT_SECURE_FILE_PRIV,
+   OPT_KEEP_FILES_ON_CREATE,
+   OPT_INNODB_ADAPTIVE_HASH_INDEX,
+-  OPT_FEDERATED
++  OPT_FEDERATED,
++  OPT_INNODB_READ_AHEAD,
++  OPT_INNODB_IBUF_CONTRACT_CONST, OPT_INNODB_IBUF_CONTRACT_BURST,
++  OPT_INNODB_BUF_FLUSH_CONST, OPT_INNODB_BUF_FLUSH_BURST
+ };
+ 
+ 
+@@ -5321,6 +5324,26 @@
+    (gptr*) &global_system_variables.innodb_table_locks,
+    (gptr*) &global_system_variables.innodb_table_locks,
+    0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
++  {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
++   "Enable/Diasable read aheads bit0:random bit1:linear",
++   (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead,
++   0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0},
++  {"innodb_ibuf_contract_const", OPT_INNODB_IBUF_CONTRACT_CONST,
++   "Const activity of merging insert buffer",
++   (gptr*) &srv_ibuf_contract_const, (gptr*) &srv_ibuf_contract_const,
++   0, GET_ULONG, REQUIRED_ARG, 5, 1, 50000, 0, 0, 0},
++  {"innodb_ibuf_contract_burst", OPT_INNODB_IBUF_CONTRACT_BURST,
++   "Burst activity of merging insert buffer",
++   (gptr*) &srv_ibuf_contract_burst, (gptr*) &srv_ibuf_contract_burst,
++   0, GET_ULONG, REQUIRED_ARG, 20, 1, 50000, 0, 0, 0},
++  {"innodb_buf_flush_const", OPT_INNODB_BUF_FLUSH_CONST,
++   "Const activity of flushing buffer pool",
++   (gptr*) &srv_buf_flush_const, (gptr*) &srv_buf_flush_const,
++   0, GET_ULONG, REQUIRED_ARG, 10, 1, 50000, 0, 0, 0},
++  {"innodb_buf_flush_burst", OPT_INNODB_BUF_FLUSH_BURST,
++   "Burst activity of flushing buffer pool",
++   (gptr*) &srv_buf_flush_burst, (gptr*) &srv_buf_flush_burst,
++   0, GET_ULONG, REQUIRED_ARG, 100, 1, 50000, 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+   {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+    (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 2fdaeb546d25 sql/set_var.cc
+--- a/sql/set_var.cc	Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/set_var.cc	Mon Sep 08 16:40:14 2008 -0700
+@@ -476,6 +476,16 @@
+ sys_var_long_ptr  sys_innodb_flush_log_at_trx_commit(
+                                         "innodb_flush_log_at_trx_commit",
+                                         &srv_flush_log_at_trx_commit);
++sys_var_long_ptr	sys_innodb_read_ahead("innodb_read_ahead",
++                                              &srv_read_ahead);
++sys_var_long_ptr	sys_innodb_ibuf_contract_const("innodb_ibuf_contract_const",
++                                                       &srv_ibuf_contract_const);
++sys_var_long_ptr	sys_innodb_ibuf_contract_burst("innodb_ibuf_contract_burst",
++                                                       &srv_ibuf_contract_burst);
++sys_var_long_ptr	sys_innodb_buf_flush_const("innodb_buf_flush_const",
++                                                   &srv_buf_flush_const);
++sys_var_long_ptr	sys_innodb_buf_flush_burst("innodb_buf_flush_burst",
++                                                   &srv_buf_flush_burst);
+ #endif
+ 
+ /* Condition pushdown to storage engine */
+@@ -818,6 +828,11 @@
+   &sys_innodb_thread_concurrency,
+   &sys_innodb_commit_concurrency,
+   &sys_innodb_flush_log_at_trx_commit,
++  &sys_innodb_read_ahead,
++  &sys_innodb_ibuf_contract_const,
++  &sys_innodb_ibuf_contract_burst,
++  &sys_innodb_buf_flush_const,
++  &sys_innodb_buf_flush_burst,
+ #endif
+   &sys_trust_routine_creators,
+   &sys_trust_function_creators,
+@@ -953,6 +968,11 @@
+   {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
+   {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
+   {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
++  {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
++  {sys_innodb_ibuf_contract_const.name, (char*) &sys_innodb_ibuf_contract_const, SHOW_SYS},
++  {sys_innodb_ibuf_contract_burst.name, (char*) &sys_innodb_ibuf_contract_burst, SHOW_SYS},
++  {sys_innodb_buf_flush_const.name, (char*) &sys_innodb_buf_flush_const, SHOW_SYS},
++  {sys_innodb_buf_flush_burst.name, (char*) &sys_innodb_buf_flush_burst, SHOW_SYS},
+ #endif
+   {sys_interactive_timeout.name,(char*) &sys_interactive_timeout,   SHOW_SYS},
+   {sys_join_buffer_size.name,   (char*) &sys_join_buffer_size,	    SHOW_SYS},
diff --git a/mysql-control_io-threads.patch b/mysql-control_io-threads.patch
new file mode 100644
index 0000000..7f155b1
--- /dev/null
+++ b/mysql-control_io-threads.patch
@@ -0,0 +1,69 @@
+diff -r 4dca80df8ee3 innobase/os/os0file.c
+--- a/innobase/os/os0file.c	Mon Sep 08 16:40:14 2008 -0700
++++ b/innobase/os/os0file.c	Mon Sep 08 16:40:20 2008 -0700
+@@ -3180,6 +3180,13 @@
+ 	struct aiocb*	control;
+ #endif
+ 	ulint		i;
++	ulint		prim_segment;
++	ulint		n;
++
++	n = array->n_slots / array->n_segments;
++	/* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++	prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ 	os_mutex_enter(array->mutex);
+ 
+@@ -3198,11 +3205,22 @@
+ 		goto loop;
+ 	}
+ 
+-	for (i = 0;; i++) {
++	for (i = prim_segment * n; i < array->n_slots; i++) {
+ 		slot = os_aio_array_get_nth_slot(array, i);
+ 
+ 		if (slot->reserved == FALSE) {
+ 			break;
++		}
++	}
++
++	if (slot->reserved == TRUE){
++		/* Not found after the intended segment. So we should search before. */
++		for (i = 0;; i++) {
++			slot = os_aio_array_get_nth_slot(array, i);
++
++			if (slot->reserved == FALSE) {
++				break;
++			}
+ 		}
+ 	}
+ 
+diff -r 4dca80df8ee3 innobase/srv/srv0start.c
+--- a/innobase/srv/srv0start.c	Mon Sep 08 16:40:14 2008 -0700
++++ b/innobase/srv/srv0start.c	Mon Sep 08 16:40:20 2008 -0700
+@@ -1213,12 +1213,12 @@
+ 
+ 	if (!os_aio_use_native_aio) {
+  		/* In simulated aio we currently have use only for 4 threads */
+-		srv_n_file_io_threads = 4;
++		/*srv_n_file_io_threads = 4;*/
+ 
+ 		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ 						* srv_n_file_io_threads,
+ 					srv_n_file_io_threads,
+-					SRV_MAX_N_PENDING_SYNC_IOS);
++					SRV_MAX_N_PENDING_SYNC_IOS * srv_n_file_io_threads / 4);
+ 	} else {
+ 		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ 						* srv_n_file_io_threads,
+diff -r 4dca80df8ee3 patch_info/control_io-threads.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/control_io-threads.info	Mon Sep 08 16:40:20 2008 -0700
+@@ -0,0 +1,6 @@
++File=control_io-threads.patch
++Name=InnoDB patch to control count of IO threads
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
diff --git a/mysql-microslow_innodb.patch b/mysql-microslow_innodb.patch
new file mode 100644
index 0000000..b173cb1
--- /dev/null
+++ b/mysql-microslow_innodb.patch
@@ -0,0 +1,2333 @@
+diff -r bb81fcdd7db2 include/my_time.h
+--- a/include/my_time.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/include/my_time.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -140,7 +140,7 @@
+ int my_date_to_str(const MYSQL_TIME *l_time, char *to);
+ int my_datetime_to_str(const MYSQL_TIME *l_time, char *to);
+ int my_TIME_to_str(const MYSQL_TIME *l_time, char *to);
+-
++ulonglong my_timer(ulonglong *ltime, ulonglong frequency);
+ C_MODE_END
+ 
+ #endif /* _my_time_h_ */
+diff -r bb81fcdd7db2 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/buf/buf0buf.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -37,6 +37,7 @@
+ #include "log0log.h"
+ #include "trx0undo.h"
+ #include "srv0srv.h"
++#include "thr0loc.h"
+ 
+ /*
+ 		IMPLEMENTATION OF THE BUFFER POOL
+@@ -1086,6 +1087,31 @@
+ 	return(block);
+ }
+ 
++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
++{
++	ulint           block_hash;
++	ulint           block_hash_byte;
++	byte            block_hash_offset;
++
++	ut_ad(block);
++
++	if (!trx || !trx->distinct_page_access_hash)
++		return;
++
++        block_hash = ut_hash_ulint((block->space << 20) + block->space +
++					block->offset, DPAH_SIZE << 3);
++	block_hash_byte = block_hash >> 3;
++	block_hash_offset = (byte) block_hash & 0x07;
++	if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE)
++		fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
++	if (block_hash_offset < 0 || block_hash_offset > 7)
++		fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
++	if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
++		trx->distinct_page_access++;
++	trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
++	return;
++}
++
+ /************************************************************************
+ This is the general function used to get access to a database page. */
+ 
+@@ -1108,6 +1134,11 @@
+ 	ulint		fix_type;
+ 	ibool		success;
+ 	ibool		must_read;
++	trx_t*          trx;
++	ulint           sec;
++	ulint           ms;
++	ib_longlong     start_time;
++	ib_longlong     finish_time;
+ 	
+ 	ut_ad(mtr);
+ 	ut_ad((rw_latch == RW_S_LATCH)
+@@ -1119,6 +1150,7 @@
+ #ifndef UNIV_LOG_DEBUG
+ 	ut_ad(!ibuf_inside() || ibuf_page(space, offset));
+ #endif
++	trx = thr_local_get_trx(os_thread_get_curr_id());
+ 	buf_pool->n_page_gets++;
+ loop:
+ 	block = NULL;
+@@ -1148,7 +1180,7 @@
+ 			return(NULL);
+ 		}
+ 
+-		buf_read_page(space, offset);
++		buf_read_page(space, offset, trx);
+ 
+ #ifdef UNIV_DEBUG
+ 		buf_dbg_counter++;
+@@ -1261,6 +1293,11 @@
+ 		        /* Let us wait until the read operation
+ 			completes */
+ 
++			if (trx)
++			{
++				ut_usectime(&sec, &ms);
++				start_time = (ib_longlong)sec * 1000000 + ms;
++			}
+ 		        for (;;) {
+ 				mutex_enter(&block->mutex);
+ 
+@@ -1275,6 +1312,12 @@
+ 
+ 				       break;
+ 				}
++			}
++                	if (trx)
++			{
++				ut_usectime(&sec, &ms);
++        	        	finish_time = (ib_longlong)sec * 1000000 + ms;
++                		trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
+ 			}
+ 		}
+ 
+@@ -1296,12 +1339,15 @@
+ 		/* In the case of a first access, try to apply linear
+ 		read-ahead */
+ 
+-		buf_read_ahead_linear(space, offset);
++		buf_read_ahead_linear(space, offset, trx);
+ 	}
+ 
+ #ifdef UNIV_IBUF_DEBUG
+ 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
++
++	_increment_page_get_statistics(block, trx);
++	
+ 	return(block->frame);		
+ }
+ 
+@@ -1326,6 +1372,7 @@
+ 	ibool		accessed;
+ 	ibool		success;
+ 	ulint		fix_type;
++	trx_t*          trx;
+ 
+ 	ut_ad(mtr && block);
+ 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+@@ -1440,13 +1487,16 @@
+ 		read-ahead */
+ 
+ 		buf_read_ahead_linear(buf_frame_get_space_id(guess),
+-					buf_frame_get_page_no(guess));
++					buf_frame_get_page_no(guess), trx);
+ 	}
+ 
+ #ifdef UNIV_IBUF_DEBUG
+ 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
+ 	buf_pool->n_page_gets++;
++
++	trx = thr_local_get_trx(os_thread_get_curr_id());
++	_increment_page_get_statistics(block, trx);
+ 
+ 	return(TRUE);
+ }
+@@ -1470,6 +1520,7 @@
+ 	buf_block_t*	block;
+ 	ibool		success;
+ 	ulint		fix_type;
++	trx_t*		trx;
+ 
+ 	ut_ad(mtr);
+ 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+@@ -1558,6 +1609,9 @@
+ 		|| (ibuf_count_get(block->space, block->offset) == 0));
+ #endif
+ 	buf_pool->n_page_gets++;
++
++	trx = thr_local_get_trx(os_thread_get_curr_id());
++	_increment_page_get_statistics(block, trx);
+ 
+ 	return(TRUE);
+ }
+diff -r bb81fcdd7db2 innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/buf/buf0rea.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -70,7 +70,8 @@
+ 			treat the tablespace as dropped; this is a timestamp we
+ 			use to stop dangling page reads from a tablespace
+ 			which we have DISCARDed + IMPORTed back */
+-	ulint	offset)	/* in: page number */
++	ulint	offset,	/* in: page number */
++	trx_t*  trx)
+ {
+ 	buf_block_t*	block;
+ 	ulint		wake_later;
+@@ -140,10 +141,10 @@
+ 
+ 	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ 
+-	*err = fil_io(OS_FILE_READ | wake_later,
++	*err = _fil_io(OS_FILE_READ | wake_later,
+ 			sync, space,
+ 			offset, 0, UNIV_PAGE_SIZE,
+-			(void*)block->frame, (void*)block);
++			(void*)block->frame, (void*)block, trx);
+ 	ut_a(*err == DB_SUCCESS);
+ 
+ 	if (sync) {
+@@ -174,8 +175,9 @@
+ 			the page at the given page number does not get
+ 			read even if we return a value > 0! */
+ 	ulint	space,	/* in: space id */
+-	ulint	offset)	/* in: page number of a page which the current thread
++	ulint	offset,	/* in: page number of a page which the current thread
+ 			wants to access */
++	trx_t*  trx)
+ {
+ 	ib_longlong	tablespace_version;
+ 	buf_block_t*	block;
+@@ -270,7 +272,7 @@
+ 		if (!ibuf_bitmap_page(i)) {
+ 			count += buf_read_page_low(&err, FALSE, ibuf_mode
+ 					| OS_AIO_SIMULATED_WAKE_LATER,
+-				        space, tablespace_version, i);
++				        space, tablespace_version, i, trx);
+ 			if (err == DB_TABLESPACE_DELETED) {
+ 				ut_print_timestamp(stderr);
+ 				fprintf(stderr,
+@@ -314,7 +316,8 @@
+ 			/* out: number of page read requests issued: this can
+ 			be > 1 if read-ahead occurred */
+ 	ulint	space,	/* in: space id */
+-	ulint	offset)	/* in: page number */
++	ulint	offset,	/* in: page number */
++	trx_t*  trx)
+ {
+ 	ib_longlong	tablespace_version;
+ 	ulint		count;
+@@ -323,13 +326,13 @@
+ 
+ 	tablespace_version = fil_space_get_version(space);
+ 
+-	count = buf_read_ahead_random(space, offset);
++	count = buf_read_ahead_random(space, offset, trx);
+ 
+ 	/* We do the i/o in the synchronous aio mode to save thread
+ 	switches: hence TRUE */
+ 
+ 	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+-					tablespace_version, offset);
++					tablespace_version, offset, trx);
+         srv_buf_pool_reads+= count2;
+ 	if (err == DB_TABLESPACE_DELETED) {
+ 	        ut_print_timestamp(stderr);
+@@ -374,8 +377,9 @@
+ /*==================*/
+ 			/* out: number of page read requests issued */
+ 	ulint	space,	/* in: space id */
+-	ulint	offset)	/* in: page number of a page; NOTE: the current thread
++	ulint	offset,	/* in: page number of a page; NOTE: the current thread
+ 			must want access to this page (see NOTE 3 above) */
++	trx_t*  trx)
+ {
+ 	ib_longlong	tablespace_version;
+ 	buf_block_t*	block;
+@@ -556,7 +560,7 @@
+ 		if (!ibuf_bitmap_page(i)) {
+ 			count += buf_read_page_low(&err, FALSE, ibuf_mode
+ 					| OS_AIO_SIMULATED_WAKE_LATER,
+-					space, 	tablespace_version, i);
++					space, 	tablespace_version, i, trx);
+ 			if (err == DB_TABLESPACE_DELETED) {
+ 				ut_print_timestamp(stderr);
+ 				fprintf(stderr,
+@@ -625,10 +629,10 @@
+ 	for (i = 0; i < n_stored; i++) {
+ 		if ((i + 1 == n_stored) && sync) {
+ 			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
+-				space_ids[i], space_versions[i], page_nos[i]);
++				space_ids[i], space_versions[i], page_nos[i], NULL);
+ 		} else {
+ 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
+-				space_ids[i], space_versions[i], page_nos[i]);
++				space_ids[i], space_versions[i], page_nos[i], NULL);
+ 		}
+ 
+ 		if (err == DB_TABLESPACE_DELETED) {
+@@ -704,11 +708,11 @@
+ 
+ 		if ((i + 1 == n_stored) && sync) {
+ 			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+-					tablespace_version, page_nos[i]);
++					tablespace_version, page_nos[i], NULL);
+ 		} else {
+ 			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+ 					| OS_AIO_SIMULATED_WAKE_LATER,
+-				       space, tablespace_version, page_nos[i]);
++				       space, tablespace_version, page_nos[i], NULL);
+ 		}
+ 	}
+ 	
+diff -r bb81fcdd7db2 innobase/fil/fil0fil.c
+--- a/innobase/fil/fil0fil.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/fil/fil0fil.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -3527,7 +3527,7 @@
+ 			node->name, node->handle, buf,
+ 			offset_low, offset_high,
+ 			UNIV_PAGE_SIZE * n_pages,
+-			NULL, NULL);
++			NULL, NULL, NULL);
+ #endif
+ 		if (success) {
+ 			node->size += n_pages;
+@@ -3851,7 +3851,7 @@
+ Reads or writes data. This operation is asynchronous (aio). */
+ 
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ 				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ 				if we are trying to do i/o on a tablespace
+@@ -3877,8 +3877,9 @@
+ 	void*	buf,		/* in/out: buffer where to store read data
+ 				or from where to write; in aio this must be
+ 				appropriately aligned */
+-	void*	message)	/* in: message for aio handler if non-sync
++	void*	message,	/* in: message for aio handler if non-sync
+ 				aio used, else ignored */
++	trx_t*  trx)
+ {
+ 	fil_system_t*	system		= fil_system;
+ 	ulint		mode;
+@@ -4018,7 +4019,7 @@
+ #else
+ 	/* Queue the aio request */
+ 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+-				offset_low, offset_high, len, node, message);
++				offset_low, offset_high, len, node, message, trx);
+ #endif
+ 	ut_a(ret);
+ 
+diff -r bb81fcdd7db2 innobase/include/buf0rea.h
+--- a/innobase/include/buf0rea.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/buf0rea.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -10,6 +10,7 @@
+ #define buf0rea_h
+ 
+ #include "univ.i"
++#include "trx0types.h"
+ #include "buf0types.h"
+ 
+ /************************************************************************
+@@ -25,7 +26,8 @@
+ 			/* out: number of page read requests issued: this can
+ 			be > 1 if read-ahead occurred */
+ 	ulint	space,	/* in: space id */
+-	ulint	offset);/* in: page number */
++	ulint	offset,	/* in: page number */
++	trx_t*  trx);
+ /************************************************************************
+ Applies linear read-ahead if in the buf_pool the page is a border page of
+ a linear read-ahead area and all the pages in the area have been accessed.
+@@ -55,8 +57,9 @@
+ /*==================*/
+ 			/* out: number of page read requests issued */
+ 	ulint	space,	/* in: space id */
+-	ulint	offset);/* in: page number of a page; NOTE: the current thread
++	ulint	offset,	/* in: page number of a page; NOTE: the current thread
+ 			must want access to this page (see NOTE 3 above) */
++	trx_t*  trx);
+ /************************************************************************
+ Issues read requests for pages which the ibuf module wants to read in, in
+ order to contract the insert buffer tree. Technically, this function is like
+diff -r bb81fcdd7db2 innobase/include/fil0fil.h
+--- a/innobase/include/fil0fil.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/fil0fil.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -534,8 +534,11 @@
+ /************************************************************************
+ Reads or writes data. This operation is asynchronous (aio). */
+ 
++#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \
++	_fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL)
++
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ 				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ 				if we are trying to do i/o on a tablespace
+@@ -561,8 +564,9 @@
+ 	void*	buf,		/* in/out: buffer where to store read data
+ 				or from where to write; in aio this must be
+ 				appropriately aligned */
+-	void*	message);	/* in: message for aio handler if non-sync
++	void*	message,	/* in: message for aio handler if non-sync
+ 				aio used, else ignored */
++	trx_t*  trx);
+ /************************************************************************
+ Reads data from a space to a buffer. Remember that the possible incomplete
+ blocks at the end of file are ignored: they are not taken into account when
+diff -r bb81fcdd7db2 innobase/include/os0file.h
+--- a/innobase/include/os0file.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/os0file.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -10,6 +10,8 @@
+ #define os0file_h
+ 
+ #include "univ.i"
++
++#include "trx0types.h"
+ 
+ #ifndef __WIN__
+ #include <dirent.h>
+@@ -421,8 +423,11 @@
+ /***********************************************************************
+ Requests a synchronous read operation. */
+ 
++#define os_file_read(file, buf, offset, offset_high, n)         \
++		_os_file_read(file, buf, offset, offset_high, n, NULL)
++
+ ibool
+-os_file_read(
++_os_file_read(
+ /*=========*/
+ 				/* out: TRUE if request was
+ 				successful, FALSE if fail */
+@@ -432,7 +437,8 @@
+ 				offset where to read */
+ 	ulint		offset_high,/* in: most significant 32 bits of
+ 				offset */
+-	ulint		n);	/* in: number of bytes to read */	
++	ulint		n,	/* in: number of bytes to read */
++	trx_t*		trx);
+ /***********************************************************************
+ Rewind file to its start, read at most size - 1 bytes from it to str, and
+ NUL-terminate str. All errors are silently ignored. This function is
+@@ -584,7 +590,8 @@
+ 				can be used to identify a completed aio
+ 				operation); if mode is OS_AIO_SYNC, these
+ 				are ignored */
+-	void*		message2);
++	void*		message2,
++	trx_t*          trx);
+ /****************************************************************************
+ Wakes up all async i/o threads so that they know to exit themselves in
+ shutdown. */
+diff -r bb81fcdd7db2 innobase/include/thr0loc.h
+--- a/innobase/include/thr0loc.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/thr0loc.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -15,6 +15,7 @@
+ 
+ #include "univ.i"
+ #include "os0thread.h"
++#include "trx0trx.h"
+ 
+ /********************************************************************
+ Initializes the thread local storage module. */
+@@ -36,6 +37,14 @@
+ /*===========*/
+ 	os_thread_id_t	id);	/* in: thread id */
+ /***********************************************************************
++Gets trx */
++
++trx_t*
++thr_local_get_trx(
++/*==================*/
++				/* out: trx for mysql */
++	os_thread_id_t	id);	/* in: thread id of the thread */
++/***********************************************************************
+ Gets the slot number in the thread table of a thread. */
+ 
+ ulint
+@@ -43,6 +52,14 @@
+ /*==================*/
+ 				/* out: slot number */
+ 	os_thread_id_t	id);	/* in: thread id of the thread */
++/***********************************************************************
++Sets in the local storage the slot number in the thread table of a thread. */
++
++void
++thr_local_set_trx(
++/*==================*/
++	os_thread_id_t	id,	/* in: thread id of the thread */
++	trx_t*		trx);	/* in: slot number */
+ /***********************************************************************
+ Sets in the local storage the slot number in the thread table of a thread. */
+ 
+diff -r bb81fcdd7db2 innobase/include/trx0trx.h
+--- a/innobase/include/trx0trx.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/trx0trx.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -668,6 +668,17 @@
+ 	/*------------------------------*/
+ 	char detailed_error[256];	/* detailed error message for last
+ 					error, or empty. */
++	/*------------------------------*/
++	os_thread_id_t	trx_thread_id;
++	ulint		io_reads;
++	ib_longlong     io_read;
++	ulint		io_reads_wait_timer;
++	ib_longlong     lock_que_wait_ustarted;
++	ulint           lock_que_wait_timer;
++	ulint           innodb_que_wait_timer;
++	ulint           distinct_page_access;
++#define	DPAH_SIZE	8192
++	byte*		distinct_page_access_hash;
+ };
+ 
+ #define TRX_MAX_N_THREADS	32	/* maximum number of concurrent
+diff -r bb81fcdd7db2 innobase/lock/lock0lock.c
+--- a/innobase/lock/lock0lock.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/lock/lock0lock.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -1806,6 +1806,8 @@
+ {
+ 	lock_t*	lock;
+ 	trx_t*	trx;
++	ulint   sec;
++	ulint   ms;
+ 	
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(mutex_own(&kernel_mutex));
+@@ -1861,6 +1863,8 @@
+ 	trx->que_state = TRX_QUE_LOCK_WAIT;
+ 	trx->was_chosen_as_deadlock_victim = FALSE;
+ 	trx->wait_started = time(NULL);
++	ut_usectime(&sec, &ms);
++	trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
+ 
+ 	ut_a(que_thr_stop(thr));
+ 
+@@ -3514,7 +3518,9 @@
+ {
+ 	lock_t*	lock;
+ 	trx_t*	trx;
+-	
++	ulint   sec;
++	ulint   ms;
++
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(mutex_own(&kernel_mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -3563,7 +3569,10 @@
+ 	
+ 		return(DB_SUCCESS);
+ 	}
+-	
++
++	trx->wait_started = time(NULL);
++	ut_usectime(&sec, &ms);
++	trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
+ 	trx->que_state = TRX_QUE_LOCK_WAIT;
+ 	trx->was_chosen_as_deadlock_victim = FALSE;
+ 	trx->wait_started = time(NULL);
+@@ -4289,7 +4298,7 @@
+ 	ulint	i;
+ 	mtr_t	mtr;
+ 	trx_t*	trx;
+-
++	
+ 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+ 
+ 	/* First print info on non-active transactions */
+diff -r bb81fcdd7db2 innobase/os/os0file.c
+--- a/innobase/os/os0file.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/os/os0file.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -14,6 +14,7 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "trx0sys.h"
+ 
+ #if defined(UNIV_HOTBACKUP) && defined(__WIN__)
+ /* Add includes for the _stat() call to compile on Windows */
+@@ -101,6 +102,7 @@
+ 	struct aiocb	control;	/* Posix control block for aio
+ 					request */
+ #endif
++        trx_t*		trx;
+ };
+ 
+ /* The aio array structure */
+@@ -1903,9 +1905,13 @@
+ #ifndef __WIN__
+ /***********************************************************************
+ Does a synchronous read operation in Posix. */
++
++#define os_file_pread(file, buf, n, offset, offset_high)        \
++		_os_file_pread(file, buf, n, offset, offset_high, NULL);
++
+ static
+ ssize_t
+-os_file_pread(
++_os_file_pread(
+ /*==========*/
+ 				/* out: number of bytes read, -1 if error */
+ 	os_file_t	file,	/* in: handle to a file */
+@@ -1913,12 +1919,17 @@
+ 	ulint		n,	/* in: number of bytes to read */	
+ 	ulint		offset,	/* in: least significant 32 bits of file
+ 				offset from where to read */
+-	ulint		offset_high) /* in: most significant 32 bits of
+-				offset */
++	ulint		offset_high, /* in: most significant 32 bits of
++				offset */
++        trx_t*		trx)
+ {
+         off_t	offs;
+ 	ssize_t	n_bytes;
+-
++	ulint           sec;
++	ulint           ms;
++	ib_longlong     start_time;
++	ib_longlong     finish_time;
++	
+ 	ut_a((offset & 0xFFFFFFFFUL) == offset);
+         
+         /* If off_t is > 4 bytes in size, then we assume we can pass a
+@@ -1937,7 +1948,13 @@
+         }
+ 
+ 	os_n_file_reads++;
+-
++	if (trx)
++	{
++	        trx->io_reads++;
++		trx->io_read += n;
++		ut_usectime(&sec, &ms);
++		start_time = (ib_longlong)sec * 1000000 + ms;
++	}
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+         os_mutex_enter(os_file_count_mutex);
+ 	os_file_n_pending_preads++;
+@@ -1951,6 +1968,13 @@
+ 	os_n_pending_reads--;
+         os_mutex_exit(os_file_count_mutex);
+ 
++        if (trx)
++        {
++		ut_usectime(&sec, &ms);
++        	finish_time = (ib_longlong)sec * 1000000 + ms;
++                trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++	}
++
+ 	return(n_bytes);
+ #else
+ 	{
+@@ -1980,6 +2004,13 @@
+         os_mutex_enter(os_file_count_mutex);
+ 	os_n_pending_reads--;
+         os_mutex_exit(os_file_count_mutex);
++
++        if (trx)
++        {
++		ut_usectime(&sec, &ms);
++        	finish_time = (ib_longlong)sec * 1000000 + ms;
++                trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++	}
+ 
+ 	return(ret);
+ 	}
+@@ -2103,7 +2134,7 @@
+ Requests a synchronous positioned read operation. */
+ 
+ ibool
+-os_file_read(
++_os_file_read(
+ /*=========*/
+ 				/* out: TRUE if request was
+ 				successful, FALSE if fail */
+@@ -2113,7 +2144,8 @@
+ 				offset where to read */
+ 	ulint		offset_high, /* in: most significant 32 bits of
+ 				offset */
+-	ulint		n)	/* in: number of bytes to read */	
++	ulint		n,	/* in: number of bytes to read */
++        trx_t*		trx)
+ {
+ #ifdef __WIN__
+ 	BOOL		ret;
+@@ -2128,8 +2160,7 @@
+ 
+ 	os_n_file_reads++;
+ 	os_bytes_read_since_printout += n;
+-
+-try_again:	
++try_again:
+ 	ut_ad(file);
+ 	ut_ad(buf);
+ 	ut_ad(n > 0);
+@@ -2177,7 +2208,7 @@
+ 	os_bytes_read_since_printout += n;
+ 
+ try_again:
+-	ret = os_file_pread(file, buf, n, offset, offset_high);
++	ret = _os_file_pread(file, buf, n, offset, offset_high, trx);
+ 
+ 	if ((ulint)ret == n) {
+ 
+@@ -3137,7 +3168,8 @@
+ 				offset */
+ 	ulint		offset_high, /* in: most significant 32 bits of
+ 				offset */
+-	ulint		len)	/* in: length of the block to read or write */
++	ulint		len,	/* in: length of the block to read or write */
++	trx_t*          trx)
+ {
+ 	os_aio_slot_t*	slot;
+ #ifdef WIN_ASYNC_IO
+@@ -3196,7 +3228,7 @@
+ 	slot->offset   = offset;
+ 	slot->offset_high = offset_high;
+ 	slot->io_already_done = FALSE;
+-	
++
+ #ifdef WIN_ASYNC_IO		
+ 	control = &(slot->control);
+ 	control->Offset = (DWORD)offset;
+@@ -3390,7 +3422,8 @@
+ 				can be used to identify a completed aio
+ 				operation); if mode is OS_AIO_SYNC, these
+ 				are ignored */
+-	void*		message2)
++	void*		message2,
++	trx_t*          trx)
+ {
+ 	os_aio_array_t*	array;
+ 	os_aio_slot_t*	slot;
+@@ -3429,8 +3462,8 @@
+ 		wait in the Windows case. */
+ 
+ 		if (type == OS_FILE_READ) {
+-			return(os_file_read(file, buf, offset,
+-							offset_high, n));
++			return(_os_file_read(file, buf, offset,
++							offset_high, n, trx));
+ 		}
+ 
+ 		ut_a(type == OS_FILE_WRITE);
+@@ -3463,14 +3496,19 @@
+ 		ut_error;
+ 	}
+ 	
++	if (trx && type == OS_FILE_READ)
++	{
++		trx->io_reads++;
++		trx->io_read += n;
++	}
+ 	slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+-					name, buf, offset, offset_high, n);
++					name, buf, offset, offset_high, n, trx);
+ 	if (type == OS_FILE_READ) {
+ 		if (os_aio_use_native_aio) {
+ #ifdef WIN_ASYNC_IO
+ 			os_n_file_reads++;
+ 			os_bytes_read_since_printout += len;
+-			
++
+ 			ret = ReadFile(file, buf, (DWORD)n, &len,
+ 							&(slot->control));
+ #elif defined(POSIX_ASYNC_IO)
+@@ -4038,7 +4076,7 @@
+ 
+ 			ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, 
+ 						consecutive_ios[i]->len);
+-			offs += consecutive_ios[i]->len;
++			offs += consecutive_ios[i]->len;			
+ 		}
+ 	}
+ 
+@@ -4050,9 +4088,8 @@
+ 
+ 	/* Mark the i/os done in slots */
+ 
+-	for (i = 0; i < n_consecutive; i++) {
++	for (i = 0; i < n_consecutive; i++) 
+ 		consecutive_ios[i]->io_already_done = TRUE;
+-	}
+ 
+ 	/* We return the messages for the first slot now, and if there were
+ 	several slots, the messages will be returned with subsequent calls
+diff -r bb81fcdd7db2 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/srv/srv0srv.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -996,6 +996,10 @@
+ 	ibool			has_slept = FALSE;
+ 	srv_conc_slot_t*	slot	  = NULL;
+ 	ulint			i;
++	ib_longlong             start_time = 0L;
++	ib_longlong             finish_time = 0L;
++	ulint                   sec;
++	ulint                   ms;
+ 
+ 	/* If trx has 'free tickets' to enter the engine left, then use one
+ 	such ticket */
+@@ -1054,6 +1058,7 @@
+     if (SRV_THREAD_SLEEP_DELAY > 0)
+     {
+       os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
++      trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
+     }
+ 
+ 		trx->op_info = "";
+@@ -1109,11 +1114,18 @@
+ 	/* Go to wait for the event; when a thread leaves InnoDB it will
+ 	release this thread */
+ 
++	ut_usectime(&sec, &ms);
++	start_time = (ib_longlong)sec * 1000000 + ms;
++
+ 	trx->op_info = "waiting in InnoDB queue";
+ 
+ 	os_event_wait(slot->event);
+ 
+ 	trx->op_info = "";
++
++	ut_usectime(&sec, &ms);
++	finish_time = (ib_longlong)sec * 1000000 + ms;
++	trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
+ 
+ 	os_fast_mutex_lock(&srv_conc_mutex);
+ 
+diff -r bb81fcdd7db2 innobase/thr/thr0loc.c
+--- a/innobase/thr/thr0loc.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/thr/thr0loc.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -45,6 +45,7 @@
+ 				for this thread */
+ 	ibool		in_ibuf;/* TRUE if the the thread is doing an ibuf
+ 				operation */
++	trx_t*          trx;
+ 	hash_node_t	hash;	/* hash chain node */
+ 	ulint		magic_n;
+ };
+@@ -113,6 +114,29 @@
+ }
+ 
+ /***********************************************************************
++Gets trx */
++
++trx_t*
++thr_local_get_trx(
++/*==================*/
++				/* out: trx for mysql */
++	os_thread_id_t	id)	/* in: thread id of the thread */
++{
++	trx_t*          trx;
++	thr_local_t*	local;
++
++	mutex_enter(&thr_local_mutex);
++
++	local = thr_local_get(id);
++
++	trx = local->trx;
++
++	mutex_exit(&thr_local_mutex);
++
++	return(trx);
++}
++
++/***********************************************************************
+ Sets the slot number in the thread table of a thread. */
+ 
+ void
+@@ -124,11 +148,31 @@
+ 	thr_local_t*	local;
+ 
+ 	mutex_enter(&thr_local_mutex);
+-	
++
+ 	local = thr_local_get(id);
+ 
+ 	local->slot_no = slot_no;
+-	
++
++	mutex_exit(&thr_local_mutex);
++}
++
++/***********************************************************************
++Sets trx */
++
++void
++thr_local_set_trx(
++/*==================*/
++	os_thread_id_t	id,	/* in: thread id of the thread */
++	trx_t*		trx)	/* in: trx */
++{
++	thr_local_t*	local;
++
++	mutex_enter(&thr_local_mutex);
++
++	local = thr_local_get(id);
++
++	local->trx = trx;
++
+ 	mutex_exit(&thr_local_mutex);
+ }
+ 
+@@ -172,6 +216,7 @@
+ 	local->magic_n = THR_LOCAL_MAGIC_N;
+ 
+  	local->in_ibuf = FALSE;
++ 	local->trx = NULL;
+ 	
+ 	mutex_enter(&thr_local_mutex);
+ 
+diff -r bb81fcdd7db2 innobase/trx/trx0trx.c
+--- a/innobase/trx/trx0trx.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/trx/trx0trx.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -190,6 +190,16 @@
+ 	trx->global_read_view_heap = mem_heap_create(256);
+ 	trx->global_read_view = NULL;
+ 	trx->read_view = NULL;
++	
++	trx->io_reads = 0;
++	trx->io_read = 0;
++	trx->io_reads_wait_timer = 0;
++	trx->lock_que_wait_timer = 0;
++	trx->innodb_que_wait_timer = 0;
++	trx->distinct_page_access = 0;
++	trx->distinct_page_access_hash = NULL;
++	trx->trx_thread_id = os_thread_get_curr_id();
++	thr_local_set_trx(trx->trx_thread_id, NULL);
+ 
+ 	/* Set X/Open XA transaction identification to NULL */
+ 	memset(&trx->xid, 0, sizeof(trx->xid));
+@@ -230,6 +240,10 @@
+ 
+ 	trx->mysql_process_no = os_proc_get_number();
+ 	
++	trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++	memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++	thr_local_set_trx(trx->mysql_thread_id, trx);
++
+ 	return(trx);
+ }
+ 
+@@ -355,6 +369,8 @@
+ 
+ 	ut_a(trx->read_view == NULL);
+ 	
++	thr_local_free(trx->trx_thread_id);
++	
+ 	mem_free(trx);
+ }
+ 
+@@ -366,6 +382,12 @@
+ /*===============*/
+ 	trx_t*	trx)	/* in, own: trx object */
+ {
++	if (trx->distinct_page_access_hash)
++	{
++		mem_free(trx->distinct_page_access_hash);
++		trx->distinct_page_access_hash= NULL;
++	}
++
+ 	thr_local_free(trx->mysql_thread_id);
+ 
+ 	mutex_enter(&kernel_mutex);
+@@ -1064,7 +1086,10 @@
+ 	trx_t*	trx)	/* in: transaction */
+ {
+ 	que_thr_t*	thr;
+-
++	ulint           sec;
++	ulint           ms;
++	ib_longlong     now;
++	
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(mutex_own(&kernel_mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -1080,6 +1105,9 @@
+ 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ 	}
+ 
++	ut_usectime(&sec, &ms);
++	now = (ib_longlong)sec * 1000000 + ms;
++	trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
+ 	trx->que_state = TRX_QUE_RUNNING;
+ }
+ 
+@@ -1093,6 +1121,9 @@
+ 	trx_t*	trx)	/* in: transaction in the TRX_QUE_LOCK_WAIT state */
+ {
+ 	que_thr_t*	thr;
++	ulint           sec;
++	ulint           ms;
++	ib_longlong     now;
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	ut_ad(mutex_own(&kernel_mutex));
+@@ -1109,6 +1140,9 @@
+ 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ 	}
+ 
++	ut_usectime(&sec, &ms);
++	now = (ib_longlong)sec * 1000000 + ms;
++	trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
+ 	trx->que_state = TRX_QUE_RUNNING;
+ }
+ 
+diff -r bb81fcdd7db2 patch_info/microslow_innodb.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/microslow_innodb.info	Mon Sep 08 16:38:46 2008 -0700
+@@ -0,0 +1,6 @@
++File=microslow_innodb.patch
++Name=Extended statistics in slow.log
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r bb81fcdd7db2 scripts/mysqldumpslow.sh
+--- a/scripts/mysqldumpslow.sh	Mon Sep 08 16:38:33 2008 -0700
++++ b/scripts/mysqldumpslow.sh	Mon Sep 08 16:38:46 2008 -0700
+@@ -83,8 +83,8 @@
+     s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//;
+     my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('','');
+ 
+-    s/^# Query_time: (\d+)  Lock_time: (\d+)  Rows_sent: (\d+).*\n//;
+-    my ($t, $l, $r) = ($1, $2, $3);
++    s/^# Query_time: (\d+(\.\d+)?)  Lock_time: (\d+(\.\d+)?)  Rows_sent: (\d+(\.\d+)?).*\n//;
++    my ($t, $l, $r) = ($1, $3, $5);
+     $t -= $l unless $opt{l};
+ 
+     # remove fluff that mysqld writes to log when it (re)starts:
+diff -r bb81fcdd7db2 sql-common/my_time.c
+--- a/sql-common/my_time.c	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql-common/my_time.c	Mon Sep 08 16:38:46 2008 -0700
+@@ -1252,3 +1252,37 @@
+   return 0;
+ }
+ 
++/*
++ int my_timer(ulonglong *ltime, ulonglong frequency)
++
++ For performance measurement this function returns the number
++ of microseconds since the epoch (SVr4, BSD 4.3, POSIX 1003.1-2001)
++ or system start (Windows platforms).
++
++ For windows platforms frequency value (obtained via 
++ QueryPerformanceFrequency) has to be specified. The global frequency
++ value is set in mysqld.cc. 
++
++ If Windows platform doesn't support QueryPerformanceFrequency we will
++ obtain the time via GetClockCount, which supports microseconds only.
++*/
++
++ulonglong my_timer(ulonglong *ltime, ulonglong frequency)
++{
++  ulonglong newtime= 0;
++#ifdef __WIN__
++  if (frequency) 
++  {
++    QueryPerformanceCounter((LARGE_INTEGER *)&newtime);
++    newtime/= (frequency * 1000000);
++  } else
++    newtime= (GetTickCount() * 1000; /* GetTickCount only returns milliseconds */
++#else
++  struct timeval t;
++  gettimeofday(&t, NULL);
++  newtime= (ulonglong)t.tv_sec * 1000000 + t.tv_usec;
++#endif
++  if (ltime)
++    *ltime= newtime;
++  return newtime;
++}
+diff -r bb81fcdd7db2 sql/filesort.cc
+--- a/sql/filesort.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/filesort.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -180,6 +180,7 @@
+   {
+     statistic_increment(thd->status_var.filesort_scan_count, &LOCK_status);
+   }
++  thd->query_plan_flags|= QPLAN_FILESORT;
+ #ifdef CAN_TRUST_RANGE
+   if (select && select->quick && select->quick->records > 0L)
+   {
+@@ -245,6 +246,7 @@
+   }
+   else
+   {
++    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
+     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
+     {
+       x_free(table_sort.buffpek);
+@@ -1116,6 +1118,7 @@
+ 
+   statistic_increment(current_thd->status_var.filesort_merge_passes,
+ 		      &LOCK_status);
++  current_thd->query_plan_fsort_passes++;
+   if (param->not_killable)
+   {
+     killed= &not_killable;
+diff -r bb81fcdd7db2 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/ha_innodb.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -1,3 +1,4 @@
++
+ /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
+ 
+    This program is free software; you can redistribute it and/or modify
+@@ -6098,6 +6099,7 @@
+ {
+ 	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ 	trx_t*		trx;
++	int i;
+ 
+   	DBUG_ENTER("ha_innobase::external_lock");
+ 	DBUG_PRINT("enter",("lock_type: %d", lock_type));
+@@ -6221,7 +6223,24 @@
+ 
+ 	if (trx->n_mysql_tables_in_use == 0) {
+ 
+-	        trx->mysql_n_tables_locked = 0;
++		current_thd->innodb_was_used = TRUE;
++		current_thd->innodb_io_reads += trx->io_reads;
++		current_thd->innodb_io_read += trx->io_read;
++		current_thd->innodb_io_reads_wait_timer += trx->io_reads_wait_timer;
++		current_thd->innodb_lock_que_wait_timer += trx->lock_que_wait_timer;
++		current_thd->innodb_innodb_que_wait_timer += trx->innodb_que_wait_timer;
++                current_thd->innodb_page_access += trx->distinct_page_access;
++
++		trx->io_reads = 0;
++		trx->io_read = 0;
++		trx->io_reads_wait_timer = 0;
++		trx->lock_que_wait_timer = 0;
++		trx->innodb_que_wait_timer = 0;
++		trx->distinct_page_access = 0;
++		if (trx->distinct_page_access_hash)
++			memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++
++    		trx->mysql_n_tables_locked = 0;
+ 		prebuilt->used_in_HANDLER = FALSE;
+ 
+ 		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+diff -r bb81fcdd7db2 sql/log.cc
+--- a/sql/log.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/log.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -2229,10 +2229,11 @@
+ */
+ 
+ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
+-		      time_t query_start_arg)
++		      time_t query_start_arg, ulonglong query_start_timer)
+ {
+   bool error=0;
+   time_t current_time;
++  ulonglong current_timer;
+   if (!is_open())
+     return 0;
+   DBUG_ENTER("MYSQL_LOG::write");
+@@ -2243,7 +2244,8 @@
+     int tmp_errno=0;
+     char buff[80],*end;
+     end=buff;
+-    if (!(thd->options & OPTION_UPDATE_LOG))
++    if (!(thd->options & OPTION_UPDATE_LOG) &&
++        !(thd->slave_thread && opt_log_slow_slave_statements))
+     {
+       VOID(pthread_mutex_unlock(&LOCK_log));
+       DBUG_RETURN(0);
+@@ -2273,22 +2275,69 @@
+       if (my_b_printf(&log_file, "# User at Host: %s[%s] @ %s [%s]\n",
+                       sctx->priv_user ?
+                       sctx->priv_user : "",
+-                      sctx->user ? sctx->user : "",
++                      sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""),
+                       sctx->host ? sctx->host : "",
+                       sctx->ip ? sctx->ip : "") ==
+           (uint) -1)
+         tmp_errno=errno;
+     }
+-    if (query_start_arg)
++    if (query_start_timer)
+     {
++      char buf[5][20];
++      ulonglong current_timer= my_timer(&current_timer, frequency);
++      sprintf(buf[0], "%.6f", (current_timer - query_start_timer) / 1000000.0);
++      sprintf(buf[1], "%.6f", (thd->timer_after_lock - query_start_timer) / 1000000.0);
++      if (!query_length)
++      {
++        thd->sent_row_count= thd->examined_row_count= 0;
++        thd->innodb_was_used= FALSE;
++        thd->query_plan_flags= QPLAN_NONE;
++        thd->query_plan_fsort_passes= 0;
++      }
++
+       /* For slow query log */
+       if (my_b_printf(&log_file,
+-                      "# Query_time: %lu  Lock_time: %lu  Rows_sent: %lu  Rows_examined: %lu\n",
+-                      (ulong) (current_time - query_start_arg),
+-                      (ulong) (thd->time_after_lock - query_start_arg),
++                      "# Thread_id: %lu  Schema: %s\n" \
++                      "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu\n", 
++                      (ulong) thd->thread_id, (thd->db ? thd->db : ""),
++                      buf[0], buf[1],
+                       (ulong) thd->sent_row_count,
+                       (ulong) thd->examined_row_count) == (uint) -1)
+         tmp_errno=errno;
++      if ((thd->variables.log_slow_verbosity & SLOG_V_QUERY_PLAN) &&
++           my_b_printf(&log_file,
++                      "# QC_Hit: %s  Full_scan: %s  Full_join: %s  Tmp_table: %s  Tmp_table_on_disk: %s\n" \
++                      "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu\n",
++                      ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
++                      ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"),
++                      thd->query_plan_fsort_passes) == (uint) -1)
++        tmp_errno=errno;
++      if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used)
++      {
++        sprintf(buf[2], "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0);
++        sprintf(buf[3], "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0);
++        sprintf(buf[4], "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0);
++        if (my_b_printf(&log_file,
++                        "#   InnoDB_IO_r_ops: %lu  InnoDB_IO_r_bytes: %lu  InnoDB_IO_r_wait: %s\n" \
++                        "#   InnoDB_rec_lock_wait: %s  InnoDB_queue_wait: %s\n" \
++                        "#   InnoDB_pages_distinct: %lu\n",
++                        (ulong) thd->innodb_io_reads,
++                        (ulong) thd->innodb_io_read,
++                        buf[2], buf[3], buf[4],
++                        (ulong) thd->innodb_page_access) == (uint) -1)
++          tmp_errno=errno;
++      } 
++      else
++      {
++        if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) &&
++            my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1)
++          tmp_errno=errno;
++      }
+     }
+     if (thd->db && strcmp(thd->db,db))
+     {						// Database changed
+diff -r bb81fcdd7db2 sql/log_event.cc
+--- a/sql/log_event.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/log_event.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -2039,6 +2039,7 @@
+       /* Execute the query (note that we bypass dispatch_command()) */
+       const char* found_semicolon= NULL;
+       mysql_parse(thd, thd->query, thd->query_length, &found_semicolon);
++      log_slow_statement(thd);
+ 
+     }
+     else
+diff -r bb81fcdd7db2 sql/mysql_priv.h
+--- a/sql/mysql_priv.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/mysql_priv.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -484,6 +484,78 @@
+ #define WEEK_FIRST_WEEKDAY   4
+ 
+ #define STRING_BUFFER_USUAL_SIZE 80
++
++/* Slow log */
++
++struct msl_opts
++{
++  ulong val;
++  const char *name;
++};
++
++#define SLOG_V_MICROTIME      1 << 0
++#define SLOG_V_QUERY_PLAN     1 << 1
++#define SLOG_V_INNODB         1 << 2
++/* ... */
++#define SLOG_V_INVALID        1 << 31
++#define SLOG_V_NONE           SLOG_V_MICROTIME
++
++static const struct msl_opts slog_verb[]= 
++{
++  /* Basic flags */
++
++  { SLOG_V_MICROTIME, "microtime" },
++  { SLOG_V_QUERY_PLAN, "query_plan" },
++  { SLOG_V_INNODB, "innodb" },
++
++  /* End of baisc flags */
++
++  { 0, "" },
++
++  /* Complex flags */
++
++  { SLOG_V_MICROTIME, "minimal" },
++  { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN, "standard" },
++  { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" },
++
++  /* End of complex flags */
++
++  { SLOG_V_INVALID, (char *)0 }
++};
++
++#define QPLAN_NONE            0
++#define QPLAN_QC              1 << 0
++#define QPLAN_QC_NO           1 << 1
++#define QPLAN_FULL_SCAN       1 << 2
++#define QPLAN_FULL_JOIN       1 << 3
++#define QPLAN_TMP_TABLE       1 << 4
++#define QPLAN_TMP_DISK        1 << 5
++#define QPLAN_FILESORT        1 << 6
++#define QPLAN_FILESORT_DISK   1 << 7
++/* ... */
++#define QPLAN_MAX             1 << 31
++
++#define SLOG_F_QC_NO          QPLAN_QC_NO
++#define SLOG_F_FULL_SCAN      QPLAN_FULL_SCAN
++#define SLOG_F_FULL_JOIN      QPLAN_FULL_JOIN
++#define SLOG_F_TMP_TABLE      QPLAN_TMP_TABLE
++#define SLOG_F_TMP_DISK       QPLAN_TMP_DISK
++#define SLOG_F_FILESORT       QPLAN_FILESORT
++#define SLOG_F_FILESORT_DISK  QPLAN_FILESORT_DISK
++#define SLOG_F_INVALID        1 << 31
++#define SLOG_F_NONE           0
++
++static const struct msl_opts slog_filter[]= 
++{
++  { SLOG_F_QC_NO,         "qc_miss" },
++  { SLOG_F_FULL_SCAN,     "full_scan" },
++  { SLOG_F_FULL_JOIN,     "full_join" },
++  { SLOG_F_TMP_TABLE,     "tmp_table" },
++  { SLOG_F_TMP_DISK,      "tmp_table_on_disk" },
++  { SLOG_F_FILESORT,      "filesort" },
++  { SLOG_F_FILESORT_DISK, "filesort_on_disk" },
++  { SLOG_F_INVALID,       (char *)0 }
++};
+ 
+ enum enum_parsing_place
+ {
+@@ -1333,7 +1405,7 @@
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+-extern my_bool opt_log_slow_admin_statements;
++extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements;
+ extern my_bool sp_automatic_privileges, opt_noacl;
+ extern my_bool opt_old_style_user_limits, trust_function_creators;
+ extern uint opt_crash_binlog_innodb;
+diff -r bb81fcdd7db2 sql/mysqld.cc
+--- a/sql/mysqld.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/mysqld.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -175,7 +175,6 @@
+ static void getvolumename();
+ static void getvolumeID(BYTE *volumeName);
+ #endif /* __NETWARE__ */
+-
+ 
+ #ifdef _AIX41
+ int initgroups(const char *,unsigned int);
+@@ -406,6 +405,7 @@
+ my_bool opt_secure_auth= 0;
+ char* opt_secure_file_priv= 0;
+ my_bool opt_log_slow_admin_statements= 0;
++my_bool opt_log_slow_slave_statements= 0;
+ my_bool lower_case_file_system= 0;
+ my_bool opt_large_pages= 0;
+ uint    opt_large_page_size= 0;
+@@ -503,6 +503,7 @@
+ Ge_creator ge_creator;
+ Le_creator le_creator;
+ 
++ulonglong frequency= 0;
+ 
+ FILE *bootstrap_file;
+ int bootstrap_error;
+@@ -3649,6 +3650,8 @@
+       unireg_abort(1);
+     }
+   }
++  if (!QueryPerformanceFrequency((LARGE_INTEGER *)&frequency))
++    frequency= 0;
+ #endif /* __WIN__ */
+ 
+   if (init_common_variables(MYSQL_CONFIG_NAME,
+@@ -4892,7 +4895,7 @@
+   OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
+   OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE,
+   OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD,
+-  OPT_LONG_QUERY_TIME,
++  OPT_LONG_QUERY_TIME, OPT_MIN_EXAMINED_ROW_LIMIT,
+   OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
+   OPT_MAX_BINLOG_CACHE_SIZE, OPT_MAX_BINLOG_SIZE,
+   OPT_MAX_CONNECTIONS, OPT_MAX_CONNECT_ERRORS,
+@@ -4983,6 +4986,10 @@
+   OPT_TIMED_MUTEXES,
+   OPT_OLD_STYLE_USER_LIMITS,
+   OPT_LOG_SLOW_ADMIN_STATEMENTS,
++  OPT_LOG_SLOW_SLAVE_STATEMENTS,
++  OPT_LOG_SLOW_RATE_LIMIT,
++  OPT_LOG_SLOW_VERBOSITY,
++  OPT_LOG_SLOW_FILTER,
+   OPT_TABLE_LOCK_WAIT_TIMEOUT,
+   OPT_PORT_OPEN_TIMEOUT,
+   OPT_MERGE,
+@@ -5374,6 +5381,11 @@
+    "Log slow OPTIMIZE, ANALYZE, ALTER and other administrative statements to the slow log if it is open.",
+    (gptr*) &opt_log_slow_admin_statements,
+    (gptr*) &opt_log_slow_admin_statements,
++   0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
++  {"log-slow-slave-statements", OPT_LOG_SLOW_SLAVE_STATEMENTS,
++   "Log slow replicated statements to the slow log if it is open.",
++   (gptr*) &opt_log_slow_slave_statements,
++   (gptr*) &opt_log_slow_slave_statements,
+    0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+   {"log-slow-queries", OPT_SLOW_QUERY_LOG,
+     "Log slow queries to this log file. Defaults logging to hostname-slow.log file. Must be enabled to activate other slow log options.",
+@@ -6038,11 +6050,27 @@
+    (gptr*) 0,
+    0, (GET_ULONG | GET_ASK_ADDR) , REQUIRED_ARG, 100,
+    1, 100, 0, 1, 0},
++  {"log_slow_filter", OPT_LOG_SLOW_FILTER,
++    "Log only the queries that followed certain execution plan. Multiple flags allowed in a comma-separated string. [qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, filesort, filesort_on_disk]",
++    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_F_NONE, 0, 0},
++  {"log_slow_rate_limit", OPT_LOG_SLOW_RATE_LIMIT,
++    "Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.",
++    (gptr*) &global_system_variables.log_slow_rate_limit,
++    (gptr*) &max_system_variables.log_slow_rate_limit, 0, GET_ULONG,
++    REQUIRED_ARG, 1, 1, ~0L, 0, 1L, 0},
++  {"log_slow_verbosity", OPT_LOG_SLOW_VERBOSITY,
++    "Choose how verbose the messages to your slow log will be. Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb]",
++    0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_V_MICROTIME, 0, 0},
+   {"long_query_time", OPT_LONG_QUERY_TIME,
+    "Log all queries that have taken more than long_query_time seconds to execute to file.",
+    (gptr*) &global_system_variables.long_query_time,
+    (gptr*) &max_system_variables.long_query_time, 0, GET_ULONG,
+-   REQUIRED_ARG, 10, 1, LONG_TIMEOUT, 0, 1, 0},
++    REQUIRED_ARG, 10000000, 0, LONG_TIMEOUT * 1000000, 0, 1, 0},
++  {"min_examined_row_limit", OPT_MIN_EXAMINED_ROW_LIMIT,
++    "Don't log queries which examine less than min_examined_row_limit rows to file.",
++    (gptr*) &global_system_variables.min_examined_row_limit,
++    (gptr*) &max_system_variables.min_examined_row_limit, 0, GET_ULONG,
++    REQUIRED_ARG, 0, 0, ~0L, 0, 1L, 0},
+   {"lower_case_table_names", OPT_LOWER_CASE_TABLE_NAMES,
+    "If set to 1 table names are stored in lowercase on disk and table names will be case-insensitive.  Should be set to 2 if you are using a case insensitive file system",
+    (gptr*) &lower_case_table_names,
+@@ -6810,7 +6838,9 @@
+   global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
+   max_system_variables.max_join_size=   (ulonglong) HA_POS_ERROR;
+   global_system_variables.old_passwords= 0;
+-
++  global_system_variables.log_slow_verbosity= SLOG_V_MICROTIME;
++  global_system_variables.log_slow_filter= SLOG_F_NONE;
++  
+   /*
+     Default behavior for 4.1 and 5.0 is to treat NULL values as unequal
+     when collecting index statistics for MyISAM tables.
+@@ -7271,6 +7301,24 @@
+   case OPT_BOOTSTRAP:
+     opt_noacl=opt_bootstrap=1;
+     break;
++  case OPT_LOG_SLOW_FILTER:
++    if ((global_system_variables.log_slow_filter= 
++          msl_flag_resolve_by_name(slog_filter, argument,
++                                   SLOG_F_NONE, SLOG_F_INVALID)) == SLOG_F_INVALID)
++    {
++      fprintf(stderr,"Invalid argument to log_slow_filter\n");
++      exit(1);
++    }
++    break;
++  case OPT_LOG_SLOW_VERBOSITY:
++    if ((global_system_variables.log_slow_verbosity= 
++         msl_flag_resolve_by_name(slog_verb, argument,
++                                  SLOG_V_NONE, SLOG_V_INVALID)) == SLOG_V_INVALID)
++    {
++      fprintf(stderr,"Invalid argument to log_slow_verbosity\n");
++      exit(1);
++    }
++    break;
+   case OPT_STORAGE_ENGINE:
+   {
+     if ((enum db_type)((global_system_variables.table_type=
+@@ -7603,10 +7651,14 @@
+   if (opt_bdb)
+     sql_print_warning("this binary does not contain BDB storage engine");
+ #endif
+-  if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes) &&
++  if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes ||
++        opt_log_slow_slave_statements) &&
+       !opt_slow_log)
+-    sql_print_warning("options --log-slow-admin-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set");
+-
++  {
++    sql_print_warning("options --log-slow-admin-statements, --log-slow-slave-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set");
++    opt_log_slow_slave_statements= FALSE;
++  }
++  
+   if (argc > 0)
+   {
+     fprintf(stderr, "%s: Too many arguments (first extra is '%s').\nUse --help to get a list of available options\n", my_progname, *argv);
+diff -r bb81fcdd7db2 sql/set_var.cc
+--- a/sql/set_var.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/set_var.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -215,7 +215,7 @@
+   sys_log_queries_not_using_indexes("log_queries_not_using_indexes",
+                                     &opt_log_queries_not_using_indexes);
+ sys_var_thd_ulong	sys_log_warnings("log_warnings", &SV::log_warnings);
+-sys_var_thd_ulong	sys_long_query_time("long_query_time",
++sys_var_thd_ulonglong  sys_long_query_time("long_query_time",
+ 					     &SV::long_query_time);
+ sys_var_bool_const_ptr sys_log_slow("log_slow_queries", &opt_slow_log);
+ sys_var_thd_bool	sys_low_priority_updates("low_priority_updates",
+@@ -281,6 +281,8 @@
+ 					   &SV::max_tmp_tables);
+ sys_var_long_ptr	sys_max_write_lock_count("max_write_lock_count",
+ 						 &max_write_lock_count);
++sys_var_thd_ulong	sys_min_examined_row_limit("min_examined_row_limit",
++					     &SV::min_examined_row_limit);
+ sys_var_thd_ulong       sys_multi_range_count("multi_range_count",
+                                               &SV::multi_range_count);
+ sys_var_long_ptr	sys_myisam_data_pointer_size("myisam_data_pointer_size",
+@@ -324,6 +326,20 @@
+ sys_var_bool_ptr	sys_relay_log_purge("relay_log_purge",
+                                             &relay_log_purge);
+ #endif
++sys_var_thd_ulong	sys_log_slow_rate_limit("log_slow_rate_limit",
++					     &SV::log_slow_rate_limit);
++sys_var_thd_msl_flag	sys_log_slow_filter("log_slow_filter",
++				       &SV::log_slow_filter,
++                                       SLOG_F_NONE,
++                                       SLOG_F_NONE,
++                                       SLOG_F_INVALID,
++                                       slog_filter);
++sys_var_thd_msl_flag	sys_log_slow_verbosity("log_slow_verbosity",
++				       &SV::log_slow_verbosity,
++                                       SLOG_V_NONE,
++                                       SLOG_V_MICROTIME,
++                                       SLOG_V_INVALID,
++                                       slog_verb);
+ sys_var_long_ptr	sys_rpl_recovery_rank("rpl_recovery_rank",
+ 					      &rpl_recovery_rank);
+ sys_var_long_ptr	sys_query_cache_size("query_cache_size",
+@@ -675,6 +691,9 @@
+   &sys_log_off,
+   &sys_log_queries_not_using_indexes,
+   &sys_log_slow,
++  &sys_log_slow_filter,
++  &sys_log_slow_rate_limit,
++  &sys_log_slow_verbosity,
+   &sys_log_update,
+   &sys_log_warnings,
+   &sys_long_query_time,
+@@ -698,6 +717,7 @@
+   &sys_max_tmp_tables,
+   &sys_max_user_connections,
+   &sys_max_write_lock_count,
++  &sys_min_examined_row_limit,
+   &sys_multi_range_count,
+   &sys_myisam_data_pointer_size,
+   &sys_myisam_max_sort_file_size,
+@@ -963,6 +983,8 @@
+   {"log_slave_updates",       (char*) &opt_log_slave_updates,       SHOW_MY_BOOL},
+ #endif
+   {sys_log_slow.name,         (char*) &sys_log_slow,                SHOW_SYS},
++  {sys_log_slow_filter.name, (char*) &sys_log_slow_filter, SHOW_SYS},
++  {sys_log_slow_verbosity.name, (char*) &sys_log_slow_verbosity, SHOW_SYS},
+   {sys_log_warnings.name,     (char*) &sys_log_warnings,	    SHOW_SYS},
+   {sys_long_query_time.name,  (char*) &sys_long_query_time, 	    SHOW_SYS},
+   {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS},
+@@ -991,6 +1013,7 @@
+   {sys_max_tmp_tables.name,	(char*) &sys_max_tmp_tables,	    SHOW_SYS},
+   {sys_max_user_connections.name,(char*) &sys_max_user_connections, SHOW_SYS},
+   {sys_max_write_lock_count.name, (char*) &sys_max_write_lock_count,SHOW_SYS},
++  {sys_min_examined_row_limit.name, (char*) &sys_min_examined_row_limit, SHOW_SYS},
+   {sys_multi_range_count.name,  (char*) &sys_multi_range_count,     SHOW_SYS},
+   {sys_myisam_data_pointer_size.name, (char*) &sys_myisam_data_pointer_size, SHOW_SYS},
+   {sys_myisam_max_sort_file_size.name, (char*) &sys_myisam_max_sort_file_size,
+@@ -1043,6 +1066,7 @@
+   {sys_query_prealloc_size.name, (char*) &sys_query_prealloc_size,  SHOW_SYS},
+   {sys_range_alloc_block_size.name, (char*) &sys_range_alloc_block_size,
+    SHOW_SYS},
++  {sys_log_slow_rate_limit.name, (char*) &sys_log_slow_rate_limit, SHOW_SYS},
+   {sys_read_buff_size.name,   (char*) &sys_read_buff_size,	    SHOW_SYS},
+   {sys_readonly.name,         (char*) &sys_readonly,                SHOW_SYS},
+   {sys_read_rnd_buff_size.name,(char*) &sys_read_rnd_buff_size,	    SHOW_SYS},
+@@ -1639,6 +1663,57 @@
+   return (byte*) &(thd->variables.*offset);
+ }
+ 
++void sys_var_thd_microtime::set_default(THD *thd, enum_var_type type)
++{
++  pthread_mutex_lock(&LOCK_global_system_variables);
++  global_system_variables.*offset= (ulonglong) option_limits->def_value;
++  pthread_mutex_unlock(&LOCK_global_system_variables);
++}
++
++bool sys_var_thd_microtime::check(THD *thd, set_var *var)
++{
++  if (var->value->result_type() == DECIMAL_RESULT)
++    var->save_result.ulonglong_value= (ulonglong)(var->value->val_real() * 1000000);
++  else 
++    var->save_result.ulonglong_value= (ulonglong)var->value->val_int() * 1000000;
++  return 0;
++}
++
++byte *sys_var_thd_microtime::value_ptr(THD *thd, enum_var_type type,
++				   LEX_STRING *base)
++{
++  if (type == OPT_GLOBAL)
++    return (byte*) &(global_system_variables.*offset);
++  return (byte*) &(thd->variables.*offset);
++}
++
++bool sys_var_thd_microtime::update(THD *thd,  set_var *var)
++{
++  bool fixed= FALSE;
++  ulonglong tmp= var->save_result.ulonglong_value;
++
++  if (tmp > max_system_variables.*offset)
++    tmp= max_system_variables.*offset;
++
++  if (option_limits)
++    tmp= getopt_ull_limit_value(tmp, option_limits, &fixed);
++
++  if (fixed)
++    throw_bounds_warning(thd, option_limits->name, tmp);
++  
++  /* Lock is needed to make things safe on 32 bit systems */
++  if (var->type == OPT_GLOBAL)
++  {
++    /* Lock is needed to make things safe on 32 bit systems */
++    pthread_mutex_lock(&LOCK_global_system_variables);
++    global_system_variables.*offset= tmp;
++    pthread_mutex_unlock(&LOCK_global_system_variables);
++  }
++  else
++    thd->variables.*offset= (ulonglong) tmp;
++
++  return 0;
++}
+ 
+ bool sys_var_thd_ha_rows::update(THD *thd, set_var *var)
+ {
+@@ -3483,6 +3558,191 @@
+ #endif
+ }
+ 
++/* Slow log stuff */
++
++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len)
++{
++  ulong i;
++  
++  for (i=0; opts[i].name; i++)
++  {
++    if (!my_strnncoll(&my_charset_latin1,
++                      (const uchar *)name, len,
++                      (const uchar *)opts[i].name, strlen(opts[i].name)))
++      return opts[i].val;
++  }
++  return opts[i].val;
++}
++
++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, 
++                               const ulong none_val, const ulong invalid_val)
++{
++  const char *p, *e;
++  ulong val= none_val;
++  
++  if (!*names_list)
++    return val;
++  
++  for (p= e= names_list; ; e++)
++  {
++    ulong i;
++    
++    if (*e != ',' && *e)
++      continue;
++    for (i=0; opts[i].name; i++)
++    {
++      if (!my_strnncoll(&my_charset_latin1,
++                        (const uchar *)p, e - p,
++                        (const uchar *)opts[i].name, strlen(opts[i].name)))
++      {
++        val= val | opts[i].val;
++        break;
++      }
++    }
++    if (opts[i].val == invalid_val)
++      return invalid_val;
++    if (!*e)
++      break;
++    p= e + 1;
++  }
++  return val;
++}
++
++const char *msl_option_get_name(const struct msl_opts *opts, ulong val)
++{
++  for (ulong i=0; opts[i].name && opts[i].name[0]; i++)
++  {
++    if (opts[i].val == val)
++      return opts[i].name;
++  }
++  return "*INVALID*";
++}
++
++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val)
++{
++  uint offset= 0;
++  
++  *buf= '\0';
++  for (ulong i=0; opts[i].name && opts[i].name[0]; i++)
++  {
++    if (opts[i].val & val)
++      offset+= snprintf(buf+offset, STRING_BUFFER_USUAL_SIZE - offset - 1,
++                        "%s%s", (offset ? "," : ""), opts[i].name);
++  }
++  return buf;
++}
++
++/****************************************************************************
++ Functions to handle log_slow_verbosity
++****************************************************************************/
++
++/* Based upon sys_var::check_enum() */
++
++bool sys_var_thd_msl_option::check(THD *thd, set_var *var)
++{
++  char buff[STRING_BUFFER_USUAL_SIZE];
++  String str(buff, sizeof(buff), &my_charset_latin1), *res;
++
++  if (var->value->result_type() == STRING_RESULT)
++  {
++    ulong verb= this->invalid_val;
++    if (!(res=var->value->val_str(&str)) ||
++	      (var->save_result.ulong_value=
++          (ulong) (verb= msl_option_resolve_by_name(this->opts, res->ptr(), res->length()))) == this->invalid_val)
++      goto err;
++    return 0;
++  }
++
++err:
++  my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name);
++  return 1;
++}
++
++byte *sys_var_thd_msl_option::value_ptr(THD *thd, enum_var_type type,
++					LEX_STRING *base)
++{
++  ulong val;
++  val= ((type == OPT_GLOBAL) ? global_system_variables.*offset :
++        thd->variables.*offset);
++  const char *verbosity= msl_option_get_name(this->opts, val);
++  return (byte *) verbosity;
++}
++
++
++void sys_var_thd_msl_option::set_default(THD *thd, enum_var_type type)
++{
++  if (type == OPT_GLOBAL)
++    global_system_variables.*offset= (ulong) this->default_val;
++  else
++    thd->variables.*offset= (ulong) (global_system_variables.*offset);
++}
++
++
++bool sys_var_thd_msl_option::update(THD *thd, set_var *var)
++{
++  if (var->type == OPT_GLOBAL)
++    global_system_variables.*offset= var->save_result.ulong_value;
++  else
++    thd->variables.*offset= var->save_result.ulong_value;
++  return 0;
++}
++
++/****************************************************************************
++ Functions to handle log_slow_filter
++****************************************************************************/
++  
++/* Based upon sys_var::check_enum() */
++
++bool sys_var_thd_msl_flag::check(THD *thd, set_var *var)
++{
++  char buff[2 * STRING_BUFFER_USUAL_SIZE];
++  String str(buff, sizeof(buff), &my_charset_latin1), *res;
++
++  if (var->value->result_type() == STRING_RESULT)
++  {
++    ulong filter= this->none_val;
++    if (!(res=var->value->val_str(&str)) ||
++	 (var->save_result.ulong_value=
++          (ulong) (filter= msl_flag_resolve_by_name(this->flags, res->ptr(), this->none_val, 
++                                                    this->invalid_val))) == this->invalid_val)
++      goto err;
++    return 0;
++  }
++
++err:
++  my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name);
++  return 1;
++}
++
++byte *sys_var_thd_msl_flag::value_ptr(THD *thd, enum_var_type type,
++					LEX_STRING *base)
++{
++  ulong val;
++  val= ((type == OPT_GLOBAL) ? global_system_variables.*offset :
++        thd->variables.*offset);
++  msl_flag_get_name(this->flags, this->flags_string, val);
++  return (byte *) this->flags_string;
++}
++
++
++void sys_var_thd_msl_flag::set_default(THD *thd, enum_var_type type)
++{
++  if (type == OPT_GLOBAL)
++    global_system_variables.*offset= (ulong) this->default_val;
++  else
++    thd->variables.*offset= (ulong) (global_system_variables.*offset);
++}
++
++
++bool sys_var_thd_msl_flag::update(THD *thd, set_var *var)
++{
++  if (var->type == OPT_GLOBAL)
++    global_system_variables.*offset= var->save_result.ulong_value;
++  else
++    thd->variables.*offset= var->save_result.ulong_value;
++  return 0;
++}
++
+ /****************************************************************************
+  Functions to handle table_type
+ ****************************************************************************/
+diff -r bb81fcdd7db2 sql/set_var.h
+--- a/sql/set_var.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/set_var.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -123,6 +123,7 @@
+ };
+ 
+ 
++
+ class sys_var_ulonglong_ptr :public sys_var
+ {
+ public:
+@@ -309,7 +310,6 @@
+   }
+ };
+ 
+-
+ class sys_var_thd_ulong :public sys_var_thd
+ {
+   sys_check_func check_func;
+@@ -329,6 +329,23 @@
+   byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
+ };
+ 
++class sys_var_thd_microtime :public sys_var_thd
++{
++public:
++  ulonglong SV::*offset;
++  sys_var_thd_microtime(const char *name_arg, ulonglong SV::*offset_arg)
++    :sys_var_thd(name_arg), offset(offset_arg)
++  {}
++  bool update(THD *thd, set_var *var);
++  void set_default(THD *thd, enum_var_type type);
++  SHOW_TYPE type() { return SHOW_MICROTIME; }
++  byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++  bool check(THD *thd, set_var *var);
++  bool check_update_type(Item_result type)
++  { 
++    return type != INT_RESULT && type != DECIMAL_RESULT;
++  }
++};
+ 
+ class sys_var_thd_ha_rows :public sys_var_thd
+ {
+@@ -346,7 +363,6 @@
+   SHOW_TYPE show_type() { return SHOW_HA_ROWS; }
+   byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
+ };
+-
+ 
+ class sys_var_thd_ulonglong :public sys_var_thd
+ {
+@@ -376,7 +392,6 @@
+   }
+ };
+ 
+-
+ class sys_var_thd_bool :public sys_var_thd
+ {
+ public:
+@@ -446,6 +461,66 @@
+                                             ulong *length);
+ };
+ 
++
++class sys_var_thd_msl_option :public sys_var_thd
++{
++protected:
++  ulong SV::*offset;
++  const ulong none_val;
++  const ulong default_val;
++  const ulong invalid_val;
++  const struct msl_opts *opts;
++public:
++  sys_var_thd_msl_option(const char *name_arg, ulong SV::*offset_arg,
++                         const ulong none_val_arg,
++                         const ulong default_val_arg,
++                         const ulong invalid_val_arg,
++                         const struct msl_opts *opts_arg)
++    :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg),
++     default_val(default_val_arg), invalid_val(invalid_val_arg), 
++     opts(opts_arg)
++  {}
++  bool check(THD *thd, set_var *var);
++  SHOW_TYPE show_type() { return SHOW_CHAR; }
++  bool check_update_type(Item_result type)
++  {
++    return type != STRING_RESULT;		/* Only accept strings */
++  }
++  void set_default(THD *thd, enum_var_type type);
++  bool update(THD *thd, set_var *var);
++  byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++};
++
++
++class sys_var_thd_msl_flag :public sys_var_thd
++{
++protected:
++  char flags_string[2 * STRING_BUFFER_USUAL_SIZE];
++  ulong SV::*offset;
++  const ulong none_val;
++  const ulong default_val;
++  const ulong invalid_val;
++  const struct msl_opts *flags;
++public:
++  sys_var_thd_msl_flag(const char *name_arg, ulong SV::*offset_arg, 
++                       const ulong none_val_arg, 
++                       const ulong default_val_arg, 
++                       const ulong invalid_val_arg,
++                       const struct msl_opts *flags_arg)
++    :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg),
++     default_val(default_val_arg), invalid_val(invalid_val_arg), 
++     flags(flags_arg)
++  {}
++  bool check(THD *thd, set_var *var);
++  SHOW_TYPE show_type() { return SHOW_CHAR; }
++  bool check_update_type(Item_result type)
++  {
++    return type != STRING_RESULT;		/* Only accept strings */
++  }
++  void set_default(THD *thd, enum_var_type type);
++  bool update(THD *thd, set_var *var);
++  byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++};
+ 
+ class sys_var_thd_storage_engine :public sys_var_thd
+ {
+@@ -1042,3 +1117,11 @@
+ bool process_key_caches(int (* func) (const char *name, KEY_CACHE *));
+ void delete_elements(I_List<NAMED_LIST> *list,
+ 		     void (*free_element)(const char*, gptr));
++
++/* Slow log functions */
++
++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len);
++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list, 
++                               const ulong none_val, const ulong invalid_val);
++const char *msl_option_get_name(const struct msl_opts *opts, ulong val);
++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val);
+diff -r bb81fcdd7db2 sql/slave.cc
+--- a/sql/slave.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/slave.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -2925,6 +2925,12 @@
+     + MAX_LOG_EVENT_HEADER;  /* note, incr over the global not session var */
+   thd->slave_thread = 1;
+   set_slave_thread_options(thd);
++  if (opt_log_slow_slave_statements)
++  {
++    thd->enable_slow_log= TRUE;
++    /* Slave thread is excluded from rate limiting the slow log writes. */
++    thd->write_to_slow_log= TRUE;
++  }
+   thd->client_capabilities = CLIENT_LOCAL_FILES;
+   thd->real_id=pthread_self();
+   pthread_mutex_lock(&LOCK_thread_count);
+diff -r bb81fcdd7db2 sql/sql_cache.cc
+--- a/sql/sql_cache.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_cache.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -1334,6 +1334,7 @@
+ 
+   thd->limit_found_rows = query->found_rows();
+   thd->status_var.last_query_cost= 0.0;
++  thd->query_plan_flags|= QPLAN_QC;
+ 
+   BLOCK_UNLOCK_RD(query_block);
+   DBUG_RETURN(1);				// Result sent to client
+@@ -1341,6 +1342,7 @@
+ err_unlock:
+   STRUCT_UNLOCK(&structure_guard_mutex);
+ err:
++  thd->query_plan_flags|= QPLAN_QC_NO;
+   DBUG_RETURN(0);				// Query was not cached
+ }
+ 
+diff -r bb81fcdd7db2 sql/sql_class.cc
+--- a/sql/sql_class.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_class.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -174,7 +174,7 @@
+    lock_id(&main_lock_id),
+    user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0),
+    transaction_rollback_request(0), is_fatal_sub_stmt_error(0),
+-   rand_used(0), time_zone_used(0),
++   rand_used(0), time_zone_used(0), user_timer(0),
+    last_insert_id_used(0), last_insert_id_used_bin_log(0), insert_id_used(0),
+    clear_next_insert_id(0), in_lock_tables(0), bootstrap(0),
+    derived_tables_processing(FALSE), spcont(NULL),
+@@ -2198,6 +2198,12 @@
+   backup->cuted_fields=     cuted_fields;
+   backup->client_capabilities= client_capabilities;
+   backup->savepoints= transaction.savepoints;
++  backup->innodb_io_reads= innodb_io_reads;
++  backup->innodb_io_read= innodb_io_read;
++  backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer;
++  backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer;
++  backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer;
++  backup->innodb_page_access= innodb_page_access;
+ 
+   if (!lex->requires_prelocking() || is_update_query(lex->sql_command))
+     options&= ~OPTION_BIN_LOG;
+@@ -2214,7 +2220,13 @@
+   sent_row_count= 0;
+   cuted_fields= 0;
+   transaction.savepoints= 0;
+-
++  innodb_io_reads= 0;
++  innodb_io_read= 0;
++  innodb_io_reads_wait_timer= 0;
++  innodb_lock_que_wait_timer= 0;
++  innodb_innodb_que_wait_timer= 0;
++  innodb_page_access= 0;
++  
+   /* Surpress OK packets in case if we will execute statements */
+   net.no_send_ok= TRUE;
+ }
+@@ -2267,6 +2279,12 @@
+   */
+   examined_row_count+= backup->examined_row_count;
+   cuted_fields+=       backup->cuted_fields;
++  innodb_io_reads+= backup->innodb_io_reads;
++  innodb_io_read+= backup->innodb_io_read;
++  innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer;
++  innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer;
++  innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer;
++  innodb_page_access+= backup->innodb_page_access;
+ }
+ 
+ 
+diff -r bb81fcdd7db2 sql/sql_class.h
+--- a/sql/sql_class.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_class.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -43,6 +43,13 @@
+ extern char internal_table_name[2];
+ extern char empty_c_string[1];
+ extern const char **errmesg;
++#ifdef __cplusplus
++__BEGIN_DECLS
++#endif
++extern ulonglong frequency;
++#ifdef __cplusplus
++__END_DECLS
++#endif
+ 
+ #define TC_LOG_PAGE_SIZE   8192
+ #define TC_LOG_MIN_SIZE    (3*TC_LOG_PAGE_SIZE)
+@@ -314,7 +321,7 @@
+   bool write(THD *thd, enum enum_server_command command,
+ 	     const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5);
+   bool write(THD *thd, const char *query, uint query_length,
+-	     time_t query_start=0);
++	     time_t query_start=0, ulonglong query_start_timer=0);
+   bool write(Log_event* event_info); // binary log write
+   bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
+ 
+@@ -520,13 +527,14 @@
+   ulong auto_increment_increment, auto_increment_offset;
+   ulong bulk_insert_buff_size;
+   ulong join_buff_size;
+-  ulong long_query_time;
++  ulonglong long_query_time;
+   ulong max_allowed_packet;
+   ulong max_error_count;
+   ulong max_length_for_sort_data;
+   ulong max_sort_length;
+   ulong max_tmp_tables;
+   ulong max_insert_delayed_threads;
++  ulong min_examined_row_limit;
+   ulong multi_range_count;
+   ulong myisam_repair_threads;
+   ulong myisam_sort_buff_size;
+@@ -541,10 +549,13 @@
+   ulong optimizer_search_depth;
+   ulong preload_buff_size;
+   ulong query_cache_type;
++  ulong log_slow_rate_limit;
+   ulong read_buff_size;
+   ulong read_rnd_buff_size;
+   ulong div_precincrement;
+   ulong sortbuff_size;
++  ulong log_slow_filter;
++  ulong log_slow_verbosity;
+   ulong table_type;
+   ulong tx_isolation;
+   ulong completion_type;
+@@ -1111,6 +1122,12 @@
+   uint in_sub_stmt;
+   bool enable_slow_log, insert_id_used, clear_next_insert_id;
+   bool last_insert_id_used;
++  ulong      innodb_io_reads;
++  ulonglong  innodb_io_read;
++  ulong      innodb_io_reads_wait_timer;
++  ulong      innodb_lock_que_wait_timer;
++  ulong      innodb_innodb_que_wait_timer;
++  ulong      innodb_page_access;
+   my_bool no_send_ok;
+   SAVEPOINT *savepoints;
+ };
+@@ -1167,6 +1184,11 @@
+ class THD :public Statement,
+            public Open_tables_state
+ {
++private:
++  inline ulonglong query_start_timer() { return start_timer; }
++  inline void set_timer()    { if (user_timer) start_timer=timer_after_lock=user_timer; else timer_after_lock=my_timer(&start_timer, frequency); }
++  inline void end_timer()    { my_timer(&start_timer, frequency); }
++  inline void lock_timer()   { my_timer(&timer_after_lock, frequency); }
+ public:
+   /*
+     Constant for THD::where initialization in the beginning of every query.
+@@ -1272,10 +1294,24 @@
+   */
+   const char *where;
+   time_t     start_time,time_after_lock,user_time;
++  ulonglong start_timer,timer_after_lock, user_timer;
+   time_t     connect_time,thr_create_time; // track down slow pthread_create
+   thr_lock_type update_lock_default;
+   Delayed_insert *di;
+ 
++  bool       write_to_slow_log;
++
++  bool       innodb_was_used;
++  ulong      innodb_io_reads;
++  ulonglong  innodb_io_read;
++  ulong      innodb_io_reads_wait_timer;
++  ulong      innodb_lock_que_wait_timer;
++  ulong      innodb_innodb_que_wait_timer;
++  ulong      innodb_page_access;
++
++  ulong      query_plan_flags;
++  ulong      query_plan_fsort_passes;
++  
+   /* <> 0 if we are inside of trigger or stored function. */
+   uint in_sub_stmt;
+ 
+@@ -1661,11 +1697,11 @@
+       sql_print_information("time() failed with %d", errno);
+   }
+ 
+-  inline time_t query_start() { query_start_used=1; return start_time; }
+-  inline void	set_time()    { if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }}
+-  inline void	end_time()    { safe_time(&start_time); }
+-  inline void	set_time(time_t t) { time_after_lock=start_time=user_time=t; }
+-  inline void	lock_time()   { safe_time(&time_after_lock); }
++  inline time_t query_start() { query_start_timer(); query_start_used=1; return start_time; }
++  inline void set_time()    { set_timer(); if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }}
++  inline void	end_time()    { end_timer(); safe_time(&start_time); }
++  inline void	set_time(time_t t) { set_timer(); time_after_lock=start_time=user_time=t; }
++  inline void	lock_time()   { lock_timer(); safe_time(&time_after_lock); }
+   inline void	insert_id(ulonglong id_arg)
+   {
+     last_insert_id= id_arg;
+diff -r bb81fcdd7db2 sql/sql_parse.cc
+--- a/sql/sql_parse.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_parse.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -20,6 +20,7 @@
+ #include <m_ctype.h>
+ #include <myisam.h>
+ #include <my_dir.h>
++#include <my_time.h>
+ 
+ #ifdef HAVE_INNOBASE_DB
+ #include "ha_innodb.h"
+@@ -1180,6 +1181,15 @@
+     my_net_set_read_timeout(net, thd->variables.net_read_timeout);
+     my_net_set_write_timeout(net, thd->variables.net_write_timeout);
+ 
++    /*
++      If rate limiting of slow log writes is enabled, decide whether to log this
++      new thread's queries or not. Uses extremely simple algorithm. :)
++    */
++    thd->write_to_slow_log= FALSE;
++    if (thd->variables.log_slow_rate_limit <= 1 || 
++        (thd->thread_id % thd->variables.log_slow_rate_limit) == 0)
++         thd->write_to_slow_log= TRUE;
++
+     while (!net->error && net->vio != 0 &&
+            !(thd->killed == THD::KILL_CONNECTION))
+     {
+@@ -2255,26 +2265,52 @@
+     return;                                     // Don't set time for sub stmt
+ 
+   start_of_query= thd->start_time;
++  ulonglong start_of_query_timer= thd->start_timer;
+   thd->end_time();				// Set start time
++
++
++  /* Follow the slow log filter configuration. */
++  if (thd->variables.log_slow_filter != SLOG_F_NONE && 
++      (!(thd->variables.log_slow_filter & thd->query_plan_flags) ||
++       ((thd->variables.log_slow_filter & SLOG_F_QC_NO) && 
++        (thd->query_plan_flags & QPLAN_QC))))
++    return;
++
++  /*
++    Low long_query_time value most likely means user is debugging stuff and even 
++    though some thread's queries are not supposed to be logged b/c of the rate 
++    limit, if one of them takes long enough (>= 1 second) it will be sensible 
++    to make an exception and write to slow log anyway.
++  */
++  if (thd->write_to_slow_log != TRUE && thd->variables.long_query_time < 1000000 &&
++      (ulong) (thd->start_timer - thd->timer_after_lock) >= 1000000)
++    thd->write_to_slow_log= TRUE;
++
++  /* Do not log this thread's queries due to rate limiting. */
++  if (thd->write_to_slow_log != TRUE)
++    return;
+ 
+   /*
+     Do not log administrative statements unless the appropriate option is
+     set; do not log into slow log if reading from backup.
+   */
+-  if (thd->enable_slow_log && !thd->user_time)
++  if (thd->enable_slow_log &&
++      (!thd->user_time || (thd->slave_thread && opt_log_slow_slave_statements))
++     )
+   {
+     thd->proc_info="logging slow query";
+ 
+-    if ((ulong) (thd->start_time - thd->time_after_lock) >
+-	thd->variables.long_query_time ||
+-        (thd->server_status &
+-	  (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
+-        opt_log_queries_not_using_indexes &&
+-        /* == SQLCOM_END unless this is a SHOW command */
+-        thd->lex->orig_sql_command == SQLCOM_END)
++    if (((ulong) (thd->start_timer - thd->timer_after_lock) >=
++         thd->variables.long_query_time ||
++         (thd->server_status &
++              (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
++         opt_log_queries_not_using_indexes &&
++         /* == SQLCOM_END unless this is a SHOW command */
++         thd->lex->orig_sql_command == SQLCOM_END) &&
++        thd->examined_row_count >= thd->variables.min_examined_row_limit)
+     {
+       thd->status_var.long_query_count++;
+-      mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query);
++      mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query, start_of_query_timer);
+     }
+   }
+ }
+@@ -5949,6 +5985,15 @@
+     thd->total_warn_count=0;			// Warnings for this query
+     thd->rand_used= 0;
+     thd->sent_row_count= thd->examined_row_count= 0;
++    thd->innodb_was_used= FALSE;
++    thd->innodb_io_reads= 0;
++    thd->innodb_io_read= 0;
++    thd->innodb_io_reads_wait_timer= 0;
++    thd->innodb_lock_que_wait_timer= 0;
++    thd->innodb_innodb_que_wait_timer= 0;
++    thd->innodb_page_access= 0;
++    thd->query_plan_flags= QPLAN_NONE;
++    thd->query_plan_fsort_passes= 0;
+   }
+   DBUG_VOID_RETURN;
+ }
+diff -r bb81fcdd7db2 sql/sql_select.cc
+--- a/sql/sql_select.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_select.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -6198,8 +6198,11 @@
+ 	  {
+ 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ 	    if (statistics)
++	    {
+ 	      statistic_increment(join->thd->status_var.select_scan_count,
+ 				  &LOCK_status);
++				join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
++      }
+ 	  }
+ 	}
+ 	else
+@@ -6214,8 +6217,11 @@
+ 	  {
+ 	    join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ 	    if (statistics)
++	    {
+ 	      statistic_increment(join->thd->status_var.select_full_join_count,
+ 				  &LOCK_status);
++				join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
++      }
+ 	  }
+ 	}
+ 	if (!table->no_keyread)
+@@ -9265,6 +9271,7 @@
+ 		      (ulong) rows_limit,test(group)));
+ 
+   statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
++  thd->query_plan_flags|= QPLAN_TMP_TABLE;
+ 
+   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+     temp_pool_slot = bitmap_set_next(&temp_pool);
+@@ -10125,6 +10132,7 @@
+   }
+   statistic_increment(table->in_use->status_var.created_tmp_disk_tables,
+ 		      &LOCK_status);
++	table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+   table->s->db_record_offset= 1;
+   DBUG_RETURN(0);
+  err:
+diff -r bb81fcdd7db2 sql/sql_show.cc
+--- a/sql/sql_show.cc	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_show.cc	Mon Sep 08 16:38:46 2008 -0700
+@@ -1531,6 +1531,9 @@
+           value= ((char *) status_var + (ulonglong) value);
+         case SHOW_LONGLONG:
+           end= longlong10_to_str(*(longlong*) value, buff, 10);
++          break;
++        case SHOW_MICROTIME:
++          end= buff + sprintf(buff, "%.6f", (*(ulonglong*)value) / 1000000.0);
+           break;
+         case SHOW_HA_ROWS:
+           end= longlong10_to_str((longlong) *(ha_rows*) value, buff, 10);
+diff -r bb81fcdd7db2 sql/structs.h
+--- a/sql/structs.h	Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/structs.h	Mon Sep 08 16:38:46 2008 -0700
+@@ -168,8 +168,8 @@
+ enum SHOW_TYPE
+ {
+   SHOW_UNDEF,
+-  SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, 
+-  SHOW_DOUBLE_STATUS,
++  SHOW_LONG, SHOW_LONGLONG, SHOW_MICROTIME, SHOW_INT, SHOW_CHAR, 
++  SHOW_CHAR_PTR, SHOW_DOUBLE_STATUS, 
+   SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION,
+   SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS,
+   SHOW_VARS,
diff --git a/mysql-show_patches.patch b/mysql-show_patches.patch
new file mode 100644
index 0000000..1e63c12
--- /dev/null
+++ b/mysql-show_patches.patch
@@ -0,0 +1,294 @@
+diff -r a36b98c5e2e3 patch_info/show_patches.info
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/show_patches.info	Mon Sep 08 16:38:33 2008 -0700
+@@ -0,0 +1,6 @@
++File=show_patches.patch
++Name=SHOW PATCHES
++Version=1.0
++Author=Jeremy Cole
++License=N/A
++Comment
+diff -r a36b98c5e2e3 sql/Makefile.am
+--- a/sql/Makefile.am	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/Makefile.am	Mon Sep 08 16:38:33 2008 -0700
+@@ -116,7 +116,7 @@
+ 			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" \
+ 			@DEFS@
+ 
+-BUILT_SOURCES =		sql_yacc.cc sql_yacc.h lex_hash.h
++BUILT_SOURCES =		sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h
+ EXTRA_DIST =		$(BUILT_SOURCES) nt_servc.cc nt_servc.h \
+ 			message.mc examples/CMakeLists.txt CMakeLists.txt \
+ 			udf_example.c udf_example.def
+@@ -172,6 +172,8 @@
+ udf_example_la_SOURCES= udf_example.c
+ udf_example_la_LDFLAGS= -module -rpath $(pkglibdir)
+ 
++patch_info.h: patch_info.h.pl
++	$(PERL) $< > $@
+ 
+ # Don't update the files from bitkeeper
+ %::SCCS/s.%
+diff -r a36b98c5e2e3 sql/Makefile.in
+--- a/sql/Makefile.in	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/Makefile.in	Mon Sep 08 16:38:33 2008 -0700
+@@ -556,7 +556,7 @@
+ gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS)
+ mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc
+ mysql_tzinfo_to_sql_LDADD = @MYSQLD_EXTRA_LDFLAGS@ $(LDADD) $(CXXLDFLAGS)
+-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h
++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h
+ EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \
+ 			message.mc examples/CMakeLists.txt CMakeLists.txt \
+ 			udf_example.c udf_example.def
+@@ -1230,6 +1230,9 @@
+ 		./gen_lex_hash$(EXEEXT) > $@-t
+ 		$(MV) $@-t $@
+ 
++patch_info.h: patch_info.h.pl
++	$(PERL) $< > $@
++
+ # Don't update the files from bitkeeper
+ %::SCCS/s.%
+ # Tell versions [3.59,3.63) of GNU make to not export all variables.
+diff -r a36b98c5e2e3 sql/lex.h
+--- a/sql/lex.h	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/lex.h	Mon Sep 08 16:38:33 2008 -0700
+@@ -359,6 +359,7 @@
+   { "PACK_KEYS",	SYM(PACK_KEYS_SYM)},
+   { "PARTIAL",		SYM(PARTIAL)},
+   { "PASSWORD",		SYM(PASSWORD)},
++  { "PATCHES",		SYM(PATCHES)},
+   { "PHASE",            SYM(PHASE_SYM)},
+   { "POINT",		SYM(POINT_SYM)},
+   { "POLYGON",		SYM(POLYGON)},
+diff -r a36b98c5e2e3 sql/mysql_priv.h
+--- a/sql/mysql_priv.h	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/mysql_priv.h	Mon Sep 08 16:38:33 2008 -0700
+@@ -948,6 +948,7 @@
+ int mysqld_show_status(THD *thd);
+ int mysqld_show_variables(THD *thd,const char *wild);
+ bool mysqld_show_storage_engines(THD *thd);
++bool mysqld_show_patches(THD *thd);
+ bool mysqld_show_privileges(THD *thd);
+ bool mysqld_show_column_types(THD *thd);
+ bool mysqld_help (THD *thd, const char *text);
+diff -r a36b98c5e2e3 sql/patch_info.h.pl
+--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
++++ b/sql/patch_info.h.pl	Mon Sep 08 16:38:33 2008 -0700
+@@ -0,0 +1,65 @@
++use strict;
++
++my $patch_info_path = '../patch_info';
++my $file = '';
++my $output = '';
++
++
++if (opendir(PATCH_DIR, $patch_info_path))
++{
++	while ((my $file = readdir(PATCH_DIR)))
++	{
++		open(PATCH_FILE, "<$patch_info_path/$file") || die("Unable to open $patch_info_path/$file ($!)");
++		my %fields;
++	
++		if ($file =~ /^\./)
++		{
++			next;
++		}	
++	
++		while (<PATCH_FILE>)
++		{
++			chomp;
++	
++			my ($key, $value) = split(/\s*=\s*/);
++			$fields{lc($key)} = $value;
++		}
++	
++		$output .= "{\"$fields{'file'}\", \"$fields{'name'}\", \"$fields{'version'}\", \"$fields{'author'}\", \"$fields{'license'}\",\"$fields{'comment'}\"},\n"
++	}
++}	
++
++print <<HEADER;
++
++/* Copyright (C) 2002-2006 MySQL AB
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; version 2 of the License.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
++
++#ifdef USE_PRAGMA_INTERFACE
++#pragma interface			/* gcc class implementation */
++#endif
++
++struct patch {
++	const char *file;
++	const char *name;
++	const char *version;
++	const char *author;
++	const char *license;
++	const char *comment;
++}patches[] = {
++$output
++{NULL, NULL, NULL, NULL}
++};	
++
++HEADER
+diff -r a36b98c5e2e3 sql/sp_head.cc
+--- a/sql/sp_head.cc	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sp_head.cc	Mon Sep 08 16:38:33 2008 -0700
+@@ -188,6 +188,7 @@
+   case SQLCOM_SHOW_MUTEX_STATUS:
+   case SQLCOM_SHOW_NEW_MASTER:
+   case SQLCOM_SHOW_OPEN_TABLES:
++  case SQLCOM_SHOW_PATCHES:
+   case SQLCOM_SHOW_PRIVILEGES:
+   case SQLCOM_SHOW_PROCESSLIST:
+   case SQLCOM_SHOW_SLAVE_HOSTS:
+diff -r a36b98c5e2e3 sql/sql_lex.h
+--- a/sql/sql_lex.h	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_lex.h	Mon Sep 08 16:38:33 2008 -0700
+@@ -81,9 +81,9 @@
+   SQLCOM_SHOW_SLAVE_HOSTS, SQLCOM_DELETE_MULTI, SQLCOM_UPDATE_MULTI,
+   SQLCOM_SHOW_BINLOG_EVENTS, SQLCOM_SHOW_NEW_MASTER, SQLCOM_DO,
+   SQLCOM_SHOW_WARNS, SQLCOM_EMPTY_QUERY, SQLCOM_SHOW_ERRORS,
+-  SQLCOM_SHOW_COLUMN_TYPES, SQLCOM_SHOW_STORAGE_ENGINES, SQLCOM_SHOW_PRIVILEGES,
+-  SQLCOM_HELP, SQLCOM_CREATE_USER, SQLCOM_DROP_USER, SQLCOM_RENAME_USER,
+-  SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM,
++  SQLCOM_SHOW_COLUMN_TYPES, SQLCOM_SHOW_PATCHES, SQLCOM_SHOW_STORAGE_ENGINES, 
++	SQLCOM_SHOW_PRIVILEGES, SQLCOM_HELP, SQLCOM_CREATE_USER, SQLCOM_DROP_USER, 
++	SQLCOM_RENAME_USER, SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM,
+   SQLCOM_CREATE_PROCEDURE, SQLCOM_CREATE_SPFUNCTION, SQLCOM_CALL,
+   SQLCOM_DROP_PROCEDURE, SQLCOM_ALTER_PROCEDURE,SQLCOM_ALTER_FUNCTION,
+   SQLCOM_SHOW_CREATE_PROC, SQLCOM_SHOW_CREATE_FUNC,
+diff -r a36b98c5e2e3 sql/sql_parse.cc
+--- a/sql/sql_parse.cc	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_parse.cc	Mon Sep 08 16:38:33 2008 -0700
+@@ -3826,6 +3826,9 @@
+     break;
+   case SQLCOM_SHOW_STORAGE_ENGINES:
+     res= mysqld_show_storage_engines(thd);
++    break;
++  case SQLCOM_SHOW_PATCHES:
++    res= mysqld_show_patches(thd);
+     break;
+   case SQLCOM_SHOW_PRIVILEGES:
+     res= mysqld_show_privileges(thd);
+diff -r a36b98c5e2e3 sql/sql_prepare.cc
+--- a/sql/sql_prepare.cc	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_prepare.cc	Mon Sep 08 16:38:33 2008 -0700
+@@ -1790,6 +1790,7 @@
+   case SQLCOM_SHOW_DATABASES:
+   case SQLCOM_SHOW_PROCESSLIST:
+   case SQLCOM_SHOW_STORAGE_ENGINES:
++  case SQLCOM_SHOW_PATCHES:
+   case SQLCOM_SHOW_PRIVILEGES:
+   case SQLCOM_SHOW_COLUMN_TYPES:
+   case SQLCOM_SHOW_STATUS:
+diff -r a36b98c5e2e3 sql/sql_show.cc
+--- a/sql/sql_show.cc	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_show.cc	Mon Sep 08 16:38:33 2008 -0700
+@@ -22,6 +22,7 @@
+ #include "sp.h"
+ #include "sp_head.h"
+ #include "sql_trigger.h"
++#include "patch_info.h"
+ #include <my_dir.h>
+ 
+ #ifdef HAVE_BERKELEY_DB
+@@ -45,6 +46,48 @@
+ static int
+ view_store_create_info(THD *thd, TABLE_LIST *table, String *buff);
+ static bool schema_table_store_record(THD *thd, TABLE *table);
++
++
++/***************************************************************************
++** List patches built into this release
++***************************************************************************/
++
++bool mysqld_show_patches(THD *thd)
++{
++  List<Item> field_list;
++       int i = 0;
++  Protocol *protocol= thd->protocol;
++  DBUG_ENTER("mysqld_show_patches");
++
++  field_list.push_back(new Item_empty_string("File", 255));
++  field_list.push_back(new Item_empty_string("Name", 50));
++  field_list.push_back(new Item_empty_string("Version", 10));
++  field_list.push_back(new Item_empty_string("Author", 50));
++  field_list.push_back(new Item_empty_string("License", 50));
++  field_list.push_back(new Item_empty_string("Comment", 32));
++
++  if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
++    DBUG_RETURN(TRUE);
++
++       for (i = 0; patches[i].file; i++)
++       {
++         protocol->prepare_for_resend();
++       protocol->store(patches[i].file, system_charset_info);
++       protocol->store(patches[i].name, system_charset_info);
++       protocol->store(patches[i].version, system_charset_info);
++       protocol->store(patches[i].author, system_charset_info);
++       protocol->store(patches[i].license, system_charset_info);
++       protocol->store(patches[i].comment, system_charset_info);
++
++       if (protocol->write())
++       DBUG_RETURN(TRUE);
++       }       
++
++   
++  send_eof(thd);
++  DBUG_RETURN(FALSE);
++
++}
+ 
+ 
+ /***************************************************************************
+diff -r a36b98c5e2e3 sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy	Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_yacc.yy	Mon Sep 08 16:38:33 2008 -0700
+@@ -816,6 +816,7 @@
+ %token  PACK_KEYS_SYM
+ %token  PARTIAL
+ %token  PASSWORD
++%token  PATCHES
+ %token  PARAM_MARKER
+ %token  PHASE_SYM
+ %token  POINTFROMTEXT
+@@ -7948,7 +7949,7 @@
+ 	;
+ 
+ show_param:
+-         DATABASES wild_and_where
++        DATABASES wild_and_where
+          {
+            LEX *lex= Lex;
+            lex->sql_command= SQLCOM_SELECT;
+@@ -8048,6 +8049,10 @@
+ 	    LEX *lex=Lex;
+ 	    lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES;
+ 	    WARN_DEPRECATED("SHOW TABLE TYPES", "SHOW [STORAGE] ENGINES");
++	  }
++	| PATCHES
++	  {
++	    Lex->sql_command= SQLCOM_SHOW_PATCHES;
+ 	  }
+ 	| opt_storage ENGINES_SYM
+ 	  {
+@@ -9466,6 +9471,7 @@
+ 	| PACK_KEYS_SYM		{}
+ 	| PARTIAL		{}
+ 	| PASSWORD		{}
++	| PATCHES {}
+         | PHASE_SYM             {}
+ 	| POINT_SYM		{}
+ 	| POLYGON		{}
diff --git a/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch b/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
new file mode 100644
index 0000000..b20ad8f
--- /dev/null
+++ b/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
@@ -0,0 +1,1302 @@
+diff -r 72a897774060 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c	Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0buf.c	Mon Sep 08 16:40:27 2008 -0700
+@@ -546,6 +546,19 @@
+ 	mutex_create(&(buf_pool->mutex));
+ 	mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
+ 
++	mutex_create(&(buf_pool->flush_list_mutex));
++	mutex_create(&(buf_pool->LRU_mutex));
++	mutex_create(&(buf_pool->free_mutex));
++	mutex_create(&(buf_pool->hash_mutex));
++	mutex_set_level(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK);
++	mutex_set_level(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK);
++	mutex_set_level(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK);
++	mutex_set_level(&(buf_pool->hash_mutex), SYNC_NO_ORDER_CHECK);
++
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 	mutex_enter(&(buf_pool->mutex));
+ 
+ 	if (srv_use_awe) {
+@@ -718,6 +731,10 @@
+ 		block->in_free_list = TRUE;
+ 	}
+ 
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 	mutex_exit(&(buf_pool->mutex));
+ 
+ 	if (srv_use_adaptive_hash_indexes) {
+@@ -854,12 +871,12 @@
+ 	if (buf_pool->freed_page_clock >= block->freed_page_clock 
+ 				+ 1 + (buf_pool->curr_size / 4)) {
+ 
+-		mutex_enter(&buf_pool->mutex);
++		mutex_enter(&(buf_pool->LRU_mutex));
+ 		/* There has been freeing activity in the LRU list:
+ 		best to move to the head of the LRU list */
+ 
+ 		buf_LRU_make_block_young(block);
+-		mutex_exit(&buf_pool->mutex);
++		mutex_exit(&(buf_pool->LRU_mutex));
+ 	}
+ }
+ 
+@@ -875,7 +892,7 @@
+ {
+ 	buf_block_t*	block;
+ 	
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
+ 
+ 	block = buf_block_align(frame);
+ 
+@@ -883,7 +900,7 @@
+ 
+ 	buf_LRU_make_block_young(block);
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
+ }
+ 
+ /************************************************************************
+@@ -894,7 +911,7 @@
+ /*===========*/
+ 	buf_block_t*	block)	/* in, own: block to be freed */
+ {
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->free_mutex));
+ 
+ 	mutex_enter(&block->mutex);
+ 
+@@ -904,7 +921,7 @@
+ 
+ 	mutex_exit(&block->mutex);
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->free_mutex));
+ }
+ 
+ /*************************************************************************
+@@ -945,11 +962,11 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	mutex_enter_fast(&(buf_pool->mutex));
++	mutex_enter_fast(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	return(block);
+ }
+@@ -966,7 +983,7 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	mutex_enter_fast(&(buf_pool->mutex));
++	mutex_enter_fast(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+@@ -974,7 +991,7 @@
+ 		block->check_index_page_at_flush = FALSE;
+ 	}
+ 	
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ }
+ 
+ /************************************************************************
+@@ -993,7 +1010,7 @@
+ 	buf_block_t*	block;
+ 	ibool		is_hashed;
+ 
+-	mutex_enter_fast(&(buf_pool->mutex));
++	mutex_enter_fast(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+@@ -1003,7 +1020,7 @@
+ 		is_hashed = block->is_hashed;
+ 	}
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	return(is_hashed);
+ }
+@@ -1045,7 +1062,7 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	mutex_enter_fast(&(buf_pool->mutex));
++	mutex_enter_fast(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+@@ -1053,7 +1070,7 @@
+ 		block->file_page_was_freed = TRUE;
+ 	}
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	return(block);
+ }
+@@ -1074,7 +1091,7 @@
+ {
+ 	buf_block_t*	block;
+ 
+-	mutex_enter_fast(&(buf_pool->mutex));
++	mutex_enter_fast(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+@@ -1082,7 +1099,7 @@
+ 		block->file_page_was_freed = FALSE;
+ 	}
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	return(block);
+ }
+@@ -1154,26 +1171,33 @@
+ 	buf_pool->n_page_gets++;
+ loop:
+ 	block = NULL;
+-	mutex_enter_fast(&(buf_pool->mutex));
++	// mutex_enter_fast(&(buf_pool->mutex));
+ 	
+ 	if (guess) {
+ 		block = buf_block_align(guess);
+ 
++		mutex_enter(&block->mutex);
+ 		if ((offset != block->offset) || (space != block->space)
+ 				|| (block->state != BUF_BLOCK_FILE_PAGE)) {
+ 
++			mutex_exit(&block->mutex);
+ 			block = NULL;
+ 		}
+ 	}
+ 
+ 	if (block == NULL) {
++		mutex_enter_fast(&(buf_pool->hash_mutex));
+ 		block = buf_page_hash_get(space, offset);
++		if(block) {
++			mutex_enter(&block->mutex);
++		}
++		mutex_exit(&(buf_pool->hash_mutex));
+ 	}
+ 
+ 	if (block == NULL) {
+ 		/* Page not in buf_pool: needs to be read from file */
+ 
+-		mutex_exit(&(buf_pool->mutex));
++		// mutex_exit(&(buf_pool->mutex));
+ 
+ 		if (mode == BUF_GET_IF_IN_POOL) {
+ 
+@@ -1192,7 +1216,7 @@
+ 		goto loop;
+ 	}
+ 
+-	mutex_enter(&block->mutex);
++	// mutex_enter(&block->mutex);
+ 
+ 	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ 
+@@ -1204,7 +1228,7 @@
+ 
+ 		if (mode == BUF_GET_IF_IN_POOL) {
+ 			/* The page is only being read to buffer */
+-			mutex_exit(&buf_pool->mutex);
++			// mutex_exit(&buf_pool->mutex);
+ 			mutex_exit(&block->mutex);
+ 
+ 			return(NULL);
+@@ -1221,7 +1245,9 @@
+ 		LRU list and we must put it to awe_LRU_free_mapped list once
+ 		mapped to a frame */
+ 		
++		mutex_enter_fast(&(buf_pool->mutex));
+ 		buf_awe_map_page_to_frame(block, TRUE);
++		mutex_exit(&buf_pool->mutex);
+ 	}
+ 	
+ #ifdef UNIV_SYNC_DEBUG
+@@ -1229,7 +1255,7 @@
+ #else
+ 	buf_block_buf_fix_inc(block);
+ #endif
+-	mutex_exit(&buf_pool->mutex);
++	// mutex_exit(&buf_pool->mutex);
+ 
+ 	/* Check if this is the first access to the page */
+ 
+@@ -1773,7 +1799,8 @@
+ 
+ 	ut_a(block);
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 	mutex_enter(&block->mutex);
+ 
+ 	if (fil_tablespace_deleted_or_being_deleted_in_mem(space,
+@@ -1788,7 +1815,8 @@
+ 		being deleted, or the page is already in buf_pool, return */
+ 
+ 		mutex_exit(&block->mutex);
+-		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&(buf_pool->LRU_mutex));
++		mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 		buf_block_free(block);
+ 
+@@ -1803,10 +1831,14 @@
+ 	ut_ad(block);
+ 	
+ 	buf_page_init(space, offset, block);
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	/* The block must be put to the LRU list, to the old blocks */
+ 
+ 	buf_LRU_add_block(block, TRUE); 	/* TRUE == to old blocks */
++	mutex_exit(&(buf_pool->LRU_mutex));
++
++	mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */
+ 	
+ 	block->io_fix = BUF_IO_READ;
+ 
+@@ -1855,7 +1887,8 @@
+ 
+ 	free_block = buf_LRU_get_free_block();
+ 	
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+@@ -1866,7 +1899,8 @@
+ 		block->file_page_was_freed = FALSE;
+ 
+ 		/* Page can be found in buf_pool */
+-		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&(buf_pool->LRU_mutex));
++		mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 		buf_block_free(free_block);
+ 
+@@ -1889,6 +1923,7 @@
+ 	mutex_enter(&block->mutex);
+ 
+ 	buf_page_init(space, offset, block);
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	/* The block must be put to the LRU list */
+ 	buf_LRU_add_block(block, FALSE);
+@@ -1900,7 +1935,7 @@
+ #endif
+ 	buf_pool->n_pages_created++;
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
+ 
+ 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+ 
+@@ -1914,7 +1949,7 @@
+ 	ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
+ 
+ 	/* Flush pages from the end of the LRU list if necessary */
+-	buf_flush_free_margin();
++	buf_flush_free_margin(FALSE);
+ 
+ 	frame = block->frame;
+ 
+@@ -1950,6 +1985,7 @@
+ {
+ 	ulint		io_type;
+ 	ulint		read_page_no;
++	ulint		flush_type;
+ 	
+ 	ut_ad(block);
+ 
+@@ -2029,9 +2065,6 @@
+ 		}
+ 	}
+ 	
+-	mutex_enter(&(buf_pool->mutex));
+-	mutex_enter(&block->mutex);
+-
+ #ifdef UNIV_IBUF_DEBUG
+ 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
+@@ -2040,9 +2073,12 @@
+ 	removes the newest lock debug record, without checking the thread
+ 	id. */
+ 
+-	block->io_fix = 0;
+-	
+ 	if (io_type == BUF_IO_READ) {
++		mutex_enter(&block->mutex);
++		mutex_enter(&(buf_pool->mutex));
++
++		block->io_fix = 0;
++
+ 		/* NOTE that the call to ibuf may have moved the ownership of
+ 		the x-latch to this OS thread: do not let this confuse you in
+ 		debugging! */		
+@@ -2053,6 +2089,8 @@
+ 
+ 		rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
+ 
++		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&block->mutex);
+ #ifdef UNIV_DEBUG
+ 		if (buf_debug_prints) {
+ 			fputs("Has read ", stderr);
+@@ -2061,14 +2099,32 @@
+ 	} else {
+ 		ut_ad(io_type == BUF_IO_WRITE);
+ 
++		flush_type = block->flush_type;
++		if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++			mutex_enter(&(buf_pool->LRU_mutex));
++		}
++		mutex_enter(&(buf_pool->flush_list_mutex));
++		mutex_enter(&block->mutex);
++		mutex_enter(&(buf_pool->mutex));
++
++		block->io_fix = 0;
++
+ 		/* Write means a flush operation: call the completion
+ 		routine in the flush system */
+ 
+ 		buf_flush_write_complete(block);
+ 
++		mutex_exit(&(buf_pool->flush_list_mutex));
++		if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++			mutex_exit(&(buf_pool->LRU_mutex));
++		}
++
+ 		rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
+ 
+ 		buf_pool->n_pages_written++;
++
++		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&block->mutex);
+ 
+ #ifdef UNIV_DEBUG
+ 		if (buf_debug_prints) {
+@@ -2077,9 +2133,6 @@
+ #endif /* UNIV_DEBUG */
+ 	}
+ 	
+-	mutex_exit(&block->mutex);
+-	mutex_exit(&(buf_pool->mutex));
+-
+ #ifdef UNIV_DEBUG
+ 	if (buf_debug_prints) {
+ 		fprintf(stderr, "page space %lu page no %lu\n",
+@@ -2107,11 +2160,11 @@
+ 		freed = buf_LRU_search_and_free_block(100);
+ 	}
+ 	
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
+ 
+ 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
+ }
+ 
+ /*************************************************************************
+@@ -2130,10 +2183,22 @@
+ 	ulint		n_flush		= 0;
+ 	ulint		n_free		= 0;
+ 	ulint		n_page		= 0;
++	ulint		n_single_flush_tmp	= 0;
++	ulint		n_lru_flush_tmp		= 0;
++	ulint		n_list_flush_tmp	= 0;
+ 	
+ 	ut_ad(buf_pool);
+ 
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
++
+ 	mutex_enter(&(buf_pool->mutex));
++	n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
++	n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST];
++	n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU];
++	mutex_exit(&(buf_pool->mutex));
+ 
+ 	for (i = 0; i < buf_pool->curr_size; i++) {
+ 
+@@ -2201,11 +2266,14 @@
+ 	}
+ 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+ 
+-	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+-	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+-	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++	ut_a(n_single_flush_tmp == n_single_flush);
++	ut_a(n_list_flush_tmp == n_list_flush);
++	ut_a(n_lru_flush_tmp == n_lru_flush);
+ 	
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	ut_a(buf_LRU_validate());
+ 	ut_a(buf_flush_validate());
+@@ -2237,7 +2305,9 @@
+ 	index_ids = mem_alloc(sizeof(dulint) * size);
+ 	counts = mem_alloc(sizeof(ulint) * size);
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
+ 	
+ 	fprintf(stderr,
+ 		"buf_pool size %lu\n"
+@@ -2290,7 +2360,9 @@
+ 		}
+ 	}
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
+ 
+ 	for (i = 0; i < n_found; i++) {
+ 		index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -2325,8 +2397,6 @@
+         ulint i;
+         ulint fixed_pages_number = 0;
+ 
+-        mutex_enter(&(buf_pool->mutex));
+-
+         for (i = 0; i < buf_pool->curr_size; i++) {
+ 
+ 		block = buf_pool_get_nth_block(buf_pool, i);
+@@ -2342,7 +2412,6 @@
+ 		}
+         }
+ 
+-        mutex_exit(&(buf_pool->mutex));
+         return fixed_pages_number;
+ }
+ #endif /* UNIV_DEBUG */
+@@ -2370,7 +2439,9 @@
+ {
+ 	ulint	ratio;
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
+ 
+ 	ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
+ 		     / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+@@ -2378,7 +2449,9 @@
+ 
+ 		       /* 1 + is there to avoid division by zero */   
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
+ 
+ 	return(ratio);
+ }
+@@ -2398,6 +2471,9 @@
+ 	ut_ad(buf_pool);
+ 	size = buf_pool->curr_size;
+ 
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
+ 	mutex_enter(&(buf_pool->mutex));
+ 	
+ 	if (srv_use_awe) {
+@@ -2469,6 +2545,9 @@
+ 	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+ 	buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
+ 
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
+ 	mutex_exit(&(buf_pool->mutex));
+ }
+ 
+@@ -2499,8 +2578,6 @@
+ 	
+ 	ut_ad(buf_pool);
+ 
+-	mutex_enter(&(buf_pool->mutex));
+-
+ 	for (i = 0; i < buf_pool->curr_size; i++) {
+ 
+ 		block = buf_pool_get_nth_block(buf_pool, i);
+@@ -2521,8 +2598,6 @@
+ 
+ 		mutex_exit(&block->mutex);
+  	}
+-
+-	mutex_exit(&(buf_pool->mutex));
+ 
+ 	return(TRUE);
+ }	
+@@ -2562,11 +2637,11 @@
+ {
+ 	ulint	len;
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->free_mutex));
+ 
+ 	len = UT_LIST_GET_LEN(buf_pool->free);
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->free_mutex));
+ 
+ 	return(len);
+ }
+diff -r 72a897774060 innobase/buf/buf0flu.c
+--- a/innobase/buf/buf0flu.c	Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0flu.c	Mon Sep 08 16:40:27 2008 -0700
+@@ -117,12 +117,14 @@
+ 	ut_ad(mutex_own(&block->mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+ 	if (block->state != BUF_BLOCK_FILE_PAGE) {
++		/* I permited not to own LRU_mutex..  */
++/*
+ 		ut_print_timestamp(stderr);
+ 		fprintf(stderr,
+ "  InnoDB: Error: buffer block state %lu in the LRU list!\n",
+ 			(ulong)block->state);
+ 		ut_print_buf(stderr, (byte*)block, sizeof(buf_block_t));
+-
++*/
+ 		return(FALSE);
+ 	}
+ 
+@@ -535,18 +537,20 @@
+ 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
+ 				|| flush_type == BUF_FLUSH_SINGLE_PAGE);
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 
+ 	block = buf_page_hash_get(space, offset);
+ 
+ 	ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
+ 
+ 	if (!block) {
+-		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&(buf_pool->hash_mutex));
+ 		return(0);
+ 	}
+ 
+ 	mutex_enter(&block->mutex);
++	mutex_enter(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	if (flush_type == BUF_FLUSH_LIST
+ 	    && buf_flush_ready_for_flush(block, flush_type)) {
+@@ -743,7 +747,7 @@
+ 		high = fil_space_get_size(space);
+ 	}
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 
+ 	for (i = low; i < high; i++) {
+ 
+@@ -777,7 +781,7 @@
+ 
+ 				mutex_exit(&block->mutex);
+ 
+-				mutex_exit(&(buf_pool->mutex));
++				mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 				/* Note: as we release the buf_pool mutex
+ 				above, in buf_flush_try_page we cannot be sure
+@@ -788,14 +792,14 @@
+ 				count += buf_flush_try_page(space, i,
+ 							    flush_type);
+ 
+-				mutex_enter(&(buf_pool->mutex));
++				mutex_enter(&(buf_pool->hash_mutex));
+ 			} else {
+ 				mutex_exit(&block->mutex);
+ 			}
+ 		}
+ 	}
+ 				
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 	return(count);
+ }
+@@ -848,7 +852,14 @@
+ 	}
+ 
+ 	(buf_pool->init_flush)[flush_type] = TRUE;
++
++	mutex_exit(&(buf_pool->mutex));
+ 	
++	if (flush_type == BUF_FLUSH_LRU) {
++		mutex_enter(&(buf_pool->LRU_mutex));
++	}
++	mutex_enter(&(buf_pool->flush_list_mutex));
++
+ 	for (;;) {
+ 		/* If we have flushed enough, leave the loop */
+ 		if (page_count >= min_n) {
+@@ -894,7 +905,10 @@
+ 				offset = block->offset;
+ 	    
+ 				mutex_exit(&block->mutex);
+-				mutex_exit(&(buf_pool->mutex));
++				if (flush_type == BUF_FLUSH_LRU) {
++					mutex_exit(&(buf_pool->LRU_mutex));
++				}
++				mutex_exit(&(buf_pool->flush_list_mutex));
+ 
+ 				old_page_count = page_count;
+ 				
+@@ -907,7 +921,10 @@
+ 				flush_type, offset,
+ 				page_count - old_page_count); */
+ 
+-				mutex_enter(&(buf_pool->mutex));
++				if (flush_type == BUF_FLUSH_LRU) {
++					mutex_enter(&(buf_pool->LRU_mutex));
++				}
++				mutex_enter(&(buf_pool->flush_list_mutex));
+ 
+ 			} else if (flush_type == BUF_FLUSH_LRU) {
+ 
+@@ -929,6 +946,13 @@
+ 	    		break;
+ 	    	}
+ 	}
++
++	if (flush_type == BUF_FLUSH_LRU) {
++		mutex_exit(&(buf_pool->LRU_mutex));
++	}
++	mutex_exit(&(buf_pool->flush_list_mutex));
++
++	mutex_enter(&(buf_pool->mutex));
+ 
+ 	(buf_pool->init_flush)[flush_type] = FALSE;
+ 
+@@ -988,10 +1012,14 @@
+ 	buf_block_t*	block;
+ 	ulint		n_replaceable;
+ 	ulint		distance	= 0;
+-	
+-	mutex_enter(&(buf_pool->mutex));
++
++	/* optimistic search... */
++	//mutex_enter(&(buf_pool->LRU_mutex));
++	//mutex_enter(&(buf_pool->free_mutex));
+ 
+ 	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
++
++	//mutex_exit(&(buf_pool->free_mutex));
+ 
+ 	block = UT_LIST_GET_LAST(buf_pool->LRU);
+ 
+@@ -1013,7 +1041,7 @@
+ 		block = UT_LIST_GET_PREV(LRU, block);
+ 	}
+ 	
+-	mutex_exit(&(buf_pool->mutex));
++	//mutex_exit(&(buf_pool->LRU_mutex));
+ 
+ 	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+ 
+@@ -1032,8 +1060,9 @@
+ immediately, without waiting. */ 
+ 
+ void
+-buf_flush_free_margin(void)
++buf_flush_free_margin(
+ /*=======================*/
++	ibool	wait)
+ {
+ 	ulint	n_to_flush;
+ 	ulint	n_flushed;
+@@ -1043,7 +1072,7 @@
+ 	if (n_to_flush > 0) {
+ 		n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
+ 							ut_dulint_zero);
+-		if (n_flushed == ULINT_UNDEFINED) {
++		if (wait && n_flushed == ULINT_UNDEFINED) {
+ 			/* There was an LRU type flush batch already running;
+ 			let us wait for it to end */
+ 		   
+@@ -1093,11 +1122,11 @@
+ {
+ 	ibool	ret;
+ 	
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
+ 
+ 	ret = buf_flush_validate_low();
+ 	
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
+ 
+ 	return(ret);
+ }
+diff -r 72a897774060 innobase/buf/buf0lru.c
+--- a/innobase/buf/buf0lru.c	Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0lru.c	Mon Sep 08 16:40:27 2008 -0700
+@@ -79,7 +79,10 @@
+ 	ibool		all_freed;
+ 
+ scan_again:
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
++	mutex_enter(&(buf_pool->flush_list_mutex));
++	mutex_enter(&(buf_pool->free_mutex));
++	mutex_enter(&(buf_pool->hash_mutex));
+ 	
+ 	all_freed = TRUE;
+ 	
+@@ -117,7 +120,10 @@
+ 			
+ 				mutex_exit(&block->mutex);
+ 
+-				mutex_exit(&(buf_pool->mutex));
++				mutex_exit(&(buf_pool->LRU_mutex));
++				mutex_exit(&(buf_pool->flush_list_mutex));
++				mutex_exit(&(buf_pool->free_mutex));
++				mutex_exit(&(buf_pool->hash_mutex));
+ 
+ 				/* Note that the following call will acquire
+ 				an S-latch on the page */
+@@ -147,7 +153,10 @@
+ 		block = UT_LIST_GET_PREV(LRU, block);
+ 	}
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
++	mutex_exit(&(buf_pool->flush_list_mutex));
++	mutex_exit(&(buf_pool->free_mutex));
++	mutex_exit(&(buf_pool->hash_mutex));
+ 	
+ 	if (!all_freed) {
+ 		os_thread_sleep(20000);
+@@ -170,14 +179,14 @@
+ 	ulint		len;
+ 	ulint		limit;
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	mutex_enter(&(buf_pool->LRU_mutex));
+ 
+ 	len = UT_LIST_GET_LEN(buf_pool->LRU);
+ 
+ 	if (len < BUF_LRU_OLD_MIN_LEN) {
+ 		/* The LRU list is too short to do read-ahead */
+ 
+-		mutex_exit(&(buf_pool->mutex));
++		mutex_exit(&(buf_pool->LRU_mutex));
+ 
+ 		return(0);
+ 	}
+@@ -186,7 +195,7 @@
+ 
+ 	limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+ 
+-	mutex_exit(&(buf_pool->mutex));
++	mutex_exit(&(buf_pool->LRU_mutex));
+ 
+ 	return(limit);
+ }
+@@ -210,13 +219,15 @@
+ 	ulint		distance = 0;
+ 	ibool		freed;
+ 
+-	mutex_enter(&(buf_pool->mutex));
++	/* optimistic search... */
++	//mutex_enter(&(buf_pool->LRU_mutex));
+ 	
++retry:
+ 	freed = FALSE;
+ 	block = UT_LIST_GET_LAST(buf_pool->LRU);
+ 
+ 	while (block != NULL) {
+-	        ut_a(block->in_LRU_list);
++	        //ut_a(block->in_LRU_list); /* optimistic */
+ 
+ 		mutex_enter(&block->mutex);
+ 
+@@ -231,9 +242,17 @@
+ 			}
+ #endif /* UNIV_DEBUG */
+ 
++			mutex_exit(&block->mutex);
++
++			mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */
++
++			mutex_enter(&(buf_pool->hash_mutex));
++			mutex_enter(&block->mutex);
++			if(block->in_LRU_list && buf_flush_ready_for_replace(block)) {
+ 			buf_LRU_block_remove_hashed_page(block);
++			mutex_exit(&(buf_pool->hash_mutex));
+ 
+-			mutex_exit(&(buf_pool->mutex));
++			mutex_exit(&(buf_pool->LRU_mutex));
+ 			mutex_exit(&block->mutex);
+ 
+ 			/* Remove possible adaptive hash index built on the
+@@ -246,14 +265,25 @@
+ 
+ 			ut_a(block->buf_fix_count == 0);
+ 
<Skipped 2113 lines>
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/percona-server.git/commitdiff/431f68fe79a66d5dfdd53f2655709e6c925fbc22



More information about the pld-cvs-commit mailing list