[packages/percona-server/v5.0.x: 77/202] - from http://www.percona.com/mysql/5.0.68/patches/
glen
glen at pld-linux.org
Wed Oct 21 16:15:30 CEST 2015
commit 833f26399d2f12a1a80df4076aee97f40af0e57b
Author: Elan Ruusamäe <glen at pld-linux.org>
Date: Wed Sep 17 14:22:43 2008 +0000
- from http://www.percona.com/mysql/5.0.68/patches/
Changed files:
mysql-acc-pslist.patch -> 1.1.2.1
mysql-control_flush_and_merge_and_read.patch -> 1.1.2.1
mysql-control_io-threads.patch -> 1.1.2.1
mysql-microslow_innodb.patch -> 1.1.2.1
mysql-show_patches.patch -> 1.1.2.1
mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch -> 1.1.2.1
mysql-userstats-testsuite.patch -> 1.1.2.1
mysql-userstats.patch -> 1.1.2.1
mysql-acc-pslist.patch | 115 +
mysql-control_flush_and_merge_and_read.patch | 238 ++
mysql-control_io-threads.patch | 69 +
mysql-microslow_innodb.patch | 2333 ++++++++++++++++++++
mysql-show_patches.patch | 294 +++
...plit_buf_pool_mutex_fixed_optimistic_safe.patch | 1302 +++++++++++
mysql-userstats-testsuite.patch | 222 ++
mysql-userstats.patch | 1453 ++++++++++++
8 files changed, 6026 insertions(+)
---
diff --git a/mysql-acc-pslist.patch b/mysql-acc-pslist.patch
new file mode 100644
index 0000000..f54950e
--- /dev/null
+++ b/mysql-acc-pslist.patch
@@ -0,0 +1,115 @@
+diff -r 174803e7e869 mysql-test/r/create.result
+--- a/mysql-test/r/create.result Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/r/create.result Thu Sep 04 12:20:19 2008 -0700
+@@ -1720,7 +1720,8 @@
+ `COMMAND` varchar(16) NOT NULL DEFAULT '',
+ `TIME` bigint(7) NOT NULL DEFAULT '0',
+ `STATE` varchar(64) DEFAULT NULL,
+- `INFO` longtext
++ `INFO` longtext,
++ `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create temporary table t1 like information_schema.processlist;
+@@ -1734,7 +1735,8 @@
+ `COMMAND` varchar(16) NOT NULL DEFAULT '',
+ `TIME` bigint(7) NOT NULL DEFAULT '0',
+ `STATE` varchar(64) DEFAULT NULL,
+- `INFO` longtext
++ `INFO` longtext,
++ `TIME_MS` decimal(22,3) NOT NULL DEFAULT '0.000'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create table t1 like information_schema.character_sets;
+diff -r 174803e7e869 mysql-test/r/not_embedded_server.result
+--- a/mysql-test/r/not_embedded_server.result Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/r/not_embedded_server.result Thu Sep 04 12:20:19 2008 -0700
+@@ -1,7 +1,7 @@
+ prepare stmt1 from ' SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!=\'Daemon\' ';
+ execute stmt1;
+-ID USER HOST DB COMMAND TIME STATE INFO
+-number root localhost test Query time executing SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!='Daemon'
++ID USER HOST DB COMMAND TIME STATE INFO TIME_MS
++number root localhost test Query time executing SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!='Daemon' time_ms
+ deallocate prepare stmt1;
+ FLUSH STATUS;
+ SHOW GLOBAL STATUS LIKE 'com_select';
+diff -r 174803e7e869 mysql-test/t/not_embedded_server.test
+--- a/mysql-test/t/not_embedded_server.test Thu Sep 04 12:17:56 2008 -0700
++++ b/mysql-test/t/not_embedded_server.test Thu Sep 04 12:20:19 2008 -0700
+@@ -16,7 +16,7 @@
+ # End of 4.1 tests
+
+ prepare stmt1 from ' SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST WHERE COMMAND!=\'Daemon\' ';
+---replace_column 1 number 6 time 3 localhost
++--replace_column 1 number 6 time 3 localhost 9 time_ms
+ execute stmt1;
+ deallocate prepare stmt1;
+
+diff -r 174803e7e869 patch_info/acc-pslist.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/acc-pslist.info Thu Sep 04 12:20:19 2008 -0700
+@@ -0,0 +1,6 @@
++File=acc-pslist.patch
++Name=Milliseconds in PROCESSLIST
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r 174803e7e869 sql/sql_show.cc
+--- a/sql/sql_show.cc Thu Sep 04 12:17:56 2008 -0700
++++ b/sql/sql_show.cc Thu Sep 04 12:20:19 2008 -0700
+@@ -1803,7 +1803,7 @@
+ TABLE *table= tables->table;
+ CHARSET_INFO *cs= system_charset_info;
+ char *user;
+- time_t now= my_time(0);
++ ulonglong unow= my_micro_time();
+ DBUG_ENTER("fill_process_list");
+
+ user= thd->security_ctx->master_access & PROCESS_ACL ?
+@@ -1861,8 +1861,8 @@
+ table->field[4]->store(command_name[tmp->command].str,
+ command_name[tmp->command].length, cs);
+ /* MYSQL_TIME */
+- table->field[5]->store((uint32)(tmp->start_time ?
+- now - tmp->start_time : 0), TRUE);
++ const ulonglong utime= tmp->start_utime ? unow - tmp->start_utime : 0;
++ table->field[5]->store(utime / 1000000, TRUE);
+ /* STATE */
+ #ifndef EMBEDDED_LIBRARY
+ val= (char*) (tmp->locked ? "Locked" :
+@@ -1896,11 +1896,15 @@
+ table->field[7]->set_notnull();
+ }
+
++ /* TIME_MS */
++ table->field[8]->store((double)(utime / 1000.0));
++
+ if (schema_table_store_record(thd, table))
+ {
+ VOID(pthread_mutex_unlock(&LOCK_thread_count));
+ DBUG_RETURN(1);
+ }
++
+ }
+ }
+
+@@ -5532,7 +5536,7 @@
+ into it two numbers, based on modulus of base-10 numbers. In the ones
+ position is the number of decimals. Tens position is unused. In the
+ hundreds and thousands position is a two-digit decimal number representing
+- length. Encode this value with (decimals*100)+length , where
++ length. Encode this value with (length*100)+decimals , where
+ 0<decimals<10 and 0<=length<100 .
+
+ @param
+@@ -6540,6 +6544,8 @@
+ {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
+ {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
+ SKIP_OPEN_TABLE},
++ {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL,
++ 0, 0, "Time_ms", SKIP_OPEN_TABLE},
+ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
+
diff --git a/mysql-control_flush_and_merge_and_read.patch b/mysql-control_flush_and_merge_and_read.patch
new file mode 100644
index 0000000..aa87a0d
--- /dev/null
+++ b/mysql-control_flush_and_merge_and_read.patch
@@ -0,0 +1,238 @@
+diff -r 2fdaeb546d25 innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/buf/buf0rea.c Mon Sep 08 16:40:14 2008 -0700
+@@ -188,6 +188,10 @@
+ ulint low, high;
+ ulint err;
+ ulint i;
++
++ if (!(srv_read_ahead & 1)) {
++ return(0);
++ }
+
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+@@ -396,6 +400,10 @@
+ ulint err;
+ ulint i;
+
++ if (!(srv_read_ahead & 2)) {
++ return(0);
++ }
++
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+diff -r 2fdaeb546d25 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/include/srv0srv.h Mon Sep 08 16:40:14 2008 -0700
+@@ -131,6 +131,12 @@
+ extern ulong srv_max_purge_lag;
+ extern ibool srv_use_awe;
+ extern ibool srv_use_adaptive_hash_indexes;
++
++extern ulint srv_read_ahead;
++extern ulint srv_ibuf_contract_const;
++extern ulint srv_ibuf_contract_burst;
++extern ulint srv_buf_flush_const;
++extern ulint srv_buf_flush_burst;
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -r 2fdaeb546d25 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Sep 08 16:39:06 2008 -0700
++++ b/innobase/srv/srv0srv.c Mon Sep 08 16:40:14 2008 -0700
+@@ -322,6 +322,11 @@
+ ibool srv_use_awe = FALSE;
+ ibool srv_use_adaptive_hash_indexes = TRUE;
+
++ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
++ulint srv_ibuf_contract_const = 5;
++ulint srv_ibuf_contract_burst = 20;
++ulint srv_buf_flush_const = 10;
++ulint srv_buf_flush_burst = 100;
+ /*-------------------------------------------*/
+ ulong srv_n_spin_wait_rounds = 20;
+ ulong srv_n_free_tickets_to_enter = 500;
+@@ -2298,7 +2303,7 @@
+ + buf_pool->n_pages_written;
+ if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(TRUE, 5);
++ ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_burst);
+
+ srv_main_thread_op_info = "flushing log";
+
+@@ -2311,7 +2316,7 @@
+ /* Try to keep the number of modified pages in the
+ buffer pool under the limit wished by the user */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst,
+ ut_dulint_max);
+
+ /* If we had to do the flush, it may have taken
+@@ -2349,7 +2354,7 @@
+ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+
+ srv_main_thread_op_info = "flushing buffer pool pages";
+- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++ buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst, ut_dulint_max);
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2359,7 +2364,7 @@
+ even if the server were active */
+
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(TRUE, 5);
++ ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_const);
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2401,14 +2406,14 @@
+ (> 70 %), we assume we can afford reserving the disk(s) for
+ the time it requires to flush 100 pages */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst,
+ ut_dulint_max);
+ } else {
+ /* Otherwise, we only flush a small number of pages so that
+ we do not unnecessarily use much disk i/o capacity from
+ other work */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_const,
+ ut_dulint_max);
+ }
+
+@@ -2497,7 +2502,7 @@
+ if (srv_fast_shutdown && srv_shutdown_state > 0) {
+ n_bytes_merged = 0;
+ } else {
+- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
++ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, srv_ibuf_contract_burst);
+ }
+
+ srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2514,7 +2519,7 @@
+
+ if (srv_fast_shutdown < 2) {
+ n_pages_flushed =
+- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++ buf_flush_batch(BUF_FLUSH_LIST, srv_buf_flush_burst, ut_dulint_max);
+ } else {
+ /* In the fastest shutdown we do not flush the buffer pool
+ to data files: we set n_pages_flushed to 0 artificially. */
+diff -r 2fdaeb546d25 patch_info/control_flush_and_merge_and_read.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/control_flush_and_merge_and_read.info Mon Sep 08 16:40:14 2008 -0700
+@@ -0,0 +1,6 @@
++File=control_flush_and_merge_and_read.patch
++Name=InnoDB patch to control insert buffer and flushing
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
+diff -r 2fdaeb546d25 sql/ha_innodb.h
+--- a/sql/ha_innodb.h Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/ha_innodb.h Mon Sep 08 16:40:14 2008 -0700
+@@ -234,6 +234,11 @@
+ extern ulong srv_thread_concurrency;
+ extern ulong srv_commit_concurrency;
+ extern ulong srv_flush_log_at_trx_commit;
++extern ulong srv_read_ahead;
++extern ulong srv_ibuf_contract_const;
++extern ulong srv_ibuf_contract_burst;
++extern ulong srv_buf_flush_const;
++extern ulong srv_buf_flush_burst;
+ }
+
+ bool innobase_init(void);
+diff -r 2fdaeb546d25 sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/mysqld.cc Mon Sep 08 16:40:14 2008 -0700
+@@ -5014,7 +5014,10 @@
+ OPT_SECURE_FILE_PRIV,
+ OPT_KEEP_FILES_ON_CREATE,
+ OPT_INNODB_ADAPTIVE_HASH_INDEX,
+- OPT_FEDERATED
++ OPT_FEDERATED,
++ OPT_INNODB_READ_AHEAD,
++ OPT_INNODB_IBUF_CONTRACT_CONST, OPT_INNODB_IBUF_CONTRACT_BURST,
++ OPT_INNODB_BUF_FLUSH_CONST, OPT_INNODB_BUF_FLUSH_BURST
+ };
+
+
+@@ -5321,6 +5324,26 @@
+ (gptr*) &global_system_variables.innodb_table_locks,
+ (gptr*) &global_system_variables.innodb_table_locks,
+ 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
++ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
++ "Enable/Diasable read aheads bit0:random bit1:linear",
++ (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead,
++ 0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0},
++ {"innodb_ibuf_contract_const", OPT_INNODB_IBUF_CONTRACT_CONST,
++ "Const activity of merging insert buffer",
++ (gptr*) &srv_ibuf_contract_const, (gptr*) &srv_ibuf_contract_const,
++ 0, GET_ULONG, REQUIRED_ARG, 5, 1, 50000, 0, 0, 0},
++ {"innodb_ibuf_contract_burst", OPT_INNODB_IBUF_CONTRACT_BURST,
++ "Burst activity of merging insert buffer",
++ (gptr*) &srv_ibuf_contract_burst, (gptr*) &srv_ibuf_contract_burst,
++ 0, GET_ULONG, REQUIRED_ARG, 20, 1, 50000, 0, 0, 0},
++ {"innodb_buf_flush_const", OPT_INNODB_BUF_FLUSH_CONST,
++ "Const activity of flushing buffer pool",
++ (gptr*) &srv_buf_flush_const, (gptr*) &srv_buf_flush_const,
++ 0, GET_ULONG, REQUIRED_ARG, 10, 1, 50000, 0, 0, 0},
++ {"innodb_buf_flush_burst", OPT_INNODB_BUF_FLUSH_BURST,
++ "Burst activity of flushing buffer pool",
++ (gptr*) &srv_buf_flush_burst, (gptr*) &srv_buf_flush_burst,
++ 0, GET_ULONG, REQUIRED_ARG, 100, 1, 50000, 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+ (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 2fdaeb546d25 sql/set_var.cc
+--- a/sql/set_var.cc Mon Sep 08 16:39:06 2008 -0700
++++ b/sql/set_var.cc Mon Sep 08 16:40:14 2008 -0700
+@@ -476,6 +476,16 @@
+ sys_var_long_ptr sys_innodb_flush_log_at_trx_commit(
+ "innodb_flush_log_at_trx_commit",
+ &srv_flush_log_at_trx_commit);
++sys_var_long_ptr sys_innodb_read_ahead("innodb_read_ahead",
++ &srv_read_ahead);
++sys_var_long_ptr sys_innodb_ibuf_contract_const("innodb_ibuf_contract_const",
++ &srv_ibuf_contract_const);
++sys_var_long_ptr sys_innodb_ibuf_contract_burst("innodb_ibuf_contract_burst",
++ &srv_ibuf_contract_burst);
++sys_var_long_ptr sys_innodb_buf_flush_const("innodb_buf_flush_const",
++ &srv_buf_flush_const);
++sys_var_long_ptr sys_innodb_buf_flush_burst("innodb_buf_flush_burst",
++ &srv_buf_flush_burst);
+ #endif
+
+ /* Condition pushdown to storage engine */
+@@ -818,6 +828,11 @@
+ &sys_innodb_thread_concurrency,
+ &sys_innodb_commit_concurrency,
+ &sys_innodb_flush_log_at_trx_commit,
++ &sys_innodb_read_ahead,
++ &sys_innodb_ibuf_contract_const,
++ &sys_innodb_ibuf_contract_burst,
++ &sys_innodb_buf_flush_const,
++ &sys_innodb_buf_flush_burst,
+ #endif
+ &sys_trust_routine_creators,
+ &sys_trust_function_creators,
+@@ -953,6 +968,11 @@
+ {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
+ {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
+ {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
++ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
++ {sys_innodb_ibuf_contract_const.name, (char*) &sys_innodb_ibuf_contract_const, SHOW_SYS},
++ {sys_innodb_ibuf_contract_burst.name, (char*) &sys_innodb_ibuf_contract_burst, SHOW_SYS},
++ {sys_innodb_buf_flush_const.name, (char*) &sys_innodb_buf_flush_const, SHOW_SYS},
++ {sys_innodb_buf_flush_burst.name, (char*) &sys_innodb_buf_flush_burst, SHOW_SYS},
+ #endif
+ {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS},
+ {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS},
diff --git a/mysql-control_io-threads.patch b/mysql-control_io-threads.patch
new file mode 100644
index 0000000..7f155b1
--- /dev/null
+++ b/mysql-control_io-threads.patch
@@ -0,0 +1,69 @@
+diff -r 4dca80df8ee3 innobase/os/os0file.c
+--- a/innobase/os/os0file.c Mon Sep 08 16:40:14 2008 -0700
++++ b/innobase/os/os0file.c Mon Sep 08 16:40:20 2008 -0700
+@@ -3180,6 +3180,13 @@
+ struct aiocb* control;
+ #endif
+ ulint i;
++ ulint prim_segment;
++ ulint n;
++
++ n = array->n_slots / array->n_segments;
++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ os_mutex_enter(array->mutex);
+
+@@ -3198,11 +3205,22 @@
+ goto loop;
+ }
+
+- for (i = 0;; i++) {
++ for (i = prim_segment * n; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
++ }
++ }
++
++ if (slot->reserved == TRUE){
++ /* Not found after the intended segment. So we should search before. */
++ for (i = 0;; i++) {
++ slot = os_aio_array_get_nth_slot(array, i);
++
++ if (slot->reserved == FALSE) {
++ break;
++ }
+ }
+ }
+
+diff -r 4dca80df8ee3 innobase/srv/srv0start.c
+--- a/innobase/srv/srv0start.c Mon Sep 08 16:40:14 2008 -0700
++++ b/innobase/srv/srv0start.c Mon Sep 08 16:40:20 2008 -0700
+@@ -1213,12 +1213,12 @@
+
+ if (!os_aio_use_native_aio) {
+ /* In simulated aio we currently have use only for 4 threads */
+- srv_n_file_io_threads = 4;
++ /*srv_n_file_io_threads = 4;*/
+
+ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+ srv_n_file_io_threads,
+- SRV_MAX_N_PENDING_SYNC_IOS);
++ SRV_MAX_N_PENDING_SYNC_IOS * srv_n_file_io_threads / 4);
+ } else {
+ os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+diff -r 4dca80df8ee3 patch_info/control_io-threads.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/control_io-threads.info Mon Sep 08 16:40:20 2008 -0700
+@@ -0,0 +1,6 @@
++File=control_io-threads.patch
++Name=InnoDB patch to control count of IO threads
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
diff --git a/mysql-microslow_innodb.patch b/mysql-microslow_innodb.patch
new file mode 100644
index 0000000..b173cb1
--- /dev/null
+++ b/mysql-microslow_innodb.patch
@@ -0,0 +1,2333 @@
+diff -r bb81fcdd7db2 include/my_time.h
+--- a/include/my_time.h Mon Sep 08 16:38:33 2008 -0700
++++ b/include/my_time.h Mon Sep 08 16:38:46 2008 -0700
+@@ -140,7 +140,7 @@
+ int my_date_to_str(const MYSQL_TIME *l_time, char *to);
+ int my_datetime_to_str(const MYSQL_TIME *l_time, char *to);
+ int my_TIME_to_str(const MYSQL_TIME *l_time, char *to);
+-
++ulonglong my_timer(ulonglong *ltime, ulonglong frequency);
+ C_MODE_END
+
+ #endif /* _my_time_h_ */
+diff -r bb81fcdd7db2 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/buf/buf0buf.c Mon Sep 08 16:38:46 2008 -0700
+@@ -37,6 +37,7 @@
+ #include "log0log.h"
+ #include "trx0undo.h"
+ #include "srv0srv.h"
++#include "thr0loc.h"
+
+ /*
+ IMPLEMENTATION OF THE BUFFER POOL
+@@ -1086,6 +1087,31 @@
+ return(block);
+ }
+
++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
++{
++ ulint block_hash;
++ ulint block_hash_byte;
++ byte block_hash_offset;
++
++ ut_ad(block);
++
++ if (!trx || !trx->distinct_page_access_hash)
++ return;
++
++ block_hash = ut_hash_ulint((block->space << 20) + block->space +
++ block->offset, DPAH_SIZE << 3);
++ block_hash_byte = block_hash >> 3;
++ block_hash_offset = (byte) block_hash & 0x07;
++ if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE)
++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
++ if (block_hash_offset < 0 || block_hash_offset > 7)
++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset);
++ if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
++ trx->distinct_page_access++;
++ trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
++ return;
++}
++
+ /************************************************************************
+ This is the general function used to get access to a database page. */
+
+@@ -1108,6 +1134,11 @@
+ ulint fix_type;
+ ibool success;
+ ibool must_read;
++ trx_t* trx;
++ ulint sec;
++ ulint ms;
++ ib_longlong start_time;
++ ib_longlong finish_time;
+
+ ut_ad(mtr);
+ ut_ad((rw_latch == RW_S_LATCH)
+@@ -1119,6 +1150,7 @@
+ #ifndef UNIV_LOG_DEBUG
+ ut_ad(!ibuf_inside() || ibuf_page(space, offset));
+ #endif
++ trx = thr_local_get_trx(os_thread_get_curr_id());
+ buf_pool->n_page_gets++;
+ loop:
+ block = NULL;
+@@ -1148,7 +1180,7 @@
+ return(NULL);
+ }
+
+- buf_read_page(space, offset);
++ buf_read_page(space, offset, trx);
+
+ #ifdef UNIV_DEBUG
+ buf_dbg_counter++;
+@@ -1261,6 +1293,11 @@
+ /* Let us wait until the read operation
+ completes */
+
++ if (trx)
++ {
++ ut_usectime(&sec, &ms);
++ start_time = (ib_longlong)sec * 1000000 + ms;
++ }
+ for (;;) {
+ mutex_enter(&block->mutex);
+
+@@ -1275,6 +1312,12 @@
+
+ break;
+ }
++ }
++ if (trx)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_longlong)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
+ }
+ }
+
+@@ -1296,12 +1339,15 @@
+ /* In the case of a first access, try to apply linear
+ read-ahead */
+
+- buf_read_ahead_linear(space, offset);
++ buf_read_ahead_linear(space, offset, trx);
+ }
+
+ #ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
++
++ _increment_page_get_statistics(block, trx);
++
+ return(block->frame);
+ }
+
+@@ -1326,6 +1372,7 @@
+ ibool accessed;
+ ibool success;
+ ulint fix_type;
++ trx_t* trx;
+
+ ut_ad(mtr && block);
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+@@ -1440,13 +1487,16 @@
+ read-ahead */
+
+ buf_read_ahead_linear(buf_frame_get_space_id(guess),
+- buf_frame_get_page_no(guess));
++ buf_frame_get_page_no(guess), trx);
+ }
+
+ #ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
+ buf_pool->n_page_gets++;
++
++ trx = thr_local_get_trx(os_thread_get_curr_id());
++ _increment_page_get_statistics(block, trx);
+
+ return(TRUE);
+ }
+@@ -1470,6 +1520,7 @@
+ buf_block_t* block;
+ ibool success;
+ ulint fix_type;
++ trx_t* trx;
+
+ ut_ad(mtr);
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+@@ -1558,6 +1609,9 @@
+ || (ibuf_count_get(block->space, block->offset) == 0));
+ #endif
+ buf_pool->n_page_gets++;
++
++ trx = thr_local_get_trx(os_thread_get_curr_id());
++ _increment_page_get_statistics(block, trx);
+
+ return(TRUE);
+ }
+diff -r bb81fcdd7db2 innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/buf/buf0rea.c Mon Sep 08 16:38:46 2008 -0700
+@@ -70,7 +70,8 @@
+ treat the tablespace as dropped; this is a timestamp we
+ use to stop dangling page reads from a tablespace
+ which we have DISCARDed + IMPORTed back */
+- ulint offset) /* in: page number */
++ ulint offset, /* in: page number */
++ trx_t* trx)
+ {
+ buf_block_t* block;
+ ulint wake_later;
+@@ -140,10 +141,10 @@
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+- *err = fil_io(OS_FILE_READ | wake_later,
++ *err = _fil_io(OS_FILE_READ | wake_later,
+ sync, space,
+ offset, 0, UNIV_PAGE_SIZE,
+- (void*)block->frame, (void*)block);
++ (void*)block->frame, (void*)block, trx);
+ ut_a(*err == DB_SUCCESS);
+
+ if (sync) {
+@@ -174,8 +175,9 @@
+ the page at the given page number does not get
+ read even if we return a value > 0! */
+ ulint space, /* in: space id */
+- ulint offset) /* in: page number of a page which the current thread
++ ulint offset, /* in: page number of a page which the current thread
+ wants to access */
++ trx_t* trx)
+ {
+ ib_longlong tablespace_version;
+ buf_block_t* block;
+@@ -270,7 +272,7 @@
+ if (!ibuf_bitmap_page(i)) {
+ count += buf_read_page_low(&err, FALSE, ibuf_mode
+ | OS_AIO_SIMULATED_WAKE_LATER,
+- space, tablespace_version, i);
++ space, tablespace_version, i, trx);
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+@@ -314,7 +316,8 @@
+ /* out: number of page read requests issued: this can
+ be > 1 if read-ahead occurred */
+ ulint space, /* in: space id */
+- ulint offset) /* in: page number */
++ ulint offset, /* in: page number */
++ trx_t* trx)
+ {
+ ib_longlong tablespace_version;
+ ulint count;
+@@ -323,13 +326,13 @@
+
+ tablespace_version = fil_space_get_version(space);
+
+- count = buf_read_ahead_random(space, offset);
++ count = buf_read_ahead_random(space, offset, trx);
+
+ /* We do the i/o in the synchronous aio mode to save thread
+ switches: hence TRUE */
+
+ count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+- tablespace_version, offset);
++ tablespace_version, offset, trx);
+ srv_buf_pool_reads+= count2;
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+@@ -374,8 +377,9 @@
+ /*==================*/
+ /* out: number of page read requests issued */
+ ulint space, /* in: space id */
+- ulint offset) /* in: page number of a page; NOTE: the current thread
++ ulint offset, /* in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
++ trx_t* trx)
+ {
+ ib_longlong tablespace_version;
+ buf_block_t* block;
+@@ -556,7 +560,7 @@
+ if (!ibuf_bitmap_page(i)) {
+ count += buf_read_page_low(&err, FALSE, ibuf_mode
+ | OS_AIO_SIMULATED_WAKE_LATER,
+- space, tablespace_version, i);
++ space, tablespace_version, i, trx);
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+@@ -625,10 +629,10 @@
+ for (i = 0; i < n_stored; i++) {
+ if ((i + 1 == n_stored) && sync) {
+ buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
+- space_ids[i], space_versions[i], page_nos[i]);
++ space_ids[i], space_versions[i], page_nos[i], NULL);
+ } else {
+ buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
+- space_ids[i], space_versions[i], page_nos[i]);
++ space_ids[i], space_versions[i], page_nos[i], NULL);
+ }
+
+ if (err == DB_TABLESPACE_DELETED) {
+@@ -704,11 +708,11 @@
+
+ if ((i + 1 == n_stored) && sync) {
+ buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+- tablespace_version, page_nos[i]);
++ tablespace_version, page_nos[i], NULL);
+ } else {
+ buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+ | OS_AIO_SIMULATED_WAKE_LATER,
+- space, tablespace_version, page_nos[i]);
++ space, tablespace_version, page_nos[i], NULL);
+ }
+ }
+
+diff -r bb81fcdd7db2 innobase/fil/fil0fil.c
+--- a/innobase/fil/fil0fil.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/fil/fil0fil.c Mon Sep 08 16:38:46 2008 -0700
+@@ -3527,7 +3527,7 @@
+ node->name, node->handle, buf,
+ offset_low, offset_high,
+ UNIV_PAGE_SIZE * n_pages,
+- NULL, NULL);
++ NULL, NULL, NULL);
+ #endif
+ if (success) {
+ node->size += n_pages;
+@@ -3851,7 +3851,7 @@
+ Reads or writes data. This operation is asynchronous (aio). */
+
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ if we are trying to do i/o on a tablespace
+@@ -3877,8 +3877,9 @@
+ void* buf, /* in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+- void* message) /* in: message for aio handler if non-sync
++ void* message, /* in: message for aio handler if non-sync
+ aio used, else ignored */
++ trx_t* trx)
+ {
+ fil_system_t* system = fil_system;
+ ulint mode;
+@@ -4018,7 +4019,7 @@
+ #else
+ /* Queue the aio request */
+ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+- offset_low, offset_high, len, node, message);
++ offset_low, offset_high, len, node, message, trx);
+ #endif
+ ut_a(ret);
+
+diff -r bb81fcdd7db2 innobase/include/buf0rea.h
+--- a/innobase/include/buf0rea.h Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/buf0rea.h Mon Sep 08 16:38:46 2008 -0700
+@@ -10,6 +10,7 @@
+ #define buf0rea_h
+
+ #include "univ.i"
++#include "trx0types.h"
+ #include "buf0types.h"
+
+ /************************************************************************
+@@ -25,7 +26,8 @@
+ /* out: number of page read requests issued: this can
+ be > 1 if read-ahead occurred */
+ ulint space, /* in: space id */
+- ulint offset);/* in: page number */
++ ulint offset, /* in: page number */
++ trx_t* trx);
+ /************************************************************************
+ Applies linear read-ahead if in the buf_pool the page is a border page of
+ a linear read-ahead area and all the pages in the area have been accessed.
+@@ -55,8 +57,9 @@
+ /*==================*/
+ /* out: number of page read requests issued */
+ ulint space, /* in: space id */
+- ulint offset);/* in: page number of a page; NOTE: the current thread
++ ulint offset, /* in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
++ trx_t* trx);
+ /************************************************************************
+ Issues read requests for pages which the ibuf module wants to read in, in
+ order to contract the insert buffer tree. Technically, this function is like
+diff -r bb81fcdd7db2 innobase/include/fil0fil.h
+--- a/innobase/include/fil0fil.h Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/fil0fil.h Mon Sep 08 16:38:46 2008 -0700
+@@ -534,8 +534,11 @@
+ /************************************************************************
+ Reads or writes data. This operation is asynchronous (aio). */
+
++#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \
++ _fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL)
++
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+ if we are trying to do i/o on a tablespace
+@@ -561,8 +564,9 @@
+ void* buf, /* in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+- void* message); /* in: message for aio handler if non-sync
++ void* message, /* in: message for aio handler if non-sync
+ aio used, else ignored */
++ trx_t* trx);
+ /************************************************************************
+ Reads data from a space to a buffer. Remember that the possible incomplete
+ blocks at the end of file are ignored: they are not taken into account when
+diff -r bb81fcdd7db2 innobase/include/os0file.h
+--- a/innobase/include/os0file.h Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/os0file.h Mon Sep 08 16:38:46 2008 -0700
+@@ -10,6 +10,8 @@
+ #define os0file_h
+
+ #include "univ.i"
++
++#include "trx0types.h"
+
+ #ifndef __WIN__
+ #include <dirent.h>
+@@ -421,8 +423,11 @@
+ /***********************************************************************
+ Requests a synchronous read operation. */
+
++#define os_file_read(file, buf, offset, offset_high, n) \
++ _os_file_read(file, buf, offset, offset_high, n, NULL)
++
+ ibool
+-os_file_read(
++_os_file_read(
+ /*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+@@ -432,7 +437,8 @@
+ offset where to read */
+ ulint offset_high,/* in: most significant 32 bits of
+ offset */
+- ulint n); /* in: number of bytes to read */
++ ulint n, /* in: number of bytes to read */
++ trx_t* trx);
+ /***********************************************************************
+ Rewind file to its start, read at most size - 1 bytes from it to str, and
+ NUL-terminate str. All errors are silently ignored. This function is
+@@ -584,7 +590,8 @@
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+- void* message2);
++ void* message2,
++ trx_t* trx);
+ /****************************************************************************
+ Wakes up all async i/o threads so that they know to exit themselves in
+ shutdown. */
+diff -r bb81fcdd7db2 innobase/include/thr0loc.h
+--- a/innobase/include/thr0loc.h Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/thr0loc.h Mon Sep 08 16:38:46 2008 -0700
+@@ -15,6 +15,7 @@
+
+ #include "univ.i"
+ #include "os0thread.h"
++#include "trx0trx.h"
+
+ /********************************************************************
+ Initializes the thread local storage module. */
+@@ -36,6 +37,14 @@
+ /*===========*/
+ os_thread_id_t id); /* in: thread id */
+ /***********************************************************************
++Gets trx */
++
++trx_t*
++thr_local_get_trx(
++/*==================*/
++ /* out: trx for mysql */
++ os_thread_id_t id); /* in: thread id of the thread */
++/***********************************************************************
+ Gets the slot number in the thread table of a thread. */
+
+ ulint
+@@ -43,6 +52,14 @@
+ /*==================*/
+ /* out: slot number */
+ os_thread_id_t id); /* in: thread id of the thread */
++/***********************************************************************
++Sets in the local storage the slot number in the thread table of a thread. */
++
++void
++thr_local_set_trx(
++/*==================*/
++ os_thread_id_t id, /* in: thread id of the thread */
++ trx_t* trx); /* in: slot number */
+ /***********************************************************************
+ Sets in the local storage the slot number in the thread table of a thread. */
+
+diff -r bb81fcdd7db2 innobase/include/trx0trx.h
+--- a/innobase/include/trx0trx.h Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/include/trx0trx.h Mon Sep 08 16:38:46 2008 -0700
+@@ -668,6 +668,17 @@
+ /*------------------------------*/
+ char detailed_error[256]; /* detailed error message for last
+ error, or empty. */
++ /*------------------------------*/
++ os_thread_id_t trx_thread_id;
++ ulint io_reads;
++ ib_longlong io_read;
++ ulint io_reads_wait_timer;
++ ib_longlong lock_que_wait_ustarted;
++ ulint lock_que_wait_timer;
++ ulint innodb_que_wait_timer;
++ ulint distinct_page_access;
++#define DPAH_SIZE 8192
++ byte* distinct_page_access_hash;
+ };
+
+ #define TRX_MAX_N_THREADS 32 /* maximum number of concurrent
+diff -r bb81fcdd7db2 innobase/lock/lock0lock.c
+--- a/innobase/lock/lock0lock.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/lock/lock0lock.c Mon Sep 08 16:38:46 2008 -0700
+@@ -1806,6 +1806,8 @@
+ {
+ lock_t* lock;
+ trx_t* trx;
++ ulint sec;
++ ulint ms;
+
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+@@ -1861,6 +1863,8 @@
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
++ ut_usectime(&sec, &ms);
++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
+
+ ut_a(que_thr_stop(thr));
+
+@@ -3514,7 +3518,9 @@
+ {
+ lock_t* lock;
+ trx_t* trx;
+-
++ ulint sec;
++ ulint ms;
++
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -3563,7 +3569,10 @@
+
+ return(DB_SUCCESS);
+ }
+-
++
++ trx->wait_started = time(NULL);
++ ut_usectime(&sec, &ms);
++ trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms;
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
+@@ -4289,7 +4298,7 @@
+ ulint i;
+ mtr_t mtr;
+ trx_t* trx;
+-
++
+ fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+
+ /* First print info on non-active transactions */
+diff -r bb81fcdd7db2 innobase/os/os0file.c
+--- a/innobase/os/os0file.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/os/os0file.c Mon Sep 08 16:38:46 2008 -0700
+@@ -14,6 +14,7 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "trx0sys.h"
+
+ #if defined(UNIV_HOTBACKUP) && defined(__WIN__)
+ /* Add includes for the _stat() call to compile on Windows */
+@@ -101,6 +102,7 @@
+ struct aiocb control; /* Posix control block for aio
+ request */
+ #endif
++ trx_t* trx;
+ };
+
+ /* The aio array structure */
+@@ -1903,9 +1905,13 @@
+ #ifndef __WIN__
+ /***********************************************************************
+ Does a synchronous read operation in Posix. */
++
++#define os_file_pread(file, buf, n, offset, offset_high) \
++ _os_file_pread(file, buf, n, offset, offset_high, NULL);
++
+ static
+ ssize_t
+-os_file_pread(
++_os_file_pread(
+ /*==========*/
+ /* out: number of bytes read, -1 if error */
+ os_file_t file, /* in: handle to a file */
+@@ -1913,12 +1919,17 @@
+ ulint n, /* in: number of bytes to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset from where to read */
+- ulint offset_high) /* in: most significant 32 bits of
+- offset */
++ ulint offset_high, /* in: most significant 32 bits of
++ offset */
++ trx_t* trx)
+ {
+ off_t offs;
+ ssize_t n_bytes;
+-
++ ulint sec;
++ ulint ms;
++ ib_longlong start_time;
++ ib_longlong finish_time;
++
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
+
+ /* If off_t is > 4 bytes in size, then we assume we can pass a
+@@ -1937,7 +1948,13 @@
+ }
+
+ os_n_file_reads++;
+-
++ if (trx)
++ {
++ trx->io_reads++;
++ trx->io_read += n;
++ ut_usectime(&sec, &ms);
++ start_time = (ib_longlong)sec * 1000000 + ms;
++ }
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+ os_mutex_enter(os_file_count_mutex);
+ os_file_n_pending_preads++;
+@@ -1951,6 +1968,13 @@
+ os_n_pending_reads--;
+ os_mutex_exit(os_file_count_mutex);
+
++ if (trx)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_longlong)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
++
+ return(n_bytes);
+ #else
+ {
+@@ -1980,6 +2004,13 @@
+ os_mutex_enter(os_file_count_mutex);
+ os_n_pending_reads--;
+ os_mutex_exit(os_file_count_mutex);
++
++ if (trx)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_longlong)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
+
+ return(ret);
+ }
+@@ -2103,7 +2134,7 @@
+ Requests a synchronous positioned read operation. */
+
+ ibool
+-os_file_read(
++_os_file_read(
+ /*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+@@ -2113,7 +2144,8 @@
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+- ulint n) /* in: number of bytes to read */
++ ulint n, /* in: number of bytes to read */
++ trx_t* trx)
+ {
+ #ifdef __WIN__
+ BOOL ret;
+@@ -2128,8 +2160,7 @@
+
+ os_n_file_reads++;
+ os_bytes_read_since_printout += n;
+-
+-try_again:
++try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+@@ -2177,7 +2208,7 @@
+ os_bytes_read_since_printout += n;
+
+ try_again:
+- ret = os_file_pread(file, buf, n, offset, offset_high);
++ ret = _os_file_pread(file, buf, n, offset, offset_high, trx);
+
+ if ((ulint)ret == n) {
+
+@@ -3137,7 +3168,8 @@
+ offset */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+- ulint len) /* in: length of the block to read or write */
++ ulint len, /* in: length of the block to read or write */
++ trx_t* trx)
+ {
+ os_aio_slot_t* slot;
+ #ifdef WIN_ASYNC_IO
+@@ -3196,7 +3228,7 @@
+ slot->offset = offset;
+ slot->offset_high = offset_high;
+ slot->io_already_done = FALSE;
+-
++
+ #ifdef WIN_ASYNC_IO
+ control = &(slot->control);
+ control->Offset = (DWORD)offset;
+@@ -3390,7 +3422,8 @@
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+- void* message2)
++ void* message2,
++ trx_t* trx)
+ {
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+@@ -3429,8 +3462,8 @@
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+- return(os_file_read(file, buf, offset,
+- offset_high, n));
++ return(_os_file_read(file, buf, offset,
++ offset_high, n, trx));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+@@ -3463,14 +3496,19 @@
+ ut_error;
+ }
+
++ if (trx && type == OS_FILE_READ)
++ {
++ trx->io_reads++;
++ trx->io_read += n;
++ }
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+- name, buf, offset, offset_high, n);
++ name, buf, offset, offset_high, n, trx);
+ if (type == OS_FILE_READ) {
+ if (os_aio_use_native_aio) {
+ #ifdef WIN_ASYNC_IO
+ os_n_file_reads++;
+ os_bytes_read_since_printout += len;
+-
++
+ ret = ReadFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+ #elif defined(POSIX_ASYNC_IO)
+@@ -4038,7 +4076,7 @@
+
+ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
+ consecutive_ios[i]->len);
+- offs += consecutive_ios[i]->len;
++ offs += consecutive_ios[i]->len;
+ }
+ }
+
+@@ -4050,9 +4088,8 @@
+
+ /* Mark the i/os done in slots */
+
+- for (i = 0; i < n_consecutive; i++) {
++ for (i = 0; i < n_consecutive; i++)
+ consecutive_ios[i]->io_already_done = TRUE;
+- }
+
+ /* We return the messages for the first slot now, and if there were
+ several slots, the messages will be returned with subsequent calls
+diff -r bb81fcdd7db2 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/srv/srv0srv.c Mon Sep 08 16:38:46 2008 -0700
+@@ -996,6 +996,10 @@
+ ibool has_slept = FALSE;
+ srv_conc_slot_t* slot = NULL;
+ ulint i;
++ ib_longlong start_time = 0L;
++ ib_longlong finish_time = 0L;
++ ulint sec;
++ ulint ms;
+
+ /* If trx has 'free tickets' to enter the engine left, then use one
+ such ticket */
+@@ -1054,6 +1058,7 @@
+ if (SRV_THREAD_SLEEP_DELAY > 0)
+ {
+ os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
++ trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
+ }
+
+ trx->op_info = "";
+@@ -1109,11 +1114,18 @@
+ /* Go to wait for the event; when a thread leaves InnoDB it will
+ release this thread */
+
++ ut_usectime(&sec, &ms);
++ start_time = (ib_longlong)sec * 1000000 + ms;
++
+ trx->op_info = "waiting in InnoDB queue";
+
+ os_event_wait(slot->event);
+
+ trx->op_info = "";
++
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_longlong)sec * 1000000 + ms;
++ trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+diff -r bb81fcdd7db2 innobase/thr/thr0loc.c
+--- a/innobase/thr/thr0loc.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/thr/thr0loc.c Mon Sep 08 16:38:46 2008 -0700
+@@ -45,6 +45,7 @@
+ for this thread */
+ ibool in_ibuf;/* TRUE if the the thread is doing an ibuf
+ operation */
++ trx_t* trx;
+ hash_node_t hash; /* hash chain node */
+ ulint magic_n;
+ };
+@@ -113,6 +114,29 @@
+ }
+
+ /***********************************************************************
++Gets trx */
++
++trx_t*
++thr_local_get_trx(
++/*==================*/
++ /* out: trx for mysql */
++ os_thread_id_t id) /* in: thread id of the thread */
++{
++ trx_t* trx;
++ thr_local_t* local;
++
++ mutex_enter(&thr_local_mutex);
++
++ local = thr_local_get(id);
++
++ trx = local->trx;
++
++ mutex_exit(&thr_local_mutex);
++
++ return(trx);
++}
++
++/***********************************************************************
+ Sets the slot number in the thread table of a thread. */
+
+ void
+@@ -124,11 +148,31 @@
+ thr_local_t* local;
+
+ mutex_enter(&thr_local_mutex);
+-
++
+ local = thr_local_get(id);
+
+ local->slot_no = slot_no;
+-
++
++ mutex_exit(&thr_local_mutex);
++}
++
++/***********************************************************************
++Sets trx */
++
++void
++thr_local_set_trx(
++/*==================*/
++ os_thread_id_t id, /* in: thread id of the thread */
++ trx_t* trx) /* in: trx */
++{
++ thr_local_t* local;
++
++ mutex_enter(&thr_local_mutex);
++
++ local = thr_local_get(id);
++
++ local->trx = trx;
++
+ mutex_exit(&thr_local_mutex);
+ }
+
+@@ -172,6 +216,7 @@
+ local->magic_n = THR_LOCAL_MAGIC_N;
+
+ local->in_ibuf = FALSE;
++ local->trx = NULL;
+
+ mutex_enter(&thr_local_mutex);
+
+diff -r bb81fcdd7db2 innobase/trx/trx0trx.c
+--- a/innobase/trx/trx0trx.c Mon Sep 08 16:38:33 2008 -0700
++++ b/innobase/trx/trx0trx.c Mon Sep 08 16:38:46 2008 -0700
+@@ -190,6 +190,16 @@
+ trx->global_read_view_heap = mem_heap_create(256);
+ trx->global_read_view = NULL;
+ trx->read_view = NULL;
++
++ trx->io_reads = 0;
++ trx->io_read = 0;
++ trx->io_reads_wait_timer = 0;
++ trx->lock_que_wait_timer = 0;
++ trx->innodb_que_wait_timer = 0;
++ trx->distinct_page_access = 0;
++ trx->distinct_page_access_hash = NULL;
++ trx->trx_thread_id = os_thread_get_curr_id();
++ thr_local_set_trx(trx->trx_thread_id, NULL);
+
+ /* Set X/Open XA transaction identification to NULL */
+ memset(&trx->xid, 0, sizeof(trx->xid));
+@@ -230,6 +240,10 @@
+
+ trx->mysql_process_no = os_proc_get_number();
+
++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++ thr_local_set_trx(trx->mysql_thread_id, trx);
++
+ return(trx);
+ }
+
+@@ -355,6 +369,8 @@
+
+ ut_a(trx->read_view == NULL);
+
++ thr_local_free(trx->trx_thread_id);
++
+ mem_free(trx);
+ }
+
+@@ -366,6 +382,12 @@
+ /*===============*/
+ trx_t* trx) /* in, own: trx object */
+ {
++ if (trx->distinct_page_access_hash)
++ {
++ mem_free(trx->distinct_page_access_hash);
++ trx->distinct_page_access_hash= NULL;
++ }
++
+ thr_local_free(trx->mysql_thread_id);
+
+ mutex_enter(&kernel_mutex);
+@@ -1064,7 +1086,10 @@
+ trx_t* trx) /* in: transaction */
+ {
+ que_thr_t* thr;
+-
++ ulint sec;
++ ulint ms;
++ ib_longlong now;
++
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -1080,6 +1105,9 @@
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
++ ut_usectime(&sec, &ms);
++ now = (ib_longlong)sec * 1000000 + ms;
++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
+ trx->que_state = TRX_QUE_RUNNING;
+ }
+
+@@ -1093,6 +1121,9 @@
+ trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */
+ {
+ que_thr_t* thr;
++ ulint sec;
++ ulint ms;
++ ib_longlong now;
+
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+@@ -1109,6 +1140,9 @@
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
++ ut_usectime(&sec, &ms);
++ now = (ib_longlong)sec * 1000000 + ms;
++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
+ trx->que_state = TRX_QUE_RUNNING;
+ }
+
+diff -r bb81fcdd7db2 patch_info/microslow_innodb.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/microslow_innodb.info Mon Sep 08 16:38:46 2008 -0700
+@@ -0,0 +1,6 @@
++File=microslow_innodb.patch
++Name=Extended statistics in slow.log
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r bb81fcdd7db2 scripts/mysqldumpslow.sh
+--- a/scripts/mysqldumpslow.sh Mon Sep 08 16:38:33 2008 -0700
++++ b/scripts/mysqldumpslow.sh Mon Sep 08 16:38:46 2008 -0700
+@@ -83,8 +83,8 @@
+ s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//;
+ my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('','');
+
+- s/^# Query_time: (\d+) Lock_time: (\d+) Rows_sent: (\d+).*\n//;
+- my ($t, $l, $r) = ($1, $2, $3);
++ s/^# Query_time: (\d+(\.\d+)?) Lock_time: (\d+(\.\d+)?) Rows_sent: (\d+(\.\d+)?).*\n//;
++ my ($t, $l, $r) = ($1, $3, $5);
+ $t -= $l unless $opt{l};
+
+ # remove fluff that mysqld writes to log when it (re)starts:
+diff -r bb81fcdd7db2 sql-common/my_time.c
+--- a/sql-common/my_time.c Mon Sep 08 16:38:33 2008 -0700
++++ b/sql-common/my_time.c Mon Sep 08 16:38:46 2008 -0700
+@@ -1252,3 +1252,37 @@
+ return 0;
+ }
+
++/*
++ int my_timer(ulonglong *ltime, ulonglong frequency)
++
++ For performance measurement this function returns the number
++ of microseconds since the epoch (SVr4, BSD 4.3, POSIX 1003.1-2001)
++ or system start (Windows platforms).
++
++ For windows platforms frequency value (obtained via
++ QueryPerformanceFrequency) has to be specified. The global frequency
++ value is set in mysqld.cc.
++
++ If Windows platform doesn't support QueryPerformanceFrequency we will
++ obtain the time via GetClockCount, which supports microseconds only.
++*/
++
++ulonglong my_timer(ulonglong *ltime, ulonglong frequency)
++{
++ ulonglong newtime= 0;
++#ifdef __WIN__
++ if (frequency)
++ {
++ QueryPerformanceCounter((LARGE_INTEGER *)&newtime);
++ newtime/= (frequency * 1000000);
++ } else
++ newtime= (GetTickCount() * 1000; /* GetTickCount only returns milliseconds */
++#else
++ struct timeval t;
++ gettimeofday(&t, NULL);
++ newtime= (ulonglong)t.tv_sec * 1000000 + t.tv_usec;
++#endif
++ if (ltime)
++ *ltime= newtime;
++ return newtime;
++}
+diff -r bb81fcdd7db2 sql/filesort.cc
+--- a/sql/filesort.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/filesort.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -180,6 +180,7 @@
+ {
+ statistic_increment(thd->status_var.filesort_scan_count, &LOCK_status);
+ }
++ thd->query_plan_flags|= QPLAN_FILESORT;
+ #ifdef CAN_TRUST_RANGE
+ if (select && select->quick && select->quick->records > 0L)
+ {
+@@ -245,6 +246,7 @@
+ }
+ else
+ {
++ thd->query_plan_flags|= QPLAN_FILESORT_DISK;
+ if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
+ {
+ x_free(table_sort.buffpek);
+@@ -1116,6 +1118,7 @@
+
+ statistic_increment(current_thd->status_var.filesort_merge_passes,
+ &LOCK_status);
++ current_thd->query_plan_fsort_passes++;
+ if (param->not_killable)
+ {
+ killed= ¬_killable;
+diff -r bb81fcdd7db2 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/ha_innodb.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -1,3 +1,4 @@
++
+ /* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
+
+ This program is free software; you can redistribute it and/or modify
+@@ -6098,6 +6099,7 @@
+ {
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+ trx_t* trx;
++ int i;
+
+ DBUG_ENTER("ha_innobase::external_lock");
+ DBUG_PRINT("enter",("lock_type: %d", lock_type));
+@@ -6221,7 +6223,24 @@
+
+ if (trx->n_mysql_tables_in_use == 0) {
+
+- trx->mysql_n_tables_locked = 0;
++ current_thd->innodb_was_used = TRUE;
++ current_thd->innodb_io_reads += trx->io_reads;
++ current_thd->innodb_io_read += trx->io_read;
++ current_thd->innodb_io_reads_wait_timer += trx->io_reads_wait_timer;
++ current_thd->innodb_lock_que_wait_timer += trx->lock_que_wait_timer;
++ current_thd->innodb_innodb_que_wait_timer += trx->innodb_que_wait_timer;
++ current_thd->innodb_page_access += trx->distinct_page_access;
++
++ trx->io_reads = 0;
++ trx->io_read = 0;
++ trx->io_reads_wait_timer = 0;
++ trx->lock_que_wait_timer = 0;
++ trx->innodb_que_wait_timer = 0;
++ trx->distinct_page_access = 0;
++ if (trx->distinct_page_access_hash)
++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++
++ trx->mysql_n_tables_locked = 0;
+ prebuilt->used_in_HANDLER = FALSE;
+
+ if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
+diff -r bb81fcdd7db2 sql/log.cc
+--- a/sql/log.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/log.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -2229,10 +2229,11 @@
+ */
+
+ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
+- time_t query_start_arg)
++ time_t query_start_arg, ulonglong query_start_timer)
+ {
+ bool error=0;
+ time_t current_time;
++ ulonglong current_timer;
+ if (!is_open())
+ return 0;
+ DBUG_ENTER("MYSQL_LOG::write");
+@@ -2243,7 +2244,8 @@
+ int tmp_errno=0;
+ char buff[80],*end;
+ end=buff;
+- if (!(thd->options & OPTION_UPDATE_LOG))
++ if (!(thd->options & OPTION_UPDATE_LOG) &&
++ !(thd->slave_thread && opt_log_slow_slave_statements))
+ {
+ VOID(pthread_mutex_unlock(&LOCK_log));
+ DBUG_RETURN(0);
+@@ -2273,22 +2275,69 @@
+ if (my_b_printf(&log_file, "# User at Host: %s[%s] @ %s [%s]\n",
+ sctx->priv_user ?
+ sctx->priv_user : "",
+- sctx->user ? sctx->user : "",
++ sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""),
+ sctx->host ? sctx->host : "",
+ sctx->ip ? sctx->ip : "") ==
+ (uint) -1)
+ tmp_errno=errno;
+ }
+- if (query_start_arg)
++ if (query_start_timer)
+ {
++ char buf[5][20];
++ ulonglong current_timer= my_timer(¤t_timer, frequency);
++ sprintf(buf[0], "%.6f", (current_timer - query_start_timer) / 1000000.0);
++ sprintf(buf[1], "%.6f", (thd->timer_after_lock - query_start_timer) / 1000000.0);
++ if (!query_length)
++ {
++ thd->sent_row_count= thd->examined_row_count= 0;
++ thd->innodb_was_used= FALSE;
++ thd->query_plan_flags= QPLAN_NONE;
++ thd->query_plan_fsort_passes= 0;
++ }
++
+ /* For slow query log */
+ if (my_b_printf(&log_file,
+- "# Query_time: %lu Lock_time: %lu Rows_sent: %lu Rows_examined: %lu\n",
+- (ulong) (current_time - query_start_arg),
+- (ulong) (thd->time_after_lock - query_start_arg),
++ "# Thread_id: %lu Schema: %s\n" \
++ "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n",
++ (ulong) thd->thread_id, (thd->db ? thd->db : ""),
++ buf[0], buf[1],
+ (ulong) thd->sent_row_count,
+ (ulong) thd->examined_row_count) == (uint) -1)
+ tmp_errno=errno;
++ if ((thd->variables.log_slow_verbosity & SLOG_V_QUERY_PLAN) &&
++ my_b_printf(&log_file,
++ "# QC_Hit: %s Full_scan: %s Full_join: %s Tmp_table: %s Tmp_table_on_disk: %s\n" \
++ "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu\n",
++ ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"),
++ thd->query_plan_fsort_passes) == (uint) -1)
++ tmp_errno=errno;
++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used)
++ {
++ sprintf(buf[2], "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0);
++ sprintf(buf[3], "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0);
++ sprintf(buf[4], "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0);
++ if (my_b_printf(&log_file,
++ "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \
++ "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \
++ "# InnoDB_pages_distinct: %lu\n",
++ (ulong) thd->innodb_io_reads,
++ (ulong) thd->innodb_io_read,
++ buf[2], buf[3], buf[4],
++ (ulong) thd->innodb_page_access) == (uint) -1)
++ tmp_errno=errno;
++ }
++ else
++ {
++ if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) &&
++ my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1)
++ tmp_errno=errno;
++ }
+ }
+ if (thd->db && strcmp(thd->db,db))
+ { // Database changed
+diff -r bb81fcdd7db2 sql/log_event.cc
+--- a/sql/log_event.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/log_event.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -2039,6 +2039,7 @@
+ /* Execute the query (note that we bypass dispatch_command()) */
+ const char* found_semicolon= NULL;
+ mysql_parse(thd, thd->query, thd->query_length, &found_semicolon);
++ log_slow_statement(thd);
+
+ }
+ else
+diff -r bb81fcdd7db2 sql/mysql_priv.h
+--- a/sql/mysql_priv.h Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/mysql_priv.h Mon Sep 08 16:38:46 2008 -0700
+@@ -484,6 +484,78 @@
+ #define WEEK_FIRST_WEEKDAY 4
+
+ #define STRING_BUFFER_USUAL_SIZE 80
++
++/* Slow log */
++
++struct msl_opts
++{
++ ulong val;
++ const char *name;
++};
++
++#define SLOG_V_MICROTIME 1 << 0
++#define SLOG_V_QUERY_PLAN 1 << 1
++#define SLOG_V_INNODB 1 << 2
++/* ... */
++#define SLOG_V_INVALID 1 << 31
++#define SLOG_V_NONE SLOG_V_MICROTIME
++
++static const struct msl_opts slog_verb[]=
++{
++ /* Basic flags */
++
++ { SLOG_V_MICROTIME, "microtime" },
++ { SLOG_V_QUERY_PLAN, "query_plan" },
++ { SLOG_V_INNODB, "innodb" },
++
++ /* End of baisc flags */
++
++ { 0, "" },
++
++ /* Complex flags */
++
++ { SLOG_V_MICROTIME, "minimal" },
++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN, "standard" },
++ { SLOG_V_MICROTIME|SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" },
++
++ /* End of complex flags */
++
++ { SLOG_V_INVALID, (char *)0 }
++};
++
++#define QPLAN_NONE 0
++#define QPLAN_QC 1 << 0
++#define QPLAN_QC_NO 1 << 1
++#define QPLAN_FULL_SCAN 1 << 2
++#define QPLAN_FULL_JOIN 1 << 3
++#define QPLAN_TMP_TABLE 1 << 4
++#define QPLAN_TMP_DISK 1 << 5
++#define QPLAN_FILESORT 1 << 6
++#define QPLAN_FILESORT_DISK 1 << 7
++/* ... */
++#define QPLAN_MAX 1 << 31
++
++#define SLOG_F_QC_NO QPLAN_QC_NO
++#define SLOG_F_FULL_SCAN QPLAN_FULL_SCAN
++#define SLOG_F_FULL_JOIN QPLAN_FULL_JOIN
++#define SLOG_F_TMP_TABLE QPLAN_TMP_TABLE
++#define SLOG_F_TMP_DISK QPLAN_TMP_DISK
++#define SLOG_F_FILESORT QPLAN_FILESORT
++#define SLOG_F_FILESORT_DISK QPLAN_FILESORT_DISK
++#define SLOG_F_INVALID 1 << 31
++#define SLOG_F_NONE 0
++
++static const struct msl_opts slog_filter[]=
++{
++ { SLOG_F_QC_NO, "qc_miss" },
++ { SLOG_F_FULL_SCAN, "full_scan" },
++ { SLOG_F_FULL_JOIN, "full_join" },
++ { SLOG_F_TMP_TABLE, "tmp_table" },
++ { SLOG_F_TMP_DISK, "tmp_table_on_disk" },
++ { SLOG_F_FILESORT, "filesort" },
++ { SLOG_F_FILESORT_DISK, "filesort_on_disk" },
++ { SLOG_F_INVALID, (char *)0 }
++};
+
+ enum enum_parsing_place
+ {
+@@ -1333,7 +1405,7 @@
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+-extern my_bool opt_log_slow_admin_statements;
++extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements;
+ extern my_bool sp_automatic_privileges, opt_noacl;
+ extern my_bool opt_old_style_user_limits, trust_function_creators;
+ extern uint opt_crash_binlog_innodb;
+diff -r bb81fcdd7db2 sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/mysqld.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -175,7 +175,6 @@
+ static void getvolumename();
+ static void getvolumeID(BYTE *volumeName);
+ #endif /* __NETWARE__ */
+-
+
+ #ifdef _AIX41
+ int initgroups(const char *,unsigned int);
+@@ -406,6 +405,7 @@
+ my_bool opt_secure_auth= 0;
+ char* opt_secure_file_priv= 0;
+ my_bool opt_log_slow_admin_statements= 0;
++my_bool opt_log_slow_slave_statements= 0;
+ my_bool lower_case_file_system= 0;
+ my_bool opt_large_pages= 0;
+ uint opt_large_page_size= 0;
+@@ -503,6 +503,7 @@
+ Ge_creator ge_creator;
+ Le_creator le_creator;
+
++ulonglong frequency= 0;
+
+ FILE *bootstrap_file;
+ int bootstrap_error;
+@@ -3649,6 +3650,8 @@
+ unireg_abort(1);
+ }
+ }
++ if (!QueryPerformanceFrequency((LARGE_INTEGER *)&frequency))
++ frequency= 0;
+ #endif /* __WIN__ */
+
+ if (init_common_variables(MYSQL_CONFIG_NAME,
+@@ -4892,7 +4895,7 @@
+ OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
+ OPT_KEY_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE,
+ OPT_KEY_CACHE_DIVISION_LIMIT, OPT_KEY_CACHE_AGE_THRESHOLD,
+- OPT_LONG_QUERY_TIME,
++ OPT_LONG_QUERY_TIME, OPT_MIN_EXAMINED_ROW_LIMIT,
+ OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
+ OPT_MAX_BINLOG_CACHE_SIZE, OPT_MAX_BINLOG_SIZE,
+ OPT_MAX_CONNECTIONS, OPT_MAX_CONNECT_ERRORS,
+@@ -4983,6 +4986,10 @@
+ OPT_TIMED_MUTEXES,
+ OPT_OLD_STYLE_USER_LIMITS,
+ OPT_LOG_SLOW_ADMIN_STATEMENTS,
++ OPT_LOG_SLOW_SLAVE_STATEMENTS,
++ OPT_LOG_SLOW_RATE_LIMIT,
++ OPT_LOG_SLOW_VERBOSITY,
++ OPT_LOG_SLOW_FILTER,
+ OPT_TABLE_LOCK_WAIT_TIMEOUT,
+ OPT_PORT_OPEN_TIMEOUT,
+ OPT_MERGE,
+@@ -5374,6 +5381,11 @@
+ "Log slow OPTIMIZE, ANALYZE, ALTER and other administrative statements to the slow log if it is open.",
+ (gptr*) &opt_log_slow_admin_statements,
+ (gptr*) &opt_log_slow_admin_statements,
++ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
++ {"log-slow-slave-statements", OPT_LOG_SLOW_SLAVE_STATEMENTS,
++ "Log slow replicated statements to the slow log if it is open.",
++ (gptr*) &opt_log_slow_slave_statements,
++ (gptr*) &opt_log_slow_slave_statements,
+ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"log-slow-queries", OPT_SLOW_QUERY_LOG,
+ "Log slow queries to this log file. Defaults logging to hostname-slow.log file. Must be enabled to activate other slow log options.",
+@@ -6038,11 +6050,27 @@
+ (gptr*) 0,
+ 0, (GET_ULONG | GET_ASK_ADDR) , REQUIRED_ARG, 100,
+ 1, 100, 0, 1, 0},
++ {"log_slow_filter", OPT_LOG_SLOW_FILTER,
++ "Log only the queries that followed certain execution plan. Multiple flags allowed in a comma-separated string. [qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, filesort, filesort_on_disk]",
++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_F_NONE, 0, 0},
++ {"log_slow_rate_limit", OPT_LOG_SLOW_RATE_LIMIT,
++ "Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.",
++ (gptr*) &global_system_variables.log_slow_rate_limit,
++ (gptr*) &max_system_variables.log_slow_rate_limit, 0, GET_ULONG,
++ REQUIRED_ARG, 1, 1, ~0L, 0, 1L, 0},
++ {"log_slow_verbosity", OPT_LOG_SLOW_VERBOSITY,
++ "Choose how verbose the messages to your slow log will be. Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb]",
++ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, SLOG_V_MICROTIME, 0, 0},
+ {"long_query_time", OPT_LONG_QUERY_TIME,
+ "Log all queries that have taken more than long_query_time seconds to execute to file.",
+ (gptr*) &global_system_variables.long_query_time,
+ (gptr*) &max_system_variables.long_query_time, 0, GET_ULONG,
+- REQUIRED_ARG, 10, 1, LONG_TIMEOUT, 0, 1, 0},
++ REQUIRED_ARG, 10000000, 0, LONG_TIMEOUT * 1000000, 0, 1, 0},
++ {"min_examined_row_limit", OPT_MIN_EXAMINED_ROW_LIMIT,
++ "Don't log queries which examine less than min_examined_row_limit rows to file.",
++ (gptr*) &global_system_variables.min_examined_row_limit,
++ (gptr*) &max_system_variables.min_examined_row_limit, 0, GET_ULONG,
++ REQUIRED_ARG, 0, 0, ~0L, 0, 1L, 0},
+ {"lower_case_table_names", OPT_LOWER_CASE_TABLE_NAMES,
+ "If set to 1 table names are stored in lowercase on disk and table names will be case-insensitive. Should be set to 2 if you are using a case insensitive file system",
+ (gptr*) &lower_case_table_names,
+@@ -6810,7 +6838,9 @@
+ global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
+ max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR;
+ global_system_variables.old_passwords= 0;
+-
++ global_system_variables.log_slow_verbosity= SLOG_V_MICROTIME;
++ global_system_variables.log_slow_filter= SLOG_F_NONE;
++
+ /*
+ Default behavior for 4.1 and 5.0 is to treat NULL values as unequal
+ when collecting index statistics for MyISAM tables.
+@@ -7271,6 +7301,24 @@
+ case OPT_BOOTSTRAP:
+ opt_noacl=opt_bootstrap=1;
+ break;
++ case OPT_LOG_SLOW_FILTER:
++ if ((global_system_variables.log_slow_filter=
++ msl_flag_resolve_by_name(slog_filter, argument,
++ SLOG_F_NONE, SLOG_F_INVALID)) == SLOG_F_INVALID)
++ {
++ fprintf(stderr,"Invalid argument to log_slow_filter\n");
++ exit(1);
++ }
++ break;
++ case OPT_LOG_SLOW_VERBOSITY:
++ if ((global_system_variables.log_slow_verbosity=
++ msl_flag_resolve_by_name(slog_verb, argument,
++ SLOG_V_NONE, SLOG_V_INVALID)) == SLOG_V_INVALID)
++ {
++ fprintf(stderr,"Invalid argument to log_slow_verbosity\n");
++ exit(1);
++ }
++ break;
+ case OPT_STORAGE_ENGINE:
+ {
+ if ((enum db_type)((global_system_variables.table_type=
+@@ -7603,10 +7651,14 @@
+ if (opt_bdb)
+ sql_print_warning("this binary does not contain BDB storage engine");
+ #endif
+- if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes) &&
++ if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes ||
++ opt_log_slow_slave_statements) &&
+ !opt_slow_log)
+- sql_print_warning("options --log-slow-admin-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set");
+-
++ {
++ sql_print_warning("options --log-slow-admin-statements, --log-slow-slave-statements and --log-queries-not-using-indexes have no effect if --log-slow-queries is not set");
++ opt_log_slow_slave_statements= FALSE;
++ }
++
+ if (argc > 0)
+ {
+ fprintf(stderr, "%s: Too many arguments (first extra is '%s').\nUse --help to get a list of available options\n", my_progname, *argv);
+diff -r bb81fcdd7db2 sql/set_var.cc
+--- a/sql/set_var.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/set_var.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -215,7 +215,7 @@
+ sys_log_queries_not_using_indexes("log_queries_not_using_indexes",
+ &opt_log_queries_not_using_indexes);
+ sys_var_thd_ulong sys_log_warnings("log_warnings", &SV::log_warnings);
+-sys_var_thd_ulong sys_long_query_time("long_query_time",
++sys_var_thd_ulonglong sys_long_query_time("long_query_time",
+ &SV::long_query_time);
+ sys_var_bool_const_ptr sys_log_slow("log_slow_queries", &opt_slow_log);
+ sys_var_thd_bool sys_low_priority_updates("low_priority_updates",
+@@ -281,6 +281,8 @@
+ &SV::max_tmp_tables);
+ sys_var_long_ptr sys_max_write_lock_count("max_write_lock_count",
+ &max_write_lock_count);
++sys_var_thd_ulong sys_min_examined_row_limit("min_examined_row_limit",
++ &SV::min_examined_row_limit);
+ sys_var_thd_ulong sys_multi_range_count("multi_range_count",
+ &SV::multi_range_count);
+ sys_var_long_ptr sys_myisam_data_pointer_size("myisam_data_pointer_size",
+@@ -324,6 +326,20 @@
+ sys_var_bool_ptr sys_relay_log_purge("relay_log_purge",
+ &relay_log_purge);
+ #endif
++sys_var_thd_ulong sys_log_slow_rate_limit("log_slow_rate_limit",
++ &SV::log_slow_rate_limit);
++sys_var_thd_msl_flag sys_log_slow_filter("log_slow_filter",
++ &SV::log_slow_filter,
++ SLOG_F_NONE,
++ SLOG_F_NONE,
++ SLOG_F_INVALID,
++ slog_filter);
++sys_var_thd_msl_flag sys_log_slow_verbosity("log_slow_verbosity",
++ &SV::log_slow_verbosity,
++ SLOG_V_NONE,
++ SLOG_V_MICROTIME,
++ SLOG_V_INVALID,
++ slog_verb);
+ sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank",
+ &rpl_recovery_rank);
+ sys_var_long_ptr sys_query_cache_size("query_cache_size",
+@@ -675,6 +691,9 @@
+ &sys_log_off,
+ &sys_log_queries_not_using_indexes,
+ &sys_log_slow,
++ &sys_log_slow_filter,
++ &sys_log_slow_rate_limit,
++ &sys_log_slow_verbosity,
+ &sys_log_update,
+ &sys_log_warnings,
+ &sys_long_query_time,
+@@ -698,6 +717,7 @@
+ &sys_max_tmp_tables,
+ &sys_max_user_connections,
+ &sys_max_write_lock_count,
++ &sys_min_examined_row_limit,
+ &sys_multi_range_count,
+ &sys_myisam_data_pointer_size,
+ &sys_myisam_max_sort_file_size,
+@@ -963,6 +983,8 @@
+ {"log_slave_updates", (char*) &opt_log_slave_updates, SHOW_MY_BOOL},
+ #endif
+ {sys_log_slow.name, (char*) &sys_log_slow, SHOW_SYS},
++ {sys_log_slow_filter.name, (char*) &sys_log_slow_filter, SHOW_SYS},
++ {sys_log_slow_verbosity.name, (char*) &sys_log_slow_verbosity, SHOW_SYS},
+ {sys_log_warnings.name, (char*) &sys_log_warnings, SHOW_SYS},
+ {sys_long_query_time.name, (char*) &sys_long_query_time, SHOW_SYS},
+ {sys_low_priority_updates.name, (char*) &sys_low_priority_updates, SHOW_SYS},
+@@ -991,6 +1013,7 @@
+ {sys_max_tmp_tables.name, (char*) &sys_max_tmp_tables, SHOW_SYS},
+ {sys_max_user_connections.name,(char*) &sys_max_user_connections, SHOW_SYS},
+ {sys_max_write_lock_count.name, (char*) &sys_max_write_lock_count,SHOW_SYS},
++ {sys_min_examined_row_limit.name, (char*) &sys_min_examined_row_limit, SHOW_SYS},
+ {sys_multi_range_count.name, (char*) &sys_multi_range_count, SHOW_SYS},
+ {sys_myisam_data_pointer_size.name, (char*) &sys_myisam_data_pointer_size, SHOW_SYS},
+ {sys_myisam_max_sort_file_size.name, (char*) &sys_myisam_max_sort_file_size,
+@@ -1043,6 +1066,7 @@
+ {sys_query_prealloc_size.name, (char*) &sys_query_prealloc_size, SHOW_SYS},
+ {sys_range_alloc_block_size.name, (char*) &sys_range_alloc_block_size,
+ SHOW_SYS},
++ {sys_log_slow_rate_limit.name, (char*) &sys_log_slow_rate_limit, SHOW_SYS},
+ {sys_read_buff_size.name, (char*) &sys_read_buff_size, SHOW_SYS},
+ {sys_readonly.name, (char*) &sys_readonly, SHOW_SYS},
+ {sys_read_rnd_buff_size.name,(char*) &sys_read_rnd_buff_size, SHOW_SYS},
+@@ -1639,6 +1663,57 @@
+ return (byte*) &(thd->variables.*offset);
+ }
+
++void sys_var_thd_microtime::set_default(THD *thd, enum_var_type type)
++{
++ pthread_mutex_lock(&LOCK_global_system_variables);
++ global_system_variables.*offset= (ulonglong) option_limits->def_value;
++ pthread_mutex_unlock(&LOCK_global_system_variables);
++}
++
++bool sys_var_thd_microtime::check(THD *thd, set_var *var)
++{
++ if (var->value->result_type() == DECIMAL_RESULT)
++ var->save_result.ulonglong_value= (ulonglong)(var->value->val_real() * 1000000);
++ else
++ var->save_result.ulonglong_value= (ulonglong)var->value->val_int() * 1000000;
++ return 0;
++}
++
++byte *sys_var_thd_microtime::value_ptr(THD *thd, enum_var_type type,
++ LEX_STRING *base)
++{
++ if (type == OPT_GLOBAL)
++ return (byte*) &(global_system_variables.*offset);
++ return (byte*) &(thd->variables.*offset);
++}
++
++bool sys_var_thd_microtime::update(THD *thd, set_var *var)
++{
++ bool fixed= FALSE;
++ ulonglong tmp= var->save_result.ulonglong_value;
++
++ if (tmp > max_system_variables.*offset)
++ tmp= max_system_variables.*offset;
++
++ if (option_limits)
++ tmp= getopt_ull_limit_value(tmp, option_limits, &fixed);
++
++ if (fixed)
++ throw_bounds_warning(thd, option_limits->name, tmp);
++
++ /* Lock is needed to make things safe on 32 bit systems */
++ if (var->type == OPT_GLOBAL)
++ {
++ /* Lock is needed to make things safe on 32 bit systems */
++ pthread_mutex_lock(&LOCK_global_system_variables);
++ global_system_variables.*offset= tmp;
++ pthread_mutex_unlock(&LOCK_global_system_variables);
++ }
++ else
++ thd->variables.*offset= (ulonglong) tmp;
++
++ return 0;
++}
+
+ bool sys_var_thd_ha_rows::update(THD *thd, set_var *var)
+ {
+@@ -3483,6 +3558,191 @@
+ #endif
+ }
+
++/* Slow log stuff */
++
++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len)
++{
++ ulong i;
++
++ for (i=0; opts[i].name; i++)
++ {
++ if (!my_strnncoll(&my_charset_latin1,
++ (const uchar *)name, len,
++ (const uchar *)opts[i].name, strlen(opts[i].name)))
++ return opts[i].val;
++ }
++ return opts[i].val;
++}
++
++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list,
++ const ulong none_val, const ulong invalid_val)
++{
++ const char *p, *e;
++ ulong val= none_val;
++
++ if (!*names_list)
++ return val;
++
++ for (p= e= names_list; ; e++)
++ {
++ ulong i;
++
++ if (*e != ',' && *e)
++ continue;
++ for (i=0; opts[i].name; i++)
++ {
++ if (!my_strnncoll(&my_charset_latin1,
++ (const uchar *)p, e - p,
++ (const uchar *)opts[i].name, strlen(opts[i].name)))
++ {
++ val= val | opts[i].val;
++ break;
++ }
++ }
++ if (opts[i].val == invalid_val)
++ return invalid_val;
++ if (!*e)
++ break;
++ p= e + 1;
++ }
++ return val;
++}
++
++const char *msl_option_get_name(const struct msl_opts *opts, ulong val)
++{
++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++)
++ {
++ if (opts[i].val == val)
++ return opts[i].name;
++ }
++ return "*INVALID*";
++}
++
++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val)
++{
++ uint offset= 0;
++
++ *buf= '\0';
++ for (ulong i=0; opts[i].name && opts[i].name[0]; i++)
++ {
++ if (opts[i].val & val)
++ offset+= snprintf(buf+offset, STRING_BUFFER_USUAL_SIZE - offset - 1,
++ "%s%s", (offset ? "," : ""), opts[i].name);
++ }
++ return buf;
++}
++
++/****************************************************************************
++ Functions to handle log_slow_verbosity
++****************************************************************************/
++
++/* Based upon sys_var::check_enum() */
++
++bool sys_var_thd_msl_option::check(THD *thd, set_var *var)
++{
++ char buff[STRING_BUFFER_USUAL_SIZE];
++ String str(buff, sizeof(buff), &my_charset_latin1), *res;
++
++ if (var->value->result_type() == STRING_RESULT)
++ {
++ ulong verb= this->invalid_val;
++ if (!(res=var->value->val_str(&str)) ||
++ (var->save_result.ulong_value=
++ (ulong) (verb= msl_option_resolve_by_name(this->opts, res->ptr(), res->length()))) == this->invalid_val)
++ goto err;
++ return 0;
++ }
++
++err:
++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name);
++ return 1;
++}
++
++byte *sys_var_thd_msl_option::value_ptr(THD *thd, enum_var_type type,
++ LEX_STRING *base)
++{
++ ulong val;
++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset :
++ thd->variables.*offset);
++ const char *verbosity= msl_option_get_name(this->opts, val);
++ return (byte *) verbosity;
++}
++
++
++void sys_var_thd_msl_option::set_default(THD *thd, enum_var_type type)
++{
++ if (type == OPT_GLOBAL)
++ global_system_variables.*offset= (ulong) this->default_val;
++ else
++ thd->variables.*offset= (ulong) (global_system_variables.*offset);
++}
++
++
++bool sys_var_thd_msl_option::update(THD *thd, set_var *var)
++{
++ if (var->type == OPT_GLOBAL)
++ global_system_variables.*offset= var->save_result.ulong_value;
++ else
++ thd->variables.*offset= var->save_result.ulong_value;
++ return 0;
++}
++
++/****************************************************************************
++ Functions to handle log_slow_filter
++****************************************************************************/
++
++/* Based upon sys_var::check_enum() */
++
++bool sys_var_thd_msl_flag::check(THD *thd, set_var *var)
++{
++ char buff[2 * STRING_BUFFER_USUAL_SIZE];
++ String str(buff, sizeof(buff), &my_charset_latin1), *res;
++
++ if (var->value->result_type() == STRING_RESULT)
++ {
++ ulong filter= this->none_val;
++ if (!(res=var->value->val_str(&str)) ||
++ (var->save_result.ulong_value=
++ (ulong) (filter= msl_flag_resolve_by_name(this->flags, res->ptr(), this->none_val,
++ this->invalid_val))) == this->invalid_val)
++ goto err;
++ return 0;
++ }
++
++err:
++ my_error(ER_WRONG_ARGUMENTS, MYF(0), var->var->name);
++ return 1;
++}
++
++byte *sys_var_thd_msl_flag::value_ptr(THD *thd, enum_var_type type,
++ LEX_STRING *base)
++{
++ ulong val;
++ val= ((type == OPT_GLOBAL) ? global_system_variables.*offset :
++ thd->variables.*offset);
++ msl_flag_get_name(this->flags, this->flags_string, val);
++ return (byte *) this->flags_string;
++}
++
++
++void sys_var_thd_msl_flag::set_default(THD *thd, enum_var_type type)
++{
++ if (type == OPT_GLOBAL)
++ global_system_variables.*offset= (ulong) this->default_val;
++ else
++ thd->variables.*offset= (ulong) (global_system_variables.*offset);
++}
++
++
++bool sys_var_thd_msl_flag::update(THD *thd, set_var *var)
++{
++ if (var->type == OPT_GLOBAL)
++ global_system_variables.*offset= var->save_result.ulong_value;
++ else
++ thd->variables.*offset= var->save_result.ulong_value;
++ return 0;
++}
++
+ /****************************************************************************
+ Functions to handle table_type
+ ****************************************************************************/
+diff -r bb81fcdd7db2 sql/set_var.h
+--- a/sql/set_var.h Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/set_var.h Mon Sep 08 16:38:46 2008 -0700
+@@ -123,6 +123,7 @@
+ };
+
+
++
+ class sys_var_ulonglong_ptr :public sys_var
+ {
+ public:
+@@ -309,7 +310,6 @@
+ }
+ };
+
+-
+ class sys_var_thd_ulong :public sys_var_thd
+ {
+ sys_check_func check_func;
+@@ -329,6 +329,23 @@
+ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
+ };
+
++class sys_var_thd_microtime :public sys_var_thd
++{
++public:
++ ulonglong SV::*offset;
++ sys_var_thd_microtime(const char *name_arg, ulonglong SV::*offset_arg)
++ :sys_var_thd(name_arg), offset(offset_arg)
++ {}
++ bool update(THD *thd, set_var *var);
++ void set_default(THD *thd, enum_var_type type);
++ SHOW_TYPE type() { return SHOW_MICROTIME; }
++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++ bool check(THD *thd, set_var *var);
++ bool check_update_type(Item_result type)
++ {
++ return type != INT_RESULT && type != DECIMAL_RESULT;
++ }
++};
+
+ class sys_var_thd_ha_rows :public sys_var_thd
+ {
+@@ -346,7 +363,6 @@
+ SHOW_TYPE show_type() { return SHOW_HA_ROWS; }
+ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
+ };
+-
+
+ class sys_var_thd_ulonglong :public sys_var_thd
+ {
+@@ -376,7 +392,6 @@
+ }
+ };
+
+-
+ class sys_var_thd_bool :public sys_var_thd
+ {
+ public:
+@@ -446,6 +461,66 @@
+ ulong *length);
+ };
+
++
++class sys_var_thd_msl_option :public sys_var_thd
++{
++protected:
++ ulong SV::*offset;
++ const ulong none_val;
++ const ulong default_val;
++ const ulong invalid_val;
++ const struct msl_opts *opts;
++public:
++ sys_var_thd_msl_option(const char *name_arg, ulong SV::*offset_arg,
++ const ulong none_val_arg,
++ const ulong default_val_arg,
++ const ulong invalid_val_arg,
++ const struct msl_opts *opts_arg)
++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg),
++ default_val(default_val_arg), invalid_val(invalid_val_arg),
++ opts(opts_arg)
++ {}
++ bool check(THD *thd, set_var *var);
++ SHOW_TYPE show_type() { return SHOW_CHAR; }
++ bool check_update_type(Item_result type)
++ {
++ return type != STRING_RESULT; /* Only accept strings */
++ }
++ void set_default(THD *thd, enum_var_type type);
++ bool update(THD *thd, set_var *var);
++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++};
++
++
++class sys_var_thd_msl_flag :public sys_var_thd
++{
++protected:
++ char flags_string[2 * STRING_BUFFER_USUAL_SIZE];
++ ulong SV::*offset;
++ const ulong none_val;
++ const ulong default_val;
++ const ulong invalid_val;
++ const struct msl_opts *flags;
++public:
++ sys_var_thd_msl_flag(const char *name_arg, ulong SV::*offset_arg,
++ const ulong none_val_arg,
++ const ulong default_val_arg,
++ const ulong invalid_val_arg,
++ const struct msl_opts *flags_arg)
++ :sys_var_thd(name_arg), offset(offset_arg), none_val(none_val_arg),
++ default_val(default_val_arg), invalid_val(invalid_val_arg),
++ flags(flags_arg)
++ {}
++ bool check(THD *thd, set_var *var);
++ SHOW_TYPE show_type() { return SHOW_CHAR; }
++ bool check_update_type(Item_result type)
++ {
++ return type != STRING_RESULT; /* Only accept strings */
++ }
++ void set_default(THD *thd, enum_var_type type);
++ bool update(THD *thd, set_var *var);
++ byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base);
++};
+
+ class sys_var_thd_storage_engine :public sys_var_thd
+ {
+@@ -1042,3 +1117,11 @@
+ bool process_key_caches(int (* func) (const char *name, KEY_CACHE *));
+ void delete_elements(I_List<NAMED_LIST> *list,
+ void (*free_element)(const char*, gptr));
++
++/* Slow log functions */
++
++ulong msl_option_resolve_by_name(const struct msl_opts *opts, const char *name, ulong len);
++ulong msl_flag_resolve_by_name(const struct msl_opts *opts, const char *names_list,
++ const ulong none_val, const ulong invalid_val);
++const char *msl_option_get_name(const struct msl_opts *opts, ulong val);
++char *msl_flag_get_name(const struct msl_opts *opts, char *buf, ulong val);
+diff -r bb81fcdd7db2 sql/slave.cc
+--- a/sql/slave.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/slave.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -2925,6 +2925,12 @@
+ + MAX_LOG_EVENT_HEADER; /* note, incr over the global not session var */
+ thd->slave_thread = 1;
+ set_slave_thread_options(thd);
++ if (opt_log_slow_slave_statements)
++ {
++ thd->enable_slow_log= TRUE;
++ /* Slave thread is excluded from rate limiting the slow log writes. */
++ thd->write_to_slow_log= TRUE;
++ }
+ thd->client_capabilities = CLIENT_LOCAL_FILES;
+ thd->real_id=pthread_self();
+ pthread_mutex_lock(&LOCK_thread_count);
+diff -r bb81fcdd7db2 sql/sql_cache.cc
+--- a/sql/sql_cache.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_cache.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -1334,6 +1334,7 @@
+
+ thd->limit_found_rows = query->found_rows();
+ thd->status_var.last_query_cost= 0.0;
++ thd->query_plan_flags|= QPLAN_QC;
+
+ BLOCK_UNLOCK_RD(query_block);
+ DBUG_RETURN(1); // Result sent to client
+@@ -1341,6 +1342,7 @@
+ err_unlock:
+ STRUCT_UNLOCK(&structure_guard_mutex);
+ err:
++ thd->query_plan_flags|= QPLAN_QC_NO;
+ DBUG_RETURN(0); // Query was not cached
+ }
+
+diff -r bb81fcdd7db2 sql/sql_class.cc
+--- a/sql/sql_class.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_class.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -174,7 +174,7 @@
+ lock_id(&main_lock_id),
+ user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0),
+ transaction_rollback_request(0), is_fatal_sub_stmt_error(0),
+- rand_used(0), time_zone_used(0),
++ rand_used(0), time_zone_used(0), user_timer(0),
+ last_insert_id_used(0), last_insert_id_used_bin_log(0), insert_id_used(0),
+ clear_next_insert_id(0), in_lock_tables(0), bootstrap(0),
+ derived_tables_processing(FALSE), spcont(NULL),
+@@ -2198,6 +2198,12 @@
+ backup->cuted_fields= cuted_fields;
+ backup->client_capabilities= client_capabilities;
+ backup->savepoints= transaction.savepoints;
++ backup->innodb_io_reads= innodb_io_reads;
++ backup->innodb_io_read= innodb_io_read;
++ backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer;
++ backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer;
++ backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer;
++ backup->innodb_page_access= innodb_page_access;
+
+ if (!lex->requires_prelocking() || is_update_query(lex->sql_command))
+ options&= ~OPTION_BIN_LOG;
+@@ -2214,7 +2220,13 @@
+ sent_row_count= 0;
+ cuted_fields= 0;
+ transaction.savepoints= 0;
+-
++ innodb_io_reads= 0;
++ innodb_io_read= 0;
++ innodb_io_reads_wait_timer= 0;
++ innodb_lock_que_wait_timer= 0;
++ innodb_innodb_que_wait_timer= 0;
++ innodb_page_access= 0;
++
+ /* Surpress OK packets in case if we will execute statements */
+ net.no_send_ok= TRUE;
+ }
+@@ -2267,6 +2279,12 @@
+ */
+ examined_row_count+= backup->examined_row_count;
+ cuted_fields+= backup->cuted_fields;
++ innodb_io_reads+= backup->innodb_io_reads;
++ innodb_io_read+= backup->innodb_io_read;
++ innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer;
++ innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer;
++ innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer;
++ innodb_page_access+= backup->innodb_page_access;
+ }
+
+
+diff -r bb81fcdd7db2 sql/sql_class.h
+--- a/sql/sql_class.h Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_class.h Mon Sep 08 16:38:46 2008 -0700
+@@ -43,6 +43,13 @@
+ extern char internal_table_name[2];
+ extern char empty_c_string[1];
+ extern const char **errmesg;
++#ifdef __cplusplus
++__BEGIN_DECLS
++#endif
++extern ulonglong frequency;
++#ifdef __cplusplus
++__END_DECLS
++#endif
+
+ #define TC_LOG_PAGE_SIZE 8192
+ #define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
+@@ -314,7 +321,7 @@
+ bool write(THD *thd, enum enum_server_command command,
+ const char *format, ...) ATTRIBUTE_FORMAT(printf, 4, 5);
+ bool write(THD *thd, const char *query, uint query_length,
+- time_t query_start=0);
++ time_t query_start=0, ulonglong query_start_timer=0);
+ bool write(Log_event* event_info); // binary log write
+ bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
+
+@@ -520,13 +527,14 @@
+ ulong auto_increment_increment, auto_increment_offset;
+ ulong bulk_insert_buff_size;
+ ulong join_buff_size;
+- ulong long_query_time;
++ ulonglong long_query_time;
+ ulong max_allowed_packet;
+ ulong max_error_count;
+ ulong max_length_for_sort_data;
+ ulong max_sort_length;
+ ulong max_tmp_tables;
+ ulong max_insert_delayed_threads;
++ ulong min_examined_row_limit;
+ ulong multi_range_count;
+ ulong myisam_repair_threads;
+ ulong myisam_sort_buff_size;
+@@ -541,10 +549,13 @@
+ ulong optimizer_search_depth;
+ ulong preload_buff_size;
+ ulong query_cache_type;
++ ulong log_slow_rate_limit;
+ ulong read_buff_size;
+ ulong read_rnd_buff_size;
+ ulong div_precincrement;
+ ulong sortbuff_size;
++ ulong log_slow_filter;
++ ulong log_slow_verbosity;
+ ulong table_type;
+ ulong tx_isolation;
+ ulong completion_type;
+@@ -1111,6 +1122,12 @@
+ uint in_sub_stmt;
+ bool enable_slow_log, insert_id_used, clear_next_insert_id;
+ bool last_insert_id_used;
++ ulong innodb_io_reads;
++ ulonglong innodb_io_read;
++ ulong innodb_io_reads_wait_timer;
++ ulong innodb_lock_que_wait_timer;
++ ulong innodb_innodb_que_wait_timer;
++ ulong innodb_page_access;
+ my_bool no_send_ok;
+ SAVEPOINT *savepoints;
+ };
+@@ -1167,6 +1184,11 @@
+ class THD :public Statement,
+ public Open_tables_state
+ {
++private:
++ inline ulonglong query_start_timer() { return start_timer; }
++ inline void set_timer() { if (user_timer) start_timer=timer_after_lock=user_timer; else timer_after_lock=my_timer(&start_timer, frequency); }
++ inline void end_timer() { my_timer(&start_timer, frequency); }
++ inline void lock_timer() { my_timer(&timer_after_lock, frequency); }
+ public:
+ /*
+ Constant for THD::where initialization in the beginning of every query.
+@@ -1272,10 +1294,24 @@
+ */
+ const char *where;
+ time_t start_time,time_after_lock,user_time;
++ ulonglong start_timer,timer_after_lock, user_timer;
+ time_t connect_time,thr_create_time; // track down slow pthread_create
+ thr_lock_type update_lock_default;
+ Delayed_insert *di;
+
++ bool write_to_slow_log;
++
++ bool innodb_was_used;
++ ulong innodb_io_reads;
++ ulonglong innodb_io_read;
++ ulong innodb_io_reads_wait_timer;
++ ulong innodb_lock_que_wait_timer;
++ ulong innodb_innodb_que_wait_timer;
++ ulong innodb_page_access;
++
++ ulong query_plan_flags;
++ ulong query_plan_fsort_passes;
++
+ /* <> 0 if we are inside of trigger or stored function. */
+ uint in_sub_stmt;
+
+@@ -1661,11 +1697,11 @@
+ sql_print_information("time() failed with %d", errno);
+ }
+
+- inline time_t query_start() { query_start_used=1; return start_time; }
+- inline void set_time() { if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }}
+- inline void end_time() { safe_time(&start_time); }
+- inline void set_time(time_t t) { time_after_lock=start_time=user_time=t; }
+- inline void lock_time() { safe_time(&time_after_lock); }
++ inline time_t query_start() { query_start_timer(); query_start_used=1; return start_time; }
++ inline void set_time() { set_timer(); if (user_time) start_time=time_after_lock=user_time; else { safe_time(&start_time); time_after_lock= start_time; }}
++ inline void end_time() { end_timer(); safe_time(&start_time); }
++ inline void set_time(time_t t) { set_timer(); time_after_lock=start_time=user_time=t; }
++ inline void lock_time() { lock_timer(); safe_time(&time_after_lock); }
+ inline void insert_id(ulonglong id_arg)
+ {
+ last_insert_id= id_arg;
+diff -r bb81fcdd7db2 sql/sql_parse.cc
+--- a/sql/sql_parse.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_parse.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -20,6 +20,7 @@
+ #include <m_ctype.h>
+ #include <myisam.h>
+ #include <my_dir.h>
++#include <my_time.h>
+
+ #ifdef HAVE_INNOBASE_DB
+ #include "ha_innodb.h"
+@@ -1180,6 +1181,15 @@
+ my_net_set_read_timeout(net, thd->variables.net_read_timeout);
+ my_net_set_write_timeout(net, thd->variables.net_write_timeout);
+
++ /*
++ If rate limiting of slow log writes is enabled, decide whether to log this
++ new thread's queries or not. Uses extremely simple algorithm. :)
++ */
++ thd->write_to_slow_log= FALSE;
++ if (thd->variables.log_slow_rate_limit <= 1 ||
++ (thd->thread_id % thd->variables.log_slow_rate_limit) == 0)
++ thd->write_to_slow_log= TRUE;
++
+ while (!net->error && net->vio != 0 &&
+ !(thd->killed == THD::KILL_CONNECTION))
+ {
+@@ -2255,26 +2265,52 @@
+ return; // Don't set time for sub stmt
+
+ start_of_query= thd->start_time;
++ ulonglong start_of_query_timer= thd->start_timer;
+ thd->end_time(); // Set start time
++
++
++ /* Follow the slow log filter configuration. */
++ if (thd->variables.log_slow_filter != SLOG_F_NONE &&
++ (!(thd->variables.log_slow_filter & thd->query_plan_flags) ||
++ ((thd->variables.log_slow_filter & SLOG_F_QC_NO) &&
++ (thd->query_plan_flags & QPLAN_QC))))
++ return;
++
++ /*
++ Low long_query_time value most likely means user is debugging stuff and even
++ though some thread's queries are not supposed to be logged b/c of the rate
++ limit, if one of them takes long enough (>= 1 second) it will be sensible
++ to make an exception and write to slow log anyway.
++ */
++ if (thd->write_to_slow_log != TRUE && thd->variables.long_query_time < 1000000 &&
++ (ulong) (thd->start_timer - thd->timer_after_lock) >= 1000000)
++ thd->write_to_slow_log= TRUE;
++
++ /* Do not log this thread's queries due to rate limiting. */
++ if (thd->write_to_slow_log != TRUE)
++ return;
+
+ /*
+ Do not log administrative statements unless the appropriate option is
+ set; do not log into slow log if reading from backup.
+ */
+- if (thd->enable_slow_log && !thd->user_time)
++ if (thd->enable_slow_log &&
++ (!thd->user_time || (thd->slave_thread && opt_log_slow_slave_statements))
++ )
+ {
+ thd->proc_info="logging slow query";
+
+- if ((ulong) (thd->start_time - thd->time_after_lock) >
+- thd->variables.long_query_time ||
+- (thd->server_status &
+- (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
+- opt_log_queries_not_using_indexes &&
+- /* == SQLCOM_END unless this is a SHOW command */
+- thd->lex->orig_sql_command == SQLCOM_END)
++ if (((ulong) (thd->start_timer - thd->timer_after_lock) >=
++ thd->variables.long_query_time ||
++ (thd->server_status &
++ (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
++ opt_log_queries_not_using_indexes &&
++ /* == SQLCOM_END unless this is a SHOW command */
++ thd->lex->orig_sql_command == SQLCOM_END) &&
++ thd->examined_row_count >= thd->variables.min_examined_row_limit)
+ {
+ thd->status_var.long_query_count++;
+- mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query);
++ mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query, start_of_query_timer);
+ }
+ }
+ }
+@@ -5949,6 +5985,15 @@
+ thd->total_warn_count=0; // Warnings for this query
+ thd->rand_used= 0;
+ thd->sent_row_count= thd->examined_row_count= 0;
++ thd->innodb_was_used= FALSE;
++ thd->innodb_io_reads= 0;
++ thd->innodb_io_read= 0;
++ thd->innodb_io_reads_wait_timer= 0;
++ thd->innodb_lock_que_wait_timer= 0;
++ thd->innodb_innodb_que_wait_timer= 0;
++ thd->innodb_page_access= 0;
++ thd->query_plan_flags= QPLAN_NONE;
++ thd->query_plan_fsort_passes= 0;
+ }
+ DBUG_VOID_RETURN;
+ }
+diff -r bb81fcdd7db2 sql/sql_select.cc
+--- a/sql/sql_select.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_select.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -6198,8 +6198,11 @@
+ {
+ join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ if (statistics)
++ {
+ statistic_increment(join->thd->status_var.select_scan_count,
+ &LOCK_status);
++ join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
++ }
+ }
+ }
+ else
+@@ -6214,8 +6217,11 @@
+ {
+ join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ if (statistics)
++ {
+ statistic_increment(join->thd->status_var.select_full_join_count,
+ &LOCK_status);
++ join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
++ }
+ }
+ }
+ if (!table->no_keyread)
+@@ -9265,6 +9271,7 @@
+ (ulong) rows_limit,test(group)));
+
+ statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status);
++ thd->query_plan_flags|= QPLAN_TMP_TABLE;
+
+ if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+ temp_pool_slot = bitmap_set_next(&temp_pool);
+@@ -10125,6 +10132,7 @@
+ }
+ statistic_increment(table->in_use->status_var.created_tmp_disk_tables,
+ &LOCK_status);
++ table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+ table->s->db_record_offset= 1;
+ DBUG_RETURN(0);
+ err:
+diff -r bb81fcdd7db2 sql/sql_show.cc
+--- a/sql/sql_show.cc Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/sql_show.cc Mon Sep 08 16:38:46 2008 -0700
+@@ -1531,6 +1531,9 @@
+ value= ((char *) status_var + (ulonglong) value);
+ case SHOW_LONGLONG:
+ end= longlong10_to_str(*(longlong*) value, buff, 10);
++ break;
++ case SHOW_MICROTIME:
++ end= buff + sprintf(buff, "%.6f", (*(ulonglong*)value) / 1000000.0);
+ break;
+ case SHOW_HA_ROWS:
+ end= longlong10_to_str((longlong) *(ha_rows*) value, buff, 10);
+diff -r bb81fcdd7db2 sql/structs.h
+--- a/sql/structs.h Mon Sep 08 16:38:33 2008 -0700
++++ b/sql/structs.h Mon Sep 08 16:38:46 2008 -0700
+@@ -168,8 +168,8 @@
+ enum SHOW_TYPE
+ {
+ SHOW_UNDEF,
+- SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR,
+- SHOW_DOUBLE_STATUS,
++ SHOW_LONG, SHOW_LONGLONG, SHOW_MICROTIME, SHOW_INT, SHOW_CHAR,
++ SHOW_CHAR_PTR, SHOW_DOUBLE_STATUS,
+ SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION,
+ SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS,
+ SHOW_VARS,
diff --git a/mysql-show_patches.patch b/mysql-show_patches.patch
new file mode 100644
index 0000000..1e63c12
--- /dev/null
+++ b/mysql-show_patches.patch
@@ -0,0 +1,294 @@
+diff -r a36b98c5e2e3 patch_info/show_patches.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/show_patches.info Mon Sep 08 16:38:33 2008 -0700
+@@ -0,0 +1,6 @@
++File=show_patches.patch
++Name=SHOW PATCHES
++Version=1.0
++Author=Jeremy Cole
++License=N/A
++Comment
+diff -r a36b98c5e2e3 sql/Makefile.am
+--- a/sql/Makefile.am Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/Makefile.am Mon Sep 08 16:38:33 2008 -0700
+@@ -116,7 +116,7 @@
+ -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \
+ @DEFS@
+
+-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h
++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h
+ EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \
+ message.mc examples/CMakeLists.txt CMakeLists.txt \
+ udf_example.c udf_example.def
+@@ -172,6 +172,8 @@
+ udf_example_la_SOURCES= udf_example.c
+ udf_example_la_LDFLAGS= -module -rpath $(pkglibdir)
+
++patch_info.h: patch_info.h.pl
++ $(PERL) $< > $@
+
+ # Don't update the files from bitkeeper
+ %::SCCS/s.%
+diff -r a36b98c5e2e3 sql/Makefile.in
+--- a/sql/Makefile.in Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/Makefile.in Mon Sep 08 16:38:33 2008 -0700
+@@ -556,7 +556,7 @@
+ gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS)
+ mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc
+ mysql_tzinfo_to_sql_LDADD = @MYSQLD_EXTRA_LDFLAGS@ $(LDADD) $(CXXLDFLAGS)
+-BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h
++BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h patch_info.h
+ EXTRA_DIST = $(BUILT_SOURCES) nt_servc.cc nt_servc.h \
+ message.mc examples/CMakeLists.txt CMakeLists.txt \
+ udf_example.c udf_example.def
+@@ -1230,6 +1230,9 @@
+ ./gen_lex_hash$(EXEEXT) > $@-t
+ $(MV) $@-t $@
+
++patch_info.h: patch_info.h.pl
++ $(PERL) $< > $@
++
+ # Don't update the files from bitkeeper
+ %::SCCS/s.%
+ # Tell versions [3.59,3.63) of GNU make to not export all variables.
+diff -r a36b98c5e2e3 sql/lex.h
+--- a/sql/lex.h Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/lex.h Mon Sep 08 16:38:33 2008 -0700
+@@ -359,6 +359,7 @@
+ { "PACK_KEYS", SYM(PACK_KEYS_SYM)},
+ { "PARTIAL", SYM(PARTIAL)},
+ { "PASSWORD", SYM(PASSWORD)},
++ { "PATCHES", SYM(PATCHES)},
+ { "PHASE", SYM(PHASE_SYM)},
+ { "POINT", SYM(POINT_SYM)},
+ { "POLYGON", SYM(POLYGON)},
+diff -r a36b98c5e2e3 sql/mysql_priv.h
+--- a/sql/mysql_priv.h Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/mysql_priv.h Mon Sep 08 16:38:33 2008 -0700
+@@ -948,6 +948,7 @@
+ int mysqld_show_status(THD *thd);
+ int mysqld_show_variables(THD *thd,const char *wild);
+ bool mysqld_show_storage_engines(THD *thd);
++bool mysqld_show_patches(THD *thd);
+ bool mysqld_show_privileges(THD *thd);
+ bool mysqld_show_column_types(THD *thd);
+ bool mysqld_help (THD *thd, const char *text);
+diff -r a36b98c5e2e3 sql/patch_info.h.pl
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/sql/patch_info.h.pl Mon Sep 08 16:38:33 2008 -0700
+@@ -0,0 +1,65 @@
++use strict;
++
++my $patch_info_path = '../patch_info';
++my $file = '';
++my $output = '';
++
++
++if (opendir(PATCH_DIR, $patch_info_path))
++{
++ while ((my $file = readdir(PATCH_DIR)))
++ {
++ open(PATCH_FILE, "<$patch_info_path/$file") || die("Unable to open $patch_info_path/$file ($!)");
++ my %fields;
++
++ if ($file =~ /^\./)
++ {
++ next;
++ }
++
++ while (<PATCH_FILE>)
++ {
++ chomp;
++
++ my ($key, $value) = split(/\s*=\s*/);
++ $fields{lc($key)} = $value;
++ }
++
++ $output .= "{\"$fields{'file'}\", \"$fields{'name'}\", \"$fields{'version'}\", \"$fields{'author'}\", \"$fields{'license'}\",\"$fields{'comment'}\"},\n"
++ }
++}
++
++print <<HEADER;
++
++/* Copyright (C) 2002-2006 MySQL AB
++
++ This program is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; version 2 of the License.
++
++ This program is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
++
++#ifdef USE_PRAGMA_INTERFACE
++#pragma interface /* gcc class implementation */
++#endif
++
++struct patch {
++ const char *file;
++ const char *name;
++ const char *version;
++ const char *author;
++ const char *license;
++ const char *comment;
++}patches[] = {
++$output
++{NULL, NULL, NULL, NULL}
++};
++
++HEADER
+diff -r a36b98c5e2e3 sql/sp_head.cc
+--- a/sql/sp_head.cc Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sp_head.cc Mon Sep 08 16:38:33 2008 -0700
+@@ -188,6 +188,7 @@
+ case SQLCOM_SHOW_MUTEX_STATUS:
+ case SQLCOM_SHOW_NEW_MASTER:
+ case SQLCOM_SHOW_OPEN_TABLES:
++ case SQLCOM_SHOW_PATCHES:
+ case SQLCOM_SHOW_PRIVILEGES:
+ case SQLCOM_SHOW_PROCESSLIST:
+ case SQLCOM_SHOW_SLAVE_HOSTS:
+diff -r a36b98c5e2e3 sql/sql_lex.h
+--- a/sql/sql_lex.h Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_lex.h Mon Sep 08 16:38:33 2008 -0700
+@@ -81,9 +81,9 @@
+ SQLCOM_SHOW_SLAVE_HOSTS, SQLCOM_DELETE_MULTI, SQLCOM_UPDATE_MULTI,
+ SQLCOM_SHOW_BINLOG_EVENTS, SQLCOM_SHOW_NEW_MASTER, SQLCOM_DO,
+ SQLCOM_SHOW_WARNS, SQLCOM_EMPTY_QUERY, SQLCOM_SHOW_ERRORS,
+- SQLCOM_SHOW_COLUMN_TYPES, SQLCOM_SHOW_STORAGE_ENGINES, SQLCOM_SHOW_PRIVILEGES,
+- SQLCOM_HELP, SQLCOM_CREATE_USER, SQLCOM_DROP_USER, SQLCOM_RENAME_USER,
+- SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM,
++ SQLCOM_SHOW_COLUMN_TYPES, SQLCOM_SHOW_PATCHES, SQLCOM_SHOW_STORAGE_ENGINES,
++ SQLCOM_SHOW_PRIVILEGES, SQLCOM_HELP, SQLCOM_CREATE_USER, SQLCOM_DROP_USER,
++ SQLCOM_RENAME_USER, SQLCOM_REVOKE_ALL, SQLCOM_CHECKSUM,
+ SQLCOM_CREATE_PROCEDURE, SQLCOM_CREATE_SPFUNCTION, SQLCOM_CALL,
+ SQLCOM_DROP_PROCEDURE, SQLCOM_ALTER_PROCEDURE,SQLCOM_ALTER_FUNCTION,
+ SQLCOM_SHOW_CREATE_PROC, SQLCOM_SHOW_CREATE_FUNC,
+diff -r a36b98c5e2e3 sql/sql_parse.cc
+--- a/sql/sql_parse.cc Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_parse.cc Mon Sep 08 16:38:33 2008 -0700
+@@ -3826,6 +3826,9 @@
+ break;
+ case SQLCOM_SHOW_STORAGE_ENGINES:
+ res= mysqld_show_storage_engines(thd);
++ break;
++ case SQLCOM_SHOW_PATCHES:
++ res= mysqld_show_patches(thd);
+ break;
+ case SQLCOM_SHOW_PRIVILEGES:
+ res= mysqld_show_privileges(thd);
+diff -r a36b98c5e2e3 sql/sql_prepare.cc
+--- a/sql/sql_prepare.cc Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_prepare.cc Mon Sep 08 16:38:33 2008 -0700
+@@ -1790,6 +1790,7 @@
+ case SQLCOM_SHOW_DATABASES:
+ case SQLCOM_SHOW_PROCESSLIST:
+ case SQLCOM_SHOW_STORAGE_ENGINES:
++ case SQLCOM_SHOW_PATCHES:
+ case SQLCOM_SHOW_PRIVILEGES:
+ case SQLCOM_SHOW_COLUMN_TYPES:
+ case SQLCOM_SHOW_STATUS:
+diff -r a36b98c5e2e3 sql/sql_show.cc
+--- a/sql/sql_show.cc Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_show.cc Mon Sep 08 16:38:33 2008 -0700
+@@ -22,6 +22,7 @@
+ #include "sp.h"
+ #include "sp_head.h"
+ #include "sql_trigger.h"
++#include "patch_info.h"
+ #include <my_dir.h>
+
+ #ifdef HAVE_BERKELEY_DB
+@@ -45,6 +46,48 @@
+ static int
+ view_store_create_info(THD *thd, TABLE_LIST *table, String *buff);
+ static bool schema_table_store_record(THD *thd, TABLE *table);
++
++
++/***************************************************************************
++** List patches built into this release
++***************************************************************************/
++
++bool mysqld_show_patches(THD *thd)
++{
++ List<Item> field_list;
++ int i = 0;
++ Protocol *protocol= thd->protocol;
++ DBUG_ENTER("mysqld_show_patches");
++
++ field_list.push_back(new Item_empty_string("File", 255));
++ field_list.push_back(new Item_empty_string("Name", 50));
++ field_list.push_back(new Item_empty_string("Version", 10));
++ field_list.push_back(new Item_empty_string("Author", 50));
++ field_list.push_back(new Item_empty_string("License", 50));
++ field_list.push_back(new Item_empty_string("Comment", 32));
++
++ if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
++ DBUG_RETURN(TRUE);
++
++ for (i = 0; patches[i].file; i++)
++ {
++ protocol->prepare_for_resend();
++ protocol->store(patches[i].file, system_charset_info);
++ protocol->store(patches[i].name, system_charset_info);
++ protocol->store(patches[i].version, system_charset_info);
++ protocol->store(patches[i].author, system_charset_info);
++ protocol->store(patches[i].license, system_charset_info);
++ protocol->store(patches[i].comment, system_charset_info);
++
++ if (protocol->write())
++ DBUG_RETURN(TRUE);
++ }
++
++
++ send_eof(thd);
++ DBUG_RETURN(FALSE);
++
++}
+
+
+ /***************************************************************************
+diff -r a36b98c5e2e3 sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy Mon Sep 08 16:37:00 2008 -0700
++++ b/sql/sql_yacc.yy Mon Sep 08 16:38:33 2008 -0700
+@@ -816,6 +816,7 @@
+ %token PACK_KEYS_SYM
+ %token PARTIAL
+ %token PASSWORD
++%token PATCHES
+ %token PARAM_MARKER
+ %token PHASE_SYM
+ %token POINTFROMTEXT
+@@ -7948,7 +7949,7 @@
+ ;
+
+ show_param:
+- DATABASES wild_and_where
++ DATABASES wild_and_where
+ {
+ LEX *lex= Lex;
+ lex->sql_command= SQLCOM_SELECT;
+@@ -8048,6 +8049,10 @@
+ LEX *lex=Lex;
+ lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES;
+ WARN_DEPRECATED("SHOW TABLE TYPES", "SHOW [STORAGE] ENGINES");
++ }
++ | PATCHES
++ {
++ Lex->sql_command= SQLCOM_SHOW_PATCHES;
+ }
+ | opt_storage ENGINES_SYM
+ {
+@@ -9466,6 +9471,7 @@
+ | PACK_KEYS_SYM {}
+ | PARTIAL {}
+ | PASSWORD {}
++ | PATCHES {}
+ | PHASE_SYM {}
+ | POINT_SYM {}
+ | POLYGON {}
diff --git a/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch b/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
new file mode 100644
index 0000000..b20ad8f
--- /dev/null
+++ b/mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
@@ -0,0 +1,1302 @@
+diff -r 72a897774060 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0buf.c Mon Sep 08 16:40:27 2008 -0700
+@@ -546,6 +546,19 @@
+ mutex_create(&(buf_pool->mutex));
+ mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
+
++ mutex_create(&(buf_pool->flush_list_mutex));
++ mutex_create(&(buf_pool->LRU_mutex));
++ mutex_create(&(buf_pool->free_mutex));
++ mutex_create(&(buf_pool->hash_mutex));
++ mutex_set_level(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK);
++ mutex_set_level(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK);
++ mutex_set_level(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK);
++ mutex_set_level(&(buf_pool->hash_mutex), SYNC_NO_ORDER_CHECK);
++
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+ mutex_enter(&(buf_pool->mutex));
+
+ if (srv_use_awe) {
+@@ -718,6 +731,10 @@
+ block->in_free_list = TRUE;
+ }
+
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+ mutex_exit(&(buf_pool->mutex));
+
+ if (srv_use_adaptive_hash_indexes) {
+@@ -854,12 +871,12 @@
+ if (buf_pool->freed_page_clock >= block->freed_page_clock
+ + 1 + (buf_pool->curr_size / 4)) {
+
+- mutex_enter(&buf_pool->mutex);
++ mutex_enter(&(buf_pool->LRU_mutex));
+ /* There has been freeing activity in the LRU list:
+ best to move to the head of the LRU list */
+
+ buf_LRU_make_block_young(block);
+- mutex_exit(&buf_pool->mutex);
++ mutex_exit(&(buf_pool->LRU_mutex));
+ }
+ }
+
+@@ -875,7 +892,7 @@
+ {
+ buf_block_t* block;
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
+
+ block = buf_block_align(frame);
+
+@@ -883,7 +900,7 @@
+
+ buf_LRU_make_block_young(block);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+ }
+
+ /************************************************************************
+@@ -894,7 +911,7 @@
+ /*===========*/
+ buf_block_t* block) /* in, own: block to be freed */
+ {
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->free_mutex));
+
+ mutex_enter(&block->mutex);
+
+@@ -904,7 +921,7 @@
+
+ mutex_exit(&block->mutex);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->free_mutex));
+ }
+
+ /*************************************************************************
+@@ -945,11 +962,11 @@
+ {
+ buf_block_t* block;
+
+- mutex_enter_fast(&(buf_pool->mutex));
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ return(block);
+ }
+@@ -966,7 +983,7 @@
+ {
+ buf_block_t* block;
+
+- mutex_enter_fast(&(buf_pool->mutex));
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+@@ -974,7 +991,7 @@
+ block->check_index_page_at_flush = FALSE;
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+ }
+
+ /************************************************************************
+@@ -993,7 +1010,7 @@
+ buf_block_t* block;
+ ibool is_hashed;
+
+- mutex_enter_fast(&(buf_pool->mutex));
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+@@ -1003,7 +1020,7 @@
+ is_hashed = block->is_hashed;
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ return(is_hashed);
+ }
+@@ -1045,7 +1062,7 @@
+ {
+ buf_block_t* block;
+
+- mutex_enter_fast(&(buf_pool->mutex));
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+@@ -1053,7 +1070,7 @@
+ block->file_page_was_freed = TRUE;
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ return(block);
+ }
+@@ -1074,7 +1091,7 @@
+ {
+ buf_block_t* block;
+
+- mutex_enter_fast(&(buf_pool->mutex));
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+@@ -1082,7 +1099,7 @@
+ block->file_page_was_freed = FALSE;
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ return(block);
+ }
+@@ -1154,26 +1171,33 @@
+ buf_pool->n_page_gets++;
+ loop:
+ block = NULL;
+- mutex_enter_fast(&(buf_pool->mutex));
++ // mutex_enter_fast(&(buf_pool->mutex));
+
+ if (guess) {
+ block = buf_block_align(guess);
+
++ mutex_enter(&block->mutex);
+ if ((offset != block->offset) || (space != block->space)
+ || (block->state != BUF_BLOCK_FILE_PAGE)) {
+
++ mutex_exit(&block->mutex);
+ block = NULL;
+ }
+ }
+
+ if (block == NULL) {
++ mutex_enter_fast(&(buf_pool->hash_mutex));
+ block = buf_page_hash_get(space, offset);
++ if(block) {
++ mutex_enter(&block->mutex);
++ }
++ mutex_exit(&(buf_pool->hash_mutex));
+ }
+
+ if (block == NULL) {
+ /* Page not in buf_pool: needs to be read from file */
+
+- mutex_exit(&(buf_pool->mutex));
++ // mutex_exit(&(buf_pool->mutex));
+
+ if (mode == BUF_GET_IF_IN_POOL) {
+
+@@ -1192,7 +1216,7 @@
+ goto loop;
+ }
+
+- mutex_enter(&block->mutex);
++ // mutex_enter(&block->mutex);
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+@@ -1204,7 +1228,7 @@
+
+ if (mode == BUF_GET_IF_IN_POOL) {
+ /* The page is only being read to buffer */
+- mutex_exit(&buf_pool->mutex);
++ // mutex_exit(&buf_pool->mutex);
+ mutex_exit(&block->mutex);
+
+ return(NULL);
+@@ -1221,7 +1245,9 @@
+ LRU list and we must put it to awe_LRU_free_mapped list once
+ mapped to a frame */
+
++ mutex_enter_fast(&(buf_pool->mutex));
+ buf_awe_map_page_to_frame(block, TRUE);
++ mutex_exit(&buf_pool->mutex);
+ }
+
+ #ifdef UNIV_SYNC_DEBUG
+@@ -1229,7 +1255,7 @@
+ #else
+ buf_block_buf_fix_inc(block);
+ #endif
+- mutex_exit(&buf_pool->mutex);
++ // mutex_exit(&buf_pool->mutex);
+
+ /* Check if this is the first access to the page */
+
+@@ -1773,7 +1799,8 @@
+
+ ut_a(block);
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+ mutex_enter(&block->mutex);
+
+ if (fil_tablespace_deleted_or_being_deleted_in_mem(space,
+@@ -1788,7 +1815,8 @@
+ being deleted, or the page is already in buf_pool, return */
+
+ mutex_exit(&block->mutex);
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ buf_block_free(block);
+
+@@ -1803,10 +1831,14 @@
+ ut_ad(block);
+
+ buf_page_init(space, offset, block);
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ /* The block must be put to the LRU list, to the old blocks */
+
+ buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
++ mutex_exit(&(buf_pool->LRU_mutex));
++
++ mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */
+
+ block->io_fix = BUF_IO_READ;
+
+@@ -1855,7 +1887,8 @@
+
+ free_block = buf_LRU_get_free_block();
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+@@ -1866,7 +1899,8 @@
+ block->file_page_was_freed = FALSE;
+
+ /* Page can be found in buf_pool */
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ buf_block_free(free_block);
+
+@@ -1889,6 +1923,7 @@
+ mutex_enter(&block->mutex);
+
+ buf_page_init(space, offset, block);
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ /* The block must be put to the LRU list */
+ buf_LRU_add_block(block, FALSE);
+@@ -1900,7 +1935,7 @@
+ #endif
+ buf_pool->n_pages_created++;
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+@@ -1914,7 +1949,7 @@
+ ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin();
++ buf_flush_free_margin(FALSE);
+
+ frame = block->frame;
+
+@@ -1950,6 +1985,7 @@
+ {
+ ulint io_type;
+ ulint read_page_no;
++ ulint flush_type;
+
+ ut_ad(block);
+
+@@ -2029,9 +2065,6 @@
+ }
+ }
+
+- mutex_enter(&(buf_pool->mutex));
+- mutex_enter(&block->mutex);
+-
+ #ifdef UNIV_IBUF_DEBUG
+ ut_a(ibuf_count_get(block->space, block->offset) == 0);
+ #endif
+@@ -2040,9 +2073,12 @@
+ removes the newest lock debug record, without checking the thread
+ id. */
+
+- block->io_fix = 0;
+-
+ if (io_type == BUF_IO_READ) {
++ mutex_enter(&block->mutex);
++ mutex_enter(&(buf_pool->mutex));
++
++ block->io_fix = 0;
++
+ /* NOTE that the call to ibuf may have moved the ownership of
+ the x-latch to this OS thread: do not let this confuse you in
+ debugging! */
+@@ -2053,6 +2089,8 @@
+
+ rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
+
++ mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&block->mutex);
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ fputs("Has read ", stderr);
+@@ -2061,14 +2099,32 @@
+ } else {
+ ut_ad(io_type == BUF_IO_WRITE);
+
++ flush_type = block->flush_type;
++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++ mutex_enter(&(buf_pool->LRU_mutex));
++ }
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&block->mutex);
++ mutex_enter(&(buf_pool->mutex));
++
++ block->io_fix = 0;
++
+ /* Write means a flush operation: call the completion
+ routine in the flush system */
+
+ buf_flush_write_complete(block);
+
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
++ mutex_exit(&(buf_pool->LRU_mutex));
++ }
++
+ rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
+
+ buf_pool->n_pages_written++;
++
++ mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&block->mutex);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+@@ -2077,9 +2133,6 @@
+ #endif /* UNIV_DEBUG */
+ }
+
+- mutex_exit(&block->mutex);
+- mutex_exit(&(buf_pool->mutex));
+-
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ fprintf(stderr, "page space %lu page no %lu\n",
+@@ -2107,11 +2160,11 @@
+ freed = buf_LRU_search_and_free_block(100);
+ }
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+ }
+
+ /*************************************************************************
+@@ -2130,10 +2183,22 @@
+ ulint n_flush = 0;
+ ulint n_free = 0;
+ ulint n_page = 0;
++ ulint n_single_flush_tmp = 0;
++ ulint n_lru_flush_tmp = 0;
++ ulint n_list_flush_tmp = 0;
+
+ ut_ad(buf_pool);
+
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
++
+ mutex_enter(&(buf_pool->mutex));
++ n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
++ n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST];
++ n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU];
++ mutex_exit(&(buf_pool->mutex));
+
+ for (i = 0; i < buf_pool->curr_size; i++) {
+
+@@ -2201,11 +2266,14 @@
+ }
+ ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+
+- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++ ut_a(n_single_flush_tmp == n_single_flush);
++ ut_a(n_list_flush_tmp == n_list_flush);
++ ut_a(n_lru_flush_tmp == n_lru_flush);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ ut_a(buf_LRU_validate());
+ ut_a(buf_flush_validate());
+@@ -2237,7 +2305,9 @@
+ index_ids = mem_alloc(sizeof(dulint) * size);
+ counts = mem_alloc(sizeof(ulint) * size);
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
+
+ fprintf(stderr,
+ "buf_pool size %lu\n"
+@@ -2290,7 +2360,9 @@
+ }
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
+
+ for (i = 0; i < n_found; i++) {
+ index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -2325,8 +2397,6 @@
+ ulint i;
+ ulint fixed_pages_number = 0;
+
+- mutex_enter(&(buf_pool->mutex));
+-
+ for (i = 0; i < buf_pool->curr_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+@@ -2342,7 +2412,6 @@
+ }
+ }
+
+- mutex_exit(&(buf_pool->mutex));
+ return fixed_pages_number;
+ }
+ #endif /* UNIV_DEBUG */
+@@ -2370,7 +2439,9 @@
+ {
+ ulint ratio;
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
+
+ ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
+ / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+@@ -2378,7 +2449,9 @@
+
+ /* 1 + is there to avoid division by zero */
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
+
+ return(ratio);
+ }
+@@ -2398,6 +2471,9 @@
+ ut_ad(buf_pool);
+ size = buf_pool->curr_size;
+
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
+ mutex_enter(&(buf_pool->mutex));
+
+ if (srv_use_awe) {
+@@ -2469,6 +2545,9 @@
+ buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+ buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
+
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
+ mutex_exit(&(buf_pool->mutex));
+ }
+
+@@ -2499,8 +2578,6 @@
+
+ ut_ad(buf_pool);
+
+- mutex_enter(&(buf_pool->mutex));
+-
+ for (i = 0; i < buf_pool->curr_size; i++) {
+
+ block = buf_pool_get_nth_block(buf_pool, i);
+@@ -2521,8 +2598,6 @@
+
+ mutex_exit(&block->mutex);
+ }
+-
+- mutex_exit(&(buf_pool->mutex));
+
+ return(TRUE);
+ }
+@@ -2562,11 +2637,11 @@
+ {
+ ulint len;
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->free_mutex));
+
+ len = UT_LIST_GET_LEN(buf_pool->free);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->free_mutex));
+
+ return(len);
+ }
+diff -r 72a897774060 innobase/buf/buf0flu.c
+--- a/innobase/buf/buf0flu.c Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0flu.c Mon Sep 08 16:40:27 2008 -0700
+@@ -117,12 +117,14 @@
+ ut_ad(mutex_own(&block->mutex));
+ #endif /* UNIV_SYNC_DEBUG */
+ if (block->state != BUF_BLOCK_FILE_PAGE) {
++ /* I permited not to own LRU_mutex.. */
++/*
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: buffer block state %lu in the LRU list!\n",
+ (ulong)block->state);
+ ut_print_buf(stderr, (byte*)block, sizeof(buf_block_t));
+-
++*/
+ return(FALSE);
+ }
+
+@@ -535,18 +537,20 @@
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
+ || flush_type == BUF_FLUSH_SINGLE_PAGE);
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+
+ block = buf_page_hash_get(space, offset);
+
+ ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (!block) {
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+ return(0);
+ }
+
+ mutex_enter(&block->mutex);
++ mutex_enter(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ if (flush_type == BUF_FLUSH_LIST
+ && buf_flush_ready_for_flush(block, flush_type)) {
+@@ -743,7 +747,7 @@
+ high = fil_space_get_size(space);
+ }
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+
+ for (i = low; i < high; i++) {
+
+@@ -777,7 +781,7 @@
+
+ mutex_exit(&block->mutex);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ /* Note: as we release the buf_pool mutex
+ above, in buf_flush_try_page we cannot be sure
+@@ -788,14 +792,14 @@
+ count += buf_flush_try_page(space, i,
+ flush_type);
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+ } else {
+ mutex_exit(&block->mutex);
+ }
+ }
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ return(count);
+ }
+@@ -848,7 +852,14 @@
+ }
+
+ (buf_pool->init_flush)[flush_type] = TRUE;
++
++ mutex_exit(&(buf_pool->mutex));
+
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&(buf_pool->LRU_mutex));
++ }
++ mutex_enter(&(buf_pool->flush_list_mutex));
++
+ for (;;) {
+ /* If we have flushed enough, leave the loop */
+ if (page_count >= min_n) {
+@@ -894,7 +905,10 @@
+ offset = block->offset;
+
+ mutex_exit(&block->mutex);
+- mutex_exit(&(buf_pool->mutex));
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&(buf_pool->LRU_mutex));
++ }
++ mutex_exit(&(buf_pool->flush_list_mutex));
+
+ old_page_count = page_count;
+
+@@ -907,7 +921,10 @@
+ flush_type, offset,
+ page_count - old_page_count); */
+
+- mutex_enter(&(buf_pool->mutex));
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&(buf_pool->LRU_mutex));
++ }
++ mutex_enter(&(buf_pool->flush_list_mutex));
+
+ } else if (flush_type == BUF_FLUSH_LRU) {
+
+@@ -929,6 +946,13 @@
+ break;
+ }
+ }
++
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&(buf_pool->LRU_mutex));
++ }
++ mutex_exit(&(buf_pool->flush_list_mutex));
++
++ mutex_enter(&(buf_pool->mutex));
+
+ (buf_pool->init_flush)[flush_type] = FALSE;
+
+@@ -988,10 +1012,14 @@
+ buf_block_t* block;
+ ulint n_replaceable;
+ ulint distance = 0;
+-
+- mutex_enter(&(buf_pool->mutex));
++
++ /* optimistic search... */
++ //mutex_enter(&(buf_pool->LRU_mutex));
++ //mutex_enter(&(buf_pool->free_mutex));
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
++
++ //mutex_exit(&(buf_pool->free_mutex));
+
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+@@ -1013,7 +1041,7 @@
+ block = UT_LIST_GET_PREV(LRU, block);
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ //mutex_exit(&(buf_pool->LRU_mutex));
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+@@ -1032,8 +1060,9 @@
+ immediately, without waiting. */
+
+ void
+-buf_flush_free_margin(void)
++buf_flush_free_margin(
+ /*=======================*/
++ ibool wait)
+ {
+ ulint n_to_flush;
+ ulint n_flushed;
+@@ -1043,7 +1072,7 @@
+ if (n_to_flush > 0) {
+ n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
+ ut_dulint_zero);
+- if (n_flushed == ULINT_UNDEFINED) {
++ if (wait && n_flushed == ULINT_UNDEFINED) {
+ /* There was an LRU type flush batch already running;
+ let us wait for it to end */
+
+@@ -1093,11 +1122,11 @@
+ {
+ ibool ret;
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
+
+ ret = buf_flush_validate_low();
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
+
+ return(ret);
+ }
+diff -r 72a897774060 innobase/buf/buf0lru.c
+--- a/innobase/buf/buf0lru.c Mon Sep 08 16:40:20 2008 -0700
++++ b/innobase/buf/buf0lru.c Mon Sep 08 16:40:27 2008 -0700
+@@ -79,7 +79,10 @@
+ ibool all_freed;
+
+ scan_again:
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
++ mutex_enter(&(buf_pool->flush_list_mutex));
++ mutex_enter(&(buf_pool->free_mutex));
++ mutex_enter(&(buf_pool->hash_mutex));
+
+ all_freed = TRUE;
+
+@@ -117,7 +120,10 @@
+
+ mutex_exit(&block->mutex);
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ /* Note that the following call will acquire
+ an S-latch on the page */
+@@ -147,7 +153,10 @@
+ block = UT_LIST_GET_PREV(LRU, block);
+ }
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
++ mutex_exit(&(buf_pool->flush_list_mutex));
++ mutex_exit(&(buf_pool->free_mutex));
++ mutex_exit(&(buf_pool->hash_mutex));
+
+ if (!all_freed) {
+ os_thread_sleep(20000);
+@@ -170,14 +179,14 @@
+ ulint len;
+ ulint limit;
+
+- mutex_enter(&(buf_pool->mutex));
++ mutex_enter(&(buf_pool->LRU_mutex));
+
+ len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ if (len < BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is too short to do read-ahead */
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+
+ return(0);
+ }
+@@ -186,7 +195,7 @@
+
+ limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+
+ return(limit);
+ }
+@@ -210,13 +219,15 @@
+ ulint distance = 0;
+ ibool freed;
+
+- mutex_enter(&(buf_pool->mutex));
++ /* optimistic search... */
++ //mutex_enter(&(buf_pool->LRU_mutex));
+
++retry:
+ freed = FALSE;
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (block != NULL) {
+- ut_a(block->in_LRU_list);
++ //ut_a(block->in_LRU_list); /* optimistic */
+
+ mutex_enter(&block->mutex);
+
+@@ -231,9 +242,17 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ mutex_exit(&block->mutex);
++
++ mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */
++
++ mutex_enter(&(buf_pool->hash_mutex));
++ mutex_enter(&block->mutex);
++ if(block->in_LRU_list && buf_flush_ready_for_replace(block)) {
+ buf_LRU_block_remove_hashed_page(block);
++ mutex_exit(&(buf_pool->hash_mutex));
+
+- mutex_exit(&(buf_pool->mutex));
++ mutex_exit(&(buf_pool->LRU_mutex));
+ mutex_exit(&block->mutex);
+
+ /* Remove possible adaptive hash index built on the
+@@ -246,14 +265,25 @@
+
+ ut_a(block->buf_fix_count == 0);
+
<Skipped 2113 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/percona-server.git/commitdiff/431f68fe79a66d5dfdd53f2655709e6c925fbc22
More information about the pld-cvs-commit
mailing list