[packages/percona-server/v5.0.x: 101/202] - for 5.0.75
glen
glen at pld-linux.org
Wed Oct 21 16:17:31 CEST 2015
commit eccb488f1f659d1774a04d96e1cd228b6aa6f200
Author: Elan Ruusamäe <glen at pld-linux.org>
Date: Fri Jan 9 11:04:53 2009 +0000
- for 5.0.75
Changed files:
mysql-innodb_check_fragmentation.patch -> 1.1.2.1
mysql-innodb_fsync_source.patch -> 1.1.2.1
mysql-innodb_io_patches.patch -> 1.1.2.1
mysql-innodb_io_pattern.patch -> 1.1.2.1
mysql-innodb_locks_held.patch -> 1.1.2.1
mysql-innodb_rw_lock.patch -> 1.1.2.1
mysql-innodb_show_bp.patch -> 1.1.2.1
mysql-innodb_show_hashed_memory.patch -> 1.1.2.1
mysql-microsec_process.patch -> 1.1.2.1
mysql-innodb_check_fragmentation.patch | 275 ++++++
mysql-innodb_fsync_source.patch | 594 +++++++++++++
mysql-innodb_io_patches.patch | 487 +++++++++++
mysql-innodb_io_pattern.patch | 688 +++++++++++++++
mysql-innodb_locks_held.patch | 168 ++++
mysql-innodb_rw_lock.patch | 1459 ++++++++++++++++++++++++++++++++
mysql-innodb_show_bp.patch | 447 ++++++++++
mysql-innodb_show_hashed_memory.patch | 275 ++++++
mysql-microsec_process.patch | 281 ++++++
9 files changed, 4674 insertions(+)
---
diff --git a/mysql-innodb_check_fragmentation.patch b/mysql-innodb_check_fragmentation.patch
new file mode 100644
index 0000000..4b16731
--- /dev/null
+++ b/mysql-innodb_check_fragmentation.patch
@@ -0,0 +1,275 @@
+diff -r 936d427a9a15 innobase/btr/btr0cur.c
+--- a/innobase/btr/btr0cur.c Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0cur.c Mon Dec 22 00:33:11 2008 -0800
+@@ -516,6 +516,14 @@
+ == index->table->comp);
+ }
+
++ if (level == 0) {
++ /* Initializes status counters */
++ innobase_mysql_thd_init_innodb_scan_cont();
++ innobase_mysql_thd_init_innodb_scan_jump();
++ innobase_mysql_thd_init_innodb_scan_data();
++ innobase_mysql_thd_init_innodb_scan_garbage();
++ }
++
+ break;
+ }
+
+@@ -663,6 +671,12 @@
+ btr_cur_add_path_info(cursor, height,
+ root_height);
+ }
++
++ /* Initializes status counters */
++ innobase_mysql_thd_init_innodb_scan_cont();
++ innobase_mysql_thd_init_innodb_scan_jump();
++ innobase_mysql_thd_init_innodb_scan_data();
++ innobase_mysql_thd_init_innodb_scan_garbage();
+
+ break;
+ }
+diff -r 936d427a9a15 innobase/btr/btr0pcur.c
+--- a/innobase/btr/btr0pcur.c Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0pcur.c Mon Dec 22 00:33:11 2008 -0800
+@@ -381,6 +381,7 @@
+ last record of the current page */
+ mtr_t* mtr) /* in: mtr */
+ {
++ ulint page_no;
+ ulint next_page_no;
+ ulint space;
+ page_t* page;
+@@ -393,11 +394,22 @@
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+ page = btr_pcur_get_page(cursor);
++ page_no = buf_frame_get_page_no(page);
+
+ next_page_no = btr_page_get_next(page, mtr);
+ space = buf_frame_get_space_id(page);
+
+ ut_ad(next_page_no != FIL_NULL);
++
++ if (next_page_no - page_no == 1) {
++ innobase_mysql_thd_increment_innodb_scan_cont(1);
++ } else {
++ innobase_mysql_thd_increment_innodb_scan_jump(1);
++ }
++ innobase_mysql_thd_increment_innodb_scan_data(
++ page_get_data_size(page));
++ innobase_mysql_thd_increment_innodb_scan_garbage(
++ page_header_get_field(page, PAGE_GARBAGE));
+
+ next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
+@@ -427,6 +439,7 @@
+ record of the current page */
+ mtr_t* mtr) /* in: mtr */
+ {
++ ulint page_no;
+ ulint prev_page_no;
+ ulint space;
+ page_t* page;
+@@ -462,9 +475,20 @@
+ btr_pcur_restore_position(latch_mode2, cursor, mtr);
+
+ page = btr_pcur_get_page(cursor);
++ page_no = buf_frame_get_page_no(page);
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+ space = buf_frame_get_space_id(page);
++
++ if (page_no - prev_page_no == 1) {
++ innobase_mysql_thd_increment_innodb_scan_cont(1);
++ } else {
++ innobase_mysql_thd_increment_innodb_scan_jump(1);
++ }
++ innobase_mysql_thd_increment_innodb_scan_data(
++ page_get_data_size(page));
++ innobase_mysql_thd_increment_innodb_scan_garbage(
++ page_header_get_field(page, PAGE_GARBAGE));
+
+ if (btr_pcur_is_before_first_on_page(cursor, mtr)
+ && (prev_page_no != FIL_NULL)) {
+diff -r 936d427a9a15 innobase/btr/btr0sea.c
+--- a/innobase/btr/btr0sea.c Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/btr/btr0sea.c Mon Dec 22 00:33:11 2008 -0800
+@@ -861,6 +861,12 @@
+
+ buf_pool->n_page_gets++;
+
++ /* Initializes status counters */
++ innobase_mysql_thd_init_innodb_scan_cont();
++ innobase_mysql_thd_init_innodb_scan_jump();
++ innobase_mysql_thd_init_innodb_scan_data();
++ innobase_mysql_thd_init_innodb_scan_garbage();
++
+ return(TRUE);
+
+ /*-------------------------------------------*/
+diff -r 936d427a9a15 innobase/include/btr0cur.h
+--- a/innobase/include/btr0cur.h Mon Dec 22 00:33:03 2008 -0800
++++ b/innobase/include/btr0cur.h Mon Dec 22 00:33:11 2008 -0800
+@@ -697,6 +697,17 @@
+ extern ulint btr_cur_n_non_sea_old;
+ extern ulint btr_cur_n_sea_old;
+
++/*--------------------------------------*/
++/* prototypes for new functions added to ha_innodb.cc */
++void innobase_mysql_thd_init_innodb_scan_cont();
++void innobase_mysql_thd_increment_innodb_scan_cont(ulong length);
++void innobase_mysql_thd_init_innodb_scan_jump();
++void innobase_mysql_thd_increment_innodb_scan_jump(ulong length);
++void innobase_mysql_thd_init_innodb_scan_data();
++void innobase_mysql_thd_increment_innodb_scan_data(ulong length);
++void innobase_mysql_thd_init_innodb_scan_garbage();
++void innobase_mysql_thd_increment_innodb_scan_garbage(ulong length);
++
+ #ifndef UNIV_NONINL
+ #include "btr0cur.ic"
+ #endif
+diff -r 936d427a9a15 patch_info/innodb_check_fragmentation.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_check_fragmentation.info Mon Dec 22 00:33:11 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_check_fragmentation.patch
++Name=Session status to check fragmentation of the last InnoDB scan
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=The names are Innodb_scan_*
+diff -r 936d427a9a15 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800
+@@ -760,6 +760,102 @@
+ }
+
+ /*************************************************************************
++Initializes Innodb_scan_blocks_contiguous. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_cont()
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_cont = 0;
++ }
++}
++
++/*************************************************************************
++Increments Innodb_scan_blocks_contiguous. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_cont(ulong length)
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_cont+= length;
++ }
++}
++
++/*************************************************************************
++Initializes Innodb_scan_blocks_jumpy. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_jump()
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_jump = 0;
++ }
++}
++
++/*************************************************************************
++Increments Innodb_scan_blocks_jumpy. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_jump(ulong length)
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_jump+= length;
++ }
++}
++
++/*************************************************************************
++Initializes Innodb_scan_data_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_data()
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_data = 0;
++ }
++}
++
++/*************************************************************************
++Increments Innodb_scan_data_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_data(ulong length)
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_data+= length;
++ }
++}
++
++/*************************************************************************
++Initializes Innodb_scan_garbages_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_init_innodb_scan_garbage()
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_garbage = 0;
++ }
++}
++
++/*************************************************************************
++Increments Innodb_scan_garbages_in_pages. */
++extern "C"
++void
++innobase_mysql_thd_increment_innodb_scan_garbage(ulong length)
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ thd->status_var.innodb_scan_garbage+= length;
++ }
++}
++
++/*************************************************************************
+ Gets the InnoDB transaction handle for a MySQL handler object, creates
+ an InnoDB transaction struct if the corresponding MySQL thread struct still
+ lacks one. */
+diff -r 936d427a9a15 sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800
+@@ -6673,6 +6673,10 @@
+ {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS},
+ #ifdef HAVE_INNOBASE_DB
+ {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS},
++ {"Innodb_scan_pages_contiguous",(char*) offsetof(STATUS_VAR, innodb_scan_cont), SHOW_LONGLONG_STATUS},
++ {"Innodb_scan_pages_jumpy", (char*) offsetof(STATUS_VAR, innodb_scan_jump), SHOW_LONGLONG_STATUS},
++ {"Innodb_scan_data_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_data), SHOW_LONGLONG_STATUS},
++ {"Innodb_scan_garbages_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_garbage), SHOW_LONGLONG_STATUS},
+ #endif /*HAVE_INNOBASE_DB*/
+ {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG},
+ {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG},
+diff -r 936d427a9a15 sql/sql_class.h
+--- a/sql/sql_class.h Mon Dec 22 00:33:03 2008 -0800
++++ b/sql/sql_class.h Mon Dec 22 00:33:11 2008 -0800
+@@ -729,6 +729,10 @@
+ sense to add to the /global/ status variable counter.
+ */
+ double last_query_cost;
++ ulonglong innodb_scan_cont;
++ ulonglong innodb_scan_jump;
++ ulonglong innodb_scan_data;
++ ulonglong innodb_scan_garbage;
+ } STATUS_VAR;
+
+ /*
diff --git a/mysql-innodb_fsync_source.patch b/mysql-innodb_fsync_source.patch
new file mode 100644
index 0000000..637a7d6
--- /dev/null
+++ b/mysql-innodb_fsync_source.patch
@@ -0,0 +1,594 @@
+diff -r 61031ebb48ce innobase/buf/buf0flu.c
+--- a/innobase/buf/buf0flu.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/buf/buf0flu.c Mon Nov 03 05:07:56 2008 -0800
+@@ -341,7 +341,7 @@
+
+ /* Now flush the doublewrite buffer data to disk */
+
+- fil_flush(TRX_SYS_SPACE);
++ fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER);
+
+ /* We know that the writes have been flushed to disk now
+ and in recovery we will find them in the doublewrite buffer
+@@ -381,7 +381,7 @@
+
+ /* Now we flush the data to disk (for example, with fsync) */
+
+- fil_flush_file_spaces(FIL_TABLESPACE);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER);
+
+ /* We can now reuse the doublewrite memory buffer: */
+
+@@ -501,7 +501,8 @@
+ }
+ #else
+ /* Force the log to the disk before writing the modified block */
+- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
++ log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE,
++ LOG_WRITE_FROM_DIRTY_BUFFER);
+ #endif
+ buf_flush_init_for_writing(block->frame, block->newest_modification,
+ block->space, block->offset);
+diff -r 61031ebb48ce innobase/fil/fil0fil.c
+--- a/innobase/fil/fil0fil.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/fil/fil0fil.c Mon Nov 03 05:07:56 2008 -0800
+@@ -245,6 +245,7 @@
+ request */
+ UT_LIST_BASE_NODE_T(fil_space_t) space_list;
+ /* list of all file spaces */
++ ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */
+ };
+
+ /* The tablespace memory cache. This variable is NULL before the module is
+@@ -849,7 +850,7 @@
+ /* Flush tablespaces so that we can close modified files in the LRU
+ list */
+
+- fil_flush_file_spaces(FIL_TABLESPACE);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
+
+ count++;
+
+@@ -1309,7 +1310,10 @@
+
+ UT_LIST_INIT(system->unflushed_spaces);
+ UT_LIST_INIT(system->space_list);
+-
++ {
++ int x;
++ for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0;
++ }
+ return(system);
+ }
+
+@@ -1437,6 +1441,23 @@
+ }
+
+ mutex_exit(&(system->mutex));
++}
++
++/********************************************************************
++Prints internal counters */
++
++void
++fil_print(FILE *file)
++{
++ fprintf(file,
++ "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, "
++ "%lu log aio, %lu log sync, %lu archive\n",
++ fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER],
++ fil_system->flush_types[FLUSH_FROM_OTHER],
++ fil_system->flush_types[FLUSH_FROM_CHECKPOINT],
++ fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE],
++ fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO],
++ fil_system->flush_types[FLUSH_FROM_ARCHIVE]);
+ }
+
+ /********************************************************************
+@@ -2256,7 +2277,7 @@
+
+ os_thread_sleep(20000);
+
+- fil_flush(id);
++ fil_flush(id, FLUSH_FROM_OTHER);
+
+ goto retry;
+
+@@ -3574,7 +3595,7 @@
+ size_after_extend, *actual_size); */
+ mutex_exit(&(system->mutex));
+
+- fil_flush(space_id);
++ fil_flush(space_id, FLUSH_FROM_OTHER);
+
+ return(success);
+ }
+@@ -4166,8 +4187,9 @@
+ void
+ fil_flush(
+ /*======*/
+- ulint space_id) /* in: file space id (this can be a group of
++ ulint space_id, /* in: file space id (this can be a group of
+ log files or a tablespace of the database) */
++ flush_from_type flush_type)/* in: identifies the caller */
+ {
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+@@ -4176,7 +4198,7 @@
+ ib_longlong old_mod_counter;
+
+ mutex_enter(&(system->mutex));
+-
++ system->flush_types[flush_type]++;
+ HASH_SEARCH(hash, system->spaces, space_id, space,
+ space->id == space_id);
+ if (!space || space->is_being_deleted) {
+@@ -4281,7 +4303,8 @@
+ void
+ fil_flush_file_spaces(
+ /*==================*/
+- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */
++ flush_from_type flush_type)/* in: identifies the caller */
+ {
+ fil_system_t* system = fil_system;
+ fil_space_t* space;
+@@ -4322,7 +4345,7 @@
+ a non-existing space id. */
+ for (i = 0; i < n_space_ids; i++) {
+
+- fil_flush(space_ids[i]);
++ fil_flush(space_ids[i], flush_type);
+ }
+
+ mem_free(space_ids);
+diff -r 61031ebb48ce innobase/include/fil0fil.h
+--- a/innobase/include/fil0fil.h Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/include/fil0fil.h Mon Nov 03 05:07:56 2008 -0800
+@@ -197,6 +197,13 @@
+ fil_init(
+ /*=====*/
+ ulint max_n_open); /* in: max number of open files */
++/********************************************************************
++ * Prints internal counters. */
++
++void
++fil_print(
++ /*=====*/
++ FILE* file); /* in: output stream */
+ /***********************************************************************
+ Opens all log files and system tablespace data files. They stay open until the
+ database server shutdown. This should be called at a server startup after the
+@@ -621,14 +628,26 @@
+ ulint segment); /* in: the number of the segment in the aio
+ array to wait for */
+ /**************************************************************************
++Identifies the caller of fil_flush. */
++typedef enum {
++ FLUSH_FROM_DIRTY_BUFFER,
++ FLUSH_FROM_OTHER,
++ FLUSH_FROM_CHECKPOINT,
++ FLUSH_FROM_LOG_IO_COMPLETE,
++ FLUSH_FROM_LOG_WRITE_UP_TO,
++ FLUSH_FROM_ARCHIVE,
++ FLUSH_FROM_NUMBER
++} flush_from_type;
++/**************************************************************************
+ Flushes to disk possible writes cached by the OS. If the space does not exist
+ or is being dropped, does not do anything. */
+
+ void
+ fil_flush(
+ /*======*/
+- ulint space_id); /* in: file space id (this can be a group of
++ ulint space_id, /* in: file space id (this can be a group of
+ log files or a tablespace of the database) */
++ flush_from_type flush_type);/* in: identifies the caller */
+ /**************************************************************************
+ Flushes to disk writes in file spaces of the given type possibly cached by
+ the OS. */
+@@ -636,7 +655,8 @@
+ void
+ fil_flush_file_spaces(
+ /*==================*/
+- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */
++ flush_from_type flush_type);/* in: identifies the caller */
+ /**********************************************************************
+ Checks the consistency of the tablespace cache. */
+
+diff -r 61031ebb48ce innobase/include/log0log.h
+--- a/innobase/include/log0log.h Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/include/log0log.h Mon Nov 03 05:07:56 2008 -0800
+@@ -146,6 +146,22 @@
+ log_io_complete(
+ /*============*/
+ log_group_t* group); /* in: log group */
++
++/**********************************************************
++Describes the caller of log_write_up_to. */
++
++typedef enum {
++ LOG_WRITE_FROM_DIRTY_BUFFER,
++ LOG_WRITE_FROM_BACKGROUND_SYNC,
++ LOG_WRITE_FROM_BACKGROUND_ASYNC,
++ LOG_WRITE_FROM_INTERNAL,
++ LOG_WRITE_FROM_CHECKPOINT_SYNC,
++ LOG_WRITE_FROM_CHECKPOINT_ASYNC,
++ LOG_WRITE_FROM_LOG_ARCHIVE,
++ LOG_WRITE_FROM_COMMIT_SYNC,
++ LOG_WRITE_FROM_COMMIT_ASYNC,
++ LOG_WRITE_FROM_NUMBER
++} log_sync_type;
+ /**********************************************************
+ This function is called, e.g., when a transaction wants to commit. It checks
+ that the log has been written to the log file up to the last log entry written
+@@ -159,14 +175,21 @@
+ be written, ut_dulint_max if not specified */
+ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+- ibool flush_to_disk);
+- /* in: TRUE if we want the written log also to be
+- flushed to disk */
++ ibool flush_to_disk,
++ /* in: TRUE if we want the written log also to be flushed to disk */
++ log_sync_type caller);/* in: identifies the caller */
+ /********************************************************************
+ Does a syncronous flush of the log buffer to disk. */
+
+ void
+ log_buffer_flush_to_disk(void);
++/*==========================*/
++/********************************************************************
++Flushes the log buffer. Forces it to disk depending on the value of
++the configuration parameter innodb_flush_log_at_trx_commit. */
++
++void
++log_buffer_flush_maybe_sync(void);
+ /*==========================*/
+ /********************************************************************
+ Advances the smallest lsn for which there are unflushed dirty blocks in the
+@@ -744,6 +767,12 @@
+ AND flushed to disk */
+ ulint n_pending_writes;/* number of currently pending flushes
+ or writes */
++ ulint log_sync_callers[LOG_WRITE_FROM_NUMBER];
++ /* counts calls to log_write_up_to */
++ ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER];
++ /* counts calls to log_write_up_to when log file is sync'd */
++ ulint n_syncs; /* number of fsyncs done for log file */
++ ulint n_checkpoints; /* number of calls to log_checkpoint */
+ /* NOTE on the 'flush' in names of the fields below: starting from
+ 4.0.14, we separate the write of the log file and the actual fsync()
+ or other method to flush it to disk. The names below shhould really
+diff -r 61031ebb48ce innobase/log/log0log.c
+--- a/innobase/log/log0log.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/log/log0log.c Mon Nov 03 05:07:56 2008 -0800
+@@ -782,6 +782,15 @@
+ log_sys->written_to_all_lsn = log_sys->lsn;
+
+ log_sys->n_pending_writes = 0;
++ {
++ int x;
++ for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) {
++ log_sys->log_sync_callers[x] = 0;
++ log_sys->log_sync_syncers[x] = 0;
++ }
++ }
++ log_sys->n_syncs = 0;
++ log_sys->n_checkpoints = 0;
+
+ log_sys->no_flush_event = os_event_create(NULL);
+
+@@ -1066,7 +1075,7 @@
+ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+
+- fil_flush(group->space_id);
++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
+ }
+
+ #ifdef UNIV_DEBUG
+@@ -1088,7 +1097,7 @@
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+ && srv_flush_log_at_trx_commit != 2) {
+
+- fil_flush(group->space_id);
++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+@@ -1303,9 +1312,10 @@
+ be written, ut_dulint_max if not specified */
+ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+- ibool flush_to_disk)
++ ibool flush_to_disk,
+ /* in: TRUE if we want the written log also to be
+ flushed to disk */
++ log_sync_type caller) /* in: identifies caller */
+ {
+ log_group_t* group;
+ ulint start_offset;
+@@ -1315,6 +1325,7 @@
+ ulint loop_count;
+ ulint unlock;
+
++ log_sys->log_sync_callers[caller]++;
+ if (recv_no_ibuf_operations) {
+ /* Recovery is running and no operations on the log files are
+ allowed yet (the variable name .._no_ibuf_.. is misleading) */
+@@ -1465,13 +1476,17 @@
+ so we have also flushed to disk what we have written */
+
+ log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
++ log_sys->n_syncs++;
++ log_sys->log_sync_syncers[caller]++;
+
+ } else if (flush_to_disk) {
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+- fil_flush(group->space_id);
++ fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO);
+ log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
++ log_sys->n_syncs++;
++ log_sys->log_sync_syncers[caller]++;
+ }
+
+ mutex_enter(&(log_sys->mutex));
+@@ -1520,7 +1535,8 @@
+
+ mutex_exit(&(log_sys->mutex));
+
+- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++ LOG_WRITE_FROM_BACKGROUND_SYNC);
+ }
+
+ /********************************************************************
+@@ -1551,7 +1567,7 @@
+ mutex_exit(&(log->mutex));
+
+ if (do_flush) {
+- log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
++ log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL);
+ }
+ }
+
+@@ -1921,11 +1937,11 @@
+ }
+
+ if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+- fil_flush_file_spaces(FIL_TABLESPACE);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+-
++ log_sys->n_checkpoints++;
+ oldest_lsn = log_buf_pool_get_oldest_modification();
+
+ mutex_exit(&(log_sys->mutex));
+@@ -1938,7 +1954,8 @@
+ write-ahead-logging algorithm ensures that the log has been flushed
+ up to oldest_lsn. */
+
+- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++ log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++ LOG_WRITE_FROM_CHECKPOINT_SYNC);
+
+ mutex_enter(&(log_sys->mutex));
+
+@@ -2566,7 +2583,7 @@
+
+ mutex_exit(&(log_sys->mutex));
+
+- fil_flush(group->archive_space_id);
++ fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE);
+
+ mutex_enter(&(log_sys->mutex));
+
+@@ -2647,7 +2664,8 @@
+
+ mutex_exit(&(log_sys->mutex));
+
+- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
++ log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
++ LOG_WRITE_FROM_LOG_ARCHIVE);
+
+ calc_new_limit = FALSE;
+
+@@ -3184,8 +3202,8 @@
+ }
+ mutex_exit(&kernel_mutex);
+
+- fil_flush_file_spaces(FIL_TABLESPACE);
+- fil_flush_file_spaces(FIL_LOG);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
++ fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER);
+
+ /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
+ pool: therefore it is essential that the buffer pool has been
+@@ -3218,7 +3236,7 @@
+
+ fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+
+- fil_flush_file_spaces(FIL_TABLESPACE);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
+
+ fil_close_all_files();
+
+@@ -3331,15 +3349,45 @@
+ time_elapsed = 0.001 + difftime(current_time,
+ log_sys->last_printout_time);
+ fprintf(file,
+- "%lu pending log writes, %lu pending chkp writes\n"
+- "%lu log i/o's done, %.2f log i/o's/second\n",
+- (ulong) log_sys->n_pending_writes,
+- (ulong) log_sys->n_pending_checkpoint_writes,
+- (ulong) log_sys->n_log_ios,
+- ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed));
++ "%lu pending log writes, %lu pending chkp writes\n"
++ "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n",
++ (ulong) log_sys->n_pending_writes,
++ (ulong) log_sys->n_pending_checkpoint_writes,
++ (ulong) log_sys->n_log_ios,
++ (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed,
++ log_sys->n_syncs,
++ log_sys->n_checkpoints);
+
+ log_sys->n_log_ios_old = log_sys->n_log_ios;
+ log_sys->last_printout_time = current_time;
++
++ fprintf(file,
++ "log sync callers: %lu buffer pool, background %lu sync and %lu async, "
++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
++ "commit %lu sync and %lu async\n",
++ log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC],
++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]);
++
++ fprintf(file,
++ "log sync syncers: %lu buffer pool, background %lu sync and %lu async, "
++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
++ "commit %lu sync and %lu async\n",
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC],
++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]);
+
+ mutex_exit(&(log_sys->mutex));
+ }
+diff -r 61031ebb48ce innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/srv/srv0srv.c Mon Nov 03 05:07:56 2008 -0800
+@@ -1638,6 +1638,12 @@
+ (ulong)time_elapsed);
+
+ fputs("----------\n"
++ "BACKGROUND THREAD\n"
++ "----------\n", file);
++ fil_print(file);
++
++
++ fputs("----------\n"
+ "SEMAPHORES\n"
+ "----------\n", file);
+ sync_print(file);
+diff -r 61031ebb48ce innobase/trx/trx0sys.c
+--- a/innobase/trx/trx0sys.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/trx/trx0sys.c Mon Nov 03 05:07:56 2008 -0800
+@@ -511,7 +511,7 @@
+ page += UNIV_PAGE_SIZE;
+ }
+
+- fil_flush_file_spaces(FIL_TABLESPACE);
++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
+
+ leave_func:
+ ut_free(unaligned_read_buf);
+diff -r 61031ebb48ce innobase/trx/trx0trx.c
+--- a/innobase/trx/trx0trx.c Mon Nov 03 05:07:46 2008 -0800
++++ b/innobase/trx/trx0trx.c Mon Nov 03 05:07:56 2008 -0800
+@@ -916,19 +916,21 @@
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+- FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ /* Write the log to the log files AND flush
+ them to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++ LOG_WRITE_FROM_COMMIT_SYNC);
+ }
+ } else if (srv_flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ ut_error;
+ }
+@@ -1659,18 +1661,21 @@
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ /* Write the log to the log files AND flush them to
+ disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++ LOG_WRITE_FROM_COMMIT_SYNC);
+ }
+ } else if (srv_flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ ut_error;
+ }
+@@ -1906,19 +1911,21 @@
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+- FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ /* Write the log to the log files AND flush
+ them to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
++ LOG_WRITE_FROM_COMMIT_SYNC);
+ }
+ } else if (srv_flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
+- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
++ LOG_WRITE_FROM_COMMIT_ASYNC);
+ } else {
+ ut_error;
+ }
+diff -r 61031ebb48ce patch_info/innodb_fsync_source.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_fsync_source.info Mon Nov 03 05:07:56 2008 -0800
+@@ -0,0 +1,9 @@
++File=innodb_fsync_source.patch
++Name=Information of fsync callers in InnoDB
++Version=1.0
++Author=Google
++License=GPL
++Comment=
++ChangeLog=
++2008-11-01
++VT: Initial porting
diff --git a/mysql-innodb_io_patches.patch b/mysql-innodb_io_patches.patch
new file mode 100644
index 0000000..90af625
--- /dev/null
+++ b/mysql-innodb_io_patches.patch
@@ -0,0 +1,487 @@
+diff -r 45683461331d innobase/buf/buf0rea.c
+--- a/innobase/buf/buf0rea.c Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/buf/buf0rea.c Mon Dec 22 00:32:02 2008 -0800
+@@ -188,6 +188,10 @@
+ ulint low, high;
+ ulint err;
+ ulint i;
++
++ if (!(srv_read_ahead & 1)) {
++ return(0);
++ }
+
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+@@ -396,6 +400,10 @@
+ ulint err;
+ ulint i;
+
++ if (!(srv_read_ahead & 2)) {
++ return(0);
++ }
++
+ if (srv_startup_is_before_trx_rollback_phase) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+diff -r 45683461331d innobase/include/os0file.h
+--- a/innobase/include/os0file.h Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/include/os0file.h Mon Dec 22 00:32:02 2008 -0800
+@@ -551,8 +551,10 @@
+ /*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+- ulint n_segments, /* in: combined number of segments in the four
+- first aio arrays; must be >= 4 */
++// ulint n_segments, /* in: combined number of segments in the four
++// first aio arrays; must be >= 4 */
++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */
++ ulint n_write_threads, /**/
+ ulint n_slots_sync); /* in: number of slots in the sync aio array */
+ /***********************************************************************
+ Requests an asynchronous i/o operation. */
+diff -r 45683461331d innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:02 2008 -0800
+@@ -89,6 +89,8 @@
+ extern ulint srv_lock_table_size;
+
+ extern ulint srv_n_file_io_threads;
++extern ulint srv_n_read_io_threads;
++extern ulint srv_n_write_io_threads;
+
+ #ifdef UNIV_LOG_ARCHIVE
+ extern ibool srv_log_archive_on;
+@@ -133,6 +135,10 @@
+ extern ulong srv_max_purge_lag;
+ extern ibool srv_use_awe;
+ extern ibool srv_use_adaptive_hash_indexes;
++
++extern ulint srv_io_capacity;
++extern ulint srv_read_ahead;
++extern ulint srv_adaptive_checkpoint;
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -r 45683461331d innobase/log/log0log.c
+--- a/innobase/log/log0log.c Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/log/log0log.c Mon Dec 22 00:32:02 2008 -0800
+@@ -3326,6 +3326,15 @@
+ (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
+ (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+
++ fprintf(file,
++ "Max checkpoint age %lu\n"
++ "Modified age %lu\n"
++ "Checkpoint age %lu\n",
++ (ulong) log_sys->max_checkpoint_age,
++ (ulong) ut_dulint_minus(log_sys->lsn,
++ log_buf_pool_get_oldest_modification()),
++ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn));
++
+ current_time = time(NULL);
+
+ time_elapsed = 0.001 + difftime(current_time,
+diff -r 45683461331d innobase/os/os0file.c
+--- a/innobase/os/os0file.c Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/os/os0file.c Mon Dec 22 00:32:02 2008 -0800
+@@ -2877,8 +2877,10 @@
+ /*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+- ulint n_segments, /* in: combined number of segments in the four
+- first aio arrays; must be >= 4 */
++// ulint n_segments, /* in: combined number of segments in the four
++// first aio arrays; must be >= 4 */
++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/
++ ulint n_write_threads, /**/
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+ {
+ ulint n_read_segs;
+@@ -2888,6 +2890,8 @@
+ #ifdef POSIX_ASYNC_IO
+ sigset_t sigset;
+ #endif
++ ulint n_segments = 2 + n_read_threads + n_write_threads;
++
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+@@ -2898,8 +2902,8 @@
+ }
+
+ n_per_seg = n / n_segments;
+- n_write_segs = (n_segments - 2) / 2;
+- n_read_segs = n_segments - 2 - n_write_segs;
++ n_write_segs = n_write_threads;
++ n_read_segs = n_read_threads;
+
+ /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+
+@@ -3180,6 +3184,13 @@
+ struct aiocb* control;
+ #endif
+ ulint i;
++ ulint prim_segment;
++ ulint n;
++
++ n = array->n_slots / array->n_segments;
++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
++
+ loop:
+ os_mutex_enter(array->mutex);
+
+@@ -3198,12 +3209,23 @@
+ goto loop;
+ }
+
++ for (i = prim_segment * n; i < array->n_slots; i++) {
++ slot = os_aio_array_get_nth_slot(array, i);
++
++ if (slot->reserved == FALSE) {
++ break;
++ }
++ }
++
++ if (slot->reserved == TRUE){
++ /* Not found after the intended segment. So we should search before. */
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
+ }
++ }
+ }
+
+ array->n_reserved++;
+diff -r 45683461331d innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:02 2008 -0800
+@@ -167,6 +167,8 @@
+ ulint srv_lock_table_size = ULINT_MAX;
+
+ ulint srv_n_file_io_threads = ULINT_MAX;
++ulint srv_n_read_io_threads = 1;
++ulint srv_n_write_io_threads = 1;
+
+ #ifdef UNIV_LOG_ARCHIVE
+ ibool srv_log_archive_on = FALSE;
+@@ -324,6 +326,15 @@
+ ibool srv_use_awe = FALSE;
+ ibool srv_use_adaptive_hash_indexes = TRUE;
+
++ulint srv_io_capacity = 100;
++
++/* Returns the number of IO operations that is X percent of the capacity.
++PCT_IO(5) -> returns the number of IO operations that is 5% of the max
++where max is srv_io_capacity. */
++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
++
++ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
++ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
+ /*-------------------------------------------*/
+ ulong srv_n_spin_wait_rounds = 20;
+ ulong srv_n_free_tickets_to_enter = 500;
+@@ -2214,6 +2225,8 @@
+ ibool skip_sleep = FALSE;
+ ulint i;
+
++ dulint oldest_lsn;
++
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Master thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+@@ -2302,9 +2315,9 @@
+ + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
++ if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(TRUE, 5);
++ ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
+
+ srv_main_thread_op_info = "flushing log";
+
+@@ -2317,7 +2330,7 @@
+ /* Try to keep the number of modified pages in the
+ buffer pool under the limit wished by the user */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ ut_dulint_max);
+
+ /* If we had to do the flush, it may have taken
+@@ -2326,6 +2339,44 @@
+ iteration of this loop. */
+
+ skip_sleep = TRUE;
++ } else if (srv_adaptive_checkpoint) {
++
++ /* Try to keep modified age not to exceed
++ max_checkpoint_age * 7/8 line */
++
++ mutex_enter(&(log_sys->mutex));
++
++ oldest_lsn = buf_pool_get_oldest_modification();
++ if (ut_dulint_is_zero(oldest_lsn)) {
++
++ mutex_exit(&(log_sys->mutex));
++
++ } else {
++ if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
++
++ /* 2nd defence line (max_checkpoint_age * 3/4) */
++
++ mutex_exit(&(log_sys->mutex));
++
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
++ ut_dulint_max);
++ skip_sleep = TRUE;
++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
++ > (log_sys->max_checkpoint_age)/2 ) {
++
++ /* 1st defence line (max_checkpoint_age * 1/2) */
++
++ mutex_exit(&(log_sys->mutex));
++
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
++ ut_dulint_max);
++ skip_sleep = TRUE;
++ } else {
++ mutex_exit(&(log_sys->mutex));
++ }
++ }
++
+ }
+
+ if (srv_activity_count == old_activity_count) {
+@@ -2352,10 +2403,10 @@
+ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
+ n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ + buf_pool->n_pages_written;
+- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
++ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
+
+ srv_main_thread_op_info = "flushing buffer pool pages";
+- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2365,7 +2416,7 @@
+ even if the server were active */
+
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(TRUE, 5);
++ ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
+
+ srv_main_thread_op_info = "flushing log";
+ log_buffer_flush_to_disk();
+@@ -2407,14 +2458,14 @@
+ (> 70 %), we assume we can afford reserving the disk(s) for
+ the time it requires to flush 100 pages */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
+ ut_dulint_max);
+ } else {
+ /* Otherwise, we only flush a small number of pages so that
+ we do not unnecessarily use much disk i/o capacity from
+ other work */
+
+- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
+ ut_dulint_max);
+ }
+
+@@ -2503,7 +2554,7 @@
+ if (srv_fast_shutdown && srv_shutdown_state > 0) {
+ n_bytes_merged = 0;
+ } else {
+- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
++ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
+ }
+
+ srv_main_thread_op_info = "reserving kernel mutex";
+@@ -2520,7 +2571,7 @@
+
+ if (srv_fast_shutdown < 2) {
+ n_pages_flushed =
+- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
+ } else {
+ /* In the fastest shutdown we do not flush the buffer pool
+ to data files: we set n_pages_flushed to 0 artificially. */
+diff -r 45683461331d innobase/srv/srv0start.c
+--- a/innobase/srv/srv0start.c Mon Dec 22 00:31:16 2008 -0800
++++ b/innobase/srv/srv0start.c Mon Dec 22 00:32:02 2008 -0800
+@@ -1205,24 +1205,28 @@
+ return(DB_ERROR);
+ }
+
++ /* over write innodb_file_io_threads */
++ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
++
+ /* Restrict the maximum number of file i/o threads */
+ if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
+
+ srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
++ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
+ }
+
+ if (!os_aio_use_native_aio) {
+ /* In simulated aio we currently have use only for 4 threads */
+- srv_n_file_io_threads = 4;
++ /*srv_n_file_io_threads = 4;*/
+
+ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+- srv_n_file_io_threads,
+- SRV_MAX_N_PENDING_SYNC_IOS);
++ srv_n_read_io_threads, srv_n_write_io_threads,
++ SRV_MAX_N_PENDING_SYNC_IOS * 8);
+ } else {
+ os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
+ * srv_n_file_io_threads,
+- srv_n_file_io_threads,
++ srv_n_read_io_threads, srv_n_write_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS);
+ }
+
+diff -r 45683461331d patch_info/innodb_io_patches.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_io_patches.info Mon Dec 22 00:32:02 2008 -0800
+@@ -0,0 +1,9 @@
++File=innodb_io_patches.patch
++Name=Cluster of past InnoDB IO patches
++Version=1.0
++Author=Percona
++License=GPL
++Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush)
++ChangeLog=
++2008-11-06
++YK: Initial release
+diff -r 45683461331d sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/ha_innodb.cc Mon Dec 22 00:32:02 2008 -0800
+@@ -149,6 +149,7 @@
+ innobase_lock_wait_timeout, innobase_force_recovery,
+ innobase_open_files;
+
++long innobase_read_io_threads, innobase_write_io_threads;
+ longlong innobase_buffer_pool_size, innobase_log_file_size;
+
+ /* The default values for the following char* start-up parameters
+@@ -1403,6 +1404,8 @@
+ srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+
+ srv_n_file_io_threads = (ulint) innobase_file_io_threads;
++ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
++ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
+
+ srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
+ srv_force_recovery = (ulint) innobase_force_recovery;
+diff -r 45683461331d sql/ha_innodb.h
+--- a/sql/ha_innodb.h Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/ha_innodb.h Mon Dec 22 00:32:02 2008 -0800
+@@ -204,6 +204,7 @@
+ extern long innobase_additional_mem_pool_size;
+ extern long innobase_buffer_pool_awe_mem_mb;
+ extern long innobase_file_io_threads, innobase_lock_wait_timeout;
++extern long innobase_read_io_threads, innobase_write_io_threads;
+ extern long innobase_force_recovery;
+ extern long innobase_open_files;
+ extern char *innobase_data_home_dir, *innobase_data_file_path;
+@@ -234,6 +235,9 @@
+ extern ulong srv_thread_concurrency;
+ extern ulong srv_commit_concurrency;
+ extern ulong srv_flush_log_at_trx_commit;
++extern ulong srv_io_capacity;
++extern ulong srv_read_ahead;
++extern ulong srv_adaptive_checkpoint;
+ }
+
+ bool innobase_init(void);
+diff -r 45683461331d sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/mysqld.cc Mon Dec 22 00:32:02 2008 -0800
+@@ -5036,6 +5036,11 @@
+ OPT_INNODB_ROLLBACK_ON_TIMEOUT,
+ OPT_SECURE_FILE_PRIV,
+ OPT_KEEP_FILES_ON_CREATE,
++ OPT_INNODB_IO_CAPACITY,
++ OPT_INNODB_READ_AHEAD,
++ OPT_INNODB_ADAPTIVE_CHECKPOINT,
++ OPT_INNODB_READ_IO_THREADS,
++ OPT_INNODB_WRITE_IO_THREADS,
+ OPT_INNODB_ADAPTIVE_HASH_INDEX,
+ OPT_FEDERATED
+ };
+@@ -5344,6 +5349,26 @@
+ (gptr*) &global_system_variables.innodb_table_locks,
+ (gptr*) &global_system_variables.innodb_table_locks,
+ 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
++ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
++ "Number of IO operations per second the server can do. Tunes background IO rate.",
++ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity,
++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0},
++ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
++ "Enable/Diasable read aheads bit0:random bit1:linear",
++ (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead,
++ 0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0},
++ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT,
++ "Enable/Diasable flushing along modified age 0:disable 1:enable",
++ (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint,
++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS,
++ "Number of background read I/O threads in InnoDB.",
++ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads,
++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
++ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS,
++ "Number of background write I/O threads in InnoDB.",
++ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+ (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 45683461331d sql/set_var.cc
+--- a/sql/set_var.cc Mon Dec 22 00:31:16 2008 -0800
++++ b/sql/set_var.cc Mon Dec 22 00:32:02 2008 -0800
+@@ -484,6 +484,12 @@
+ sys_var_long_ptr sys_innodb_flush_log_at_trx_commit(
+ "innodb_flush_log_at_trx_commit",
+ &srv_flush_log_at_trx_commit);
++sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity",
++ &srv_io_capacity);
++sys_var_long_ptr sys_innodb_read_ahead("innodb_read_ahead",
++ &srv_read_ahead);
++sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
++ &srv_adaptive_checkpoint);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
+ &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
+@@ -847,6 +853,9 @@
+ &sys_innodb_thread_concurrency,
+ &sys_innodb_commit_concurrency,
+ &sys_innodb_flush_log_at_trx_commit,
++ &sys_innodb_io_capacity,
++ &sys_innodb_read_ahead,
++ &sys_innodb_adaptive_checkpoint,
+ #endif
+ &sys_trust_routine_creators,
+ &sys_trust_function_creators,
+@@ -982,6 +991,11 @@
+ {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
+ {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
+ {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
++ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS},
++ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
++ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
++ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
++ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
+ #endif
+ {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS},
+ {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS},
diff --git a/mysql-innodb_io_pattern.patch b/mysql-innodb_io_pattern.patch
new file mode 100644
index 0000000..604404f
--- /dev/null
+++ b/mysql-innodb_io_pattern.patch
@@ -0,0 +1,688 @@
+diff -r 2bbfde0e0e70 include/mysql_com.h
+--- a/include/mysql_com.h Mon Dec 22 00:33:11 2008 -0800
++++ b/include/mysql_com.h Mon Dec 22 00:33:48 2008 -0800
+@@ -121,6 +121,9 @@
+ #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */
+ #define REFRESH_DES_KEY_FILE 0x40000L
+ #define REFRESH_USER_RESOURCES 0x80000L
++
++/* TRUNCATE INFORMATION_SCHEMA.INNODB_IO_PATTERN */
++#define REFRESH_INNODB_IO_PATTERN 0x1000000L
+
+ #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */
+ #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */
+diff -r 2bbfde0e0e70 innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:48 2008 -0800
+@@ -653,6 +653,9 @@
+ }
+
+ buf_pool->page_hash = hash_create(2 * max_size);
++ buf_pool->io_counter_hash = NULL;
++ buf_pool->io_counter_heap = NULL;
++ buf_pool->io_counters = 0;
+
+ buf_pool->n_pend_reads = 0;
+
+@@ -1966,6 +1969,9 @@
+ ulint io_type;
+ ulint read_page_no;
+
++ buf_io_counter_t* io_counter;
++ ulint fold;
++
+ ut_ad(block);
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+@@ -2067,6 +2073,26 @@
+ buf_pool->n_pages_read++;
+
+ rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
++ /* io_counter here */
++ if (srv_io_pattern && srv_io_pattern_trace_running) {
++ fold = buf_page_address_fold(block->space, block->offset);
++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter,
++ (io_counter->space == block->space) && (io_counter->offset == block->offset));
++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) {
++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t)));
++ io_counter->space = block->space;
++ io_counter->offset = block->offset;
++ io_counter->n_read = 0;
++ io_counter->n_write = 0;
++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash,
++ buf_page_address_fold(block->space, block->offset), io_counter);
++ buf_pool->io_counters++;
++ }
++ if (io_counter != NULL) {
++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block)));
++ io_counter->n_read++;
++ }
++ }
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+@@ -2082,6 +2108,26 @@
+ buf_flush_write_complete(block);
+
+ rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
++ /* io_counter here */
++ if (srv_io_pattern && srv_io_pattern_trace_running) {
++ fold = buf_page_address_fold(block->space, block->offset);
++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter,
++ (io_counter->space == block->space) && (io_counter->offset == block->offset));
++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) {
++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t)));
++ io_counter->space = block->space;
++ io_counter->offset = block->offset;
++ io_counter->n_read = 0;
++ io_counter->n_write = 0;
++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash,
++ buf_page_address_fold(block->space, block->offset), io_counter);
++ buf_pool->io_counters++;
++ }
++ if (io_counter != NULL) {
++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block)));
++ io_counter->n_write++;
++ }
++ }
+
+ buf_pool->n_pages_written++;
+
+@@ -2656,3 +2702,58 @@
+ return buf_pool_get_nth_block(buf_pool, i);
+
+ }
++
++/*************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++buf_io_counter_control(void)
++/*========================*/
++{
++ ulint n;
++
++ mutex_enter(&(buf_pool->mutex));
++ if (srv_io_pattern_trace) {
++ if (buf_pool->io_counter_hash == NULL) {
++ /* estimating (buf_pool * 10) */
++ buf_pool->io_counter_hash = hash_create(20 * buf_pool->max_size);
++ buf_pool->io_counter_heap = mem_heap_create(4096 * 1024);
++ buf_pool->io_counters = 0;
++
++ srv_io_pattern = TRUE;
++ }
++ } else {
++ if (buf_pool->io_counter_hash != NULL) {
++ srv_io_pattern = FALSE;
++
++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) {
++ (buf_pool->io_counter_hash->array + n)->node = NULL;
++ }
++ mem_heap_free(buf_pool->io_counter_heap);
++ buf_pool->io_counter_heap = NULL;
++ buf_pool->io_counters = 0;
++
++ hash_table_free(buf_pool->io_counter_hash);
++ buf_pool->io_counter_hash = NULL;
++ }
++ }
++ mutex_exit(&(buf_pool->mutex));
++}
++
++void
++buf_io_counter_clear(void)
++/*======================*/
++{
++ ulint n;
++
++ mutex_enter(&(buf_pool->mutex));
++ if (buf_pool->io_counter_hash != NULL) {
++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) {
++ (buf_pool->io_counter_hash->array + n)->node = NULL;
++ }
++ mem_heap_empty(buf_pool->io_counter_heap);
++ buf_pool->io_counters = 0;
++ }
++ mutex_exit(&(buf_pool->mutex));
++}
+diff -r 2bbfde0e0e70 innobase/include/buf0buf.h
+--- a/innobase/include/buf0buf.h Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:48 2008 -0800
+@@ -709,6 +709,18 @@
+ void buf_pool_dump(void);
+ buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i);
+
++
++/*************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++buf_io_counter_control(void);
++/*=========================*/
++
++void
++buf_io_counter_clear(void);
++/*=======================*/
+
+ /* The buffer control block structure */
+
+@@ -930,6 +942,9 @@
+ ulint curr_size; /* current pool size in pages;
+ currently always the same as
+ max_size */
++ hash_table_t* io_counter_hash;
++ mem_heap_t* io_counter_heap;
++ ulint io_counters;
+ hash_table_t* page_hash; /* hash table of the file pages */
+
+ ulint n_pend_reads; /* number of pending read operations */
+@@ -1015,6 +1030,15 @@
+ locki table, are not in this list */
+ };
+
++struct buf_io_counter_struct{
++ ulint space;
++ ulint offset;
++ buf_io_counter_t* hash;
++ ulint index_id;
++ ulint n_read;
++ ulint n_write;
++};
++
+ /* States of a control block */
+ #define BUF_BLOCK_NOT_USED 211 /* is in the free list */
+ #define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns
+diff -r 2bbfde0e0e70 innobase/include/buf0types.h
+--- a/innobase/include/buf0types.h Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/buf0types.h Mon Dec 22 00:33:48 2008 -0800
+@@ -12,6 +12,8 @@
+ typedef struct buf_block_struct buf_block_t;
+ typedef struct buf_pool_struct buf_pool_t;
+
++typedef struct buf_io_counter_struct buf_io_counter_t;
++
+ /* The 'type' used of a buffer frame */
+ typedef byte buf_frame_t;
+
+diff -r 2bbfde0e0e70 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/include/srv0srv.h Mon Dec 22 00:33:48 2008 -0800
+@@ -141,6 +141,11 @@
+ extern ulint srv_io_capacity;
+ extern ulint srv_read_ahead;
+ extern ulint srv_adaptive_checkpoint;
++
++extern volatile ibool srv_io_pattern;
++extern ulong srv_io_pattern_trace;
++extern ulong srv_io_pattern_trace_running;
++extern ulong srv_io_pattern_size_limit;
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -r 2bbfde0e0e70 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Dec 22 00:33:11 2008 -0800
++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:33:48 2008 -0800
+@@ -337,6 +337,11 @@
+
+ ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
+ ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
++
++volatile ibool srv_io_pattern = FALSE;
++ulint srv_io_pattern_trace = 0;
++ulint srv_io_pattern_trace_running = 0;
++ulint srv_io_pattern_size_limit = ULINT_MAX - (1024 * 1024);
+ /*-------------------------------------------*/
+ ulong srv_n_spin_wait_rounds = 20;
+ ulong srv_n_free_tickets_to_enter = 500;
+diff -r 2bbfde0e0e70 mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/information_schema.result Mon Dec 22 00:33:48 2008 -0800
+@@ -59,6 +59,7 @@
+ USER_PRIVILEGES
+ USER_STATISTICS
+ VIEWS
++INNODB_IO_PATTERN
+ columns_priv
+ db
+ func
+@@ -742,7 +743,7 @@
+ CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1;
+ CREATE VIEW a2 AS SELECT t_CRASHME FROM a1;
+ count(*)
+-108
++109
+ drop view a2, a1;
+ drop table t_crashme;
+ select table_schema,table_name, column_name from
+@@ -812,12 +813,13 @@
+ TABLE_PRIVILEGES TABLE_NAME select
+ TABLE_STATISTICS TABLE_NAME select
+ VIEWS TABLE_NAME select
++INNODB_IO_PATTERN TABLE_NAME select
+ delete from mysql.user where user='mysqltest_4';
+ delete from mysql.db where user='mysqltest_4';
+ flush privileges;
+ SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA;
+ table_schema count(*)
+-information_schema 23
++information_schema 24
+ mysql 17
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1225,6 +1227,7 @@
+ USER_PRIVILEGES GRANTEE
+ USER_STATISTICS USER
+ VIEWS TABLE_SCHEMA
++INNODB_IO_PATTERN SPACE
+ SELECT t.table_name, c1.column_name
+ FROM information_schema.tables t
+ INNER JOIN
+@@ -1263,6 +1266,7 @@
+ USER_PRIVILEGES GRANTEE
+ USER_STATISTICS USER
+ VIEWS TABLE_SCHEMA
++INNODB_IO_PATTERN SPACE
+ SELECT MAX(table_name) FROM information_schema.tables;
+ MAX(table_name)
+ VIEWS
+@@ -1337,6 +1341,7 @@
+ COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1
+ INDEX_STATISTICS information_schema.INDEX_STATISTICS 1
+ INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1
++INNODB_IO_PATTERN information_schema.INNODB_IO_PATTERN 1
+ KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1
+ PROCESSLIST information_schema.PROCESSLIST 1
+ PROFILING information_schema.PROFILING 1
+diff -r 2bbfde0e0e70 mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:48 2008 -0800
+@@ -28,6 +28,7 @@
+ USER_PRIVILEGES
+ USER_STATISTICS
+ VIEWS
++INNODB_IO_PATTERN
+ show tables from INFORMATION_SCHEMA like 'T%';
+ Tables_in_information_schema (T%)
+ TABLES
+diff -r 2bbfde0e0e70 mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:11 2008 -0800
++++ b/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:48 2008 -0800
+@@ -102,6 +102,7 @@
+ | USER_PRIVILEGES |
+ | USER_STATISTICS |
+ | VIEWS |
++| INNODB_IO_PATTERN |
+ +---------------------------------------+
+ Database: INFORMATION_SCHEMA
+ +---------------------------------------+
+@@ -130,6 +131,7 @@
+ | USER_PRIVILEGES |
+ | USER_STATISTICS |
+ | VIEWS |
++| INNODB_IO_PATTERN |
+ +---------------------------------------+
+ Wildcard: inf_rmation_schema
+ +--------------------+
+diff -r 2bbfde0e0e70 patch_info/innodb_io_pattern.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_io_pattern.info Mon Dec 22 00:33:48 2008 -0800
+@@ -0,0 +1,8 @@
++File=innodb_io_pattern.patch
++Name=Information schema table of InnoDB IO counts for each datafile pages
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=INFORMATION_SCHEMA.INNODB_IO_PATTERN
++2008-12-01
++YK: fix for mysql-test
+diff -r 2bbfde0e0e70 sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:48 2008 -0800
+@@ -1569,6 +1569,8 @@
+ pthread_cond_init(&commit_cond, NULL);
+ innodb_inited= 1;
+
++ buf_io_counter_control();
++
+ /* If this is a replication slave and we needed to do a crash recovery,
+ set the master binlog position to what InnoDB internally knew about
+ how far we got transactions durable inside InnoDB. There is a
+@@ -6527,6 +6529,28 @@
+ }
+
+ /****************************************************************************
++Controls the internal hash table for IO pattern tracing
++along innodb_io_pattern_trace value.*/
++
++void
++innodb_io_pattern_control(void)
++/*===========================*/
++{
++ if (innodb_inited) {
++ buf_io_counter_control();
++ }
++}
++
++void
++innodb_io_pattern_clear(void)
++/*=========================*/
++{
++ if (innodb_inited) {
++ buf_io_counter_clear();
++ }
++}
++
++/****************************************************************************
+ Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
+ Monitor to the client. */
+
+diff -r 2bbfde0e0e70 sql/ha_innodb.h
+--- a/sql/ha_innodb.h Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/ha_innodb.h Mon Dec 22 00:33:48 2008 -0800
+@@ -240,6 +240,9 @@
+ extern ulong srv_adaptive_checkpoint;
+ extern ulong srv_show_locks_held;
+ extern ulong srv_show_verbose_locks;
++extern ulong srv_io_pattern_trace;
++extern ulong srv_io_pattern_trace_running;
++extern ulong srv_io_pattern_size_limit;
+ }
+
+ bool innobase_init(void);
+@@ -266,6 +269,9 @@
+ bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables);
+ bool innodb_mutex_show_status(THD* thd);
+ void innodb_export_status(void);
++
++void innodb_io_pattern_control(void);
++void innodb_io_pattern_clear(void);
+
+ void innobase_release_temporary_latches(THD *thd);
+
+diff -r 2bbfde0e0e70 sql/lex.h
+--- a/sql/lex.h Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/lex.h Mon Dec 22 00:33:48 2008 -0800
+@@ -244,6 +244,7 @@
+ { "INNER", SYM(INNER_SYM)},
+ { "INNOBASE", SYM(INNOBASE_SYM)},
+ { "INNODB", SYM(INNOBASE_SYM)},
++ { "INNODB_IO_PATTERN", SYM(INNODB_IO_PATTERN)},
+ { "INOUT", SYM(INOUT_SYM)},
+ { "INSENSITIVE", SYM(INSENSITIVE_SYM)},
+ { "INSERT", SYM(INSERT)},
+diff -r 2bbfde0e0e70 sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/mysqld.cc Mon Dec 22 00:33:48 2008 -0800
+@@ -4983,6 +4983,9 @@
+ OPT_INNODB_SYNC_SPIN_LOOPS,
+ OPT_INNODB_CONCURRENCY_TICKETS,
+ OPT_INNODB_THREAD_SLEEP_DELAY,
++ OPT_INNODB_IO_PATTERN_TRACE,
++ OPT_INNODB_IO_PATTERN_TRACE_RUNNING,
++ OPT_INNODB_IO_PATTERN_SIZE_LIMIT,
+ OPT_BDB_CACHE_SIZE,
+ OPT_BDB_LOG_BUFFER_SIZE,
+ OPT_BDB_MAX_LOCK,
+@@ -5382,6 +5385,18 @@
+ "Number of background write I/O threads in InnoDB.",
+ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
+ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
++ {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE,
++ "Create/Drop the internal hash table for IO pattern tracing.",
++ (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace,
++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++ {"innodb_io_pattern_trace_running", OPT_INNODB_IO_PATTERN_TRACE_RUNNING,
++ "Control IO pattern trace running or not.",
++ (gptr*) &srv_io_pattern_trace_running, (gptr*) &srv_io_pattern_trace_running,
++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
++ {"innodb_io_pattern_size_limit", OPT_INNODB_IO_PATTERN_SIZE_LIMIT,
++ "Set max number of counters per data pages. (0 = disable counting).",
++ (gptr*) &srv_io_pattern_size_limit, (gptr*) &srv_io_pattern_size_limit,
++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX - (1024 * 1024), 0, 0, 0},
+ #endif /* End HAVE_INNOBASE_DB */
+ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
+ (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+diff -r 2bbfde0e0e70 sql/set_var.cc
+--- a/sql/set_var.cc Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/set_var.cc Mon Dec 22 00:33:48 2008 -0800
+@@ -501,6 +501,12 @@
+ sys_var_long_ptr sys_innodb_show_verbose_locks(
+ "innodb_show_verbose_locks",
+ &srv_show_verbose_locks);
++sys_var_innodb_io_pattern_trace sys_innodb_io_pattern_trace("innodb_io_pattern_trace",
++ &srv_io_pattern_trace);
++sys_var_long_ptr sys_innodb_io_pattern_trace_running("innodb_io_pattern_trace_running",
++ &srv_io_pattern_trace_running);
++sys_var_long_ptr sys_innodb_io_pattern_size_limit("innodb_io_pattern_size_limit",
++ &srv_io_pattern_size_limit);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
+ &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
+@@ -870,6 +876,9 @@
+ &sys_innodb_adaptive_checkpoint,
+ &sys_innodb_show_locks_held,
+ &sys_innodb_show_verbose_locks,
++ &sys_innodb_io_pattern_trace,
++ &sys_innodb_io_pattern_trace_running,
++ &sys_innodb_io_pattern_size_limit,
+ #endif
+ &sys_trust_routine_creators,
+ &sys_trust_function_creators,
+@@ -1012,6 +1021,9 @@
+ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
+ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
+ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
++ {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS},
++ {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS},
++ {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS},
+ #endif
+ {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS},
+ {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS},
+@@ -3117,6 +3129,19 @@
+ thd->variables.lc_time_names= global_system_variables.lc_time_names;
+ }
+
++#ifdef HAVE_INNOBASE_DB
++bool sys_var_innodb_io_pattern_trace::update(THD *thd, set_var *var)
++{
++ bool ret;
++
++ ret = sys_var_long_ptr_global::update(thd, var);
++
++ innodb_io_pattern_control();
++
++ return ret;
++}
++#endif /* HAVE_INNOBASE_DB */
++
+ /*
+ Functions to update thd->options bits
+ */
+diff -r 2bbfde0e0e70 sql/set_var.h
+--- a/sql/set_var.h Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/set_var.h Mon Dec 22 00:33:48 2008 -0800
+@@ -985,6 +985,17 @@
+ virtual void set_default(THD *thd, enum_var_type type);
+ };
+
++#ifdef HAVE_INNOBASE_DB
++/* sys_var_innodb_io_pattern_trace */
++class sys_var_innodb_io_pattern_trace :public sys_var_long_ptr
++{
++public:
++ sys_var_innodb_io_pattern_trace(const char *name_arg, ulong *value_ptr_arg)
++ :sys_var_long_ptr(name_arg,value_ptr_arg) {}
++ bool update(THD *thd, set_var *var);
++};
++#endif /* HAVE_INNOBASE_DB */
++
+ /****************************************************************************
+ Classes for parsing of the SET command
+ ****************************************************************************/
+diff -r 2bbfde0e0e70 sql/sql_parse.cc
+--- a/sql/sql_parse.cc Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_parse.cc Mon Dec 22 00:33:48 2008 -0800
+@@ -7998,6 +7998,13 @@
+ }
+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
+ }
++#ifdef HAVE_INNOBASE_DB
++ if (options & REFRESH_INNODB_IO_PATTERN)
++ {
++ tmp_write_to_binlog= 0;
++ innodb_io_pattern_clear();
++ }
++#endif /* HAVE_INNOBASE_DB */
+ *write_to_binlog= tmp_write_to_binlog;
+ return result;
+ }
+diff -r 2bbfde0e0e70 sql/sql_show.cc
+--- a/sql/sql_show.cc Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_show.cc Mon Dec 22 00:33:48 2008 -0800
+@@ -32,6 +32,17 @@
+ #ifdef HAVE_INNOBASE_DB
+ #include "ha_innodb.h"
+ #endif
++
++#ifdef HAVE_INNOBASE_DB
++#define INSIDE_HA_INNOBASE_CC
++extern "C" {
++#include "srv0srv.h"
++#include "buf0buf.h"
++#include "dict0dict.h"
++}
++/* We need to undef it in InnoDB */
++#undef byte
++#endif /* HAVE_INNOBASE_DB */
+
+ #ifndef NO_EMBEDDED_ACCESS_CHECKS
+ static const char *grant_names[]={
+@@ -4074,6 +4085,67 @@
+ DBUG_RETURN(res);
+ }
+
++int innodb_io_pattern_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++ TABLE *table= (TABLE *) tables->table;
++
++ buf_io_counter_t* io_counter;
++ dict_index_t* index;
++
++ DBUG_ENTER("innodb_io_pattern_fill_table");
++ int returnable= 0;
++
++ /* We cannot use inline functions of InnoDB here */
++
++ /* !!!!!ATTENTION!!!!!: This function is not protected by mutex for performance. */
++ /* Don't use "DROP TABLE innodb_io_pattern" and INFORMATION_SCHEMA.INNODB_IO_PATTERN */
++ /* at the same time as possible. */
++
++ if (srv_io_pattern) {
++ for (ulint n=0; n < buf_pool->io_counter_hash->n_cells; n++) {
++ if (!srv_io_pattern)
++ goto end_func;
++
++ io_counter = (buf_io_counter_t*)(buf_pool->io_counter_hash->array + n)->node;
++ while (io_counter) {
++ if (!srv_io_pattern)
++ goto end_func;
++
++ if (dict_sys != NULL) {
++ dulint id;
++ id.high = 0;
++ id.low = io_counter->index_id;
++ index = dict_index_find_on_id_low(id);
++ } else {
++ index = NULL;
++ }
++
++ table->field[0]->store(io_counter->space);
++ table->field[1]->store(io_counter->offset);
++ table->field[2]->store(io_counter->index_id);
++ if (index != NULL) {
++ table->field[3]->store(index->table_name,strlen(index->table_name),system_charset_info);
++ table->field[4]->store(index->name,strlen(index->name),system_charset_info);
++ } else {
++ table->field[3]->store("",0,system_charset_info);
++ table->field[4]->store("",0,system_charset_info);
++ }
++ table->field[5]->store(io_counter->n_read);
++ table->field[6]->store(io_counter->n_write);
++ if (schema_table_store_record(thd, table))
++ {
++ returnable= 1;
++ goto end_func;
++ }
++ io_counter = io_counter->hash;
++ }
++ }
++ }
++
++ end_func:
++ DBUG_RETURN(returnable);
++}
++
+ /*
+ Find schema_tables elment by name
+
+@@ -4880,6 +4952,19 @@
+ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0}
+ };
+
++#ifdef HAVE_INNOBASE_DB
++ST_FIELD_INFO innodb_io_pattern_field_info[]=
++{
++ {"SPACE", 11, MYSQL_TYPE_LONG, 0, 0, "space_id"},
++ {"OFFSET", 11, MYSQL_TYPE_LONG, 0, 0, "offset"},
++ {"INDEX_ID", 11, MYSQL_TYPE_LONG, 0, 0, "index id"},
++ {"TABLE_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "table name"},
++ {"INDEX_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "index name"},
++ {"N_READ", 11, MYSQL_TYPE_LONG, 0, 0, "read ios"},
++ {"N_WRITE", 11, MYSQL_TYPE_LONG, 0, 0, "write ios"},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0}
++};
++#endif
+
+ ST_FIELD_INFO variables_fields_info[]=
+ {
+@@ -5055,6 +5140,10 @@
+ make_old_format, 0, -1, -1, 1},
+ {"VIEWS", view_fields_info, create_schema_table,
+ get_all_tables, 0, get_schema_views_record, 1, 2, 0},
++#ifdef HAVE_INNOBASE_DB
++ {"INNODB_IO_PATTERN", innodb_io_pattern_field_info, create_schema_table,
++ innodb_io_pattern_fill_table, 0, 0, -1, -1, 0},
++#endif
+ {0, 0, 0, 0, 0, 0, 0, 0, 0}
+ };
+
+diff -r 2bbfde0e0e70 sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy Mon Dec 22 00:33:11 2008 -0800
++++ b/sql/sql_yacc.yy Mon Dec 22 00:33:48 2008 -0800
+@@ -685,6 +685,7 @@
+ %token INFILE
+ %token INNER_SYM
+ %token INNOBASE_SYM
++%token INNODB_IO_PATTERN
+ %token INOUT_SYM
+ %token INSENSITIVE_SYM
+ %token INSERT
+@@ -8541,6 +8542,7 @@
+ | MASTER_SYM { Lex->type|= REFRESH_MASTER; }
+ | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; }
+ | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; }
++ | INNODB_IO_PATTERN { Lex->type|= REFRESH_INNODB_IO_PATTERN; }
+ | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; }
+ | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; }
+ | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; }
+@@ -9594,6 +9596,7 @@
+ | ISOLATION {}
+ | ISSUER_SYM {}
+ | INNOBASE_SYM {}
++ | INNODB_IO_PATTERN {}
+ | INSERT_METHOD {}
+ | IO_SYM {}
+ | IPC_SYM {}
diff --git a/mysql-innodb_locks_held.patch b/mysql-innodb_locks_held.patch
new file mode 100644
index 0000000..416d50e
--- /dev/null
+++ b/mysql-innodb_locks_held.patch
@@ -0,0 +1,168 @@
+diff -r ae6708ab17e5 innobase/include/srv0srv.h
+--- a/innobase/include/srv0srv.h Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:58 2008 -0800
+@@ -80,6 +80,8 @@
+ extern ulint srv_log_file_size;
+ extern ulint srv_log_buffer_size;
+ extern ulong srv_flush_log_at_trx_commit;
++extern ulong srv_show_locks_held;
++extern ulong srv_show_verbose_locks;
+
+ extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
+ character set */
+diff -r ae6708ab17e5 innobase/lock/lock0lock.c
+--- a/innobase/lock/lock0lock.c Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/lock/lock0lock.c Mon Dec 22 00:32:58 2008 -0800
+@@ -4181,6 +4181,7 @@
+ #endif /* UNIV_SYNC_DEBUG */
+ }
+
++ if ( srv_show_verbose_locks ) {
+ for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+
+ if (lock_rec_get_nth_bit(lock, i)) {
+@@ -4198,6 +4199,7 @@
+ putc('\n', file);
+ }
+ }
++ } /* srv_show_verbose_locks */
+
+ mtr_commit(&mtr);
+ if (UNIV_LIKELY_NULL(heap)) {
+@@ -4369,7 +4371,7 @@
+ }
+ }
+
+- if (!srv_print_innodb_lock_monitor) {
++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+ nth_trx++;
+ goto loop;
+ }
+@@ -4426,9 +4428,9 @@
+
+ nth_lock++;
+
+- if (nth_lock >= 10) {
++ if (nth_lock >= srv_show_locks_held) {
+ fputs(
+- "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n",
++ "TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n",
+ file);
+
+ nth_trx++;
+diff -r ae6708ab17e5 innobase/srv/srv0srv.c
+--- a/innobase/srv/srv0srv.c Mon Dec 22 00:32:07 2008 -0800
++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:58 2008 -0800
+@@ -116,6 +116,8 @@
+ ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
+ ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
+ ulong srv_flush_log_at_trx_commit = 1;
++ulint srv_show_locks_held = 10;
++ulint srv_show_verbose_locks = 0;
+
+ byte srv_latin1_ordering[256] /* The sort order table of the latin1
+ character set. The following table is
+diff -r ae6708ab17e5 libmysqld/set_var.cc
+--- a/libmysqld/set_var.cc Mon Dec 22 00:32:07 2008 -0800
++++ b/libmysqld/set_var.cc Mon Dec 22 00:32:58 2008 -0800
+@@ -821,6 +821,8 @@
+ &sys_innodb_thread_concurrency,
+ &sys_innodb_commit_concurrency,
+ &sys_innodb_flush_log_at_trx_commit,
++ &sys_innodb_show_locks_held,
++ &sys_innodb_show_verbose_locks,
+ #endif
+ &sys_trust_routine_creators,
+ &sys_trust_function_creators,
+@@ -936,6 +938,8 @@
+ {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
+ {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
+ {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS},
++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS },
++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS },
+ {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR},
+ {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG },
+ {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG },
+diff -r ae6708ab17e5 patch_info/innodb_locks_held.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_locks_held.info Mon Dec 22 00:32:58 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_locks_held.patch
++Name=Add locks held, remove locked records in SHOW INNODB STATUS
++Version=1.0
++Author=Baron Schwartz <baron at xaprb.com>
++License=GPL
++Comment=Bug #29126 fix
+diff -r ae6708ab17e5 sql/ha_innodb.h
+--- a/sql/ha_innodb.h Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/ha_innodb.h Mon Dec 22 00:32:58 2008 -0800
+@@ -238,6 +238,8 @@
+ extern ulong srv_io_capacity;
+ extern ulong srv_read_ahead;
+ extern ulong srv_adaptive_checkpoint;
++extern ulong srv_show_locks_held;
++extern ulong srv_show_verbose_locks;
+ }
+
+ bool innobase_init(void);
+diff -r ae6708ab17e5 sql/mysqld.cc
+--- a/sql/mysqld.cc Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/mysqld.cc Mon Dec 22 00:32:58 2008 -0800
+@@ -4969,6 +4969,8 @@
+ OPT_INNODB_MAX_PURGE_LAG,
+ OPT_INNODB_FILE_IO_THREADS,
+ OPT_INNODB_LOCK_WAIT_TIMEOUT,
++ OPT_INNODB_SHOW_LOCKS_HELD,
++ OPT_INNODB_SHOW_VERBOSE_LOCKS,
+ OPT_INNODB_THREAD_CONCURRENCY,
+ OPT_INNODB_COMMIT_CONCURRENCY,
+ OPT_INNODB_FORCE_RECOVERY,
+@@ -5308,6 +5310,14 @@
+ (gptr*) &srv_flush_log_at_trx_commit,
+ (gptr*) &srv_flush_log_at_trx_commit,
+ 0, GET_ULONG, OPT_ARG, 1, 0, 2, 0, 0, 0},
++ {"innodb_show_locks_held", OPT_INNODB_SHOW_LOCKS_HELD,
++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++ (gptr*) &srv_show_locks_held, (gptr*) &srv_show_locks_held,
++ 0, GET_LONG, OPT_ARG, 10, 0, 1000, 0, 1, 0},
++ {"innodb_show_verbose_locks", OPT_INNODB_SHOW_VERBOSE_LOCKS,
++ "Whether to show records locked in SHOW INNODB STATUS.",
++ (gptr*) &srv_show_verbose_locks, (gptr*) &srv_show_verbose_locks,
++ 0, GET_LONG, OPT_ARG, 0, 0, 1, 0, 1, 0},
+ {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD,
+ "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method,
+ (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
+diff -r ae6708ab17e5 sql/set_var.cc
+--- a/sql/set_var.cc Mon Dec 22 00:32:07 2008 -0800
++++ b/sql/set_var.cc Mon Dec 22 00:32:58 2008 -0800
+@@ -495,6 +495,12 @@
+ &srv_read_ahead);
+ sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
+ &srv_adaptive_checkpoint);
++sys_var_long_ptr sys_innodb_show_locks_held(
++ "innodb_show_locks_held",
++ &srv_show_locks_held);
++sys_var_long_ptr sys_innodb_show_verbose_locks(
++ "innodb_show_verbose_locks",
++ &srv_show_verbose_locks);
+ sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
+ &innobase_data_file_path);
+ sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
+@@ -862,6 +868,8 @@
+ &sys_innodb_io_capacity,
+ &sys_innodb_read_ahead,
+ &sys_innodb_adaptive_checkpoint,
++ &sys_innodb_show_locks_held,
++ &sys_innodb_show_verbose_locks,
+ #endif
+ &sys_trust_routine_creators,
+ &sys_trust_function_creators,
+@@ -977,6 +985,8 @@
+ {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
+ {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
+ {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS},
++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS },
++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS },
+ {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR},
+ {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG },
+ {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG },
diff --git a/mysql-innodb_rw_lock.patch b/mysql-innodb_rw_lock.patch
new file mode 100644
index 0000000..3070bb0
--- /dev/null
+++ b/mysql-innodb_rw_lock.patch
@@ -0,0 +1,1459 @@
+diff -r 962aec0d731c innobase/configure
+--- a/innobase/configure Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/configure Thu Oct 09 08:30:28 2008 -0700
+@@ -20519,6 +20519,88 @@
+
+ fi
+ done
++
++
++# as http://lists.mysql.com/commits/40686 does
++{ echo "$as_me:$LINENO: checking whether the compiler provides atomic builtins" >&5
++echo $ECHO_N "checking whether the compiler provides atomic builtins... $ECHO_C" >&6; }
++if test "${mysql_cv_atomic_builtins+set}" = set; then
++ echo $ECHO_N "(cached) $ECHO_C" >&6
++else
++ if test "$cross_compiling" = yes; then
++ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
++See \`config.log' for more details." >&5
++echo "$as_me: error: cannot run test program while cross compiling
++See \`config.log' for more details." >&2;}
++ { (exit 1); exit 1; }; }
++else
++ cat >conftest.$ac_ext <<_ACEOF
++/* confdefs.h. */
++_ACEOF
++cat confdefs.h >>conftest.$ac_ext
++cat >>conftest.$ac_ext <<_ACEOF
++/* end confdefs.h. */
++
++ int main()
++ {
++ int foo= -10; int bar= 10;
++ __sync_fetch_and_add(&foo, bar);
++ if (foo)
++ return -1;
++ bar= __sync_lock_test_and_set(&foo, bar);
++ if (bar || foo != 10)
++ return -1;
++ bar= __sync_val_compare_and_swap(&bar, foo, 15);
++ if (bar)
++ return -1;
++ return 0;
++ }
++
++_ACEOF
++rm -f conftest$ac_exeext
++if { (ac_try="$ac_link"
++case "(($ac_try" in
++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
++ *) ac_try_echo=$ac_try;;
++esac
++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
++ (eval "$ac_link") 2>&5
++ ac_status=$?
++ echo "$as_me:$LINENO: \$? = $ac_status" >&5
++ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
++ { (case "(($ac_try" in
++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
++ *) ac_try_echo=$ac_try;;
++esac
++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
++ (eval "$ac_try") 2>&5
++ ac_status=$?
++ echo "$as_me:$LINENO: \$? = $ac_status" >&5
++ (exit $ac_status); }; }; then
++ mysql_cv_atomic_builtins=yes
++else
++ echo "$as_me: program exited with status $ac_status" >&5
++echo "$as_me: failed program was:" >&5
++sed 's/^/| /' conftest.$ac_ext >&5
++
++( exit $ac_status )
++mysql_cv_atomic_builtins=no
++fi
++rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
++fi
++
++
++fi
++{ echo "$as_me:$LINENO: result: $mysql_cv_atomic_builtins" >&5
++echo "${ECHO_T}$mysql_cv_atomic_builtins" >&6; }
++
++if test "x$mysql_cv_atomic_builtins" = xyes; then
++
++cat >>confdefs.h <<\_ACEOF
++#define HAVE_ATOMIC_BUILTINS 1
++_ACEOF
++
++fi
+
+ #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
+ # Some versions of Unix only take 2 arguments.
+diff -r 962aec0d731c innobase/configure.in
+--- a/innobase/configure.in Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/configure.in Thu Oct 09 08:30:28 2008 -0700
+@@ -42,6 +42,31 @@
+ AC_CHECK_FUNCS(sched_yield)
+ AC_CHECK_FUNCS(fdatasync)
+ AC_CHECK_FUNCS(localtime_r)
++
++# as http://lists.mysql.com/commits/40686 does
++AC_CACHE_CHECK([whether the compiler provides atomic builtins],
++ [mysql_cv_atomic_builtins], [AC_TRY_RUN([
++ int main()
++ {
++ int foo= -10; int bar= 10;
++ __sync_fetch_and_add(&foo, bar);
++ if (foo)
++ return -1;
++ bar= __sync_lock_test_and_set(&foo, bar);
++ if (bar || foo != 10)
++ return -1;
++ bar= __sync_val_compare_and_swap(&bar, foo, 15);
++ if (bar)
++ return -1;
++ return 0;
++ }
++], [mysql_cv_atomic_builtins=yes], [mysql_cv_atomic_builtins=no])])
++
++if test "x$mysql_cv_atomic_builtins" = xyes; then
++ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1,
++ [Define to 1 if compiler provides atomic builtins.])
++fi
++
+ #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
+ # Some versions of Unix only take 2 arguments.
+ #AC_C_INLINE Already checked in MySQL
+diff -r 962aec0d731c innobase/ib_config.h
+--- a/innobase/ib_config.h Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/ib_config.h Thu Oct 09 08:30:28 2008 -0700
+@@ -3,6 +3,9 @@
+
+ /* Define to 1 if you have the <aio.h> header file. */
+ #define HAVE_AIO_H 1
++
++/* Define to 1 if compiler provides atomic builtins. */
++#define HAVE_ATOMIC_BUILTINS 1
+
+ /* Define to 1 if you have the <dlfcn.h> header file. */
+ #define HAVE_DLFCN_H 1
+diff -r 962aec0d731c innobase/ib_config.h.in
+--- a/innobase/ib_config.h.in Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/ib_config.h.in Thu Oct 09 08:30:28 2008 -0700
+@@ -2,6 +2,9 @@
+
+ /* Define to 1 if you have the <aio.h> header file. */
+ #undef HAVE_AIO_H
++
++/* Define to 1 if compiler provides atomic builtins. */
++#undef HAVE_ATOMIC_BUILTINS
+
+ /* Define to 1 if you have the <dlfcn.h> header file. */
+ #undef HAVE_DLFCN_H
+diff -r 962aec0d731c innobase/include/sync0rw.h
+--- a/innobase/include/sync0rw.h Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/include/sync0rw.h Thu Oct 09 08:30:28 2008 -0700
+@@ -325,7 +325,17 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
++/*==================*/
++ rw_lock_t* lock);
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*==================*/
++ rw_lock_t* lock);
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
+ /*================*/
+ rw_lock_t* lock);
+ UNIV_INLINE
+@@ -408,6 +418,11 @@
+ rw_lock_debug_t* info); /* in: debug struct */
+ #endif /* UNIV_SYNC_DEBUG */
+
++#ifdef HAVE_ATOMIC_BUILTINS
++/* This value means NOT_LOCKED */
++#define RW_LOCK_BIAS 0x00100000
++#endif
++
+ /* NOTE! The structure appears here only for the compiler to know its size.
+ Do not use its fields directly! The structure used in the spin lock
+ implementation of a read-write lock. Several threads may have a shared lock
+@@ -417,9 +432,9 @@
+ field. Then no new readers are allowed in. */
+
+ struct rw_lock_struct {
+- os_event_t event; /* Used by sync0arr.c for thread queueing */
+-
+-#ifdef __WIN__
++ /* Used by sync0arr.c for thread queueing */
++ os_event_t s_event; /* Used for s_lock */
++ os_event_t x_event; /* Used for x_lock */
+ os_event_t wait_ex_event; /* This windows specific event is
+ used by the thread which has set the
+ lock state to RW_LOCK_WAIT_EX. The
+@@ -427,31 +442,35 @@
+ thread will be the next one to proceed
+ once the current the event gets
+ signalled. See LEMMA 2 in sync0sync.c */
++
++#ifdef HAVE_ATOMIC_BUILTINS
++ volatile lint lock_word; /* Used by using atomic builtin */
+ #endif
+
+- ulint reader_count; /* Number of readers who have locked this
++ volatile ulint reader_count; /* Number of readers who have locked this
+ lock in the shared mode */
+- ulint writer; /* This field is set to RW_LOCK_EX if there
++ volatile ulint writer; /* This field is set to RW_LOCK_EX if there
+ is a writer owning the lock (in exclusive
+ mode), RW_LOCK_WAIT_EX if a writer is
+ queueing for the lock, and
+ RW_LOCK_NOT_LOCKED, otherwise. */
+- os_thread_id_t writer_thread;
++ volatile os_thread_id_t writer_thread;
+ /* Thread id of a possible writer thread */
+- ulint writer_count; /* Number of times the same thread has
++ volatile ulint writer_count; /* Number of times the same thread has
+ recursively locked the lock in the exclusive
+ mode */
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_t mutex; /* The mutex protecting rw_lock_struct */
++#endif
+ ulint pass; /* Default value 0. This is set to some
+ value != 0 given by the caller of an x-lock
+ operation, if the x-lock is to be passed to
+ another thread to unlock (which happens in
+ asynchronous i/o). */
+- ulint waiters; /* This ulint is set to 1 if there are
+- waiters (readers or writers) in the global
+- wait array, waiting for this rw_lock.
+- Otherwise, == 0. */
+- ibool writer_is_wait_ex;
++ volatile ulint s_waiters; /* 1: there are waiters (s_lock) */
++ volatile ulint x_waiters; /* 1: there are waiters (x_lock) */
++ volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */
++ volatile ibool writer_is_wait_ex;
+ /* This is TRUE if the writer field is
+ RW_LOCK_WAIT_EX; this field is located far
+ from the memory update hotspot fields which
+diff -r 962aec0d731c innobase/include/sync0rw.ic
+--- a/innobase/include/sync0rw.ic Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/include/sync0rw.ic Thu Oct 09 08:30:28 2008 -0700
+@@ -47,20 +47,52 @@
+ Accessor functions for rw lock. */
+ UNIV_INLINE
+ ulint
+-rw_lock_get_waiters(
++rw_lock_get_s_waiters(
+ /*================*/
+ rw_lock_t* lock)
+ {
+- return(lock->waiters);
++ return(lock->s_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_x_waiters(
++/*================*/
++ rw_lock_t* lock)
++{
++ return(lock->x_waiters);
++}
++UNIV_INLINE
++ulint
++rw_lock_get_wx_waiters(
++/*================*/
++ rw_lock_t* lock)
++{
++ return(lock->wait_ex_waiters);
+ }
+ UNIV_INLINE
+ void
+-rw_lock_set_waiters(
+-/*================*/
++rw_lock_set_s_waiters(
+ rw_lock_t* lock,
+ ulint flag)
+ {
+- lock->waiters = flag;
++ lock->s_waiters = flag;
++}
++UNIV_INLINE
++void
++rw_lock_set_x_waiters(
++ rw_lock_t* lock,
++ ulint flag)
++{
++ lock->x_waiters = flag;
++}
++UNIV_INLINE
++void
++rw_lock_set_wx_waiters(
++/*================*/
++ rw_lock_t* lock,
++ ulint flag)
++{
++ lock->wait_ex_waiters = flag;
+ }
+ UNIV_INLINE
+ ulint
+@@ -68,7 +100,19 @@
+ /*===============*/
+ rw_lock_t* lock)
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (lock->writer == RW_LOCK_NOT_LOCKED) {
++ return(RW_LOCK_NOT_LOCKED);
++ }
++
++ if (lock->writer_is_wait_ex) {
++ return(RW_LOCK_WAIT_EX);
++ } else {
++ return(RW_LOCK_EX);
++ }
++#else
+ return(lock->writer);
++#endif
+ }
+ UNIV_INLINE
+ void
+@@ -96,6 +140,7 @@
+ {
+ lock->reader_count = count;
+ }
++#ifndef HAVE_ATOMIC_BUILTINS
+ UNIV_INLINE
+ mutex_t*
+ rw_lock_get_mutex(
+@@ -104,6 +149,7 @@
+ {
+ return(&(lock->mutex));
+ }
++#endif
+
+ /**********************************************************************
+ Returns the value of writer_count for the lock. Does not reserve the lock
+@@ -133,14 +179,26 @@
+ const char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
+-#ifdef UNIV_SYNC_DEBUG
++#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS)
+ ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+ #endif /* UNIV_SYNC_DEBUG */
+ /* Check if the writer field is free */
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) {
++ /* try s-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++ /* fail */
++ __sync_fetch_and_add(&(lock->lock_word),1);
++ return(FALSE); /* locking did not succeed */
++ }
++ /* success */
++ __sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
+ /* Set the shared lock by incrementing the reader count */
+ lock->reader_count++;
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+@@ -167,11 +225,15 @@
+ const char* file_name, /* in: file name where requested */
+ ulint line) /* in: line where lock requested */
+ {
+- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+ ut_ad(rw_lock_get_reader_count(lock) == 0);
+
+ /* Set the shared lock by incrementing the reader count */
++#ifdef HAVE_ATOMIC_BUILTINS
++ __sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ lock->reader_count++;
++#endif
+
+ lock->last_s_file_name = file_name;
+ lock->last_s_line = line;
+@@ -199,7 +261,11 @@
+
+ rw_lock_set_writer(lock, RW_LOCK_EX);
+ lock->writer_thread = os_thread_get_curr_id();
++#ifdef HAVE_ATOMIC_BUILTINS
++ __sync_fetch_and_add(&(lock->writer_count),1);
++#else
+ lock->writer_count++;
++#endif
+ lock->pass = 0;
+
+ lock->last_x_file_name = file_name;
+@@ -241,15 +307,21 @@
+ ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+ #endif /* UNIV_SYNC_DEBUG */
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(rw_lock_get_mutex(lock));
++#endif
+
+ if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ return; /* Success */
+ } else {
+ /* Did not succeed, try spin wait */
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ rw_lock_s_lock_spin(lock, pass, file_name, line);
+
+@@ -272,11 +344,23 @@
+ {
+ ibool success = FALSE;
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++ /* try s-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
++ /* fail */
++ __sync_fetch_and_add(&(lock->lock_word),1);
++ return(FALSE); /* locking did not succeed */
++ }
++ /* success */
++ __sync_fetch_and_add(&(lock->reader_count),1);
++#else
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if (lock->writer == RW_LOCK_NOT_LOCKED) {
+ /* Set the shared lock by incrementing the reader count */
+ lock->reader_count++;
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+@@ -289,7 +373,9 @@
+ success = TRUE;
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ return(success);
+ }
+@@ -309,6 +395,55 @@
+ {
+ ibool success = FALSE;
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
++#ifdef HAVE_ATOMIC_BUILTINS
++ if ((lock->lock_word == RW_LOCK_BIAS)
++ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++ /* try x-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),
++ RW_LOCK_BIAS) == 0) {
++ /* success */
++ /* try to lock writer */
++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++ == RW_LOCK_NOT_LOCKED) {
++ /* success */
++ lock->writer_thread = curr_thread;
++ lock->pass = 0;
++ lock->writer_is_wait_ex = FALSE;
++ /* next function may work as memory barrier */
++ relock:
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ ut_ad(rw_lock_validate(lock));
++
++ return(TRUE);
++ } else {
++ /* x-unlock */
++ __sync_fetch_and_add(&(lock->lock_word),
++ RW_LOCK_BIAS);
++ }
++ } else {
++ /* fail (x-lock) */
++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS);
++ }
++ }
++
++ if (lock->pass == 0
++ && os_thread_eq(lock->writer_thread, curr_thread)
++ && rw_lock_get_writer(lock) == RW_LOCK_EX) {
++ goto relock;
++ }
++
++ ut_ad(rw_lock_validate(lock));
++
++ return(FALSE);
++#else
+ mutex_enter(rw_lock_get_mutex(lock));
+
+ if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
+@@ -339,6 +474,7 @@
+ ut_ad(rw_lock_validate(lock));
+
+ return(success);
++#endif
+ }
+
+ /**********************************************************************
+@@ -354,16 +490,33 @@
+ #endif
+ )
+ {
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_t* mutex = &(lock->mutex);
+- ibool sg = FALSE;
++#endif
++ ibool x_sg = FALSE;
++ ibool wx_sg = FALSE;
++#ifdef HAVE_ATOMIC_BUILTINS
++ ibool last = FALSE;
++#endif
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter(mutex);
++#endif
+
+ /* Reset the shared lock by decrementing the reader count */
+
+ ut_a(lock->reader_count > 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++ /* unlock lock_word */
++ __sync_fetch_and_add(&(lock->lock_word),1);
++
++ if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) {
++ last = TRUE;
++ }
++#else
+ lock->reader_count--;
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+@@ -372,20 +525,36 @@
+ /* If there may be waiters and this was the last s-lock,
+ signal the object */
+
+- if (UNIV_UNLIKELY(lock->waiters)
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (UNIV_UNLIKELY(last && lock->wait_ex_waiters)) {
++#else
++ if (UNIV_UNLIKELY(lock->wait_ex_waiters)
+ && lock->reader_count == 0) {
+- sg = TRUE;
++#endif
++ wx_sg = TRUE;
+
+- rw_lock_set_waiters(lock, 0);
++ rw_lock_set_wx_waiters(lock, 0);
++ }
++#ifdef HAVE_ATOMIC_BUILTINS
++ else if (UNIV_UNLIKELY(last && lock->x_waiters)) {
++#else
++ else if (UNIV_UNLIKELY(lock->x_waiters)
++ && lock->reader_count == 0) {
++#endif
++ x_sg = TRUE;
++
++ rw_lock_set_x_waiters(lock, 0);
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(mutex);
++#endif
+
+- if (UNIV_UNLIKELY(sg)) {
+-#ifdef __WIN__
++ if (UNIV_UNLIKELY(wx_sg)) {
+ os_event_set(lock->wait_ex_event);
+-#endif
+- os_event_set(lock->event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ } else if (UNIV_UNLIKELY(x_sg)) {
++ os_event_set(lock->x_event);
+ sync_array_object_signalled(sync_primary_wait_array);
+ }
+
+@@ -409,13 +578,22 @@
+
+ ut_ad(lock->reader_count > 0);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ __sync_sub_and_fetch(&(lock->reader_count),1);
++#else
+ lock->reader_count--;
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+ #endif
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ ut_ad(!lock->s_waiters);
++ ut_ad(!lock->x_waiters);
++#else
+ ut_ad(!lock->waiters);
++#endif
+ ut_ad(rw_lock_validate(lock));
+ #ifdef UNIV_SYNC_PERF_STAT
+ rw_s_exit_count++;
+@@ -435,41 +613,81 @@
+ #endif
+ )
+ {
+- ibool sg = FALSE;
++#ifdef HAVE_ATOMIC_BUILTINS
++ ibool last = FALSE;
++#endif
++ ibool s_sg = FALSE;
++ ibool x_sg = FALSE;
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter(&(lock->mutex));
++#endif
+
+ /* Reset the exclusive lock if this thread no longer has an x-mode
+ lock */
+
+ ut_ad(lock->writer_count > 0);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++ last = TRUE;
++ }
++
++ if (last) {
++ /* unlock lock_word */
++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS);
++
++ /* FIXME: It is a value of bad manners for pthread.
++ But we shouldn't keep an ID of not-owner. */
++ lock->writer_thread = -1;
++
++ /* atomic operation may be safer about memory order. */
++ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
++ __sync_synchronize();
++ }
++#else
+ lock->writer_count--;
+
+ if (lock->writer_count == 0) {
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ }
++#endif
+
+ #ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+ #endif
+
+ /* If there may be waiters, signal the lock */
+- if (UNIV_UNLIKELY(lock->waiters)
+- && lock->writer_count == 0) {
+-
+- sg = TRUE;
+- rw_lock_set_waiters(lock, 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (last) {
++#else
++ if (lock->writer_count == 0) {
++#endif
++ if(lock->s_waiters){
++ s_sg = TRUE;
++ rw_lock_set_s_waiters(lock, 0);
++ }
++ if(lock->x_waiters){
++ x_sg = TRUE;
++ rw_lock_set_x_waiters(lock, 0);
++ }
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
+
+- if (UNIV_UNLIKELY(sg)) {
++ if (UNIV_UNLIKELY(s_sg)) {
++ os_event_set(lock->s_event);
++ sync_array_object_signalled(sync_primary_wait_array);
++ }
++ if (UNIV_UNLIKELY(x_sg)) {
+ #ifdef __WIN__
++ /* I doubt the necessity of it. */
+ os_event_set(lock->wait_ex_event);
+ #endif
+- os_event_set(lock->event);
++ os_event_set(lock->x_event);
+ sync_array_object_signalled(sync_primary_wait_array);
+ }
+
+@@ -494,9 +712,13 @@
+
+ ut_ad(lock->writer_count > 0);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
++#else
+ lock->writer_count--;
+
+ if (lock->writer_count == 0) {
++#endif
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ }
+
+@@ -504,7 +726,12 @@
+ rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+ #endif
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ ut_ad(!lock->s_waiters);
++ ut_ad(!lock->x_waiters);
++#else
+ ut_ad(!lock->waiters);
++#endif
+ ut_ad(rw_lock_validate(lock));
+
+ #ifdef UNIV_SYNC_PERF_STAT
+diff -r 962aec0d731c innobase/sync/sync0arr.c
+--- a/innobase/sync/sync0arr.c Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/sync/sync0arr.c Thu Oct 09 08:30:28 2008 -0700
+@@ -309,13 +309,13 @@
+ {
+ if (type == SYNC_MUTEX) {
+ return(os_event_reset(((mutex_t *) object)->event));
+-#ifdef __WIN__
+ } else if (type == RW_LOCK_WAIT_EX) {
+ return(os_event_reset(
+ ((rw_lock_t *) object)->wait_ex_event));
+-#endif
+- } else {
+- return(os_event_reset(((rw_lock_t *) object)->event));
++ } else if (type == RW_LOCK_SHARED) {
++ return(os_event_reset(((rw_lock_t *) object)->s_event));
++ } else { /* RW_LOCK_EX */
++ return(os_event_reset(((rw_lock_t *) object)->x_event));
+ }
+ }
+
+@@ -415,15 +415,12 @@
+
+ if (cell->request_type == SYNC_MUTEX) {
+ event = ((mutex_t*) cell->wait_object)->event;
+-#ifdef __WIN__
+- /* On windows if the thread about to wait is the one which
+- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
+- it waits on a special event i.e.: wait_ex_event. */
+ } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+ event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
+-#endif
+- } else {
+- event = ((rw_lock_t*) cell->wait_object)->event;
++ } else if (cell->request_type == RW_LOCK_SHARED) {
++ event = ((rw_lock_t*) cell->wait_object)->s_event;
++ } else {
++ event = ((rw_lock_t*) cell->wait_object)->x_event;
+ }
+
+ cell->waiting = TRUE;
+@@ -464,6 +461,7 @@
+ mutex_t* mutex;
+ rw_lock_t* rwlock;
+ ulint type;
++ ulint writer;
+
+ type = cell->request_type;
+
+@@ -492,12 +490,10 @@
+ (ulong) mutex->waiters);
+
+ } else if (type == RW_LOCK_EX
+-#ifdef __WIN__
+ || type == RW_LOCK_WAIT_EX
+-#endif
+ || type == RW_LOCK_SHARED) {
+
+- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
++ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
+
+ rwlock = cell->old_wait_rw_lock;
+
+@@ -505,21 +501,23 @@
+ " RW-latch at %p created in file %s line %lu\n",
+ rwlock, rwlock->cfile_name,
+ (ulong) rwlock->cline);
+- if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
++ writer = rw_lock_get_writer(rwlock);
++ if (writer != RW_LOCK_NOT_LOCKED) {
+ fprintf(file,
+ "a writer (thread id %lu) has reserved it in mode %s",
+ (ulong) os_thread_pf(rwlock->writer_thread),
+- rwlock->writer == RW_LOCK_EX
++ writer == RW_LOCK_EX
+ ? " exclusive\n"
+ : " wait exclusive\n");
+ }
+
+ fprintf(file,
+- "number of readers %lu, waiters flag %lu\n"
++ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n"
+ "Last time read locked in file %s line %lu\n"
+ "Last time write locked in file %s line %lu\n",
+ (ulong) rwlock->reader_count,
+- (ulong) rwlock->waiters,
++ (ulong) rwlock->s_waiters,
++ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
+ rwlock->last_s_file_name,
+ (ulong) rwlock->last_s_line,
+ rwlock->last_x_file_name,
+@@ -839,11 +837,15 @@
+ /*========================*/
+ sync_array_t* arr) /* in: wait array */
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++ __sync_fetch_and_add(&(arr->sg_count),1);
++#else
+ sync_array_enter(arr);
+
+ arr->sg_count++;
+
+ sync_array_exit(arr);
++#endif
+ }
+
+ /**************************************************************************
+@@ -880,19 +882,23 @@
+
+ mutex = cell->wait_object;
+ os_event_set(mutex->event);
+-#ifdef __WIN__
+ } else if (cell->request_type
+ == RW_LOCK_WAIT_EX) {
+ rw_lock_t* lock;
+
+ lock = cell->wait_object;
+ os_event_set(lock->wait_ex_event);
+-#endif
+- } else {
++ } else if (cell->request_type
++ == RW_LOCK_SHARED) {
+ rw_lock_t* lock;
+
+ lock = cell->wait_object;
+- os_event_set(lock->event);
++ os_event_set(lock->s_event);
++ } else {
++ rw_lock_t* lock;
++
++ lock = cell->wait_object;
++ os_event_set(lock->x_event);
+ }
+ }
+ }
+diff -r 962aec0d731c innobase/sync/sync0rw.c
+--- a/innobase/sync/sync0rw.c Thu Oct 09 08:28:53 2008 -0700
++++ b/innobase/sync/sync0rw.c Thu Oct 09 08:30:28 2008 -0700
+@@ -99,6 +99,7 @@
+ object is created, then the following call initializes
+ the sync system. */
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_create(rw_lock_get_mutex(lock));
+ mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
+
+@@ -108,8 +109,14 @@
+ lock->mutex.cmutex_name = cmutex_name;
+ lock->mutex.mutex_type = 1;
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
++#endif /* !HAVE_ATOMIC_BUILTINS */
+
+- rw_lock_set_waiters(lock, 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++ lock->lock_word = RW_LOCK_BIAS;
++#endif
++ rw_lock_set_s_waiters(lock, 0);
++ rw_lock_set_x_waiters(lock, 0);
++ rw_lock_set_wx_waiters(lock, 0);
+ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+ lock->writer_count = 0;
+ rw_lock_set_reader_count(lock, 0);
+@@ -130,11 +137,9 @@
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_s_line = 0;
+ lock->last_x_line = 0;
+- lock->event = os_event_create(NULL);
+-
+-#ifdef __WIN__
++ lock->s_event = os_event_create(NULL);
++ lock->x_event = os_event_create(NULL);
+ lock->wait_ex_event = os_event_create(NULL);
+-#endif
+
+ mutex_enter(&rw_lock_list_mutex);
+
+@@ -162,19 +167,21 @@
+ ut_a(rw_lock_validate(lock));
+ #endif /* UNIV_DEBUG */
+ ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+- ut_a(rw_lock_get_waiters(lock) == 0);
++ ut_a(rw_lock_get_s_waiters(lock) == 0);
++ ut_a(rw_lock_get_x_waiters(lock) == 0);
++ ut_a(rw_lock_get_wx_waiters(lock) == 0);
+ ut_a(rw_lock_get_reader_count(lock) == 0);
+
+ lock->magic_n = 0;
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_free(rw_lock_get_mutex(lock));
++#endif
+
+ mutex_enter(&rw_lock_list_mutex);
+- os_event_free(lock->event);
+-
+-#ifdef __WIN__
++ os_event_free(lock->s_event);
++ os_event_free(lock->x_event);
+ os_event_free(lock->wait_ex_event);
+-#endif
+
+ if (UT_LIST_GET_PREV(list, lock)) {
+ ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+@@ -192,6 +199,8 @@
+ Checks that the rw-lock has been initialized and that there are no
+ simultaneous shared and exclusive locks. */
+
++/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */
++
+ ibool
+ rw_lock_validate(
+ /*=============*/
+@@ -199,7 +208,9 @@
+ {
+ ut_a(lock);
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(rw_lock_get_mutex(lock));
++#endif
+
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ ut_a((rw_lock_get_reader_count(lock) == 0)
+@@ -207,11 +218,17 @@
+ ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
+ || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+ || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
+- ut_a((rw_lock_get_waiters(lock) == 0)
+- || (rw_lock_get_waiters(lock) == 1));
++ ut_a((rw_lock_get_s_waiters(lock) == 0)
++ || (rw_lock_get_s_waiters(lock) == 1));
++ ut_a((rw_lock_get_x_waiters(lock) == 0)
++ || (rw_lock_get_x_waiters(lock) == 1));
++ ut_a((rw_lock_get_wx_waiters(lock) == 0)
++ || (rw_lock_get_wx_waiters(lock) == 1));
+ ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ return(TRUE);
+ }
+@@ -237,13 +254,14 @@
+ ut_ad(rw_lock_validate(lock));
+
+ lock_loop:
++ i = 0;
++spin_loop:
+ rw_s_spin_wait_count++;
+
+ /* Spin waiting for the writer field to become free */
+- i = 0;
+
+- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+- && i < SYNC_SPIN_ROUNDS) {
++ while (i < SYNC_SPIN_ROUNDS
++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+@@ -262,15 +280,27 @@
+ lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(rw_lock_get_mutex(lock));
++#endif
+
+ /* We try once again to obtain the lock */
+
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ return; /* Success */
+ } else {
++#ifdef HAVE_ATOMIC_BUILTINS
++ /* like sync0sync.c doing */
++ i++;
++
++ if (i < SYNC_SPIN_ROUNDS) {
++ goto spin_loop;
++ }
++#endif
+ /* If we get here, locking did not succeed, we may
+ suspend the thread to wait in the wait array */
+
+@@ -281,9 +311,19 @@
+ file_name, line,
+ &index);
+
+- rw_lock_set_waiters(lock, 1);
++ rw_lock_set_s_waiters(lock, 1);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ /* like sync0sync.c doing */
++ for (i = 0; i < 4; i++) {
++ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++ sync_array_free_cell(sync_primary_wait_array, index);
++ return; /* Success */
++ }
++ }
++#else
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -318,13 +358,19 @@
+ {
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&(lock->mutex));
++#endif
+
+ lock->writer_thread = os_thread_get_curr_id();
+
+ lock->pass = 0;
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#else
++ __sync_synchronize();
++#endif
+ }
+
+ /**********************************************************************
+@@ -342,6 +388,89 @@
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+ {
++#ifdef HAVE_ATOMIC_BUILTINS
++ os_thread_id_t curr_thread = os_thread_get_curr_id();
++
++ /* try to lock writer */
++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
++ == RW_LOCK_NOT_LOCKED) {
++ /* success */
++ /* obtain RW_LOCK_WAIT_EX right */
++ lock->writer_thread = curr_thread;
++ lock->pass = pass;
++ lock->writer_is_wait_ex = TRUE;
++ /* atomic operation may be safer about memory order. */
++ __sync_synchronize();
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
++ file_name, line);
++#endif
++ }
++
++ if (!os_thread_eq(lock->writer_thread, curr_thread)) {
++ return(RW_LOCK_NOT_LOCKED);
++ }
++
++ switch(rw_lock_get_writer(lock)) {
++ case RW_LOCK_WAIT_EX:
++ /* have right to try x-lock */
++ if (lock->lock_word == RW_LOCK_BIAS) {
++ /* try x-lock */
++ if(__sync_sub_and_fetch(&(lock->lock_word),
++ RW_LOCK_BIAS) == 0) {
++ /* success */
++ lock->pass = pass;
++ lock->writer_is_wait_ex = FALSE;
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++ file_name, line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ /* Locking succeeded, we may return */
++ return(RW_LOCK_EX);
++ } else {
++ /* fail */
++ __sync_fetch_and_add(&(lock->lock_word),
++ RW_LOCK_BIAS);
++ }
++ }
++ /* There are readers, we have to wait */
++ return(RW_LOCK_WAIT_EX);
++
++ break;
++
++ case RW_LOCK_EX:
++ /* already have x-lock */
++ if ((lock->pass == 0)&&(pass == 0)) {
++ __sync_fetch_and_add(&(lock->writer_count),1);
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
++ line);
++#endif
++
++ lock->last_x_file_name = file_name;
++ lock->last_x_line = line;
++
++ /* Locking succeeded, we may return */
++ return(RW_LOCK_EX);
++ }
++
++ return(RW_LOCK_NOT_LOCKED);
++
++ break;
++
++ default: /* ??? */
++ return(RW_LOCK_NOT_LOCKED);
++ }
++#else /* HAVE_ATOMIC_BUILTINS */
++
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -423,6 +552,7 @@
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ }
++#endif /* HAVE_ATOMIC_BUILTINS */
+
+ /* Locking did not succeed */
+ return(RW_LOCK_NOT_LOCKED);
+@@ -448,19 +578,33 @@
+ ulint line) /* in: line where requested */
+ {
+ ulint index; /* index of the reserved wait cell */
+- ulint state; /* lock state acquired */
++ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */
++#ifdef HAVE_ATOMIC_BUILTINS
++ ulint prev_state = RW_LOCK_NOT_LOCKED;
++#endif
+ ulint i; /* spin round count */
+
+ ut_ad(rw_lock_validate(lock));
+
+ lock_loop:
++ i = 0;
++
++#ifdef HAVE_ATOMIC_BUILTINS
++ prev_state = state;
++#else
+ /* Acquire the mutex protecting the rw-lock fields */
+ mutex_enter_fast(&(lock->mutex));
++#endif
+
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (state != prev_state) i=0; /* if progress, reset counter. */
++#else
+ mutex_exit(&(lock->mutex));
++#endif
+
++spin_loop:
+ if (state == RW_LOCK_EX) {
+
+ return; /* Locking succeeded */
+@@ -468,10 +612,9 @@
+ } else if (state == RW_LOCK_NOT_LOCKED) {
+
+ /* Spin waiting for the writer field to become free */
+- i = 0;
+
+- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
+- && i < SYNC_SPIN_ROUNDS) {
++ while (i < SYNC_SPIN_ROUNDS
++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+@@ -485,9 +628,12 @@
+ } else if (state == RW_LOCK_WAIT_EX) {
+
+ /* Spin waiting for the reader count field to become zero */
+- i = 0;
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ while (lock->lock_word != RW_LOCK_BIAS
++#else
+ while (rw_lock_get_reader_count(lock) != 0
++#endif
+ && i < SYNC_SPIN_ROUNDS) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+@@ -500,7 +646,6 @@
+ os_thread_yield();
+ }
+ } else {
+- i = 0; /* Eliminate a compiler warning */
+ ut_error;
+ }
+
+@@ -516,34 +661,69 @@
+ /* We try once again to obtain the lock. Acquire the mutex protecting
+ the rw-lock fields */
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ prev_state = state;
++#else
+ mutex_enter(rw_lock_get_mutex(lock));
++#endif
+
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (state != prev_state) i=0; /* if progress, reset counter. */
++#endif
++
+ if (state == RW_LOCK_EX) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ return; /* Locking succeeded */
+ }
++
++#ifdef HAVE_ATOMIC_BUILTINS
++ /* like sync0sync.c doing */
++ i++;
++
++ if (i < SYNC_SPIN_ROUNDS) {
++ goto spin_loop;
++ }
++#endif
+
+ rw_x_system_call_count++;
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
+-#ifdef __WIN__
+- /* On windows RW_LOCK_WAIT_EX signifies
+- that this thread should wait on the
+- special wait_ex_event. */
+ (state == RW_LOCK_WAIT_EX)
+ ? RW_LOCK_WAIT_EX :
+-#endif
+ RW_LOCK_EX,
+ file_name, line,
+ &index);
+
+- rw_lock_set_waiters(lock, 1);
++ if (state == RW_LOCK_WAIT_EX) {
++ rw_lock_set_wx_waiters(lock, 1);
++ } else {
++ rw_lock_set_x_waiters(lock, 1);
++ }
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ /* like sync0sync.c doing */
++ for (i = 0; i < 4; i++) {
++ prev_state = state;
++ state = rw_lock_x_lock_low(lock, pass, file_name, line);
++ if (state == RW_LOCK_EX) {
++ sync_array_free_cell(sync_primary_wait_array, index);
++ return; /* Locking succeeded */
++ }
++ if (state != prev_state) {
++ /* retry! */
++ sync_array_free_cell(sync_primary_wait_array, index);
++ goto lock_loop;
++ }
++ }
++#else
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+@@ -718,7 +898,9 @@
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&(lock->mutex));
++#endif
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+@@ -728,7 +910,9 @@
+ && (info->pass == 0)
+ && (info->lock_type == lock_type)) {
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
+ /* Found! */
+
+ return(TRUE);
+@@ -736,7 +920,9 @@
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
+
+ return(FALSE);
+ }
+@@ -758,21 +944,25 @@
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&(lock->mutex));
++#endif
+
+ if (lock_type == RW_LOCK_SHARED) {
+ if (lock->reader_count > 0) {
+ ret = TRUE;
+ }
+ } else if (lock_type == RW_LOCK_EX) {
+- if (lock->writer == RW_LOCK_EX) {
++ if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
+ ret = TRUE;
+ }
+ } else {
+ ut_error;
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
+
+ return(ret);
+ }
+@@ -801,16 +991,26 @@
+
+ count++;
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&(lock->mutex));
++#endif
+
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)
+- || (rw_lock_get_waiters(lock) != 0)) {
++ || (rw_lock_get_s_waiters(lock) != 0)
++ || (rw_lock_get_x_waiters(lock) != 0)
++ || (rw_lock_get_wx_waiters(lock) != 0)) {
+
+ fprintf(stderr, "RW-LOCK: %p ", lock);
+
+- if (rw_lock_get_waiters(lock)) {
+- fputs(" Waiters for the lock exist\n", stderr);
++ if (rw_lock_get_s_waiters(lock)) {
++ fputs(" s_waiters for the lock exist,", stderr);
++ }
++ if (rw_lock_get_x_waiters(lock)) {
++ fputs(" x_waiters for the lock exist\n", stderr);
++ }
++ if (rw_lock_get_wx_waiters(lock)) {
++ fputs(" wait_ex_waiters for the lock exist\n", stderr);
+ } else {
+ putc('\n', stderr);
+ }
+@@ -822,7 +1022,9 @@
+ }
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&(lock->mutex));
++#endif
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+@@ -847,10 +1049,18 @@
+
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)
+- || (rw_lock_get_waiters(lock) != 0)) {
++ || (rw_lock_get_s_waiters(lock) != 0)
++ || (rw_lock_get_x_waiters(lock) != 0)
++ || (rw_lock_get_wx_waiters(lock) != 0)) {
+
+- if (rw_lock_get_waiters(lock)) {
+- fputs(" Waiters for the lock exist\n", stderr);
++ if (rw_lock_get_s_waiters(lock)) {
++ fputs(" s_waiters for the lock exist,", stderr);
++ }
++ if (rw_lock_get_x_waiters(lock)) {
++ fputs(" x_waiters for the lock exist\n", stderr);
++ }
++ if (rw_lock_get_wx_waiters(lock)) {
++ fputs(" wait_ex_waiters for the lock exist\n", stderr);
+ } else {
+ putc('\n', stderr);
+ }
+@@ -909,14 +1119,18 @@
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(rw_lock_get_mutex(lock));
++#endif
+
+ if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+ || (rw_lock_get_reader_count(lock) != 0)) {
+ count++;
+ }
+
++#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(rw_lock_get_mutex(lock));
++#endif
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+diff -r 962aec0d731c patch_info/innodb_rw_lock.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_rw_lock.info Thu Oct 09 08:30:28 2008 -0700
+@@ -0,0 +1,6 @@
++File=innodb_rw_lock.patch
++Name=Fix of InnoDB rw_locks
++Version=1.0
++Author=Yasufumi Kinoshita
++License=BSD
++Comment=
diff --git a/mysql-innodb_show_bp.patch b/mysql-innodb_show_bp.patch
new file mode 100644
index 0000000..a56ae9a
--- /dev/null
+++ b/mysql-innodb_show_bp.patch
@@ -0,0 +1,447 @@
+diff -r fe944d2c6e1f innobase/btr/btr0btr.c
+--- a/innobase/btr/btr0btr.c Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/btr/btr0btr.c Mon Nov 10 19:48:24 2008 -0800
+@@ -2989,3 +2989,11 @@
+
+ return(TRUE);
+ }
++
++dulint
++btr_page_get_index_id_noninline(
++/*============*/
++ page_t* page) /* in: index page */
++{
++ return btr_page_get_index_id(page);
++}
+diff -r fe944d2c6e1f innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/buf/buf0buf.c Mon Nov 10 19:48:24 2008 -0800
+@@ -2629,3 +2629,13 @@
+ buf_block_print(block);
+ }
+
++buf_block_t*
++buf_pool_get_nth_block_no_inline(
++/*===================*/
++ /* out: pointer to block */
++ buf_pool_t* buf_pool,/* in: buf_pool */
++ ulint i) /* in: index of the block */{
++
++return buf_pool_get_nth_block(buf_pool, i);
++
++}
+diff -r fe944d2c6e1f innobase/include/btr0btr.h
+--- a/innobase/include/btr0btr.h Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/btr0btr.h Mon Nov 10 19:48:24 2008 -0800
+@@ -69,6 +69,12 @@
+ UNIV_INLINE
+ dulint
+ btr_page_get_index_id(
++/*==================*/
++ /* out: index id */
++ page_t* page); /* in: index page */
++
++dulint
++btr_page_get_index_id_noninline(
+ /*==================*/
+ /* out: index id */
+ page_t* page); /* in: index page */
+diff -r fe944d2c6e1f innobase/include/buf0buf.h
+--- a/innobase/include/buf0buf.h Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/buf0buf.h Mon Nov 10 19:48:24 2008 -0800
+@@ -703,6 +703,8 @@
+ buf_get_free_list_len(void);
+ /*=======================*/
+
++void buf_pool_dump(void);
++buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i);
+
+
+ /* The buffer control block structure */
+diff -r fe944d2c6e1f innobase/include/page0page.h
+--- a/innobase/include/page0page.h Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/include/page0page.h Mon Nov 10 19:48:24 2008 -0800
+@@ -260,6 +260,12 @@
+ /*============*/
+ /* out: number of user records */
+ page_t* page); /* in: index page */
++
++ulint
++page_get_n_recs_noninline(
++/*============*/
++ /* out: number of user records */
++ page_t* page); /* in: index page */
+ /*******************************************************************
+ Returns the number of records before the given record in chain.
+ The number includes infimum and supremum records. */
+@@ -519,6 +525,12 @@
+ UNIV_INLINE
+ ulint
+ page_get_data_size(
++/*===============*/
++ /* out: data in bytes */
++ page_t* page); /* in: index page */
++
++ulint
++page_get_data_size_noninline(
+ /*===============*/
+ /* out: data in bytes */
+ page_t* page); /* in: index page */
+diff -r fe944d2c6e1f innobase/page/page0page.c
+--- a/innobase/page/page0page.c Mon Nov 10 19:47:27 2008 -0800
++++ b/innobase/page/page0page.c Mon Nov 10 19:48:24 2008 -0800
+@@ -1994,3 +1994,25 @@
+ page_cur_move_to_next(&cur);
+ }
+ }
++
++ulint
++page_get_n_recs_noninline(
++/*============*/
++ /* out: number of user records */
++ page_t* page) /* in: index page */
++{
++ return page_get_n_recs(page);
++}
++
++
++ulint
++page_get_data_size_noninline(
++/*============*/
++ /* out: number of user records */
++ page_t* page) /* in: index page */
++{
++ return page_get_data_size(page);
++}
++
++
++
+diff -r fe944d2c6e1f mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/information_schema.result Mon Nov 10 19:48:25 2008 -0800
+@@ -42,6 +42,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INNODB_BUFFER_POOL_CONTENT
+ INDEX_STATISTICS
+ KEY_COLUMN_USAGE
+ PROCESSLIST
+@@ -741,7 +742,7 @@
+ CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1;
+ CREATE VIEW a2 AS SELECT t_CRASHME FROM a1;
+ count(*)
+-107
++108
+ drop view a2, a1;
+ drop table t_crashme;
+ select table_schema,table_name, column_name from
+@@ -802,6 +803,7 @@
+ TABLE_NAME COLUMN_NAME PRIVILEGES
+ COLUMNS TABLE_NAME select
+ COLUMN_PRIVILEGES TABLE_NAME select
++INNODB_BUFFER_POOL_CONTENT TABLE_NAME select
+ INDEX_STATISTICS TABLE_NAME select
+ KEY_COLUMN_USAGE TABLE_NAME select
+ STATISTICS TABLE_NAME select
+@@ -815,7 +817,7 @@
+ flush privileges;
+ SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA;
+ table_schema count(*)
+-information_schema 22
++information_schema 23
+ mysql 17
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1206,6 +1208,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME
+ COLUMNS TABLE_SCHEMA
+ COLUMN_PRIVILEGES TABLE_SCHEMA
++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA
+ INDEX_STATISTICS TABLE_SCHEMA
+ KEY_COLUMN_USAGE CONSTRAINT_SCHEMA
+ PROCESSLIST ID
+@@ -1243,6 +1246,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME
+ COLUMNS TABLE_SCHEMA
+ COLUMN_PRIVILEGES TABLE_SCHEMA
++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA
+ INDEX_STATISTICS TABLE_SCHEMA
+ KEY_COLUMN_USAGE CONSTRAINT_SCHEMA
+ PROCESSLIST ID
+@@ -1332,6 +1336,7 @@
+ COLUMNS information_schema.COLUMNS 1
+ COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1
+ INDEX_STATISTICS information_schema.INDEX_STATISTICS 1
++INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1
+ KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1
+ PROCESSLIST information_schema.PROCESSLIST 1
+ PROFILING information_schema.PROFILING 1
+diff -r fe944d2c6e1f mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/information_schema_db.result Mon Nov 10 19:48:25 2008 -0800
+@@ -11,6 +11,7 @@
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INNODB_BUFFER_POOL_CONTENT
+ INDEX_STATISTICS
+ KEY_COLUMN_USAGE
+ PROCESSLIST
+diff -r fe944d2c6e1f mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result Mon Nov 10 19:47:27 2008 -0800
++++ b/mysql-test/r/mysqlshow.result Mon Nov 10 19:48:25 2008 -0800
+@@ -85,6 +85,7 @@
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS |
+ | COLUMN_PRIVILEGES |
++| INNODB_BUFFER_POOL_CONTENT |
+ | INDEX_STATISTICS |
+ | KEY_COLUMN_USAGE |
+ | PROCESSLIST |
+@@ -112,6 +113,7 @@
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS |
+ | COLUMN_PRIVILEGES |
++| INNODB_BUFFER_POOL_CONTENT |
+ | INDEX_STATISTICS |
+ | KEY_COLUMN_USAGE |
+ | PROCESSLIST |
+diff -r fe944d2c6e1f patch_info/innodb_show_bp.info
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/patch_info/innodb_show_bp.info Mon Nov 10 19:48:25 2008 -0800
+@@ -0,0 +1,6 @@
++File=innodb_show_bp.patch
++Name=show innodb buffer pool content
++Version=1.0
++Author=Percona <info at percona.com>
++License=GPL
++Comment=
+diff -r fe944d2c6e1f sql/ha_innodb.cc
+--- a/sql/ha_innodb.cc Mon Nov 10 19:47:27 2008 -0800
++++ b/sql/ha_innodb.cc Mon Nov 10 19:48:25 2008 -0800
+@@ -128,10 +128,12 @@
+ #include "../innobase/include/lock0lock.h"
+ #include "../innobase/include/dict0crea.h"
+ #include "../innobase/include/btr0cur.h"
++#include "../innobase/include/buf0buf.h"
+ #include "../innobase/include/btr0btr.h"
+ #include "../innobase/include/fsp0fsp.h"
+ #include "../innobase/include/sync0sync.h"
+ #include "../innobase/include/fil0fil.h"
++#include "../innobase/include/page0page.h"
+ #include "../innobase/include/trx0xa.h"
+ }
+
+@@ -6483,6 +6485,116 @@
+ DBUG_RETURN(FALSE);
+ }
+
++bool
++innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables)
++{
++ ulint size;
++ ulint i;
++ dulint id;
<Skipped 769 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/percona-server.git/commitdiff/431f68fe79a66d5dfdd53f2655709e6c925fbc22
More information about the pld-cvs-commit
mailing list